diff --git a/commands/README.md b/commands/README.md index 96ceeb4..c88963c 100644 --- a/commands/README.md +++ b/commands/README.md @@ -28,6 +28,7 @@ Slash commands for quick actions. User-invoked (type `/command` to trigger). | `/programmer` | | Code development tasks | | `/gcal` | `/calendar`, `/cal` | Google Calendar access | | `/usage` | `/stats` | View usage statistics | +| `/external` | `/llm`, `/ext` | Toggle and use external LLM mode | ### Kubernetes (`/k8s:*`) diff --git a/commands/external.md b/commands/external.md new file mode 100644 index 0000000..5975941 --- /dev/null +++ b/commands/external.md @@ -0,0 +1,89 @@ +--- +name: external +description: Toggle and use external LLM mode (GPT-5.2, Gemini, etc.) +aliases: [llm, ext, external-llm] +--- + +# External LLM Mode + +Route requests to external LLMs via opencode or gemini CLI. + +## Usage + +``` +/external # Show current status +/external on [reason] # Enable external mode +/external off # Disable external mode +/external invoke # Send prompt to default model +/external invoke --model # Send to specific model +/external invoke --task # Route by task type +/external models # List available models +``` + +## Implementation + +### Status +```bash +~/.claude/mcp/llm-router/toggle.py status +``` + +### Toggle On/Off +```bash +~/.claude/mcp/llm-router/toggle.py on --reason "reason" +~/.claude/mcp/llm-router/toggle.py off +``` + +### Invoke +```bash +~/.claude/mcp/llm-router/invoke.py --model MODEL -p "prompt" [--json] +~/.claude/mcp/llm-router/invoke.py --task TASK -p "prompt" [--json] +``` + +## Available Models by Tier + +### Frontier (strongest) +| Model | Provider | Best For | +|-------|----------|----------| +| `github-copilot/gpt-5.2` | opencode | reasoning, fallback | +| `github-copilot/gemini-3-pro-preview` | opencode | long context, reasoning | +| `gemini/gemini-2.5-pro` | gemini | long context, reasoning | + +### Mid-tier (general purpose) +| Model | Provider | Best For | +|-------|----------|----------| +| `github-copilot/claude-sonnet-4.5` | opencode | general, fallback | +| `github-copilot/gemini-3-flash-preview` | opencode | fast | +| `zai-coding-plan/glm-4.7` | opencode | code generation | +| `opencode/big-pickle` | opencode | general | +| `gemini/gemini-2.5-flash` | gemini | fast | + +### Lightweight (simple tasks) +| Model | Provider | Best For | +|-------|----------|----------| +| `github-copilot/claude-haiku-4.5` | opencode | simple tasks | + +## Task Routing + +| Task | Routes To | Tier | +|------|-----------|------| +| `reasoning` | github-copilot/gpt-5.2 | frontier | +| `code-generation` | github-copilot/gemini-3-pro-preview | frontier | +| `long-context` | gemini/gemini-2.5-pro | frontier | +| `fast` | github-copilot/gemini-3-flash-preview | mid-tier | +| `general` (default) | github-copilot/claude-sonnet-4.5 | mid-tier | + +## State Files + +- Mode state: `~/.claude/state/external-mode.json` +- Model policy: `~/.claude/state/model-policy.json` + +## Examples + +``` +/external on testing # Enable for testing +/external invoke "Explain k8s pods" # Use default model (mid-tier) +/external invoke --model github-copilot/gpt-5.2 "Complex analysis" # frontier +/external invoke --task code-generation "Write a Python function" # routes to frontier +/external invoke --task fast "Quick question" # routes to mid-tier +/external off # Back to Claude +``` diff --git a/mcp/llm-router/invoke.py b/mcp/llm-router/invoke.py index 2228d66..e21a26f 100755 --- a/mcp/llm-router/invoke.py +++ b/mcp/llm-router/invoke.py @@ -38,7 +38,7 @@ def resolve_model(args: argparse.Namespace, policy: dict) -> str: return policy.get("task_routing", {}).get("default", "copilot/sonnet-4.5") -def invoke(model: str, prompt: str, policy: dict) -> str: +def invoke(model: str, prompt: str, policy: dict, timeout: int = 600) -> str: """Invoke the appropriate provider for the given model.""" external_models = policy.get("external_models", {}) @@ -53,11 +53,11 @@ def invoke(model: str, prompt: str, policy: dict) -> str: if cli == "opencode": sys.path.insert(0, str(ROUTER_DIR)) from providers.opencode import invoke as opencode_invoke - return opencode_invoke(cli_args, prompt) + return opencode_invoke(cli_args, prompt, timeout=timeout) elif cli == "gemini": sys.path.insert(0, str(ROUTER_DIR)) from providers.gemini import invoke as gemini_invoke - return gemini_invoke(cli_args, prompt) + return gemini_invoke(cli_args, prompt, timeout=timeout) else: raise ValueError(f"Unknown CLI: {cli}") @@ -88,8 +88,8 @@ def main(): parser.add_argument( "--timeout", type=int, - default=300, - help="Timeout in seconds (default: 300)" + default=600, + help="Timeout in seconds (default: 600)" ) args = parser.parse_args() @@ -97,7 +97,7 @@ def main(): try: policy = load_policy() model = resolve_model(args, policy) - result = invoke(model, args.prompt, policy) + result = invoke(model, args.prompt, policy, timeout=args.timeout) if args.json: output = { diff --git a/state/component-registry.json b/state/component-registry.json index 6c72a2f..8d1ce06 100644 --- a/state/component-registry.json +++ b/state/component-registry.json @@ -199,6 +199,15 @@ ], "invokes": "skill:usage" }, + "/external": { + "description": "Toggle and use external LLM mode (GPT-5.2, Gemini, etc.)", + "aliases": [ + "/llm", + "/ext", + "/external-llm" + ], + "invokes": "command:external" + }, "/README": { "description": "TODO", "aliases": [], diff --git a/state/kb.json b/state/kb.json index 6e60f40..4ac09ac 100644 --- a/state/kb.json +++ b/state/kb.json @@ -1 +1,171 @@ -{"infra":{"cluster":"k0s","nodes":3,"arch":"arm64"},"svc":{"gitops":"argocd","mon":"prometheus","alerts":"alertmanager"},"net":{},"hw":{"pi5_8gb":2,"pi3_1gb":1}} \ No newline at end of file +{ + "infra": { + "cluster": "k0s", + "nodes": 3, + "arch": "arm64", + "storage": "longhorn", + "storage_class": "longhorn", + "backup": "longhorn-backup + minio-to-mega" + }, + "hw": { + "pi5_8gb": 2, + "pi3_1gb": 1, + "roles": { + "control_plane": "pi5", + "workers": ["pi5", "pi3"] + } + }, + "net": { + "metallb_pool": "192.168.153.240-192.168.153.254", + "ingress_nginx_ip": "192.168.153.240", + "ingress_haproxy_ip": "192.168.153.241", + "tailnet": "taildb3494.ts.net", + "dns_pattern": "...nip.io" + }, + "svc": { + "gitops": "argocd", + "monitoring": { + "metrics": "kube-prometheus-stack", + "logs": "loki-stack", + "alerts": "alertmanager", + "dashboards": "grafana" + }, + "ingress": ["nginx-ingress-controller", "haproxy-ingress"], + "storage": ["longhorn", "local-path-storage", "minio"], + "networking": ["metallb", "tailscale-operator"] + }, + "apps": { + "ai_stack": { + "namespace": "ai-stack", + "components": ["open-webui", "ollama", "litellm", "searxng", "n8n", "vllm"], + "models": ["gpt-oss:120b", "qwen3-coder"], + "ollama_host": "100.85.116.57:11434" + }, + "home": ["home-assistant", "pihole", "plex"], + "infra": ["gitea", "docker-registry", "kubernetes-dashboard"], + "other": ["ghost", "tor-controller", "speedtest-tracker"] + }, + "namespaces": [ + "ai-stack", "argocd", "monitoring", "loki-system", "longhorn-system", + "metallb-system", "minio", "nginx-ingress-controller", "tailscale-operator", + "gitea", "home-assistant", "pihole", "pihole2", "plex", "ghost", + "kubernetes-dashboard", "docker-registry", "k8s-agent", "tools", "vpa" + ], + "urls": { + "grafana": "grafana.monitoring.192.168.153.240.nip.io", + "longhorn": "ui.longhorn-system.192.168.153.240.nip.io", + "open_webui": "oi.ai-stack.192.168.153.240.nip.io", + "searxng": "sx.ai-stack.192.168.153.240.nip.io", + "n8n": "n8n.ai-stack.192.168.153.240.nip.io", + "minio_console": "console.minio.192.168.153.240.nip.io", + "pihole": "pihole.192.168.153.240.nip.io", + "k8s_dashboard": "dashboard.kubernetes-dashboards.192.168.153.240.nip.io", + "home_assistant": "ha.home-assistant.192.168.153.241.nip.io", + "plex": "player.plex.192.168.153.246.nip.io" + }, + "external_llm": { + "description": "Route requests to external LLMs via opencode or gemini CLI", + "state_file": "~/.claude/state/external-mode.json", + "router_dir": "~/.claude/mcp/llm-router/", + "commands": { + "toggle_on": "~/.claude/mcp/llm-router/toggle.py on --reason 'reason'", + "toggle_off": "~/.claude/mcp/llm-router/toggle.py off", + "status": "~/.claude/mcp/llm-router/toggle.py status", + "invoke": "~/.claude/mcp/llm-router/invoke.py --model MODEL -p 'prompt'" + }, + "providers": ["opencode", "gemini"], + "tiers": { + "frontier": ["github-copilot/gpt-5.2", "github-copilot/gemini-3-pro-preview", "gemini/gemini-2.5-pro"], + "mid-tier": ["github-copilot/gpt-5-mini", "github-copilot/claude-sonnet-4.5", "github-copilot/gemini-3-flash-preview", "opencode/grok-code", "gemini/gemini-2.5-flash"], + "lightweight": ["opencode/gpt-5-nano", "zai-coding-plan/glm-4.5-air", "github-copilot/claude-haiku-4.5"] + }, + "task_routing": { + "reasoning": "github-copilot/gpt-5.2", + "code-generation": "github-copilot/gemini-3-pro-preview", + "long-context": "gemini/gemini-2.5-pro", + "fast": "github-copilot/gemini-3-flash-preview", + "default": "github-copilot/claude-sonnet-4.5" + }, + "notes": { + "opencode_path": "/home/linuxbrew/.linuxbrew/bin/opencode (NOT /usr/bin/opencode which crashes)", + "o3_removed": "github-copilot/o3 not available via GitHub Copilot" + } + }, + "workstation": { + "hostname": "willlaptop", + "ip": "192.168.153.117", + "os": "Arch Linux", + "desktop": "GNOME", + "shell": "fish", + "terminal": ["ghostty", "alacritty", "gnome-console"], + "network": "systemd-networkd + iwd", + "theme": "Dracula", + "editors": ["vscode", "zed", "vim"], + "browsers": ["firefox", "chromium", "google-chrome", "zen-browser", "epiphany"], + "virtualization": ["docker", "podman", "distrobox", "virt-manager", "virtualbox", "gnome-boxes"], + "k8s_tools": ["k9s", "k0s-bin", "k0sctl-bin", "argocd", "krew", "kubecolor"], + "dev_langs": ["go", "rust", "python", "typescript", "zig", "bun", "node/npm/pnpm"], + "ai_local": { + "ollama": true, + "llama_swap": true, + "models": ["Qwen3-4b", "Gemma3-4b"] + }, + "backup": ["restic", "timeshift", "btrbk", "chezmoi"], + "dotfiles": "chezmoi" + }, + "repos": { + "willlaptop": { + "path": "~/Code/active/devops/willlaptop", + "remote": "git@gitea-gitea-ssh.taildb3494.ts.net:will/willlaptop.git", + "purpose": "Workstation provisioning and config", + "structure": { + "ansible/": "Machine provisioning playbooks", + "ansible/roles/common/": "Hostname, network, users, SSH config", + "ansible/roles/packages/": "Package installation (pacman, AUR, flatpak, appimage)", + "ansible/roles/packages/files/": "Package lists (pkglist.txt, aur_pkglist.txt, etc)", + "docker/": "Local Docker stacks", + "scripts/": "Utility scripts (backup, sync, networking)", + "MCP/": "MCP server configs", + "local_ollama/": "Local Ollama data" + }, + "ansible_tags": ["network", "wifi", "ethernet", "users", "sshd", "pacman", "aur", "flatpak", "appimage"], + "docker_stacks": ["file_browser", "minio-longhorn-backup", "rancher-cleanup"], + "scripts": ["bridge-up.sh", "chezmoi-sync.sh", "curl-s3.sh", "kvm-bridge-setup.sh", + "rclone-sync.sh", "restic-backup.sh", "restic-clean.sh"] + }, + "homelab": { + "path": "~/Code/active/devops/homelab/homelab", + "remote": "git@github.com:will666/homelab.git", + "symlink": "~/.claude/repos/homelab", + "structure": { + "ansible/": "Ansible playbooks and templates for node provisioning", + "argocd/": "ArgoCD Application manifests (one per service)", + "charts/": "Helm values and raw manifests per service", + "charts//values.yaml": "Helm chart values override", + "charts//manifests/": "Raw K8s manifests (non-Helm resources)", + "docker/": "Docker Compose stacks for non-K8s workloads" + }, + "charts": [ + "ai-stack", "argocd", "argo-workflow", "cdi-operator", + "cloudflare-tunnel-ingress-controller", "docker-registry", "ghost", + "gitea", "haproxy-ingress", "harbor", "home-assistant", "k0s-backup", + "k8s-agent-dashboard", "kube-prometheus-stack", "kubernetes-dashboard", + "kubevirt", "local-path-storage", "loki-stack", "longhorn", + "longhorn-backup", "metallb", "minio", "minio-to-mega-backup", + "nfs-server-longhorn", "nginx-ingress-controller", "pihole", "pihole2", + "plex", "speedtest-tracker", "squareffect", "squareserver", + "tailscale-operator", "tools", "tor-controller", "traefik-ingress-controller", + "willlaptop-backup", "willlaptop-monitoring", "wills-portal" + ], + "docker_stacks": [ + "protonvpn-proxy", "squareffect", "squareserver", "stable-diffusion-webui" + ], + "conventions": { + "argocd_app": "argocd/.yaml points to charts//", + "helm_values": "charts//values.yaml for Helm overrides", + "raw_manifests": "charts//manifests/ for non-Helm K8s resources", + "naming": "ArgoCD app name = namespace name (usually)" + } + } + } +} diff --git a/state/model-policy.json b/state/model-policy.json index 29b006f..6bf8d32 100644 --- a/state/model-policy.json +++ b/state/model-policy.json @@ -119,72 +119,79 @@ "cli": "opencode", "cli_args": ["-m", "github-copilot/gpt-5.2"], "use_cases": ["reasoning", "fallback"], - "tier": "opus-equivalent" + "tier": "frontier" }, "github-copilot/claude-sonnet-4.5": { "cli": "opencode", "cli_args": ["-m", "github-copilot/claude-sonnet-4.5"], "use_cases": ["general", "fallback"], - "tier": "sonnet-equivalent" + "tier": "mid-tier" }, "github-copilot/claude-haiku-4.5": { "cli": "opencode", "cli_args": ["-m", "github-copilot/claude-haiku-4.5"], "use_cases": ["simple"], - "tier": "haiku-equivalent" - }, - "zai-coding-plan/glm-4.7": { - "cli": "opencode", - "cli_args": ["-m", "zai-coding-plan/glm-4.7"], - "use_cases": ["code-generation"], - "tier": "opus-equivalent" + "tier": "lightweight" }, "github-copilot/gemini-3-pro-preview": { "cli": "opencode", "cli_args": ["-m", "github-copilot/gemini-3-pro-preview"], "use_cases": ["long-context", "reasoning"], - "tier": "opus-equivalent" + "tier": "frontier" }, "github-copilot/gemini-3-flash-preview": { "cli": "opencode", "cli_args": ["-m", "github-copilot/gemini-3-flash-preview"], "use_cases": ["fast", "general"], - "tier": "sonnet-equivalent" - }, - "github-copilot/o3": { - "cli": "opencode", - "cli_args": ["-m", "github-copilot/o3"], - "use_cases": ["complex-reasoning"], - "tier": "sonnet-equivalent" - }, - "opencode/big-pickle": { - "cli": "opencode", - "cli_args": ["-m", "opencode/big-pickle"], - "use_cases": ["general"], - "tier": "sonnet-equivalent" + "tier": "mid-tier" }, "gemini/gemini-2.5-pro": { "cli": "gemini", "cli_args": ["-m", "gemini-2.5-pro"], "use_cases": ["long-context", "reasoning"], - "tier": "sonnet-equivalent" + "tier": "frontier" }, "gemini/gemini-2.5-flash": { "cli": "gemini", "cli_args": ["-m", "gemini-2.5-flash"], "use_cases": ["fast", "general"], - "tier": "haiku-equivalent" + "tier": "mid-tier" + }, + "github-copilot/gpt-5-mini": { + "cli": "opencode", + "cli_args": ["-m", "github-copilot/gpt-5-mini"], + "use_cases": ["fast", "general"], + "tier": "mid-tier" + }, + "opencode/gpt-5-nano": { + "cli": "opencode", + "cli_args": ["-m", "opencode/gpt-5-nano"], + "use_cases": ["fast", "simple"], + "tier": "lightweight" + }, + "zai-coding-plan/glm-4.5-air": { + "cli": "opencode", + "cli_args": ["-m", "zai-coding-plan/glm-4.5-air"], + "use_cases": ["simple", "fast"], + "tier": "lightweight" + }, + "opencode/grok-code": { + "cli": "opencode", + "cli_args": ["-m", "opencode/grok-code"], + "use_cases": ["code-generation", "general"], + "tier": "mid-tier" } }, - "claude_to_external_map": { - "opus": "github-copilot/gpt-5.2", - "sonnet": "github-copilot/claude-sonnet-4.5", - "haiku": "github-copilot/claude-haiku-4.5" + "tier_to_external_map": { + "frontier": "github-copilot/gpt-5.2", + "mid-tier": "github-copilot/gpt-5-mini", + "lightweight": "opencode/gpt-5-nano" }, "task_routing": { "reasoning": "github-copilot/gpt-5.2", - "code-generation": "zai-coding-plan/glm-4.7", + "code-generation": "github-copilot/gemini-3-pro-preview", "long-context": "gemini/gemini-2.5-pro", + "fast": "github-copilot/gemini-3-flash-preview", "default": "github-copilot/claude-sonnet-4.5" } } diff --git a/state/personal-assistant/general-instructions.json b/state/personal-assistant/general-instructions.json index 13bd9c9..ec67704 100644 --- a/state/personal-assistant/general-instructions.json +++ b/state/personal-assistant/general-instructions.json @@ -20,6 +20,18 @@ "status": "active", "added": "2026-01-04" }, + { + "id": "f6a7b8c9-0123-45fa-1234-666666666666", + "instruction": "After reinstalling gmail-mcp package, run ~/.claude/patches/apply-gmail-auth-patch.sh to restore auto re-auth on token expiry.", + "status": "active", + "added": "2026-01-09" + }, + { + "id": "a7b8c9d0-1234-56ab-2345-777777777777", + "instruction": "Homelab repo is at ~/Code/active/devops/homelab/homelab (canonical). ~/.claude/repos/homelab is a symlink to it. Always use the canonical path for new work.", + "status": "active", + "added": "2026-01-09" + }, { "id": "b2c3d4e5-6789-01bc-def0-222222222222", "instruction": "Git workflow: See CLAUDE.md for full process. Use rebase merges, not merge commits.",