- Rename tiers: opus/sonnet/haiku → frontier/mid-tier/lightweight - Align with industry benchmarks (MMLU, GPQA, Chatbot Arena) - Add /external command for LLM mode control - Fix invoke.py timeout passthrough (now 600s default) Tier changes: - Promote gemini-2.5-pro to frontier (benchmark-validated) - Demote glm-4.7 to mid-tier then removed (unreliable) - Promote gemini-2.5-flash to mid-tier New models added: - gpt-5-mini, gpt-5-nano (GPT family coverage) - grok-code (Grok/X family) - glm-4.5-air (lightweight GLM) Removed (redundant/unreliable): - o3 (not available) - glm-4.7 (timeouts) - gpt-4o, big-pickle, glm-4.5-flash (redundant) Final: 11 models across 3 tiers, 4 model families Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
128 lines
3.5 KiB
Python
Executable File
128 lines
3.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Invoke external LLM via configured provider.
|
|
|
|
Usage:
|
|
invoke.py --model copilot/gpt-5.2 -p "prompt"
|
|
invoke.py --task reasoning -p "prompt"
|
|
invoke.py --task code-generation -p "prompt" --json
|
|
|
|
Model selection priority:
|
|
1. Explicit --model flag
|
|
2. Task-based routing (--task flag)
|
|
3. Default from policy
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
STATE_DIR = Path.home() / ".claude/state"
|
|
ROUTER_DIR = Path(__file__).parent
|
|
|
|
|
|
def load_policy() -> dict:
|
|
"""Load model policy from state file."""
|
|
policy_file = STATE_DIR / "model-policy.json"
|
|
with open(policy_file) as f:
|
|
return json.load(f)
|
|
|
|
|
|
def resolve_model(args: argparse.Namespace, policy: dict) -> str:
|
|
"""Determine which model to use based on args and policy."""
|
|
if args.model:
|
|
return args.model
|
|
if args.task and args.task in policy.get("task_routing", {}):
|
|
return policy["task_routing"][args.task]
|
|
return policy.get("task_routing", {}).get("default", "copilot/sonnet-4.5")
|
|
|
|
|
|
def invoke(model: str, prompt: str, policy: dict, timeout: int = 600) -> str:
|
|
"""Invoke the appropriate provider for the given model."""
|
|
external_models = policy.get("external_models", {})
|
|
|
|
if model not in external_models:
|
|
raise ValueError(f"Unknown model: {model}. Available: {list(external_models.keys())}")
|
|
|
|
model_config = external_models[model]
|
|
cli = model_config["cli"]
|
|
cli_args = model_config.get("cli_args", [])
|
|
|
|
# Import and invoke appropriate provider
|
|
if cli == "opencode":
|
|
sys.path.insert(0, str(ROUTER_DIR))
|
|
from providers.opencode import invoke as opencode_invoke
|
|
return opencode_invoke(cli_args, prompt, timeout=timeout)
|
|
elif cli == "gemini":
|
|
sys.path.insert(0, str(ROUTER_DIR))
|
|
from providers.gemini import invoke as gemini_invoke
|
|
return gemini_invoke(cli_args, prompt, timeout=timeout)
|
|
else:
|
|
raise ValueError(f"Unknown CLI: {cli}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Invoke external LLM via configured provider"
|
|
)
|
|
parser.add_argument(
|
|
"-p", "--prompt",
|
|
required=True,
|
|
help="Prompt text"
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
help="Explicit model (e.g., copilot/gpt-5.2)"
|
|
)
|
|
parser.add_argument(
|
|
"--task",
|
|
choices=["reasoning", "code-generation", "long-context", "general"],
|
|
help="Task type for automatic model routing"
|
|
)
|
|
parser.add_argument(
|
|
"--json",
|
|
action="store_true",
|
|
help="Output as JSON with model info"
|
|
)
|
|
parser.add_argument(
|
|
"--timeout",
|
|
type=int,
|
|
default=600,
|
|
help="Timeout in seconds (default: 600)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
policy = load_policy()
|
|
model = resolve_model(args, policy)
|
|
result = invoke(model, args.prompt, policy, timeout=args.timeout)
|
|
|
|
if args.json:
|
|
output = {
|
|
"model": model,
|
|
"response": result,
|
|
"success": True
|
|
}
|
|
print(json.dumps(output, indent=2))
|
|
else:
|
|
print(result)
|
|
|
|
except Exception as e:
|
|
if args.json:
|
|
output = {
|
|
"model": args.model or "unknown",
|
|
"error": str(e),
|
|
"success": False
|
|
}
|
|
print(json.dumps(output, indent=2))
|
|
sys.exit(1)
|
|
else:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|