feat(external-llm): standardize tiers and optimize model selection
- Rename tiers: opus/sonnet/haiku → frontier/mid-tier/lightweight - Align with industry benchmarks (MMLU, GPQA, Chatbot Arena) - Add /external command for LLM mode control - Fix invoke.py timeout passthrough (now 600s default) Tier changes: - Promote gemini-2.5-pro to frontier (benchmark-validated) - Demote glm-4.7 to mid-tier then removed (unreliable) - Promote gemini-2.5-flash to mid-tier New models added: - gpt-5-mini, gpt-5-nano (GPT family coverage) - grok-code (Grok/X family) - glm-4.5-air (lightweight GLM) Removed (redundant/unreliable): - o3 (not available) - glm-4.7 (timeouts) - gpt-4o, big-pickle, glm-4.5-flash (redundant) Final: 11 models across 3 tiers, 4 model families Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -38,7 +38,7 @@ def resolve_model(args: argparse.Namespace, policy: dict) -> str:
|
||||
return policy.get("task_routing", {}).get("default", "copilot/sonnet-4.5")
|
||||
|
||||
|
||||
def invoke(model: str, prompt: str, policy: dict) -> str:
|
||||
def invoke(model: str, prompt: str, policy: dict, timeout: int = 600) -> str:
|
||||
"""Invoke the appropriate provider for the given model."""
|
||||
external_models = policy.get("external_models", {})
|
||||
|
||||
@@ -53,11 +53,11 @@ def invoke(model: str, prompt: str, policy: dict) -> str:
|
||||
if cli == "opencode":
|
||||
sys.path.insert(0, str(ROUTER_DIR))
|
||||
from providers.opencode import invoke as opencode_invoke
|
||||
return opencode_invoke(cli_args, prompt)
|
||||
return opencode_invoke(cli_args, prompt, timeout=timeout)
|
||||
elif cli == "gemini":
|
||||
sys.path.insert(0, str(ROUTER_DIR))
|
||||
from providers.gemini import invoke as gemini_invoke
|
||||
return gemini_invoke(cli_args, prompt)
|
||||
return gemini_invoke(cli_args, prompt, timeout=timeout)
|
||||
else:
|
||||
raise ValueError(f"Unknown CLI: {cli}")
|
||||
|
||||
@@ -88,8 +88,8 @@ def main():
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=300,
|
||||
help="Timeout in seconds (default: 300)"
|
||||
default=600,
|
||||
help="Timeout in seconds (default: 600)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -97,7 +97,7 @@ def main():
|
||||
try:
|
||||
policy = load_policy()
|
||||
model = resolve_model(args, policy)
|
||||
result = invoke(model, args.prompt, policy)
|
||||
result = invoke(model, args.prompt, policy, timeout=args.timeout)
|
||||
|
||||
if args.json:
|
||||
output = {
|
||||
|
||||
Reference in New Issue
Block a user