From 6b56d9e2233b92b0cd323eaf1b3bce5b09e71e44 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 19 Feb 2026 11:45:55 -0800 Subject: [PATCH] feat(models): add auth profile cooldown for api key pools --- README.md | 3 +- docs/plans/2026-02-15-openclaw-gap-roadmap.md | 8 ++- docs/plans/state.json | 18 ++++++ src/config/schema.test.ts | 30 ++++++++++ src/config/schema.ts | 2 + src/daemon/clientFactory.test.ts | 11 ++++ src/daemon/models.ts | 22 +++++--- src/models/rotating.test.ts | 47 ++++++++++++++++ src/models/rotating.ts | 56 ++++++++++++++++--- 9 files changed, 175 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 8a0e047..ce316bf 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,7 @@ models: model: claude-opus-4-5-20251101 api_key: sk-ant-api03-... # api_keys: [sk-ant-primary-..., sk-ant-secondary-...] # Optional rotation pool + # auth_profile_cooldown_ms: 30000 # Optional cooldown before retrying a failed key profile local: provider: ollama model: qwen2.5:14b @@ -321,7 +322,7 @@ models: Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`. -When multiple keys are configured via `api_keys`, Flynn rotates across keys on provider failures and sticks to the last successful key profile until it fails. +When multiple keys are configured via `api_keys`, Flynn rotates across key profiles on provider failures and sticks to the last successful profile until it fails. Set `auth_profile_cooldown_ms` to temporarily cool down failing profiles before retrying them. Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text. diff --git a/docs/plans/2026-02-15-openclaw-gap-roadmap.md b/docs/plans/2026-02-15-openclaw-gap-roadmap.md index 1a0c93e..8ad2733 100644 --- a/docs/plans/2026-02-15-openclaw-gap-roadmap.md +++ b/docs/plans/2026-02-15-openclaw-gap-roadmap.md @@ -332,6 +332,8 @@ These are substantial UX/ecosystem projects or highly platform-specific; defer u ## Suggested Next Execution Order -1) Auth profile rotation/stickiness before provider fallback -2) Queue/run-control polish (interrupt preemption telemetry + UX) -3) Daily memory continuity tuning (if continuity quality is still lacking) +1) Queue/run-control polish (interrupt preemption telemetry + UX) +2) Daily memory continuity tuning (if continuity quality is still lacking) +3) Auth-profile expansion beyond API-key pools (if needed) + +Note: API-key pool auth profile cooldown/backoff (`auth_profile_cooldown_ms`) shipped on 2026-02-19. diff --git a/docs/plans/state.json b/docs/plans/state.json index b12da3f..b055e19 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -5775,6 +5775,24 @@ "docs/plans/state.json" ], "test_status": "pnpm test:run src/security/elevation.test.ts src/gateway/handlers/agent.test.ts src/frontends/tui/minimal.test.ts src/backends/native/agent.test.ts src/daemon/routing.test.ts src/commands/builtin/index.test.ts + pnpm typecheck passing" + }, + "auth-profile-cooldown-for-key-pools": { + "status": "completed", + "date": "2026-02-19", + "updated": "2026-02-19", + "summary": "Added per-tier/profile cooldown support for rotated API-key auth pools via `auth_profile_cooldown_ms`. `RotatingModelClient` now applies temporary backoff to failing profiles while preserving sticky-success behavior, and model client factory wiring now passes cooldown for providers using `api_keys` pools.", + "files_modified": [ + "src/models/rotating.ts", + "src/models/rotating.test.ts", + "src/daemon/models.ts", + "src/daemon/clientFactory.test.ts", + "src/config/schema.ts", + "src/config/schema.test.ts", + "README.md", + "docs/plans/2026-02-15-openclaw-gap-roadmap.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/models/rotating.test.ts src/daemon/clientFactory.test.ts src/config/schema.test.ts + pnpm typecheck passing" } }, "overall_progress": { diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index 566ecc4..a88aea8 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -599,6 +599,36 @@ describe('configSchema — models auth_mode', () => { }); expect(result.models.default.api_keys).toEqual(['sk-1', 'sk-2']); }); + + it('accepts auth_profile_cooldown_ms per model tier', () => { + const result = configSchema.parse({ + ...minimalConfig, + models: { + default: { + provider: 'openai', + model: 'gpt-4o', + api_keys: ['sk-1', 'sk-2'], + auth_profile_cooldown_ms: 30000, + }, + }, + }); + expect(result.models.default.auth_profile_cooldown_ms).toBe(30000); + }); + + it('rejects invalid auth_profile_cooldown_ms values', () => { + expect(() => { + configSchema.parse({ + ...minimalConfig, + models: { + default: { + provider: 'openai', + model: 'gpt-4o', + auth_profile_cooldown_ms: -1, + }, + }, + }); + }).toThrow(/auth_profile_cooldown_ms/i); + }); }); describe('configSchema — matrix', () => { diff --git a/src/config/schema.ts b/src/config/schema.ts index cc00aac..240fde4 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -144,6 +144,8 @@ const modelConfigBaseSchema = z.object({ endpoint: z.string().optional(), api_key: z.string().optional(), api_keys: z.array(z.string().min(1)).optional(), + /** Cooldown (ms) before retrying a failed key/token profile in rotation pools. */ + auth_profile_cooldown_ms: z.number().min(0).max(3_600_000).optional(), auth_token: z.string().optional(), /** Credential selection strategy for this tier (provider-specific). */ auth_mode: z.enum(['auto', 'api_key', 'oauth']).optional(), diff --git a/src/daemon/clientFactory.test.ts b/src/daemon/clientFactory.test.ts index 6240752..1be2961 100644 --- a/src/daemon/clientFactory.test.ts +++ b/src/daemon/clientFactory.test.ts @@ -58,6 +58,17 @@ describe('createClientFromConfig', () => { expect(client.constructor.name).toBe('RotatingModelClient'); }); + it('supports auth_profile_cooldown_ms with api_keys pools', async () => { + const { createClientFromConfig } = await loadFactory(); + const client = createClientFromConfig({ + provider: 'openai', + model: 'gpt-4o', + api_keys: ['sk-1', 'sk-2'], + auth_profile_cooldown_ms: 30_000, + }); + expect(client.constructor.name).toBe('RotatingModelClient'); + }); + it('creates OllamaClient for ollama provider', async () => { const { createClientFromConfig } = await loadFactory(); const client = createClientFromConfig({ diff --git a/src/daemon/models.ts b/src/daemon/models.ts index 99a444d..f25221a 100644 --- a/src/daemon/models.ts +++ b/src/daemon/models.ts @@ -50,11 +50,15 @@ function resolveApiKeyPool(cfg: ModelConfig, envVar?: string): string[] { function createApiKeyClient( keys: string[], build: (apiKey: string) => ModelClient, + options?: { cooldownMs?: number }, ): ModelClient { if (keys.length === 1) { return build(keys[0]); } - return new RotatingModelClient(keys.map((key) => build(key))); + return new RotatingModelClient( + keys.map((key) => build(key)), + { cooldownMs: options?.cooldownMs ?? 0 }, + ); } function resolveZaiCredential(cfg: ModelConfig): string { @@ -113,7 +117,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({ model: cfg.model, apiKey, - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } // auto: prefer API keys, then token @@ -126,7 +130,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({ model: cfg.model, apiKey, - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } const token = cfg.auth_token ?? getAnthropicAuthToken(); @@ -176,7 +180,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({ model: cfg.model, apiKey, - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } // auto: prefer API keys, then OAuth @@ -189,7 +193,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({ model: cfg.model, apiKey, - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } const existing = loadStoredOpenAIAuth(); @@ -235,7 +239,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { model: cfg.model, apiKey, baseURL: cfg.endpoint ?? 'https://openrouter.ai/api/v1', - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } case 'vercel': return new OpenAIClient({ @@ -261,7 +265,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { model: cfg.model, apiKey, baseURL: cfg.endpoint ?? 'https://api.x.ai/v1', - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } case 'minimax': { @@ -275,7 +279,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { model: cfg.model, apiKey, baseURL: cfg.endpoint ?? 'https://api.minimax.io/v1', - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } case 'moonshot': { @@ -289,7 +293,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient { model: cfg.model, apiKey, baseURL: cfg.endpoint ?? 'https://api.moonshot.cn/v1', - })); + }), { cooldownMs: cfg.auth_profile_cooldown_ms }); } case 'bedrock': return new BedrockClient({ diff --git a/src/models/rotating.test.ts b/src/models/rotating.test.ts index b62b7e2..728a979 100644 --- a/src/models/rotating.test.ts +++ b/src/models/rotating.test.ts @@ -34,4 +34,51 @@ describe('RotatingModelClient', () => { expect(first.chat).toHaveBeenCalledTimes(1); expect(second.chat).toHaveBeenCalledTimes(2); }); + + it('applies cooldown to failed profiles before retrying them', async () => { + let nowMs = 1_000; + const now = () => nowMs; + + const first = makeClient(vi.fn() + .mockRejectedValueOnce(new Error('429')) + .mockResolvedValue({ content: 'first-ok' })); + const second = makeClient(vi.fn() + .mockResolvedValueOnce({ content: 'second-ok' }) + .mockResolvedValueOnce({ content: 'second-ok' }) + .mockRejectedValueOnce(new Error('temp-2')) + .mockResolvedValue({ content: 'second-ok' })); + const rotating = new RotatingModelClient([first, second], { cooldownMs: 10_000, now }); + + const r1 = await rotating.chat({ messages: [{ role: 'user', content: 'a' }] }); + expect(r1.content).toBe('second-ok'); + + nowMs += 1_000; + const r2 = await rotating.chat({ messages: [{ role: 'user', content: 'b' }] }); + expect(r2.content).toBe('second-ok'); + + nowMs += 11_000; + const r3 = await rotating.chat({ messages: [{ role: 'user', content: 'c' }] }); + expect(r3.content).toBe('first-ok'); + + expect(first.chat).toHaveBeenCalledTimes(2); + expect(second.chat).toHaveBeenCalledTimes(3); + }); + + it('still attempts cooling profiles when all profiles are cooling down', async () => { + let nowMs = 1_000; + const now = () => nowMs; + const first = makeClient(vi.fn().mockRejectedValue(new Error('down-1'))); + const second = makeClient(vi.fn().mockRejectedValue(new Error('down-2'))); + const rotating = new RotatingModelClient([first, second], { cooldownMs: 30_000, now }); + + await expect(rotating.chat({ messages: [{ role: 'user', content: 'a' }] })) + .rejects.toThrow(/all auth profiles failed/i); + + nowMs += 100; + await expect(rotating.chat({ messages: [{ role: 'user', content: 'b' }] })) + .rejects.toThrow(/all auth profiles failed/i); + + expect(first.chat).toHaveBeenCalledTimes(2); + expect(second.chat).toHaveBeenCalledTimes(2); + }); }); diff --git a/src/models/rotating.ts b/src/models/rotating.ts index 2d67be6..b12b0f1 100644 --- a/src/models/rotating.ts +++ b/src/models/rotating.ts @@ -1,33 +1,46 @@ import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js'; +export interface RotatingModelClientOptions { + /** Cooldown period applied to a profile after a failed attempt. */ + cooldownMs?: number; + /** Injectable clock for deterministic tests. */ + now?: () => number; +} + /** * Model client wrapper that rotates across equivalent auth profiles (e.g. API keys). * Sticky-by-success behavior: keep using the last successful profile until it fails. */ export class RotatingModelClient implements ModelClient { private readonly clients: ModelClient[]; + private readonly cooldownMs: number; + private readonly now: () => number; + private readonly cooldownUntilMs: number[]; private currentIndex = 0; - constructor(clients: ModelClient[]) { + constructor(clients: ModelClient[], options: RotatingModelClientOptions = {}) { if (clients.length === 0) { throw new Error('RotatingModelClient requires at least one client'); } this.clients = clients; + this.cooldownMs = Math.max(0, options.cooldownMs ?? 0); + this.now = options.now ?? (() => Date.now()); + this.cooldownUntilMs = Array.from({ length: clients.length }, () => 0); } async chat(request: ChatRequest): Promise { - const start = this.currentIndex; const errors: Error[] = []; - - for (let offset = 0; offset < this.clients.length; offset += 1) { - const index = (start + offset) % this.clients.length; + const order = this.getAttemptOrder(); + for (const index of order) { const client = this.clients[index]; try { const response = await client.chat(request); this.currentIndex = index; + this.cooldownUntilMs[index] = 0; return response; } catch (error) { errors.push(error instanceof Error ? error : new Error(String(error))); + this.applyCooldown(index); } } @@ -35,10 +48,8 @@ export class RotatingModelClient implements ModelClient { } async *chatStream(request: ChatRequest): AsyncIterable { - const start = this.currentIndex; - - for (let offset = 0; offset < this.clients.length; offset += 1) { - const index = (start + offset) % this.clients.length; + const order = this.getAttemptOrder(); + for (const index of order) { const client = this.clients[index]; if (!client.chatStream) { continue; @@ -48,6 +59,7 @@ export class RotatingModelClient implements ModelClient { for await (const event of client.chatStream(request)) { if (event.type === 'error') { failed = true; + this.applyCooldown(index); break; } yield event; @@ -55,10 +67,36 @@ export class RotatingModelClient implements ModelClient { if (!failed) { this.currentIndex = index; + this.cooldownUntilMs[index] = 0; return; } } yield { type: 'error', error: new Error('All auth profiles failed for streaming') }; } + + private getAttemptOrder(): number[] { + const now = this.now(); + const available: number[] = []; + const cooling: number[] = []; + + for (let offset = 0; offset < this.clients.length; offset += 1) { + const index = (this.currentIndex + offset) % this.clients.length; + if (this.cooldownUntilMs[index] <= now) { + available.push(index); + } else { + cooling.push(index); + } + } + + // If all profiles are cooling down, still attempt them in sticky order. + return available.length > 0 ? [...available, ...cooling] : cooling; + } + + private applyCooldown(index: number): void { + if (this.cooldownMs <= 0) { + return; + } + this.cooldownUntilMs[index] = this.now() + this.cooldownMs; + } }