feat(models): add auth profile cooldown for api key pools
This commit is contained in:
@@ -258,6 +258,7 @@ models:
|
||||
model: claude-opus-4-5-20251101
|
||||
api_key: sk-ant-api03-...
|
||||
# api_keys: [sk-ant-primary-..., sk-ant-secondary-...] # Optional rotation pool
|
||||
# auth_profile_cooldown_ms: 30000 # Optional cooldown before retrying a failed key profile
|
||||
local:
|
||||
provider: ollama
|
||||
model: qwen2.5:14b
|
||||
@@ -321,7 +322,7 @@ models:
|
||||
|
||||
Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`.
|
||||
|
||||
When multiple keys are configured via `api_keys`, Flynn rotates across keys on provider failures and sticks to the last successful key profile until it fails.
|
||||
When multiple keys are configured via `api_keys`, Flynn rotates across key profiles on provider failures and sticks to the last successful profile until it fails. Set `auth_profile_cooldown_ms` to temporarily cool down failing profiles before retrying them.
|
||||
|
||||
Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text.
|
||||
|
||||
|
||||
@@ -332,6 +332,8 @@ These are substantial UX/ecosystem projects or highly platform-specific; defer u
|
||||
|
||||
## Suggested Next Execution Order
|
||||
|
||||
1) Auth profile rotation/stickiness before provider fallback
|
||||
2) Queue/run-control polish (interrupt preemption telemetry + UX)
|
||||
3) Daily memory continuity tuning (if continuity quality is still lacking)
|
||||
1) Queue/run-control polish (interrupt preemption telemetry + UX)
|
||||
2) Daily memory continuity tuning (if continuity quality is still lacking)
|
||||
3) Auth-profile expansion beyond API-key pools (if needed)
|
||||
|
||||
Note: API-key pool auth profile cooldown/backoff (`auth_profile_cooldown_ms`) shipped on 2026-02-19.
|
||||
|
||||
@@ -5775,6 +5775,24 @@
|
||||
"docs/plans/state.json"
|
||||
],
|
||||
"test_status": "pnpm test:run src/security/elevation.test.ts src/gateway/handlers/agent.test.ts src/frontends/tui/minimal.test.ts src/backends/native/agent.test.ts src/daemon/routing.test.ts src/commands/builtin/index.test.ts + pnpm typecheck passing"
|
||||
},
|
||||
"auth-profile-cooldown-for-key-pools": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-19",
|
||||
"updated": "2026-02-19",
|
||||
"summary": "Added per-tier/profile cooldown support for rotated API-key auth pools via `auth_profile_cooldown_ms`. `RotatingModelClient` now applies temporary backoff to failing profiles while preserving sticky-success behavior, and model client factory wiring now passes cooldown for providers using `api_keys` pools.",
|
||||
"files_modified": [
|
||||
"src/models/rotating.ts",
|
||||
"src/models/rotating.test.ts",
|
||||
"src/daemon/models.ts",
|
||||
"src/daemon/clientFactory.test.ts",
|
||||
"src/config/schema.ts",
|
||||
"src/config/schema.test.ts",
|
||||
"README.md",
|
||||
"docs/plans/2026-02-15-openclaw-gap-roadmap.md",
|
||||
"docs/plans/state.json"
|
||||
],
|
||||
"test_status": "pnpm test:run src/models/rotating.test.ts src/daemon/clientFactory.test.ts src/config/schema.test.ts + pnpm typecheck passing"
|
||||
}
|
||||
},
|
||||
"overall_progress": {
|
||||
|
||||
@@ -599,6 +599,36 @@ describe('configSchema — models auth_mode', () => {
|
||||
});
|
||||
expect(result.models.default.api_keys).toEqual(['sk-1', 'sk-2']);
|
||||
});
|
||||
|
||||
it('accepts auth_profile_cooldown_ms per model tier', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
models: {
|
||||
default: {
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o',
|
||||
api_keys: ['sk-1', 'sk-2'],
|
||||
auth_profile_cooldown_ms: 30000,
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(result.models.default.auth_profile_cooldown_ms).toBe(30000);
|
||||
});
|
||||
|
||||
it('rejects invalid auth_profile_cooldown_ms values', () => {
|
||||
expect(() => {
|
||||
configSchema.parse({
|
||||
...minimalConfig,
|
||||
models: {
|
||||
default: {
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o',
|
||||
auth_profile_cooldown_ms: -1,
|
||||
},
|
||||
},
|
||||
});
|
||||
}).toThrow(/auth_profile_cooldown_ms/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — matrix', () => {
|
||||
|
||||
@@ -144,6 +144,8 @@ const modelConfigBaseSchema = z.object({
|
||||
endpoint: z.string().optional(),
|
||||
api_key: z.string().optional(),
|
||||
api_keys: z.array(z.string().min(1)).optional(),
|
||||
/** Cooldown (ms) before retrying a failed key/token profile in rotation pools. */
|
||||
auth_profile_cooldown_ms: z.number().min(0).max(3_600_000).optional(),
|
||||
auth_token: z.string().optional(),
|
||||
/** Credential selection strategy for this tier (provider-specific). */
|
||||
auth_mode: z.enum(['auto', 'api_key', 'oauth']).optional(),
|
||||
|
||||
@@ -58,6 +58,17 @@ describe('createClientFromConfig', () => {
|
||||
expect(client.constructor.name).toBe('RotatingModelClient');
|
||||
});
|
||||
|
||||
it('supports auth_profile_cooldown_ms with api_keys pools', async () => {
|
||||
const { createClientFromConfig } = await loadFactory();
|
||||
const client = createClientFromConfig({
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o',
|
||||
api_keys: ['sk-1', 'sk-2'],
|
||||
auth_profile_cooldown_ms: 30_000,
|
||||
});
|
||||
expect(client.constructor.name).toBe('RotatingModelClient');
|
||||
});
|
||||
|
||||
it('creates OllamaClient for ollama provider', async () => {
|
||||
const { createClientFromConfig } = await loadFactory();
|
||||
const client = createClientFromConfig({
|
||||
|
||||
+13
-9
@@ -50,11 +50,15 @@ function resolveApiKeyPool(cfg: ModelConfig, envVar?: string): string[] {
|
||||
function createApiKeyClient(
|
||||
keys: string[],
|
||||
build: (apiKey: string) => ModelClient,
|
||||
options?: { cooldownMs?: number },
|
||||
): ModelClient {
|
||||
if (keys.length === 1) {
|
||||
return build(keys[0]);
|
||||
}
|
||||
return new RotatingModelClient(keys.map((key) => build(key)));
|
||||
return new RotatingModelClient(
|
||||
keys.map((key) => build(key)),
|
||||
{ cooldownMs: options?.cooldownMs ?? 0 },
|
||||
);
|
||||
}
|
||||
|
||||
function resolveZaiCredential(cfg: ModelConfig): string {
|
||||
@@ -113,7 +117,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
|
||||
// auto: prefer API keys, then token
|
||||
@@ -126,7 +130,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
|
||||
const token = cfg.auth_token ?? getAnthropicAuthToken();
|
||||
@@ -176,7 +180,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
|
||||
// auto: prefer API keys, then OAuth
|
||||
@@ -189,7 +193,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
|
||||
const existing = loadStoredOpenAIAuth();
|
||||
@@ -235,7 +239,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
baseURL: cfg.endpoint ?? 'https://openrouter.ai/api/v1',
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
case 'vercel':
|
||||
return new OpenAIClient({
|
||||
@@ -261,7 +265,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
baseURL: cfg.endpoint ?? 'https://api.x.ai/v1',
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
case 'minimax':
|
||||
{
|
||||
@@ -275,7 +279,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
baseURL: cfg.endpoint ?? 'https://api.minimax.io/v1',
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
case 'moonshot':
|
||||
{
|
||||
@@ -289,7 +293,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
|
||||
model: cfg.model,
|
||||
apiKey,
|
||||
baseURL: cfg.endpoint ?? 'https://api.moonshot.cn/v1',
|
||||
}));
|
||||
}), { cooldownMs: cfg.auth_profile_cooldown_ms });
|
||||
}
|
||||
case 'bedrock':
|
||||
return new BedrockClient({
|
||||
|
||||
@@ -34,4 +34,51 @@ describe('RotatingModelClient', () => {
|
||||
expect(first.chat).toHaveBeenCalledTimes(1);
|
||||
expect(second.chat).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('applies cooldown to failed profiles before retrying them', async () => {
|
||||
let nowMs = 1_000;
|
||||
const now = () => nowMs;
|
||||
|
||||
const first = makeClient(vi.fn()
|
||||
.mockRejectedValueOnce(new Error('429'))
|
||||
.mockResolvedValue({ content: 'first-ok' }));
|
||||
const second = makeClient(vi.fn()
|
||||
.mockResolvedValueOnce({ content: 'second-ok' })
|
||||
.mockResolvedValueOnce({ content: 'second-ok' })
|
||||
.mockRejectedValueOnce(new Error('temp-2'))
|
||||
.mockResolvedValue({ content: 'second-ok' }));
|
||||
const rotating = new RotatingModelClient([first, second], { cooldownMs: 10_000, now });
|
||||
|
||||
const r1 = await rotating.chat({ messages: [{ role: 'user', content: 'a' }] });
|
||||
expect(r1.content).toBe('second-ok');
|
||||
|
||||
nowMs += 1_000;
|
||||
const r2 = await rotating.chat({ messages: [{ role: 'user', content: 'b' }] });
|
||||
expect(r2.content).toBe('second-ok');
|
||||
|
||||
nowMs += 11_000;
|
||||
const r3 = await rotating.chat({ messages: [{ role: 'user', content: 'c' }] });
|
||||
expect(r3.content).toBe('first-ok');
|
||||
|
||||
expect(first.chat).toHaveBeenCalledTimes(2);
|
||||
expect(second.chat).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('still attempts cooling profiles when all profiles are cooling down', async () => {
|
||||
let nowMs = 1_000;
|
||||
const now = () => nowMs;
|
||||
const first = makeClient(vi.fn().mockRejectedValue(new Error('down-1')));
|
||||
const second = makeClient(vi.fn().mockRejectedValue(new Error('down-2')));
|
||||
const rotating = new RotatingModelClient([first, second], { cooldownMs: 30_000, now });
|
||||
|
||||
await expect(rotating.chat({ messages: [{ role: 'user', content: 'a' }] }))
|
||||
.rejects.toThrow(/all auth profiles failed/i);
|
||||
|
||||
nowMs += 100;
|
||||
await expect(rotating.chat({ messages: [{ role: 'user', content: 'b' }] }))
|
||||
.rejects.toThrow(/all auth profiles failed/i);
|
||||
|
||||
expect(first.chat).toHaveBeenCalledTimes(2);
|
||||
expect(second.chat).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
+47
-9
@@ -1,33 +1,46 @@
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
|
||||
|
||||
export interface RotatingModelClientOptions {
|
||||
/** Cooldown period applied to a profile after a failed attempt. */
|
||||
cooldownMs?: number;
|
||||
/** Injectable clock for deterministic tests. */
|
||||
now?: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model client wrapper that rotates across equivalent auth profiles (e.g. API keys).
|
||||
* Sticky-by-success behavior: keep using the last successful profile until it fails.
|
||||
*/
|
||||
export class RotatingModelClient implements ModelClient {
|
||||
private readonly clients: ModelClient[];
|
||||
private readonly cooldownMs: number;
|
||||
private readonly now: () => number;
|
||||
private readonly cooldownUntilMs: number[];
|
||||
private currentIndex = 0;
|
||||
|
||||
constructor(clients: ModelClient[]) {
|
||||
constructor(clients: ModelClient[], options: RotatingModelClientOptions = {}) {
|
||||
if (clients.length === 0) {
|
||||
throw new Error('RotatingModelClient requires at least one client');
|
||||
}
|
||||
this.clients = clients;
|
||||
this.cooldownMs = Math.max(0, options.cooldownMs ?? 0);
|
||||
this.now = options.now ?? (() => Date.now());
|
||||
this.cooldownUntilMs = Array.from({ length: clients.length }, () => 0);
|
||||
}
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
const start = this.currentIndex;
|
||||
const errors: Error[] = [];
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (start + offset) % this.clients.length;
|
||||
const order = this.getAttemptOrder();
|
||||
for (const index of order) {
|
||||
const client = this.clients[index];
|
||||
try {
|
||||
const response = await client.chat(request);
|
||||
this.currentIndex = index;
|
||||
this.cooldownUntilMs[index] = 0;
|
||||
return response;
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
this.applyCooldown(index);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,10 +48,8 @@ export class RotatingModelClient implements ModelClient {
|
||||
}
|
||||
|
||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const start = this.currentIndex;
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (start + offset) % this.clients.length;
|
||||
const order = this.getAttemptOrder();
|
||||
for (const index of order) {
|
||||
const client = this.clients[index];
|
||||
if (!client.chatStream) {
|
||||
continue;
|
||||
@@ -48,6 +59,7 @@ export class RotatingModelClient implements ModelClient {
|
||||
for await (const event of client.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
failed = true;
|
||||
this.applyCooldown(index);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
@@ -55,10 +67,36 @@ export class RotatingModelClient implements ModelClient {
|
||||
|
||||
if (!failed) {
|
||||
this.currentIndex = index;
|
||||
this.cooldownUntilMs[index] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: 'error', error: new Error('All auth profiles failed for streaming') };
|
||||
}
|
||||
|
||||
private getAttemptOrder(): number[] {
|
||||
const now = this.now();
|
||||
const available: number[] = [];
|
||||
const cooling: number[] = [];
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (this.currentIndex + offset) % this.clients.length;
|
||||
if (this.cooldownUntilMs[index] <= now) {
|
||||
available.push(index);
|
||||
} else {
|
||||
cooling.push(index);
|
||||
}
|
||||
}
|
||||
|
||||
// If all profiles are cooling down, still attempt them in sticky order.
|
||||
return available.length > 0 ? [...available, ...cooling] : cooling;
|
||||
}
|
||||
|
||||
private applyCooldown(index: number): void {
|
||||
if (this.cooldownMs <= 0) {
|
||||
return;
|
||||
}
|
||||
this.cooldownUntilMs[index] = this.now() + this.cooldownMs;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user