From 6b56d9e2233b92b0cd323eaf1b3bce5b09e71e44 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Thu, 19 Feb 2026 11:45:55 -0800
Subject: [PATCH] feat(models): add auth profile cooldown for api key pools

---
 README.md                                     |  3 +-
 docs/plans/2026-02-15-openclaw-gap-roadmap.md |  8 ++-
 docs/plans/state.json                         | 18 ++++++
 src/config/schema.test.ts                     | 30 ++++++++++
 src/config/schema.ts                          |  2 +
 src/daemon/clientFactory.test.ts              | 11 ++++
 src/daemon/models.ts                          | 22 +++++---
 src/models/rotating.test.ts                   | 47 ++++++++++++++++
 src/models/rotating.ts                        | 56 ++++++++++++++++---
 9 files changed, 175 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 8a0e047..ce316bf 100644
--- a/README.md
+++ b/README.md
@@ -258,6 +258,7 @@ models:
     model: claude-opus-4-5-20251101
     api_key: sk-ant-api03-...
     # api_keys: [sk-ant-primary-..., sk-ant-secondary-...]  # Optional rotation pool
+    # auth_profile_cooldown_ms: 30000  # Optional cooldown before retrying a failed key profile
   local:
     provider: ollama
     model: qwen2.5:14b
@@ -321,7 +322,7 @@ models:
 
 Each tier can optionally specify `auth_mode` (`auto` | `api_key` | `oauth`) to control whether Flynn uses API keys vs OAuth/token auth for that provider. `use_oauth: true` remains supported as a compatibility alias for `auth_mode: oauth`.
 
-When multiple keys are configured via `api_keys`, Flynn rotates across keys on provider failures and sticks to the last successful key profile until it fails.
+When multiple keys are configured via `api_keys`, Flynn rotates across key profiles on provider failures and sticks to the last successful profile until it fails. Set `auth_profile_cooldown_ms` to temporarily cool down failing profiles before retrying them.
 
 Note: with `provider: openai` + `auth_mode: oauth` (Codex backend), Flynn currently does not send tool definitions to the provider. Tool execution is therefore unavailable in that mode, and any textual `tool_use` output should be treated as non-executable model text.
 
diff --git a/docs/plans/2026-02-15-openclaw-gap-roadmap.md b/docs/plans/2026-02-15-openclaw-gap-roadmap.md
index 1a0c93e..8ad2733 100644
--- a/docs/plans/2026-02-15-openclaw-gap-roadmap.md
+++ b/docs/plans/2026-02-15-openclaw-gap-roadmap.md
@@ -332,6 +332,8 @@ These are substantial UX/ecosystem projects or highly platform-specific; defer u
 
 ## Suggested Next Execution Order
 
-1) Auth profile rotation/stickiness before provider fallback
-2) Queue/run-control polish (interrupt preemption telemetry + UX)
-3) Daily memory continuity tuning (if continuity quality is still lacking)
+1) Queue/run-control polish (interrupt preemption telemetry + UX)
+2) Daily memory continuity tuning (if continuity quality is still lacking)
+3) Auth-profile expansion beyond API-key pools (if needed)
+
+Note: API-key pool auth profile cooldown/backoff (`auth_profile_cooldown_ms`) shipped on 2026-02-19.
diff --git a/docs/plans/state.json b/docs/plans/state.json
index b12da3f..b055e19 100644
--- a/docs/plans/state.json
+++ b/docs/plans/state.json
@@ -5775,6 +5775,24 @@
         "docs/plans/state.json"
       ],
       "test_status": "pnpm test:run src/security/elevation.test.ts src/gateway/handlers/agent.test.ts src/frontends/tui/minimal.test.ts src/backends/native/agent.test.ts src/daemon/routing.test.ts src/commands/builtin/index.test.ts + pnpm typecheck passing"
+    },
+    "auth-profile-cooldown-for-key-pools": {
+      "status": "completed",
+      "date": "2026-02-19",
+      "updated": "2026-02-19",
+      "summary": "Added per-tier/profile cooldown support for rotated API-key auth pools via `auth_profile_cooldown_ms`. `RotatingModelClient` now applies temporary backoff to failing profiles while preserving sticky-success behavior, and model client factory wiring now passes cooldown for providers using `api_keys` pools.",
+      "files_modified": [
+        "src/models/rotating.ts",
+        "src/models/rotating.test.ts",
+        "src/daemon/models.ts",
+        "src/daemon/clientFactory.test.ts",
+        "src/config/schema.ts",
+        "src/config/schema.test.ts",
+        "README.md",
+        "docs/plans/2026-02-15-openclaw-gap-roadmap.md",
+        "docs/plans/state.json"
+      ],
+      "test_status": "pnpm test:run src/models/rotating.test.ts src/daemon/clientFactory.test.ts src/config/schema.test.ts + pnpm typecheck passing"
     }
   },
   "overall_progress": {
diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts
index 566ecc4..a88aea8 100644
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -599,6 +599,36 @@ describe('configSchema — models auth_mode', () => {
     });
     expect(result.models.default.api_keys).toEqual(['sk-1', 'sk-2']);
   });
+
+  it('accepts auth_profile_cooldown_ms per model tier', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      models: {
+        default: {
+          provider: 'openai',
+          model: 'gpt-4o',
+          api_keys: ['sk-1', 'sk-2'],
+          auth_profile_cooldown_ms: 30000,
+        },
+      },
+    });
+    expect(result.models.default.auth_profile_cooldown_ms).toBe(30000);
+  });
+
+  it('rejects invalid auth_profile_cooldown_ms values', () => {
+    expect(() => {
+      configSchema.parse({
+        ...minimalConfig,
+        models: {
+          default: {
+            provider: 'openai',
+            model: 'gpt-4o',
+            auth_profile_cooldown_ms: -1,
+          },
+        },
+      });
+    }).toThrow(/auth_profile_cooldown_ms/i);
+  });
 });
 
 describe('configSchema — matrix', () => {
diff --git a/src/config/schema.ts b/src/config/schema.ts
index cc00aac..240fde4 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -144,6 +144,8 @@ const modelConfigBaseSchema = z.object({
   endpoint: z.string().optional(),
   api_key: z.string().optional(),
   api_keys: z.array(z.string().min(1)).optional(),
+  /** Cooldown (ms) before retrying a failed key/token profile in rotation pools. */
+  auth_profile_cooldown_ms: z.number().min(0).max(3_600_000).optional(),
   auth_token: z.string().optional(),
   /** Credential selection strategy for this tier (provider-specific). */
   auth_mode: z.enum(['auto', 'api_key', 'oauth']).optional(),
diff --git a/src/daemon/clientFactory.test.ts b/src/daemon/clientFactory.test.ts
index 6240752..1be2961 100644
--- a/src/daemon/clientFactory.test.ts
+++ b/src/daemon/clientFactory.test.ts
@@ -58,6 +58,17 @@ describe('createClientFromConfig', () => {
     expect(client.constructor.name).toBe('RotatingModelClient');
   });
 
+  it('supports auth_profile_cooldown_ms with api_keys pools', async () => {
+    const { createClientFromConfig } = await loadFactory();
+    const client = createClientFromConfig({
+      provider: 'openai',
+      model: 'gpt-4o',
+      api_keys: ['sk-1', 'sk-2'],
+      auth_profile_cooldown_ms: 30_000,
+    });
+    expect(client.constructor.name).toBe('RotatingModelClient');
+  });
+
   it('creates OllamaClient for ollama provider', async () => {
     const { createClientFromConfig } = await loadFactory();
     const client = createClientFromConfig({
diff --git a/src/daemon/models.ts b/src/daemon/models.ts
index 99a444d..f25221a 100644
--- a/src/daemon/models.ts
+++ b/src/daemon/models.ts
@@ -50,11 +50,15 @@ function resolveApiKeyPool(cfg: ModelConfig, envVar?: string): string[] {
 function createApiKeyClient(
   keys: string[],
   build: (apiKey: string) => ModelClient,
+  options?: { cooldownMs?: number },
 ): ModelClient {
   if (keys.length === 1) {
     return build(keys[0]);
   }
-  return new RotatingModelClient(keys.map((key) => build(key)));
+  return new RotatingModelClient(
+    keys.map((key) => build(key)),
+    { cooldownMs: options?.cooldownMs ?? 0 },
+  );
 }
 
 function resolveZaiCredential(cfg: ModelConfig): string {
@@ -113,7 +117,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({
           model: cfg.model,
           apiKey,
-        }));
+        }), { cooldownMs: cfg.auth_profile_cooldown_ms });
       }
 
       // auto: prefer API keys, then token
@@ -126,7 +130,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         return createApiKeyClient(allKeys, (apiKey) => new AnthropicClient({
           model: cfg.model,
           apiKey,
-        }));
+        }), { cooldownMs: cfg.auth_profile_cooldown_ms });
       }
 
       const token = cfg.auth_token ?? getAnthropicAuthToken();
@@ -176,7 +180,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({
           model: cfg.model,
           apiKey,
-        }));
+        }), { cooldownMs: cfg.auth_profile_cooldown_ms });
       }
 
       // auto: prefer API keys, then OAuth
@@ -189,7 +193,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         return createApiKeyClient(allKeys, (apiKey) => new OpenAIClient({
           model: cfg.model,
           apiKey,
-        }));
+        }), { cooldownMs: cfg.auth_profile_cooldown_ms });
       }
 
       const existing = loadStoredOpenAIAuth();
@@ -235,7 +239,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         model: cfg.model,
         apiKey,
         baseURL: cfg.endpoint ?? 'https://openrouter.ai/api/v1',
-      }));
+      }), { cooldownMs: cfg.auth_profile_cooldown_ms });
     }
     case 'vercel':
       return new OpenAIClient({
@@ -261,7 +265,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         model: cfg.model,
         apiKey,
         baseURL: cfg.endpoint ?? 'https://api.x.ai/v1',
-      }));
+      }), { cooldownMs: cfg.auth_profile_cooldown_ms });
     }
     case 'minimax':
     {
@@ -275,7 +279,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         model: cfg.model,
         apiKey,
         baseURL: cfg.endpoint ?? 'https://api.minimax.io/v1',
-      }));
+      }), { cooldownMs: cfg.auth_profile_cooldown_ms });
     }
     case 'moonshot':
     {
@@ -289,7 +293,7 @@ export function createClientFromConfig(cfg: ModelConfig): ModelClient {
         model: cfg.model,
         apiKey,
         baseURL: cfg.endpoint ?? 'https://api.moonshot.cn/v1',
-      }));
+      }), { cooldownMs: cfg.auth_profile_cooldown_ms });
     }
     case 'bedrock':
       return new BedrockClient({
diff --git a/src/models/rotating.test.ts b/src/models/rotating.test.ts
index b62b7e2..728a979 100644
--- a/src/models/rotating.test.ts
+++ b/src/models/rotating.test.ts
@@ -34,4 +34,51 @@ describe('RotatingModelClient', () => {
     expect(first.chat).toHaveBeenCalledTimes(1);
     expect(second.chat).toHaveBeenCalledTimes(2);
   });
+
+  it('applies cooldown to failed profiles before retrying them', async () => {
+    let nowMs = 1_000;
+    const now = () => nowMs;
+
+    const first = makeClient(vi.fn()
+      .mockRejectedValueOnce(new Error('429'))
+      .mockResolvedValue({ content: 'first-ok' }));
+    const second = makeClient(vi.fn()
+      .mockResolvedValueOnce({ content: 'second-ok' })
+      .mockResolvedValueOnce({ content: 'second-ok' })
+      .mockRejectedValueOnce(new Error('temp-2'))
+      .mockResolvedValue({ content: 'second-ok' }));
+    const rotating = new RotatingModelClient([first, second], { cooldownMs: 10_000, now });
+
+    const r1 = await rotating.chat({ messages: [{ role: 'user', content: 'a' }] });
+    expect(r1.content).toBe('second-ok');
+
+    nowMs += 1_000;
+    const r2 = await rotating.chat({ messages: [{ role: 'user', content: 'b' }] });
+    expect(r2.content).toBe('second-ok');
+
+    nowMs += 11_000;
+    const r3 = await rotating.chat({ messages: [{ role: 'user', content: 'c' }] });
+    expect(r3.content).toBe('first-ok');
+
+    expect(first.chat).toHaveBeenCalledTimes(2);
+    expect(second.chat).toHaveBeenCalledTimes(3);
+  });
+
+  it('still attempts cooling profiles when all profiles are cooling down', async () => {
+    let nowMs = 1_000;
+    const now = () => nowMs;
+    const first = makeClient(vi.fn().mockRejectedValue(new Error('down-1')));
+    const second = makeClient(vi.fn().mockRejectedValue(new Error('down-2')));
+    const rotating = new RotatingModelClient([first, second], { cooldownMs: 30_000, now });
+
+    await expect(rotating.chat({ messages: [{ role: 'user', content: 'a' }] }))
+      .rejects.toThrow(/all auth profiles failed/i);
+
+    nowMs += 100;
+    await expect(rotating.chat({ messages: [{ role: 'user', content: 'b' }] }))
+      .rejects.toThrow(/all auth profiles failed/i);
+
+    expect(first.chat).toHaveBeenCalledTimes(2);
+    expect(second.chat).toHaveBeenCalledTimes(2);
+  });
 });
diff --git a/src/models/rotating.ts b/src/models/rotating.ts
index 2d67be6..b12b0f1 100644
--- a/src/models/rotating.ts
+++ b/src/models/rotating.ts
@@ -1,33 +1,46 @@
 import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
 
+export interface RotatingModelClientOptions {
+  /** Cooldown period applied to a profile after a failed attempt. */
+  cooldownMs?: number;
+  /** Injectable clock for deterministic tests. */
+  now?: () => number;
+}
+
 /**
  * Model client wrapper that rotates across equivalent auth profiles (e.g. API keys).
  * Sticky-by-success behavior: keep using the last successful profile until it fails.
  */
 export class RotatingModelClient implements ModelClient {
   private readonly clients: ModelClient[];
+  private readonly cooldownMs: number;
+  private readonly now: () => number;
+  private readonly cooldownUntilMs: number[];
   private currentIndex = 0;
 
-  constructor(clients: ModelClient[]) {
+  constructor(clients: ModelClient[], options: RotatingModelClientOptions = {}) {
     if (clients.length === 0) {
       throw new Error('RotatingModelClient requires at least one client');
     }
     this.clients = clients;
+    this.cooldownMs = Math.max(0, options.cooldownMs ?? 0);
+    this.now = options.now ?? (() => Date.now());
+    this.cooldownUntilMs = Array.from({ length: clients.length }, () => 0);
   }
 
   async chat(request: ChatRequest): Promise<ChatResponse> {
-    const start = this.currentIndex;
     const errors: Error[] = [];
-
-    for (let offset = 0; offset < this.clients.length; offset += 1) {
-      const index = (start + offset) % this.clients.length;
+    const order = this.getAttemptOrder();
+    for (const index of order) {
       const client = this.clients[index];
       try {
         const response = await client.chat(request);
         this.currentIndex = index;
+        this.cooldownUntilMs[index] = 0;
         return response;
       } catch (error) {
         errors.push(error instanceof Error ? error : new Error(String(error)));
+        this.applyCooldown(index);
       }
     }
 
@@ -35,10 +48,8 @@ export class RotatingModelClient implements ModelClient {
   }
 
   async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
-    const start = this.currentIndex;
-
-    for (let offset = 0; offset < this.clients.length; offset += 1) {
-      const index = (start + offset) % this.clients.length;
+    const order = this.getAttemptOrder();
+    for (const index of order) {
       const client = this.clients[index];
       if (!client.chatStream) {
         continue;
@@ -48,6 +59,7 @@ export class RotatingModelClient implements ModelClient {
       for await (const event of client.chatStream(request)) {
         if (event.type === 'error') {
           failed = true;
+          this.applyCooldown(index);
           break;
         }
         yield event;
@@ -55,10 +67,36 @@ export class RotatingModelClient implements ModelClient {
 
       if (!failed) {
         this.currentIndex = index;
+        this.cooldownUntilMs[index] = 0;
         return;
       }
     }
 
     yield { type: 'error', error: new Error('All auth profiles failed for streaming') };
   }
+
+  private getAttemptOrder(): number[] {
+    const now = this.now();
+    const available: number[] = [];
+    const cooling: number[] = [];
+
+    for (let offset = 0; offset < this.clients.length; offset += 1) {
+      const index = (this.currentIndex + offset) % this.clients.length;
+      if (this.cooldownUntilMs[index] <= now) {
+        available.push(index);
+      } else {
+        cooling.push(index);
+      }
+    }
+
+    // If all profiles are cooling down, still attempt them in sticky order.
+    return available.length > 0 ? [...available, ...cooling] : cooling;
+  }
+
+  private applyCooldown(index: number): void {
+    if (this.cooldownMs <= 0) {
+      return;
+    }
+    this.cooldownUntilMs[index] = this.now() + this.cooldownMs;
+  }
 }