feat(models): add auth profile cooldown for api key pools

This commit is contained in:
William Valentin
2026-02-19 11:45:55 -08:00
parent baa53f91d9
commit 6b56d9e223
9 changed files with 175 additions and 22 deletions
+47
View File
@@ -34,4 +34,51 @@ describe('RotatingModelClient', () => {
expect(first.chat).toHaveBeenCalledTimes(1);
expect(second.chat).toHaveBeenCalledTimes(2);
});
it('applies cooldown to failed profiles before retrying them', async () => {
let nowMs = 1_000;
const now = () => nowMs;
const first = makeClient(vi.fn()
.mockRejectedValueOnce(new Error('429'))
.mockResolvedValue({ content: 'first-ok' }));
const second = makeClient(vi.fn()
.mockResolvedValueOnce({ content: 'second-ok' })
.mockResolvedValueOnce({ content: 'second-ok' })
.mockRejectedValueOnce(new Error('temp-2'))
.mockResolvedValue({ content: 'second-ok' }));
const rotating = new RotatingModelClient([first, second], { cooldownMs: 10_000, now });
const r1 = await rotating.chat({ messages: [{ role: 'user', content: 'a' }] });
expect(r1.content).toBe('second-ok');
nowMs += 1_000;
const r2 = await rotating.chat({ messages: [{ role: 'user', content: 'b' }] });
expect(r2.content).toBe('second-ok');
nowMs += 11_000;
const r3 = await rotating.chat({ messages: [{ role: 'user', content: 'c' }] });
expect(r3.content).toBe('first-ok');
expect(first.chat).toHaveBeenCalledTimes(2);
expect(second.chat).toHaveBeenCalledTimes(3);
});
it('still attempts cooling profiles when all profiles are cooling down', async () => {
let nowMs = 1_000;
const now = () => nowMs;
const first = makeClient(vi.fn().mockRejectedValue(new Error('down-1')));
const second = makeClient(vi.fn().mockRejectedValue(new Error('down-2')));
const rotating = new RotatingModelClient([first, second], { cooldownMs: 30_000, now });
await expect(rotating.chat({ messages: [{ role: 'user', content: 'a' }] }))
.rejects.toThrow(/all auth profiles failed/i);
nowMs += 100;
await expect(rotating.chat({ messages: [{ role: 'user', content: 'b' }] }))
.rejects.toThrow(/all auth profiles failed/i);
expect(first.chat).toHaveBeenCalledTimes(2);
expect(second.chat).toHaveBeenCalledTimes(2);
});
});
+47 -9
View File
@@ -1,33 +1,46 @@
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
export interface RotatingModelClientOptions {
/** Cooldown period applied to a profile after a failed attempt. */
cooldownMs?: number;
/** Injectable clock for deterministic tests. */
now?: () => number;
}
/**
* Model client wrapper that rotates across equivalent auth profiles (e.g. API keys).
* Sticky-by-success behavior: keep using the last successful profile until it fails.
*/
export class RotatingModelClient implements ModelClient {
private readonly clients: ModelClient[];
private readonly cooldownMs: number;
private readonly now: () => number;
private readonly cooldownUntilMs: number[];
private currentIndex = 0;
constructor(clients: ModelClient[]) {
constructor(clients: ModelClient[], options: RotatingModelClientOptions = {}) {
if (clients.length === 0) {
throw new Error('RotatingModelClient requires at least one client');
}
this.clients = clients;
this.cooldownMs = Math.max(0, options.cooldownMs ?? 0);
this.now = options.now ?? (() => Date.now());
this.cooldownUntilMs = Array.from({ length: clients.length }, () => 0);
}
async chat(request: ChatRequest): Promise<ChatResponse> {
const start = this.currentIndex;
const errors: Error[] = [];
for (let offset = 0; offset < this.clients.length; offset += 1) {
const index = (start + offset) % this.clients.length;
const order = this.getAttemptOrder();
for (const index of order) {
const client = this.clients[index];
try {
const response = await client.chat(request);
this.currentIndex = index;
this.cooldownUntilMs[index] = 0;
return response;
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
this.applyCooldown(index);
}
}
@@ -35,10 +48,8 @@ export class RotatingModelClient implements ModelClient {
}
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const start = this.currentIndex;
for (let offset = 0; offset < this.clients.length; offset += 1) {
const index = (start + offset) % this.clients.length;
const order = this.getAttemptOrder();
for (const index of order) {
const client = this.clients[index];
if (!client.chatStream) {
continue;
@@ -48,6 +59,7 @@ export class RotatingModelClient implements ModelClient {
for await (const event of client.chatStream(request)) {
if (event.type === 'error') {
failed = true;
this.applyCooldown(index);
break;
}
yield event;
@@ -55,10 +67,36 @@ export class RotatingModelClient implements ModelClient {
if (!failed) {
this.currentIndex = index;
this.cooldownUntilMs[index] = 0;
return;
}
}
yield { type: 'error', error: new Error('All auth profiles failed for streaming') };
}
private getAttemptOrder(): number[] {
const now = this.now();
const available: number[] = [];
const cooling: number[] = [];
for (let offset = 0; offset < this.clients.length; offset += 1) {
const index = (this.currentIndex + offset) % this.clients.length;
if (this.cooldownUntilMs[index] <= now) {
available.push(index);
} else {
cooling.push(index);
}
}
// If all profiles are cooling down, still attempt them in sticky order.
return available.length > 0 ? [...available, ...cooling] : cooling;
}
private applyCooldown(index: number): void {
if (this.cooldownMs <= 0) {
return;
}
this.cooldownUntilMs[index] = this.now() + this.cooldownMs;
}
}