feat(models): add auth profile cooldown for api key pools
This commit is contained in:
+47
-9
@@ -1,33 +1,46 @@
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
|
||||
|
||||
export interface RotatingModelClientOptions {
|
||||
/** Cooldown period applied to a profile after a failed attempt. */
|
||||
cooldownMs?: number;
|
||||
/** Injectable clock for deterministic tests. */
|
||||
now?: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model client wrapper that rotates across equivalent auth profiles (e.g. API keys).
|
||||
* Sticky-by-success behavior: keep using the last successful profile until it fails.
|
||||
*/
|
||||
export class RotatingModelClient implements ModelClient {
|
||||
private readonly clients: ModelClient[];
|
||||
private readonly cooldownMs: number;
|
||||
private readonly now: () => number;
|
||||
private readonly cooldownUntilMs: number[];
|
||||
private currentIndex = 0;
|
||||
|
||||
constructor(clients: ModelClient[]) {
|
||||
constructor(clients: ModelClient[], options: RotatingModelClientOptions = {}) {
|
||||
if (clients.length === 0) {
|
||||
throw new Error('RotatingModelClient requires at least one client');
|
||||
}
|
||||
this.clients = clients;
|
||||
this.cooldownMs = Math.max(0, options.cooldownMs ?? 0);
|
||||
this.now = options.now ?? (() => Date.now());
|
||||
this.cooldownUntilMs = Array.from({ length: clients.length }, () => 0);
|
||||
}
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
const start = this.currentIndex;
|
||||
const errors: Error[] = [];
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (start + offset) % this.clients.length;
|
||||
const order = this.getAttemptOrder();
|
||||
for (const index of order) {
|
||||
const client = this.clients[index];
|
||||
try {
|
||||
const response = await client.chat(request);
|
||||
this.currentIndex = index;
|
||||
this.cooldownUntilMs[index] = 0;
|
||||
return response;
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
this.applyCooldown(index);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,10 +48,8 @@ export class RotatingModelClient implements ModelClient {
|
||||
}
|
||||
|
||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const start = this.currentIndex;
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (start + offset) % this.clients.length;
|
||||
const order = this.getAttemptOrder();
|
||||
for (const index of order) {
|
||||
const client = this.clients[index];
|
||||
if (!client.chatStream) {
|
||||
continue;
|
||||
@@ -48,6 +59,7 @@ export class RotatingModelClient implements ModelClient {
|
||||
for await (const event of client.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
failed = true;
|
||||
this.applyCooldown(index);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
@@ -55,10 +67,36 @@ export class RotatingModelClient implements ModelClient {
|
||||
|
||||
if (!failed) {
|
||||
this.currentIndex = index;
|
||||
this.cooldownUntilMs[index] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: 'error', error: new Error('All auth profiles failed for streaming') };
|
||||
}
|
||||
|
||||
private getAttemptOrder(): number[] {
|
||||
const now = this.now();
|
||||
const available: number[] = [];
|
||||
const cooling: number[] = [];
|
||||
|
||||
for (let offset = 0; offset < this.clients.length; offset += 1) {
|
||||
const index = (this.currentIndex + offset) % this.clients.length;
|
||||
if (this.cooldownUntilMs[index] <= now) {
|
||||
available.push(index);
|
||||
} else {
|
||||
cooling.push(index);
|
||||
}
|
||||
}
|
||||
|
||||
// If all profiles are cooling down, still attempt them in sticky order.
|
||||
return available.length > 0 ? [...available, ...cooling] : cooling;
|
||||
}
|
||||
|
||||
private applyCooldown(index: number): void {
|
||||
if (this.cooldownMs <= 0) {
|
||||
return;
|
||||
}
|
||||
this.cooldownUntilMs[index] = this.now() + this.cooldownMs;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user