fix(router): align fallback semantics and oauth provider behavior

This commit is contained in:
William Valentin
2026-02-23 17:11:15 -08:00
parent 00b2d646f7
commit 092a9baeae
10 changed files with 118 additions and 32 deletions
+4
View File
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
'ollama',
'llamacpp',
'openrouter',
'vercel',
'zhipuai',
'xai',
'minimax',
'moonshot',
'synthetic',
] as const;
for (const provider of nonAudioProviders) {
+15 -1
View File
@@ -5,7 +5,21 @@
* Models that don't will receive a Whisper transcript as text instead.
*/
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
export type ModelProvider =
| 'anthropic'
| 'openai'
| 'gemini'
| 'ollama'
| 'llamacpp'
| 'openrouter'
| 'vercel'
| 'bedrock'
| 'github'
| 'zhipuai'
| 'xai'
| 'minimax'
| 'moonshot'
| 'synthetic';
/**
* Models known to support native audio input via their API.
+7 -21
View File
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
});
it('adds provider warning when tools are requested in OAuth mode', async () => {
const sse = makeSse([
{ event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
{ event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
]);
globalThis.fetch = vi.fn(async () => {
const stream = new ReadableStream({
start(controller) {
controller.enqueue(new TextEncoder().encode(sse));
controller.close();
},
});
return new Response(stream, { status: 200 });
}) as typeof fetch;
it('throws when tools are requested in OAuth mode', async () => {
const fetchSpy = vi.fn();
globalThis.fetch = fetchSpy as unknown as typeof fetch;
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
const resp = await client.chat({
await expect(client.chat({
system: 'You are helpful.',
messages: [{ role: 'user', content: 'use tools' }],
tools: [{
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
required: ['id'],
},
}],
});
})).rejects.toThrow('does not support tool execution');
expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
expect(resp.content).toContain('Requested tools were not sent to the provider');
expect(resp.content).toContain('result body');
expect(fetchSpy).not.toHaveBeenCalled();
});
});
+6
View File
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {
async chat(request: ChatRequest): Promise<ChatResponse> {
if (this.useOAuth) {
if (request.tools && request.tools.length > 0) {
throw new Error(
'OpenAI OAuth (Codex backend) does not support tool execution. ' +
'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
);
}
return this.chatViaOAuthCodex(request);
}
+51
View File
@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
expect(fallbackClient.chat).toHaveBeenCalled();
});
it('skips duplicate fallback clients that already failed as primary', async () => {
const failingPrimary = createMockClient('primary', true);
const fallbackClient = createMockClient('fallback');
const router = new ModelRouter({
default: failingPrimary,
fallbackChain: [failingPrimary, fallbackClient],
});
const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
expect(response.content).toBe('Response from fallback');
expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
});
it('applies retry policy to fallback clients', async () => {
const failingPrimary = createMockClient('primary', true);
let attempts = 0;
const flakyFallback: ModelClient = {
chat: vi.fn().mockImplementation(async () => {
attempts += 1;
if (attempts === 1) {
throw new Error('transient');
}
return {
content: 'Recovered fallback',
stopReason: 'end_turn',
usage: { inputTokens: 1, outputTokens: 1 },
} satisfies ChatResponse;
}),
};
const router = new ModelRouter({
default: failingPrimary,
fallbackChain: [flakyFallback],
retryConfig: {
maxRetries: 1,
initialDelayMs: 1,
backoffMultiplier: 1,
maxDelayMs: 1,
nonRetryablePatterns: [],
},
});
const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
expect(response.content).toBe('Recovered fallback');
expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
});
it('throws when all providers fail', async () => {
const failing1 = createMockClient('primary', true);
const failing2 = createMockClient('fallback', true);
+28 -3
View File
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
const useTier = tier ?? this.currentTier;
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
const errors: Error[] = [];
const attemptedClients = new Set<ModelClient>();
attemptedClients.add(primaryClient);
// Try primary client (with retry if configured)
try {
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
// Try tier-specific fallbacks first
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
for (let i = 0; i < tierFallbackList.length; i++) {
const fallbackClient = tierFallbackList[i];
if (attemptedClients.has(fallbackClient)) {
continue;
}
this.throwIfAborted();
try {
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
logger.debug(reason);
const response = await tierFallbackList[i].chat(request);
attemptedClients.add(fallbackClient);
const response = this.retryConfig
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
shouldAbort: () => this.abortRequested,
})
: await fallbackClient.chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {
// Then try global fallback chain
for (let i = 0; i < this.fallbackChain.length; i++) {
this.throwIfAborted();
const fallbackClient = this.fallbackChain[i];
if (attemptedClients.has(fallbackClient)) {
continue;
}
this.throwIfAborted();
try {
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
logger.debug(reason);
const response = await fallbackClient.chat(request);
attemptedClients.add(fallbackClient);
const response = this.retryConfig
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
shouldAbort: () => this.abortRequested,
})
: await fallbackClient.chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
const useTier = tier ?? this.currentTier;
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
let primaryError: string | undefined;
const attemptedClients = new Set<ModelClient>();
attemptedClients.add(primaryClient);
if (primaryClient.chatStream) {
let hasError = false;
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
for (let i = 0; i < tierFallbackList.length; i++) {
const fallbackClient = tierFallbackList[i];
if (attemptedClients.has(fallbackClient)) {continue;}
if (!fallbackClient.chatStream) {continue;}
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
logger.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
attemptedClients.add(fallbackClient);
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
// Then try global fallback chain
for (let i = 0; i < this.fallbackChain.length; i++) {
const fallbackClient = this.fallbackChain[i];
if (attemptedClients.has(fallbackClient)) {continue;}
if (!fallbackClient.chatStream) {continue;}
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
logger.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
attemptedClients.add(fallbackClient);
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {