fix(router): align fallback semantics and oauth provider behavior

This commit is contained in:
William Valentin
2026-02-23 17:11:15 -08:00
parent 00b2d646f7
commit 092a9baeae
10 changed files with 118 additions and 32 deletions
+1 -1
View File
@@ -178,7 +178,7 @@ const modelsSchema = z.object({
fast: modelConfigSchema.optional(),
default: modelConfigSchema,
complex: modelConfigSchema.optional(),
fallback_chain: z.array(z.string()).default(['anthropic']),
fallback_chain: z.array(z.string()).default([]),
local_providers: z.record(z.string(), modelConfigSchema).optional(),
thinking: thinkingSchema,
});
+2 -2
View File
@@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => {
runtimeConfig: {
models: {
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
fallback_chain: ['anthropic'],
fallback_chain: [],
},
} as unknown as AgentHandlerDeps['runtimeConfig'],
});
@@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => {
runtimeConfig: {
models: {
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
fallback_chain: ['anthropic'],
fallback_chain: [],
},
} as unknown as AgentHandlerDeps['runtimeConfig'],
});
+3 -3
View File
@@ -1497,7 +1497,7 @@ describe('config handlers', () => {
},
models: {
default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
fallback_chain: ['anthropic'],
fallback_chain: [],
},
backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
hooks: { confirm: ['shell.exec'], log: [], silent: [] },
@@ -1868,7 +1868,7 @@ describe('redactConfig comprehensive credential redaction', () => {
},
complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
local: { provider: 'ollama' as const, model: 'llama3' },
fallback_chain: ['anthropic'],
fallback_chain: [],
local_providers: {
ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
@@ -2012,7 +2012,7 @@ describe('redactConfig comprehensive credential redaction', () => {
// models
expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']);
expect(getPath(result, 'models', 'fallback_chain')).toEqual([]);
// web_search
expect(getPath(result, 'web_search', 'provider')).toBe('brave');
expect(getPath(result, 'web_search', 'max_results')).toBe(5);
+1 -1
View File
@@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record<string, unknown> {
function makeBaseConfig(): Config {
return {
server: { localhost: true, port: 18800 },
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] },
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] },
backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
hooks: { confirm: [], log: [], silent: [] },
mcp: { servers: [] },
+4
View File
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
'ollama',
'llamacpp',
'openrouter',
'vercel',
'zhipuai',
'xai',
'minimax',
'moonshot',
'synthetic',
] as const;
for (const provider of nonAudioProviders) {
+15 -1
View File
@@ -5,7 +5,21 @@
* Models that don't will receive a Whisper transcript as text instead.
*/
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
export type ModelProvider =
| 'anthropic'
| 'openai'
| 'gemini'
| 'ollama'
| 'llamacpp'
| 'openrouter'
| 'vercel'
| 'bedrock'
| 'github'
| 'zhipuai'
| 'xai'
| 'minimax'
| 'moonshot'
| 'synthetic';
/**
* Models known to support native audio input via their API.
+7 -21
View File
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
});
it('adds provider warning when tools are requested in OAuth mode', async () => {
const sse = makeSse([
{ event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
{ event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
]);
globalThis.fetch = vi.fn(async () => {
const stream = new ReadableStream({
start(controller) {
controller.enqueue(new TextEncoder().encode(sse));
controller.close();
},
});
return new Response(stream, { status: 200 });
}) as typeof fetch;
it('throws when tools are requested in OAuth mode', async () => {
const fetchSpy = vi.fn();
globalThis.fetch = fetchSpy as unknown as typeof fetch;
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
const resp = await client.chat({
await expect(client.chat({
system: 'You are helpful.',
messages: [{ role: 'user', content: 'use tools' }],
tools: [{
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
required: ['id'],
},
}],
});
})).rejects.toThrow('does not support tool execution');
expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
expect(resp.content).toContain('Requested tools were not sent to the provider');
expect(resp.content).toContain('result body');
expect(fetchSpy).not.toHaveBeenCalled();
});
});
+6
View File
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {
async chat(request: ChatRequest): Promise<ChatResponse> {
if (this.useOAuth) {
if (request.tools && request.tools.length > 0) {
throw new Error(
'OpenAI OAuth (Codex backend) does not support tool execution. ' +
'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
);
}
return this.chatViaOAuthCodex(request);
}
+51
View File
@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
expect(fallbackClient.chat).toHaveBeenCalled();
});
it('skips duplicate fallback clients that already failed as primary', async () => {
const failingPrimary = createMockClient('primary', true);
const fallbackClient = createMockClient('fallback');
const router = new ModelRouter({
default: failingPrimary,
fallbackChain: [failingPrimary, fallbackClient],
});
const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
expect(response.content).toBe('Response from fallback');
expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
});
it('applies retry policy to fallback clients', async () => {
const failingPrimary = createMockClient('primary', true);
let attempts = 0;
const flakyFallback: ModelClient = {
chat: vi.fn().mockImplementation(async () => {
attempts += 1;
if (attempts === 1) {
throw new Error('transient');
}
return {
content: 'Recovered fallback',
stopReason: 'end_turn',
usage: { inputTokens: 1, outputTokens: 1 },
} satisfies ChatResponse;
}),
};
const router = new ModelRouter({
default: failingPrimary,
fallbackChain: [flakyFallback],
retryConfig: {
maxRetries: 1,
initialDelayMs: 1,
backoffMultiplier: 1,
maxDelayMs: 1,
nonRetryablePatterns: [],
},
});
const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
expect(response.content).toBe('Recovered fallback');
expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
});
it('throws when all providers fail', async () => {
const failing1 = createMockClient('primary', true);
const failing2 = createMockClient('fallback', true);
+28 -3
View File
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
const useTier = tier ?? this.currentTier;
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
const errors: Error[] = [];
const attemptedClients = new Set<ModelClient>();
attemptedClients.add(primaryClient);
// Try primary client (with retry if configured)
try {
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
// Try tier-specific fallbacks first
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
for (let i = 0; i < tierFallbackList.length; i++) {
const fallbackClient = tierFallbackList[i];
if (attemptedClients.has(fallbackClient)) {
continue;
}
this.throwIfAborted();
try {
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
logger.debug(reason);
const response = await tierFallbackList[i].chat(request);
attemptedClients.add(fallbackClient);
const response = this.retryConfig
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
shouldAbort: () => this.abortRequested,
})
: await fallbackClient.chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {
// Then try global fallback chain
for (let i = 0; i < this.fallbackChain.length; i++) {
this.throwIfAborted();
const fallbackClient = this.fallbackChain[i];
if (attemptedClients.has(fallbackClient)) {
continue;
}
this.throwIfAborted();
try {
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
logger.debug(reason);
const response = await fallbackClient.chat(request);
attemptedClients.add(fallbackClient);
const response = this.retryConfig
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
shouldAbort: () => this.abortRequested,
})
: await fallbackClient.chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
const useTier = tier ?? this.currentTier;
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
let primaryError: string | undefined;
const attemptedClients = new Set<ModelClient>();
attemptedClients.add(primaryClient);
if (primaryClient.chatStream) {
let hasError = false;
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
for (let i = 0; i < tierFallbackList.length; i++) {
const fallbackClient = tierFallbackList[i];
if (attemptedClients.has(fallbackClient)) {continue;}
if (!fallbackClient.chatStream) {continue;}
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
logger.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
attemptedClients.add(fallbackClient);
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
// Then try global fallback chain
for (let i = 0; i < this.fallbackChain.length; i++) {
const fallbackClient = this.fallbackChain[i];
if (attemptedClients.has(fallbackClient)) {continue;}
if (!fallbackClient.chatStream) {continue;}
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
logger.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
attemptedClients.add(fallbackClient);
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {