fix(router): align fallback semantics and oauth provider behavior
This commit is contained in:
@@ -178,7 +178,7 @@ const modelsSchema = z.object({
|
||||
fast: modelConfigSchema.optional(),
|
||||
default: modelConfigSchema,
|
||||
complex: modelConfigSchema.optional(),
|
||||
fallback_chain: z.array(z.string()).default(['anthropic']),
|
||||
fallback_chain: z.array(z.string()).default([]),
|
||||
local_providers: z.record(z.string(), modelConfigSchema).optional(),
|
||||
thinking: thinkingSchema,
|
||||
});
|
||||
|
||||
@@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => {
|
||||
runtimeConfig: {
|
||||
models: {
|
||||
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
||||
fallback_chain: ['anthropic'],
|
||||
fallback_chain: [],
|
||||
},
|
||||
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
||||
});
|
||||
@@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => {
|
||||
runtimeConfig: {
|
||||
models: {
|
||||
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
||||
fallback_chain: ['anthropic'],
|
||||
fallback_chain: [],
|
||||
},
|
||||
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
||||
});
|
||||
|
||||
@@ -1497,7 +1497,7 @@ describe('config handlers', () => {
|
||||
},
|
||||
models: {
|
||||
default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
|
||||
fallback_chain: ['anthropic'],
|
||||
fallback_chain: [],
|
||||
},
|
||||
backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
|
||||
hooks: { confirm: ['shell.exec'], log: [], silent: [] },
|
||||
@@ -1868,7 +1868,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
|
||||
},
|
||||
complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
|
||||
local: { provider: 'ollama' as const, model: 'llama3' },
|
||||
fallback_chain: ['anthropic'],
|
||||
fallback_chain: [],
|
||||
local_providers: {
|
||||
ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
|
||||
fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
|
||||
@@ -2012,7 +2012,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
|
||||
// models
|
||||
expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
|
||||
expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
|
||||
expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']);
|
||||
expect(getPath(result, 'models', 'fallback_chain')).toEqual([]);
|
||||
// web_search
|
||||
expect(getPath(result, 'web_search', 'provider')).toBe('brave');
|
||||
expect(getPath(result, 'web_search', 'max_results')).toBe(5);
|
||||
|
||||
@@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record<string, unknown> {
|
||||
function makeBaseConfig(): Config {
|
||||
return {
|
||||
server: { localhost: true, port: 18800 },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] },
|
||||
backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
|
||||
hooks: { confirm: [], log: [], silent: [] },
|
||||
mcp: { servers: [] },
|
||||
|
||||
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
|
||||
'ollama',
|
||||
'llamacpp',
|
||||
'openrouter',
|
||||
'vercel',
|
||||
'zhipuai',
|
||||
'xai',
|
||||
'minimax',
|
||||
'moonshot',
|
||||
'synthetic',
|
||||
] as const;
|
||||
|
||||
for (const provider of nonAudioProviders) {
|
||||
|
||||
@@ -5,7 +5,21 @@
|
||||
* Models that don't will receive a Whisper transcript as text instead.
|
||||
*/
|
||||
|
||||
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
|
||||
export type ModelProvider =
|
||||
| 'anthropic'
|
||||
| 'openai'
|
||||
| 'gemini'
|
||||
| 'ollama'
|
||||
| 'llamacpp'
|
||||
| 'openrouter'
|
||||
| 'vercel'
|
||||
| 'bedrock'
|
||||
| 'github'
|
||||
| 'zhipuai'
|
||||
| 'xai'
|
||||
| 'minimax'
|
||||
| 'moonshot'
|
||||
| 'synthetic';
|
||||
|
||||
/**
|
||||
* Models known to support native audio input via their API.
|
||||
|
||||
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
|
||||
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
|
||||
});
|
||||
|
||||
it('adds provider warning when tools are requested in OAuth mode', async () => {
|
||||
const sse = makeSse([
|
||||
{ event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
|
||||
{ event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
|
||||
]);
|
||||
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(new TextEncoder().encode(sse));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
return new Response(stream, { status: 200 });
|
||||
}) as typeof fetch;
|
||||
|
||||
it('throws when tools are requested in OAuth mode', async () => {
|
||||
const fetchSpy = vi.fn();
|
||||
globalThis.fetch = fetchSpy as unknown as typeof fetch;
|
||||
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
|
||||
const resp = await client.chat({
|
||||
|
||||
await expect(client.chat({
|
||||
system: 'You are helpful.',
|
||||
messages: [{ role: 'user', content: 'use tools' }],
|
||||
tools: [{
|
||||
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
|
||||
required: ['id'],
|
||||
},
|
||||
}],
|
||||
});
|
||||
})).rejects.toThrow('does not support tool execution');
|
||||
|
||||
expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
|
||||
expect(resp.content).toContain('Requested tools were not sent to the provider');
|
||||
expect(resp.content).toContain('result body');
|
||||
expect(fetchSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
if (this.useOAuth) {
|
||||
if (request.tools && request.tools.length > 0) {
|
||||
throw new Error(
|
||||
'OpenAI OAuth (Codex backend) does not support tool execution. ' +
|
||||
'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
|
||||
);
|
||||
}
|
||||
return this.chatViaOAuthCodex(request);
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
|
||||
expect(fallbackClient.chat).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips duplicate fallback clients that already failed as primary', async () => {
|
||||
const failingPrimary = createMockClient('primary', true);
|
||||
const fallbackClient = createMockClient('fallback');
|
||||
|
||||
const router = new ModelRouter({
|
||||
default: failingPrimary,
|
||||
fallbackChain: [failingPrimary, fallbackClient],
|
||||
});
|
||||
|
||||
const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
|
||||
|
||||
expect(response.content).toBe('Response from fallback');
|
||||
expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
|
||||
expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('applies retry policy to fallback clients', async () => {
|
||||
const failingPrimary = createMockClient('primary', true);
|
||||
let attempts = 0;
|
||||
const flakyFallback: ModelClient = {
|
||||
chat: vi.fn().mockImplementation(async () => {
|
||||
attempts += 1;
|
||||
if (attempts === 1) {
|
||||
throw new Error('transient');
|
||||
}
|
||||
return {
|
||||
content: 'Recovered fallback',
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens: 1, outputTokens: 1 },
|
||||
} satisfies ChatResponse;
|
||||
}),
|
||||
};
|
||||
|
||||
const router = new ModelRouter({
|
||||
default: failingPrimary,
|
||||
fallbackChain: [flakyFallback],
|
||||
retryConfig: {
|
||||
maxRetries: 1,
|
||||
initialDelayMs: 1,
|
||||
backoffMultiplier: 1,
|
||||
maxDelayMs: 1,
|
||||
nonRetryablePatterns: [],
|
||||
},
|
||||
});
|
||||
|
||||
const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
|
||||
|
||||
expect(response.content).toBe('Recovered fallback');
|
||||
expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('throws when all providers fail', async () => {
|
||||
const failing1 = createMockClient('primary', true);
|
||||
const failing2 = createMockClient('fallback', true);
|
||||
|
||||
+28
-3
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
|
||||
const useTier = tier ?? this.currentTier;
|
||||
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
||||
const errors: Error[] = [];
|
||||
const attemptedClients = new Set<ModelClient>();
|
||||
attemptedClients.add(primaryClient);
|
||||
|
||||
// Try primary client (with retry if configured)
|
||||
try {
|
||||
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
|
||||
// Try tier-specific fallbacks first
|
||||
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
||||
for (let i = 0; i < tierFallbackList.length; i++) {
|
||||
const fallbackClient = tierFallbackList[i];
|
||||
if (attemptedClients.has(fallbackClient)) {
|
||||
continue;
|
||||
}
|
||||
this.throwIfAborted();
|
||||
try {
|
||||
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
|
||||
logger.debug(reason);
|
||||
const response = await tierFallbackList[i].chat(request);
|
||||
attemptedClients.add(fallbackClient);
|
||||
const response = this.retryConfig
|
||||
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
|
||||
shouldAbort: () => this.abortRequested,
|
||||
})
|
||||
: await fallbackClient.chat(request);
|
||||
return { ...response, fallback: true, fallbackReason: reason };
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {
|
||||
|
||||
// Then try global fallback chain
|
||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||
this.throwIfAborted();
|
||||
const fallbackClient = this.fallbackChain[i];
|
||||
if (attemptedClients.has(fallbackClient)) {
|
||||
continue;
|
||||
}
|
||||
this.throwIfAborted();
|
||||
try {
|
||||
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
|
||||
logger.debug(reason);
|
||||
const response = await fallbackClient.chat(request);
|
||||
attemptedClients.add(fallbackClient);
|
||||
const response = this.retryConfig
|
||||
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
|
||||
shouldAbort: () => this.abortRequested,
|
||||
})
|
||||
: await fallbackClient.chat(request);
|
||||
return { ...response, fallback: true, fallbackReason: reason };
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
|
||||
const useTier = tier ?? this.currentTier;
|
||||
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
||||
let primaryError: string | undefined;
|
||||
const attemptedClients = new Set<ModelClient>();
|
||||
attemptedClients.add(primaryClient);
|
||||
|
||||
if (primaryClient.chatStream) {
|
||||
let hasError = false;
|
||||
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
|
||||
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
||||
for (let i = 0; i < tierFallbackList.length; i++) {
|
||||
const fallbackClient = tierFallbackList[i];
|
||||
if (attemptedClients.has(fallbackClient)) {continue;}
|
||||
if (!fallbackClient.chatStream) {continue;}
|
||||
|
||||
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
|
||||
logger.debug(reason);
|
||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||
attemptedClients.add(fallbackClient);
|
||||
|
||||
let hasError = false;
|
||||
for await (const event of fallbackClient.chatStream(request)) {
|
||||
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
|
||||
// Then try global fallback chain
|
||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||
const fallbackClient = this.fallbackChain[i];
|
||||
if (attemptedClients.has(fallbackClient)) {continue;}
|
||||
if (!fallbackClient.chatStream) {continue;}
|
||||
|
||||
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
|
||||
logger.debug(reason);
|
||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||
attemptedClients.add(fallbackClient);
|
||||
|
||||
let hasError = false;
|
||||
for await (const event of fallbackClient.chatStream(request)) {
|
||||
|
||||
Reference in New Issue
Block a user