diff --git a/src/config/schema.ts b/src/config/schema.ts index c44d950..a754e70 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -178,7 +178,7 @@ const modelsSchema = z.object({ fast: modelConfigSchema.optional(), default: modelConfigSchema, complex: modelConfigSchema.optional(), - fallback_chain: z.array(z.string()).default(['anthropic']), + fallback_chain: z.array(z.string()).default([]), local_providers: z.record(z.string(), modelConfigSchema).optional(), thinking: thinkingSchema, }); diff --git a/src/gateway/handlers/agent.test.ts b/src/gateway/handlers/agent.test.ts index da97351..8d1a50c 100644 --- a/src/gateway/handlers/agent.test.ts +++ b/src/gateway/handlers/agent.test.ts @@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => { runtimeConfig: { models: { default: { provider: 'anthropic', model: 'claude-sonnet-4' }, - fallback_chain: ['anthropic'], + fallback_chain: [], }, } as unknown as AgentHandlerDeps['runtimeConfig'], }); @@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => { runtimeConfig: { models: { default: { provider: 'anthropic', model: 'claude-sonnet-4' }, - fallback_chain: ['anthropic'], + fallback_chain: [], }, } as unknown as AgentHandlerDeps['runtimeConfig'], }); diff --git a/src/gateway/handlers/handlers.test.ts b/src/gateway/handlers/handlers.test.ts index 8763c7a..550c3d3 100644 --- a/src/gateway/handlers/handlers.test.ts +++ b/src/gateway/handlers/handlers.test.ts @@ -1497,7 +1497,7 @@ describe('config handlers', () => { }, models: { default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' }, - fallback_chain: ['anthropic'], + fallback_chain: [], }, backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } }, hooks: { confirm: ['shell.exec'], log: [], silent: [] }, @@ -1868,7 +1868,7 @@ describe('redactConfig – comprehensive credential redaction', () => { }, complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' }, local: { provider: 'ollama' as const, model: 'llama3' }, - fallback_chain: ['anthropic'], + fallback_chain: [], local_providers: { ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token', fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' }, @@ -2012,7 +2012,7 @@ describe('redactConfig – comprehensive credential redaction', () => { // models expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic'); expect(getPath(result, 'models', 'default', 'model')).toBe('claude'); - expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']); + expect(getPath(result, 'models', 'fallback_chain')).toEqual([]); // web_search expect(getPath(result, 'web_search', 'provider')).toBe('brave'); expect(getPath(result, 'web_search', 'max_results')).toBe(5); diff --git a/src/gateway/handlers/services.test.ts b/src/gateway/handlers/services.test.ts index 6dc38f9..ca0c0c3 100644 --- a/src/gateway/handlers/services.test.ts +++ b/src/gateway/handlers/services.test.ts @@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record { function makeBaseConfig(): Config { return { server: { localhost: true, port: 18800 }, - models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] }, + models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] }, backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } }, hooks: { confirm: [], log: [], silent: [] }, mcp: { servers: [] }, diff --git a/src/models/capabilities.test.ts b/src/models/capabilities.test.ts index 824ba43..8107226 100644 --- a/src/models/capabilities.test.ts +++ b/src/models/capabilities.test.ts @@ -23,8 +23,12 @@ describe('supportsAudioInput', () => { 'ollama', 'llamacpp', 'openrouter', + 'vercel', 'zhipuai', 'xai', + 'minimax', + 'moonshot', + 'synthetic', ] as const; for (const provider of nonAudioProviders) { diff --git a/src/models/capabilities.ts b/src/models/capabilities.ts index 379cd5e..a773bf0 100644 --- a/src/models/capabilities.ts +++ b/src/models/capabilities.ts @@ -5,7 +5,21 @@ * Models that don't will receive a Whisper transcript as text instead. */ -export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai'; +export type ModelProvider = + | 'anthropic' + | 'openai' + | 'gemini' + | 'ollama' + | 'llamacpp' + | 'openrouter' + | 'vercel' + | 'bedrock' + | 'github' + | 'zhipuai' + | 'xai' + | 'minimax' + | 'moonshot' + | 'synthetic'; /** * Models known to support native audio input via their API. diff --git a/src/models/openai.oauth.test.ts b/src/models/openai.oauth.test.ts index ed6f227..2ea804b 100644 --- a/src/models/openai.oauth.test.ts +++ b/src/models/openai.oauth.test.ts @@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => { expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 }); }); - it('adds provider warning when tools are requested in OAuth mode', async () => { - const sse = makeSse([ - { event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } }, - { event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } }, - ]); - - globalThis.fetch = vi.fn(async () => { - const stream = new ReadableStream({ - start(controller) { - controller.enqueue(new TextEncoder().encode(sse)); - controller.close(); - }, - }); - return new Response(stream, { status: 200 }); - }) as typeof fetch; - + it('throws when tools are requested in OAuth mode', async () => { + const fetchSpy = vi.fn(); + globalThis.fetch = fetchSpy as unknown as typeof fetch; const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true }); - const resp = await client.chat({ + + await expect(client.chat({ system: 'You are helpful.', messages: [{ role: 'user', content: 'use tools' }], tools: [{ @@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => { required: ['id'], }, }], - }); + })).rejects.toThrow('does not support tool execution'); - expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.'); - expect(resp.content).toContain('Requested tools were not sent to the provider'); - expect(resp.content).toContain('result body'); + expect(fetchSpy).not.toHaveBeenCalled(); }); }); diff --git a/src/models/openai.ts b/src/models/openai.ts index bbf566a..fc60e6e 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient { async chat(request: ChatRequest): Promise { if (this.useOAuth) { + if (request.tools && request.tools.length > 0) { + throw new Error( + 'OpenAI OAuth (Codex backend) does not support tool execution. ' + + 'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.', + ); + } return this.chatViaOAuthCodex(request); } diff --git a/src/models/router.test.ts b/src/models/router.test.ts index cf6aee2..057868c 100644 --- a/src/models/router.test.ts +++ b/src/models/router.test.ts @@ -47,6 +47,57 @@ describe('ModelRouter', () => { expect(fallbackClient.chat).toHaveBeenCalled(); }); + it('skips duplicate fallback clients that already failed as primary', async () => { + const failingPrimary = createMockClient('primary', true); + const fallbackClient = createMockClient('fallback'); + + const router = new ModelRouter({ + default: failingPrimary, + fallbackChain: [failingPrimary, fallbackClient], + }); + + const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] }); + + expect(response.content).toBe('Response from fallback'); + expect(failingPrimary.chat).toHaveBeenCalledTimes(1); + expect(fallbackClient.chat).toHaveBeenCalledTimes(1); + }); + + it('applies retry policy to fallback clients', async () => { + const failingPrimary = createMockClient('primary', true); + let attempts = 0; + const flakyFallback: ModelClient = { + chat: vi.fn().mockImplementation(async () => { + attempts += 1; + if (attempts === 1) { + throw new Error('transient'); + } + return { + content: 'Recovered fallback', + stopReason: 'end_turn', + usage: { inputTokens: 1, outputTokens: 1 }, + } satisfies ChatResponse; + }), + }; + + const router = new ModelRouter({ + default: failingPrimary, + fallbackChain: [flakyFallback], + retryConfig: { + maxRetries: 1, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + nonRetryablePatterns: [], + }, + }); + + const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] }); + + expect(response.content).toBe('Recovered fallback'); + expect(flakyFallback.chat).toHaveBeenCalledTimes(2); + }); + it('throws when all providers fail', async () => { const failing1 = createMockClient('primary', true); const failing2 = createMockClient('fallback', true); diff --git a/src/models/router.ts b/src/models/router.ts index a05d1c7..c4ee72e 100644 --- a/src/models/router.ts +++ b/src/models/router.ts @@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient { const useTier = tier ?? this.currentTier; const primaryClient = this.clients.get(useTier) ?? this.defaultClient; const errors: Error[] = []; + const attemptedClients = new Set(); + attemptedClients.add(primaryClient); // Try primary client (with retry if configured) try { @@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient { // Try tier-specific fallbacks first const tierFallbackList = this.tierFallbacks.get(useTier) ?? []; for (let i = 0; i < tierFallbackList.length; i++) { + const fallbackClient = tierFallbackList[i]; + if (attemptedClients.has(fallbackClient)) { + continue; + } this.throwIfAborted(); try { const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`; logger.debug(reason); - const response = await tierFallbackList[i].chat(request); + attemptedClients.add(fallbackClient); + const response = this.retryConfig + ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, { + shouldAbort: () => this.abortRequested, + }) + : await fallbackClient.chat(request); return { ...response, fallback: true, fallbackReason: reason }; } catch (error) { errors.push(error instanceof Error ? error : new Error(String(error))); @@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient { // Then try global fallback chain for (let i = 0; i < this.fallbackChain.length; i++) { - this.throwIfAborted(); const fallbackClient = this.fallbackChain[i]; + if (attemptedClients.has(fallbackClient)) { + continue; + } + this.throwIfAborted(); try { const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`; logger.debug(reason); - const response = await fallbackClient.chat(request); + attemptedClients.add(fallbackClient); + const response = this.retryConfig + ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, { + shouldAbort: () => this.abortRequested, + }) + : await fallbackClient.chat(request); return { ...response, fallback: true, fallbackReason: reason }; } catch (error) { errors.push(error instanceof Error ? error : new Error(String(error))); @@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient { const useTier = tier ?? this.currentTier; const primaryClient = this.clients.get(useTier) ?? this.defaultClient; let primaryError: string | undefined; + const attemptedClients = new Set(); + attemptedClients.add(primaryClient); if (primaryClient.chatStream) { let hasError = false; @@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient { const tierFallbackList = this.tierFallbacks.get(useTier) ?? []; for (let i = 0; i < tierFallbackList.length; i++) { const fallbackClient = tierFallbackList[i]; + if (attemptedClients.has(fallbackClient)) {continue;} if (!fallbackClient.chatStream) {continue;} const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`; logger.debug(reason); yield { type: 'fallback_warning', fallbackReason: reason }; + attemptedClients.add(fallbackClient); let hasError = false; for await (const event of fallbackClient.chatStream(request)) { @@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient { // Then try global fallback chain for (let i = 0; i < this.fallbackChain.length; i++) { const fallbackClient = this.fallbackChain[i]; + if (attemptedClients.has(fallbackClient)) {continue;} if (!fallbackClient.chatStream) {continue;} const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`; logger.debug(reason); yield { type: 'fallback_warning', fallbackReason: reason }; + attemptedClients.add(fallbackClient); let hasError = false; for await (const event of fallbackClient.chatStream(request)) {