fix(router): align fallback semantics and oauth provider behavior

2026-02-23 17:11:15 -08:00
parent 00b2d646f7
commit 092a9baeae
10 changed files with 118 additions and 32 deletions
@@ -178,7 +178,7 @@ const modelsSchema = z.object({
  fast: modelConfigSchema.optional(),
  default: modelConfigSchema,
  complex: modelConfigSchema.optional(),
-  fallback_chain: z.array(z.string()).default(['anthropic']),
+  fallback_chain: z.array(z.string()).default([]),
  local_providers: z.record(z.string(), modelConfigSchema).optional(),
  thinking: thinkingSchema,
 });
@@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => {
      runtimeConfig: {
        models: {
          default: { provider: 'anthropic', model: 'claude-sonnet-4' },
-          fallback_chain: ['anthropic'],
+          fallback_chain: [],
        },
      } as unknown as AgentHandlerDeps['runtimeConfig'],
    });
@@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => {
      runtimeConfig: {
        models: {
          default: { provider: 'anthropic', model: 'claude-sonnet-4' },
-          fallback_chain: ['anthropic'],
+          fallback_chain: [],
        },
      } as unknown as AgentHandlerDeps['runtimeConfig'],
    });
@@ -1497,7 +1497,7 @@ describe('config handlers', () => {
      },
      models: {
        default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
-        fallback_chain: ['anthropic'],
+        fallback_chain: [],
      },
      backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
      hooks: { confirm: ['shell.exec'], log: [], silent: [] },
@@ -1868,7 +1868,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
        },
        complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
        local: { provider: 'ollama' as const, model: 'llama3' },
-        fallback_chain: ['anthropic'],
+        fallback_chain: [],
        local_providers: {
          ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
            fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
@@ -2012,7 +2012,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
    // models
    expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
    expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
-    expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']);
+    expect(getPath(result, 'models', 'fallback_chain')).toEqual([]);
    // web_search
    expect(getPath(result, 'web_search', 'provider')).toBe('brave');
    expect(getPath(result, 'web_search', 'max_results')).toBe(5);
@@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record<string, unknown> {
 function makeBaseConfig(): Config {
  return {
    server: { localhost: true, port: 18800 },
-    models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] },
+    models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] },
    backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
    hooks: { confirm: [], log: [], silent: [] },
    mcp: { servers: [] },
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
      'ollama',
      'llamacpp',
      'openrouter',
      'vercel',
      'zhipuai',
      'xai',
      'minimax',
      'moonshot',
      'synthetic',
    ] as const;
    for (const provider of nonAudioProviders) {
@@ -5,7 +5,21 @@
 * Models that don't will receive a Whisper transcript as text instead.
 */
-export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
+export type ModelProvider =
  | 'anthropic'
  | 'openai'
  | 'gemini'
  | 'ollama'
  | 'llamacpp'
  | 'openrouter'
  | 'vercel'
  | 'bedrock'
  | 'github'
  | 'zhipuai'
  | 'xai'
  | 'minimax'
  | 'moonshot'
  | 'synthetic';
 /**
 * Models known to support native audio input via their API.
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
    expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
  });
-  it('adds provider warning when tools are requested in OAuth mode', async () => {
+  it('throws when tools are requested in OAuth mode', async () => {
-    const sse = makeSse([
+    const fetchSpy = vi.fn();
-      { event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
+    globalThis.fetch = fetchSpy as unknown as typeof fetch;
      { event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
    ]);
    globalThis.fetch = vi.fn(async () => {
      const stream = new ReadableStream({
        start(controller) {
          controller.enqueue(new TextEncoder().encode(sse));
          controller.close();
        },
      });
      return new Response(stream, { status: 200 });
    }) as typeof fetch;
    const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
-    const resp = await client.chat({
+
    await expect(client.chat({
      system: 'You are helpful.',
      messages: [{ role: 'user', content: 'use tools' }],
      tools: [{
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
          required: ['id'],
        },
      }],
-    });
+    })).rejects.toThrow('does not support tool execution');
-    expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
+    expect(fetchSpy).not.toHaveBeenCalled();
    expect(resp.content).toContain('Requested tools were not sent to the provider');
    expect(resp.content).toContain('result body');
  });
 });
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {
  async chat(request: ChatRequest): Promise<ChatResponse> {
    if (this.useOAuth) {
      if (request.tools && request.tools.length > 0) {
        throw new Error(
          'OpenAI OAuth (Codex backend) does not support tool execution. ' +
          'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
        );
      }
      return this.chatViaOAuthCodex(request);
    }
@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
    expect(fallbackClient.chat).toHaveBeenCalled();
  });
  it('skips duplicate fallback clients that already failed as primary', async () => {
    const failingPrimary = createMockClient('primary', true);
    const fallbackClient = createMockClient('fallback');
    const router = new ModelRouter({
      default: failingPrimary,
      fallbackChain: [failingPrimary, fallbackClient],
    });
    const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
    expect(response.content).toBe('Response from fallback');
    expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
    expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
  });
  it('applies retry policy to fallback clients', async () => {
    const failingPrimary = createMockClient('primary', true);
    let attempts = 0;
    const flakyFallback: ModelClient = {
      chat: vi.fn().mockImplementation(async () => {
        attempts += 1;
        if (attempts === 1) {
          throw new Error('transient');
        }
        return {
          content: 'Recovered fallback',
          stopReason: 'end_turn',
          usage: { inputTokens: 1, outputTokens: 1 },
        } satisfies ChatResponse;
      }),
    };
    const router = new ModelRouter({
      default: failingPrimary,
      fallbackChain: [flakyFallback],
      retryConfig: {
        maxRetries: 1,
        initialDelayMs: 1,
        backoffMultiplier: 1,
        maxDelayMs: 1,
        nonRetryablePatterns: [],
      },
    });
    const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
    expect(response.content).toBe('Recovered fallback');
    expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
  });
  it('throws when all providers fail', async () => {
    const failing1 = createMockClient('primary', true);
    const failing2 = createMockClient('fallback', true);
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
    const useTier = tier ?? this.currentTier;
    const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
    const errors: Error[] = [];
    const attemptedClients = new Set<ModelClient>();
    attemptedClients.add(primaryClient);
    // Try primary client (with retry if configured)
    try {
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
    // Try tier-specific fallbacks first
    const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
    for (let i = 0; i < tierFallbackList.length; i++) {
      const fallbackClient = tierFallbackList[i];
      if (attemptedClients.has(fallbackClient)) {
        continue;
      }
      this.throwIfAborted();
      try {
        const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
        logger.debug(reason);
-        const response = await tierFallbackList[i].chat(request);
+        attemptedClients.add(fallbackClient);
        const response = this.retryConfig
          ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
            shouldAbort: () => this.abortRequested,
          })
          : await fallbackClient.chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {
    // Then try global fallback chain
    for (let i = 0; i < this.fallbackChain.length; i++) {
      this.throwIfAborted();
      const fallbackClient = this.fallbackChain[i];
      if (attemptedClients.has(fallbackClient)) {
        continue;
      }
      this.throwIfAborted();
      try {
        const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
        logger.debug(reason);
-        const response = await fallbackClient.chat(request);
+        attemptedClients.add(fallbackClient);
        const response = this.retryConfig
          ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
            shouldAbort: () => this.abortRequested,
          })
          : await fallbackClient.chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
    const useTier = tier ?? this.currentTier;
    const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
    let primaryError: string | undefined;
    const attemptedClients = new Set<ModelClient>();
    attemptedClients.add(primaryClient);
    if (primaryClient.chatStream) {
      let hasError = false;
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
    const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
    for (let i = 0; i < tierFallbackList.length; i++) {
      const fallbackClient = tierFallbackList[i];
      if (attemptedClients.has(fallbackClient)) {continue;}
      if (!fallbackClient.chatStream) {continue;}
      const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
      logger.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };
      attemptedClients.add(fallbackClient);
      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
    // Then try global fallback chain
    for (let i = 0; i < this.fallbackChain.length; i++) {
      const fallbackClient = this.fallbackChain[i];
      if (attemptedClients.has(fallbackClient)) {continue;}
      if (!fallbackClient.chatStream) {continue;}
      const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
      logger.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };
      attemptedClients.add(fallbackClient);
      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {