fix(router): align fallback semantics and oauth provider behavior

2026-02-23 17:11:15 -08:00
parent 00b2d646f7
commit 092a9baeae
10 changed files with 118 additions and 32 deletions
@@ -178,7 +178,7 @@ const modelsSchema = z.object({
  fast: modelConfigSchema.optional(),
  default: modelConfigSchema,
  complex: modelConfigSchema.optional(),
-  fallback_chain: z.array(z.string()).default(['anthropic']),
+  fallback_chain: z.array(z.string()).default([]),
  local_providers: z.record(z.string(), modelConfigSchema).optional(),
  thinking: thinkingSchema,
 });
@@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => {
      runtimeConfig: {
        models: {
          default: { provider: 'anthropic', model: 'claude-sonnet-4' },
-          fallback_chain: ['anthropic'],
+          fallback_chain: [],
        },
      } as unknown as AgentHandlerDeps['runtimeConfig'],
    });
@@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => {
      runtimeConfig: {
        models: {
          default: { provider: 'anthropic', model: 'claude-sonnet-4' },
-          fallback_chain: ['anthropic'],
+          fallback_chain: [],
        },
      } as unknown as AgentHandlerDeps['runtimeConfig'],
    });
@@ -1497,7 +1497,7 @@ describe('config handlers', () => {
      },
      models: {
        default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
-        fallback_chain: ['anthropic'],
+        fallback_chain: [],
      },
      backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
      hooks: { confirm: ['shell.exec'], log: [], silent: [] },
@@ -1868,7 +1868,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
        },
        complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
        local: { provider: 'ollama' as const, model: 'llama3' },
-        fallback_chain: ['anthropic'],
+        fallback_chain: [],
        local_providers: {
          ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
            fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
@@ -2012,7 +2012,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
    // models
    expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
    expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
-    expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']);
+    expect(getPath(result, 'models', 'fallback_chain')).toEqual([]);
    // web_search
    expect(getPath(result, 'web_search', 'provider')).toBe('brave');
    expect(getPath(result, 'web_search', 'max_results')).toBe(5);
@@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record<string, unknown> {
 function makeBaseConfig(): Config {
  return {
    server: { localhost: true, port: 18800 },
-    models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] },
+    models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] },
    backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
    hooks: { confirm: [], log: [], silent: [] },
    mcp: { servers: [] },
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
      'ollama',
      'llamacpp',
      'openrouter',
+      'vercel',
      'zhipuai',
      'xai',
+      'minimax',
+      'moonshot',
+      'synthetic',
    ] as const;

    for (const provider of nonAudioProviders) {
@@ -5,7 +5,21 @@
 * Models that don't will receive a Whisper transcript as text instead.
 */

-export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
+export type ModelProvider =
+  | 'anthropic'
+  | 'openai'
+  | 'gemini'
+  | 'ollama'
+  | 'llamacpp'
+  | 'openrouter'
+  | 'vercel'
+  | 'bedrock'
+  | 'github'
+  | 'zhipuai'
+  | 'xai'
+  | 'minimax'
+  | 'moonshot'
+  | 'synthetic';

 /**
 * Models known to support native audio input via their API.
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
    expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
  });

-  it('adds provider warning when tools are requested in OAuth mode', async () => {
-    const sse = makeSse([
-      { event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
-      { event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
-    ]);
-
-    globalThis.fetch = vi.fn(async () => {
-      const stream = new ReadableStream({
-        start(controller) {
-          controller.enqueue(new TextEncoder().encode(sse));
-          controller.close();
-        },
-      });
-      return new Response(stream, { status: 200 });
-    }) as typeof fetch;
-
+  it('throws when tools are requested in OAuth mode', async () => {
+    const fetchSpy = vi.fn();
+    globalThis.fetch = fetchSpy as unknown as typeof fetch;
    const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
-    const resp = await client.chat({
+
+    await expect(client.chat({
      system: 'You are helpful.',
      messages: [{ role: 'user', content: 'use tools' }],
      tools: [{
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
          required: ['id'],
        },
      }],
-    });
+    })).rejects.toThrow('does not support tool execution');

-    expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
-    expect(resp.content).toContain('Requested tools were not sent to the provider');
-    expect(resp.content).toContain('result body');
+    expect(fetchSpy).not.toHaveBeenCalled();
  });
 });
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {

  async chat(request: ChatRequest): Promise<ChatResponse> {
    if (this.useOAuth) {
+      if (request.tools && request.tools.length > 0) {
+        throw new Error(
+          'OpenAI OAuth (Codex backend) does not support tool execution. ' +
+          'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
+        );
+      }
      return this.chatViaOAuthCodex(request);
    }

@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
    expect(fallbackClient.chat).toHaveBeenCalled();
  });

+  it('skips duplicate fallback clients that already failed as primary', async () => {
+    const failingPrimary = createMockClient('primary', true);
+    const fallbackClient = createMockClient('fallback');
+
+    const router = new ModelRouter({
+      default: failingPrimary,
+      fallbackChain: [failingPrimary, fallbackClient],
+    });
+
+    const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
+
+    expect(response.content).toBe('Response from fallback');
+    expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
+    expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
+  });
+
+  it('applies retry policy to fallback clients', async () => {
+    const failingPrimary = createMockClient('primary', true);
+    let attempts = 0;
+    const flakyFallback: ModelClient = {
+      chat: vi.fn().mockImplementation(async () => {
+        attempts += 1;
+        if (attempts === 1) {
+          throw new Error('transient');
+        }
+        return {
+          content: 'Recovered fallback',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 1, outputTokens: 1 },
+        } satisfies ChatResponse;
+      }),
+    };
+
+    const router = new ModelRouter({
+      default: failingPrimary,
+      fallbackChain: [flakyFallback],
+      retryConfig: {
+        maxRetries: 1,
+        initialDelayMs: 1,
+        backoffMultiplier: 1,
+        maxDelayMs: 1,
+        nonRetryablePatterns: [],
+      },
+    });
+
+    const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
+
+    expect(response.content).toBe('Recovered fallback');
+    expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
+  });
+
  it('throws when all providers fail', async () => {
    const failing1 = createMockClient('primary', true);
    const failing2 = createMockClient('fallback', true);
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
    const useTier = tier ?? this.currentTier;
    const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
    const errors: Error[] = [];
+    const attemptedClients = new Set<ModelClient>();
+    attemptedClients.add(primaryClient);

    // Try primary client (with retry if configured)
    try {
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
    // Try tier-specific fallbacks first
    const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
    for (let i = 0; i < tierFallbackList.length; i++) {
+      const fallbackClient = tierFallbackList[i];
+      if (attemptedClients.has(fallbackClient)) {
+        continue;
+      }
      this.throwIfAborted();
      try {
        const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
        logger.debug(reason);
-        const response = await tierFallbackList[i].chat(request);
+        attemptedClients.add(fallbackClient);
+        const response = this.retryConfig
+          ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
+            shouldAbort: () => this.abortRequested,
+          })
+          : await fallbackClient.chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {

    // Then try global fallback chain
    for (let i = 0; i < this.fallbackChain.length; i++) {
-      this.throwIfAborted();
      const fallbackClient = this.fallbackChain[i];
+      if (attemptedClients.has(fallbackClient)) {
+        continue;
+      }
+      this.throwIfAborted();
      try {
        const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
        logger.debug(reason);
-        const response = await fallbackClient.chat(request);
+        attemptedClients.add(fallbackClient);
+        const response = this.retryConfig
+          ? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
+            shouldAbort: () => this.abortRequested,
+          })
+          : await fallbackClient.chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
    const useTier = tier ?? this.currentTier;
    const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
    let primaryError: string | undefined;
+    const attemptedClients = new Set<ModelClient>();
+    attemptedClients.add(primaryClient);

    if (primaryClient.chatStream) {
      let hasError = false;
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
    const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
    for (let i = 0; i < tierFallbackList.length; i++) {
      const fallbackClient = tierFallbackList[i];
+      if (attemptedClients.has(fallbackClient)) {continue;}
      if (!fallbackClient.chatStream) {continue;}

      const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
      logger.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };
+      attemptedClients.add(fallbackClient);

      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
    // Then try global fallback chain
    for (let i = 0; i < this.fallbackChain.length; i++) {
      const fallbackClient = this.fallbackChain[i];
+      if (attemptedClients.has(fallbackClient)) {continue;}
      if (!fallbackClient.chatStream) {continue;}

      const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
      logger.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };
+      attemptedClients.add(fallbackClient);

      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {