fix(router): align fallback semantics and oauth provider behavior
This commit is contained in:
@@ -178,7 +178,7 @@ const modelsSchema = z.object({
|
|||||||
fast: modelConfigSchema.optional(),
|
fast: modelConfigSchema.optional(),
|
||||||
default: modelConfigSchema,
|
default: modelConfigSchema,
|
||||||
complex: modelConfigSchema.optional(),
|
complex: modelConfigSchema.optional(),
|
||||||
fallback_chain: z.array(z.string()).default(['anthropic']),
|
fallback_chain: z.array(z.string()).default([]),
|
||||||
local_providers: z.record(z.string(), modelConfigSchema).optional(),
|
local_providers: z.record(z.string(), modelConfigSchema).optional(),
|
||||||
thinking: thinkingSchema,
|
thinking: thinkingSchema,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ describe('createAgentHandlers command fast-path', () => {
|
|||||||
runtimeConfig: {
|
runtimeConfig: {
|
||||||
models: {
|
models: {
|
||||||
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
||||||
fallback_chain: ['anthropic'],
|
fallback_chain: [],
|
||||||
},
|
},
|
||||||
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
||||||
});
|
});
|
||||||
@@ -199,7 +199,7 @@ describe('createAgentHandlers command fast-path', () => {
|
|||||||
runtimeConfig: {
|
runtimeConfig: {
|
||||||
models: {
|
models: {
|
||||||
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
default: { provider: 'anthropic', model: 'claude-sonnet-4' },
|
||||||
fallback_chain: ['anthropic'],
|
fallback_chain: [],
|
||||||
},
|
},
|
||||||
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
} as unknown as AgentHandlerDeps['runtimeConfig'],
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1497,7 +1497,7 @@ describe('config handlers', () => {
|
|||||||
},
|
},
|
||||||
models: {
|
models: {
|
||||||
default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
|
default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' },
|
||||||
fallback_chain: ['anthropic'],
|
fallback_chain: [],
|
||||||
},
|
},
|
||||||
backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
|
backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } },
|
||||||
hooks: { confirm: ['shell.exec'], log: [], silent: [] },
|
hooks: { confirm: ['shell.exec'], log: [], silent: [] },
|
||||||
@@ -1868,7 +1868,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
|
|||||||
},
|
},
|
||||||
complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
|
complex: { provider: 'anthropic' as const, model: 'claude-opus', auth_token: 'at-complex' },
|
||||||
local: { provider: 'ollama' as const, model: 'llama3' },
|
local: { provider: 'ollama' as const, model: 'llama3' },
|
||||||
fallback_chain: ['anthropic'],
|
fallback_chain: [],
|
||||||
local_providers: {
|
local_providers: {
|
||||||
ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
|
ollama: { provider: 'ollama' as const, model: 'llama3', api_key: 'lp-key', auth_token: 'lp-token',
|
||||||
fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
|
fallback: { provider: 'llamacpp' as const, model: 'llama', api_key: 'lp-fb-key' },
|
||||||
@@ -2012,7 +2012,7 @@ describe('redactConfig – comprehensive credential redaction', () => {
|
|||||||
// models
|
// models
|
||||||
expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
|
expect(getPath(result, 'models', 'default', 'provider')).toBe('anthropic');
|
||||||
expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
|
expect(getPath(result, 'models', 'default', 'model')).toBe('claude');
|
||||||
expect(getPath(result, 'models', 'fallback_chain')).toEqual(['anthropic']);
|
expect(getPath(result, 'models', 'fallback_chain')).toEqual([]);
|
||||||
// web_search
|
// web_search
|
||||||
expect(getPath(result, 'web_search', 'provider')).toBe('brave');
|
expect(getPath(result, 'web_search', 'provider')).toBe('brave');
|
||||||
expect(getPath(result, 'web_search', 'max_results')).toBe(5);
|
expect(getPath(result, 'web_search', 'max_results')).toBe(5);
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ function withMutableConfig(config: Config): Config & Record<string, unknown> {
|
|||||||
function makeBaseConfig(): Config {
|
function makeBaseConfig(): Config {
|
||||||
return {
|
return {
|
||||||
server: { localhost: true, port: 18800 },
|
server: { localhost: true, port: 18800 },
|
||||||
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: ['anthropic'] },
|
models: { default: { provider: 'anthropic', model: 'claude-sonnet-4', api_key: 'sk-test' }, fallback_chain: [] },
|
||||||
backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
|
backends: { native: { enabled: true }, opencode: { enabled: false }, claude_code: { enabled: false } },
|
||||||
hooks: { confirm: [], log: [], silent: [] },
|
hooks: { confirm: [], log: [], silent: [] },
|
||||||
mcp: { servers: [] },
|
mcp: { servers: [] },
|
||||||
|
|||||||
@@ -23,8 +23,12 @@ describe('supportsAudioInput', () => {
|
|||||||
'ollama',
|
'ollama',
|
||||||
'llamacpp',
|
'llamacpp',
|
||||||
'openrouter',
|
'openrouter',
|
||||||
|
'vercel',
|
||||||
'zhipuai',
|
'zhipuai',
|
||||||
'xai',
|
'xai',
|
||||||
|
'minimax',
|
||||||
|
'moonshot',
|
||||||
|
'synthetic',
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
for (const provider of nonAudioProviders) {
|
for (const provider of nonAudioProviders) {
|
||||||
|
|||||||
@@ -5,7 +5,21 @@
|
|||||||
* Models that don't will receive a Whisper transcript as text instead.
|
* Models that don't will receive a Whisper transcript as text instead.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
|
export type ModelProvider =
|
||||||
|
| 'anthropic'
|
||||||
|
| 'openai'
|
||||||
|
| 'gemini'
|
||||||
|
| 'ollama'
|
||||||
|
| 'llamacpp'
|
||||||
|
| 'openrouter'
|
||||||
|
| 'vercel'
|
||||||
|
| 'bedrock'
|
||||||
|
| 'github'
|
||||||
|
| 'zhipuai'
|
||||||
|
| 'xai'
|
||||||
|
| 'minimax'
|
||||||
|
| 'moonshot'
|
||||||
|
| 'synthetic';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Models known to support native audio input via their API.
|
* Models known to support native audio input via their API.
|
||||||
|
|||||||
@@ -70,24 +70,12 @@ describe('OpenAIClient OAuth (Codex)', () => {
|
|||||||
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
|
expect(resp.usage).toEqual({ inputTokens: 2, outputTokens: 2 });
|
||||||
});
|
});
|
||||||
|
|
||||||
it('adds provider warning when tools are requested in OAuth mode', async () => {
|
it('throws when tools are requested in OAuth mode', async () => {
|
||||||
const sse = makeSse([
|
const fetchSpy = vi.fn();
|
||||||
{ event: 'response.output_text.delta', data: { type: 'response.output_text.delta', delta: 'result body' } },
|
globalThis.fetch = fetchSpy as unknown as typeof fetch;
|
||||||
{ event: 'response.completed', data: { type: 'response.completed', response: { usage: { input_tokens: 1, output_tokens: 1 } } } },
|
|
||||||
]);
|
|
||||||
|
|
||||||
globalThis.fetch = vi.fn(async () => {
|
|
||||||
const stream = new ReadableStream({
|
|
||||||
start(controller) {
|
|
||||||
controller.enqueue(new TextEncoder().encode(sse));
|
|
||||||
controller.close();
|
|
||||||
},
|
|
||||||
});
|
|
||||||
return new Response(stream, { status: 200 });
|
|
||||||
}) as typeof fetch;
|
|
||||||
|
|
||||||
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
|
const client = new OpenAIClient({ model: 'gpt-5.3-codex', useOAuth: true });
|
||||||
const resp = await client.chat({
|
|
||||||
|
await expect(client.chat({
|
||||||
system: 'You are helpful.',
|
system: 'You are helpful.',
|
||||||
messages: [{ role: 'user', content: 'use tools' }],
|
messages: [{ role: 'user', content: 'use tools' }],
|
||||||
tools: [{
|
tools: [{
|
||||||
@@ -99,10 +87,8 @@ describe('OpenAIClient OAuth (Codex)', () => {
|
|||||||
required: ['id'],
|
required: ['id'],
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
});
|
})).rejects.toThrow('does not support tool execution');
|
||||||
|
|
||||||
expect(resp.content).toContain('[provider-warning] OpenAI OAuth (Codex backend) does not support tool execution in Flynn yet.');
|
expect(fetchSpy).not.toHaveBeenCalled();
|
||||||
expect(resp.content).toContain('Requested tools were not sent to the provider');
|
|
||||||
expect(resp.content).toContain('result body');
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -233,6 +233,12 @@ export class OpenAIClient implements ModelClient {
|
|||||||
|
|
||||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
if (this.useOAuth) {
|
if (this.useOAuth) {
|
||||||
|
if (request.tools && request.tools.length > 0) {
|
||||||
|
throw new Error(
|
||||||
|
'OpenAI OAuth (Codex backend) does not support tool execution. ' +
|
||||||
|
'Use auth_mode=api_key for tool loops or configure a fallback provider that supports tools.',
|
||||||
|
);
|
||||||
|
}
|
||||||
return this.chatViaOAuthCodex(request);
|
return this.chatViaOAuthCodex(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,57 @@ describe('ModelRouter', () => {
|
|||||||
expect(fallbackClient.chat).toHaveBeenCalled();
|
expect(fallbackClient.chat).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('skips duplicate fallback clients that already failed as primary', async () => {
|
||||||
|
const failingPrimary = createMockClient('primary', true);
|
||||||
|
const fallbackClient = createMockClient('fallback');
|
||||||
|
|
||||||
|
const router = new ModelRouter({
|
||||||
|
default: failingPrimary,
|
||||||
|
fallbackChain: [failingPrimary, fallbackClient],
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
|
||||||
|
|
||||||
|
expect(response.content).toBe('Response from fallback');
|
||||||
|
expect(failingPrimary.chat).toHaveBeenCalledTimes(1);
|
||||||
|
expect(fallbackClient.chat).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies retry policy to fallback clients', async () => {
|
||||||
|
const failingPrimary = createMockClient('primary', true);
|
||||||
|
let attempts = 0;
|
||||||
|
const flakyFallback: ModelClient = {
|
||||||
|
chat: vi.fn().mockImplementation(async () => {
|
||||||
|
attempts += 1;
|
||||||
|
if (attempts === 1) {
|
||||||
|
throw new Error('transient');
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
content: 'Recovered fallback',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 1, outputTokens: 1 },
|
||||||
|
} satisfies ChatResponse;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const router = new ModelRouter({
|
||||||
|
default: failingPrimary,
|
||||||
|
fallbackChain: [flakyFallback],
|
||||||
|
retryConfig: {
|
||||||
|
maxRetries: 1,
|
||||||
|
initialDelayMs: 1,
|
||||||
|
backoffMultiplier: 1,
|
||||||
|
maxDelayMs: 1,
|
||||||
|
nonRetryablePatterns: [],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await router.chat({ messages: [{ role: 'user', content: 'retry fallback' }] });
|
||||||
|
|
||||||
|
expect(response.content).toBe('Recovered fallback');
|
||||||
|
expect(flakyFallback.chat).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
it('throws when all providers fail', async () => {
|
it('throws when all providers fail', async () => {
|
||||||
const failing1 = createMockClient('primary', true);
|
const failing1 = createMockClient('primary', true);
|
||||||
const failing2 = createMockClient('fallback', true);
|
const failing2 = createMockClient('fallback', true);
|
||||||
|
|||||||
+28
-3
@@ -87,6 +87,8 @@ export class ModelRouter implements ModelClient {
|
|||||||
const useTier = tier ?? this.currentTier;
|
const useTier = tier ?? this.currentTier;
|
||||||
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
||||||
const errors: Error[] = [];
|
const errors: Error[] = [];
|
||||||
|
const attemptedClients = new Set<ModelClient>();
|
||||||
|
attemptedClients.add(primaryClient);
|
||||||
|
|
||||||
// Try primary client (with retry if configured)
|
// Try primary client (with retry if configured)
|
||||||
try {
|
try {
|
||||||
@@ -109,11 +111,20 @@ export class ModelRouter implements ModelClient {
|
|||||||
// Try tier-specific fallbacks first
|
// Try tier-specific fallbacks first
|
||||||
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
||||||
for (let i = 0; i < tierFallbackList.length; i++) {
|
for (let i = 0; i < tierFallbackList.length; i++) {
|
||||||
|
const fallbackClient = tierFallbackList[i];
|
||||||
|
if (attemptedClients.has(fallbackClient)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
this.throwIfAborted();
|
this.throwIfAborted();
|
||||||
try {
|
try {
|
||||||
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
|
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
|
||||||
logger.debug(reason);
|
logger.debug(reason);
|
||||||
const response = await tierFallbackList[i].chat(request);
|
attemptedClients.add(fallbackClient);
|
||||||
|
const response = this.retryConfig
|
||||||
|
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `tier fallback #${i + 1}`, {
|
||||||
|
shouldAbort: () => this.abortRequested,
|
||||||
|
})
|
||||||
|
: await fallbackClient.chat(request);
|
||||||
return { ...response, fallback: true, fallbackReason: reason };
|
return { ...response, fallback: true, fallbackReason: reason };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||||
@@ -123,12 +134,20 @@ export class ModelRouter implements ModelClient {
|
|||||||
|
|
||||||
// Then try global fallback chain
|
// Then try global fallback chain
|
||||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||||
this.throwIfAborted();
|
|
||||||
const fallbackClient = this.fallbackChain[i];
|
const fallbackClient = this.fallbackChain[i];
|
||||||
|
if (attemptedClients.has(fallbackClient)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
this.throwIfAborted();
|
||||||
try {
|
try {
|
||||||
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
|
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
|
||||||
logger.debug(reason);
|
logger.debug(reason);
|
||||||
const response = await fallbackClient.chat(request);
|
attemptedClients.add(fallbackClient);
|
||||||
|
const response = this.retryConfig
|
||||||
|
? await withRetry(() => fallbackClient.chat(request), this.retryConfig, `global fallback #${i + 1}`, {
|
||||||
|
shouldAbort: () => this.abortRequested,
|
||||||
|
})
|
||||||
|
: await fallbackClient.chat(request);
|
||||||
return { ...response, fallback: true, fallbackReason: reason };
|
return { ...response, fallback: true, fallbackReason: reason };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||||
@@ -143,6 +162,8 @@ export class ModelRouter implements ModelClient {
|
|||||||
const useTier = tier ?? this.currentTier;
|
const useTier = tier ?? this.currentTier;
|
||||||
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
||||||
let primaryError: string | undefined;
|
let primaryError: string | undefined;
|
||||||
|
const attemptedClients = new Set<ModelClient>();
|
||||||
|
attemptedClients.add(primaryClient);
|
||||||
|
|
||||||
if (primaryClient.chatStream) {
|
if (primaryClient.chatStream) {
|
||||||
let hasError = false;
|
let hasError = false;
|
||||||
@@ -170,11 +191,13 @@ export class ModelRouter implements ModelClient {
|
|||||||
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
const tierFallbackList = this.tierFallbacks.get(useTier) ?? [];
|
||||||
for (let i = 0; i < tierFallbackList.length; i++) {
|
for (let i = 0; i < tierFallbackList.length; i++) {
|
||||||
const fallbackClient = tierFallbackList[i];
|
const fallbackClient = tierFallbackList[i];
|
||||||
|
if (attemptedClients.has(fallbackClient)) {continue;}
|
||||||
if (!fallbackClient.chatStream) {continue;}
|
if (!fallbackClient.chatStream) {continue;}
|
||||||
|
|
||||||
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
|
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
|
||||||
logger.debug(reason);
|
logger.debug(reason);
|
||||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||||
|
attemptedClients.add(fallbackClient);
|
||||||
|
|
||||||
let hasError = false;
|
let hasError = false;
|
||||||
for await (const event of fallbackClient.chatStream(request)) {
|
for await (const event of fallbackClient.chatStream(request)) {
|
||||||
@@ -192,11 +215,13 @@ export class ModelRouter implements ModelClient {
|
|||||||
// Then try global fallback chain
|
// Then try global fallback chain
|
||||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||||
const fallbackClient = this.fallbackChain[i];
|
const fallbackClient = this.fallbackChain[i];
|
||||||
|
if (attemptedClients.has(fallbackClient)) {continue;}
|
||||||
if (!fallbackClient.chatStream) {continue;}
|
if (!fallbackClient.chatStream) {continue;}
|
||||||
|
|
||||||
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
|
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
|
||||||
logger.debug(reason);
|
logger.debug(reason);
|
||||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||||
|
attemptedClients.add(fallbackClient);
|
||||||
|
|
||||||
let hasError = false;
|
let hasError = false;
|
||||||
for await (const event of fallbackClient.chatStream(request)) {
|
for await (const event of fallbackClient.chatStream(request)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user