fix: provider-aware model routing with fallback visibility
- Extract createClientFromConfig() to dispatch on provider field instead of hardcoding all tiers as AnthropicClient - Add fallback/fallbackReason metadata to ChatResponse and ChatStreamEvent so callers know when a fallback model was used - Enhance doctor check to report full model stack and warn on missing API keys for cloud providers - Log fallback warnings in NativeAgent and display them in TUI - Support tier names and local_providers entries in fallback_chain - Add 8 tests for createClientFromConfig covering all provider types
This commit is contained in:
@@ -41,6 +41,8 @@ describe('ModelRouter', () => {
|
||||
const response = await router.chat({ messages: [{ role: 'user', content: 'Hi' }] });
|
||||
|
||||
expect(response.content).toBe('Response from fallback');
|
||||
expect(response.fallback).toBe(true);
|
||||
expect(response.fallbackReason).toMatch(/Primary model failed/);
|
||||
expect(failingClient.chat).toHaveBeenCalled();
|
||||
expect(fallbackClient.chat).toHaveBeenCalled();
|
||||
});
|
||||
@@ -132,13 +134,18 @@ describe('ModelRouter streaming', () => {
|
||||
});
|
||||
|
||||
const chunks: string[] = [];
|
||||
let fallbackWarning: string | undefined;
|
||||
for await (const event of router.chatStream({ messages: [] })) {
|
||||
if (event.type === 'content' && event.content) {
|
||||
chunks.push(event.content);
|
||||
}
|
||||
if (event.type === 'fallback_warning') {
|
||||
fallbackWarning = event.fallbackReason;
|
||||
}
|
||||
}
|
||||
|
||||
expect(chunks).toEqual(['Fallback']);
|
||||
expect(fallbackWarning).toMatch(/Primary model failed/);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
+19
-7
@@ -58,13 +58,16 @@ export class ModelRouter implements ModelClient {
|
||||
}
|
||||
|
||||
// Try fallback chain
|
||||
for (const fallbackClient of this.fallbackChain) {
|
||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||
const fallbackClient = this.fallbackChain[i];
|
||||
try {
|
||||
console.log('Trying fallback model...');
|
||||
return await fallbackClient.chat(request);
|
||||
const reason = `Primary model failed (${errors[0].message}), using fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
const response = await fallbackClient.chat(request);
|
||||
return { ...response, fallback: true, fallbackReason: reason };
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
console.warn(`Fallback model failed: ${errors[errors.length - 1].message}`);
|
||||
console.warn(`Fallback model #${i + 1} failed: ${errors[errors.length - 1].message}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,30 +77,39 @@ export class ModelRouter implements ModelClient {
|
||||
async *chatStream(request: ChatRequest, tier?: ModelTier): AsyncIterable<ChatStreamEvent> {
|
||||
const useTier = tier ?? this.currentTier;
|
||||
const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
|
||||
let primaryError: string | undefined;
|
||||
|
||||
if (primaryClient.chatStream) {
|
||||
let hasError = false;
|
||||
for await (const event of primaryClient.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
hasError = true;
|
||||
console.warn(`Primary stream failed: ${event.error?.message}`);
|
||||
primaryError = event.error?.message ?? 'Unknown error';
|
||||
console.warn(`Primary stream failed: ${primaryError}`);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
}
|
||||
|
||||
if (!hasError) return;
|
||||
} else {
|
||||
primaryError = 'Primary client does not support streaming';
|
||||
}
|
||||
|
||||
// Try fallback chain
|
||||
for (const fallbackClient of this.fallbackChain) {
|
||||
for (let i = 0; i < this.fallbackChain.length; i++) {
|
||||
const fallbackClient = this.fallbackChain[i];
|
||||
if (!fallbackClient.chatStream) continue;
|
||||
|
||||
const reason = `Primary model failed (${primaryError}), using fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||
|
||||
let hasError = false;
|
||||
for await (const event of fallbackClient.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
hasError = true;
|
||||
console.warn(`Fallback stream failed: ${event.error?.message}`);
|
||||
console.warn(`Fallback stream #${i + 1} failed: ${event.error?.message}`);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
|
||||
+7
-1
@@ -55,6 +55,10 @@ export interface ChatResponse {
|
||||
stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | 'tool_use' | string;
|
||||
usage: TokenUsage;
|
||||
toolCalls?: ModelToolCall[];
|
||||
/** Set when the response came from a fallback model, not the primary. */
|
||||
fallback?: boolean;
|
||||
/** Human-readable reason for the fallback. */
|
||||
fallbackReason?: string;
|
||||
}
|
||||
|
||||
export interface TokenUsage {
|
||||
@@ -63,11 +67,13 @@ export interface TokenUsage {
|
||||
}
|
||||
|
||||
export interface ChatStreamEvent {
|
||||
type: 'content' | 'done' | 'error' | 'tool_use';
|
||||
type: 'content' | 'done' | 'error' | 'tool_use' | 'fallback_warning';
|
||||
content?: string;
|
||||
usage?: TokenUsage;
|
||||
error?: Error;
|
||||
toolCall?: ModelToolCall;
|
||||
/** Human-readable message when primary model failed and fallback is being used. */
|
||||
fallbackReason?: string;
|
||||
}
|
||||
|
||||
export interface StreamingModelClient {
|
||||
|
||||
Reference in New Issue
Block a user