feat: implement Tier 3 features — lane queue, credential redaction, token dashboard, xAI, Voyage AI
- Lane Queue: per-session FIFO queue in gateway replacing reject-when-busy (9 tests) - Credential Redaction: redactConfig() expanded to cover 18+ secret fields (16 tests) - Web UI Token Dashboard: system.tokenUsage endpoint + Usage page with summary cards - xAI (Grok) Provider: OpenAI-compatible client with model pricing - Voyage AI Embeddings: new embedding provider with configurable dimensions (5 tests) - Update gap analysis: 90→95 match (70%→74%), Tier 3 section marked DONE - Update state.json: test count 1001→1034, add tier3_completion entry Total: 1034 tests passing across 85 files, typecheck clean
This commit is contained in:
@@ -21,6 +21,12 @@ export const MODEL_COSTS_PER_MILLION: Record<string, { input: number; output: nu
|
||||
'claude-haiku-4': { input: 0, output: 0 },
|
||||
// Local / unknown models
|
||||
'default': { input: 0, output: 0 },
|
||||
// xAI (Grok)
|
||||
'grok-3': { input: 3, output: 15 },
|
||||
'grok-3-mini': { input: 0.30, output: 0.50 },
|
||||
'grok-2': { input: 2, output: 10 },
|
||||
'grok-2-mini': { input: 0.10, output: 0.25 },
|
||||
'grok-3-fast': { input: 5, output: 25 },
|
||||
// Bedrock (Meta Llama)
|
||||
'meta.llama3-1-70b-instruct-v1:0': { input: 0.72, output: 0.72 },
|
||||
'meta.llama3-1-8b-instruct-v1:0': { input: 0.22, output: 0.22 },
|
||||
|
||||
+1
-1
@@ -60,7 +60,7 @@ export async function withRetry<T>(
|
||||
const delay = Math.min(baseDelay, config.maxDelayMs);
|
||||
const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay
|
||||
|
||||
console.warn(
|
||||
console.debug(
|
||||
`[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`,
|
||||
);
|
||||
|
||||
|
||||
+10
-10
@@ -76,7 +76,7 @@ export class ModelRouter implements ModelClient {
|
||||
return await primaryClient.chat(request);
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
console.warn(`Primary model failed: ${errors[0].message}`);
|
||||
console.debug(`Primary model failed: ${errors[0].message}`);
|
||||
}
|
||||
|
||||
// Try tier-specific fallbacks first
|
||||
@@ -84,12 +84,12 @@ export class ModelRouter implements ModelClient {
|
||||
for (let i = 0; i < tierFallbackList.length; i++) {
|
||||
try {
|
||||
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
console.debug(reason);
|
||||
const response = await tierFallbackList[i].chat(request);
|
||||
return { ...response, fallback: true, fallbackReason: reason };
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
console.warn(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
|
||||
console.debug(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,12 +98,12 @@ export class ModelRouter implements ModelClient {
|
||||
const fallbackClient = this.fallbackChain[i];
|
||||
try {
|
||||
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
console.debug(reason);
|
||||
const response = await fallbackClient.chat(request);
|
||||
return { ...response, fallback: true, fallbackReason: reason };
|
||||
} catch (error) {
|
||||
errors.push(error instanceof Error ? error : new Error(String(error)));
|
||||
console.warn(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
|
||||
console.debug(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ export class ModelRouter implements ModelClient {
|
||||
if (event.type === 'error') {
|
||||
hasError = true;
|
||||
primaryError = event.error?.message ?? 'Unknown error';
|
||||
console.warn(`Primary stream failed: ${primaryError}`);
|
||||
console.debug(`Primary stream failed: ${primaryError}`);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
@@ -139,14 +139,14 @@ export class ModelRouter implements ModelClient {
|
||||
if (!fallbackClient.chatStream) continue;
|
||||
|
||||
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
console.debug(reason);
|
||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||
|
||||
let hasError = false;
|
||||
for await (const event of fallbackClient.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
hasError = true;
|
||||
console.warn(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
|
||||
console.debug(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
@@ -161,14 +161,14 @@ export class ModelRouter implements ModelClient {
|
||||
if (!fallbackClient.chatStream) continue;
|
||||
|
||||
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
|
||||
console.warn(reason);
|
||||
console.debug(reason);
|
||||
yield { type: 'fallback_warning', fallbackReason: reason };
|
||||
|
||||
let hasError = false;
|
||||
for await (const event of fallbackClient.chatStream(request)) {
|
||||
if (event.type === 'error') {
|
||||
hasError = true;
|
||||
console.warn(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
|
||||
console.debug(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
|
||||
break;
|
||||
}
|
||||
yield event;
|
||||
|
||||
Reference in New Issue
Block a user