feat: implement Tier 3 features — lane queue, credential redaction, token dashboard, xAI, Voyage AI

- Lane Queue: per-session FIFO queue in gateway replacing reject-when-busy (9 tests)
- Credential Redaction: redactConfig() expanded to cover 18+ secret fields (16 tests)
- Web UI Token Dashboard: system.tokenUsage endpoint + Usage page with summary cards
- xAI (Grok) Provider: OpenAI-compatible client with model pricing
- Voyage AI Embeddings: new embedding provider with configurable dimensions (5 tests)
- Update gap analysis: 90→95 match (70%→74%), Tier 3 section marked DONE
- Update state.json: test count 1001→1034, add tier3_completion entry

Total: 1034 tests passing across 85 files, typecheck clean
This commit is contained in:
William Valentin
2026-02-09 10:32:57 -08:00
parent 1d126cddfb
commit 9be8f76bc7
26 changed files with 1395 additions and 105 deletions
+6
View File
@@ -21,6 +21,12 @@ export const MODEL_COSTS_PER_MILLION: Record<string, { input: number; output: nu
'claude-haiku-4': { input: 0, output: 0 },
// Local / unknown models
'default': { input: 0, output: 0 },
// xAI (Grok)
'grok-3': { input: 3, output: 15 },
'grok-3-mini': { input: 0.30, output: 0.50 },
'grok-2': { input: 2, output: 10 },
'grok-2-mini': { input: 0.10, output: 0.25 },
'grok-3-fast': { input: 5, output: 25 },
// Bedrock (Meta Llama)
'meta.llama3-1-70b-instruct-v1:0': { input: 0.72, output: 0.72 },
'meta.llama3-1-8b-instruct-v1:0': { input: 0.22, output: 0.22 },
+1 -1
View File
@@ -60,7 +60,7 @@ export async function withRetry<T>(
const delay = Math.min(baseDelay, config.maxDelayMs);
const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay
console.warn(
console.debug(
`[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`,
);
+10 -10
View File
@@ -76,7 +76,7 @@ export class ModelRouter implements ModelClient {
return await primaryClient.chat(request);
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
console.warn(`Primary model failed: ${errors[0].message}`);
console.debug(`Primary model failed: ${errors[0].message}`);
}
// Try tier-specific fallbacks first
@@ -84,12 +84,12 @@ export class ModelRouter implements ModelClient {
for (let i = 0; i < tierFallbackList.length; i++) {
try {
const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
console.warn(reason);
console.debug(reason);
const response = await tierFallbackList[i].chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
console.warn(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
console.debug(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
}
}
@@ -98,12 +98,12 @@ export class ModelRouter implements ModelClient {
const fallbackClient = this.fallbackChain[i];
try {
const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
console.warn(reason);
console.debug(reason);
const response = await fallbackClient.chat(request);
return { ...response, fallback: true, fallbackReason: reason };
} catch (error) {
errors.push(error instanceof Error ? error : new Error(String(error)));
console.warn(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
console.debug(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
}
}
@@ -121,7 +121,7 @@ export class ModelRouter implements ModelClient {
if (event.type === 'error') {
hasError = true;
primaryError = event.error?.message ?? 'Unknown error';
console.warn(`Primary stream failed: ${primaryError}`);
console.debug(`Primary stream failed: ${primaryError}`);
break;
}
yield event;
@@ -139,14 +139,14 @@ export class ModelRouter implements ModelClient {
if (!fallbackClient.chatStream) continue;
const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
console.warn(reason);
console.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {
if (event.type === 'error') {
hasError = true;
console.warn(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
console.debug(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
break;
}
yield event;
@@ -161,14 +161,14 @@ export class ModelRouter implements ModelClient {
if (!fallbackClient.chatStream) continue;
const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
console.warn(reason);
console.debug(reason);
yield { type: 'fallback_warning', fallbackReason: reason };
let hasError = false;
for await (const event of fallbackClient.chatStream(request)) {
if (event.type === 'error') {
hasError = true;
console.warn(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
console.debug(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
break;
}
yield event;