feat: implement Tier 3 features — lane queue, credential redaction, token dashboard, xAI, Voyage AI

- Lane Queue: per-session FIFO queue in gateway replacing reject-when-busy (9 tests) - Credential Redaction: redactConfig() expanded to cover 18+ secret fields (16 tests) - Web UI Token Dashboard: system.tokenUsage endpoint + Usage page with summary cards - xAI (Grok) Provider: OpenAI-compatible client with model pricing - Voyage AI Embeddings: new embedding provider with configurable dimensions (5 tests) - Update gap analysis: 90→95 match (70%→74%), Tier 3 section marked DONE - Update state.json: test count 1001→1034, add tier3_completion entry Total: 1034 tests passing across 85 files, typecheck clean
2026-02-09 10:32:57 -08:00
parent 1d126cddfb
commit 9be8f76bc7
26 changed files with 1395 additions and 105 deletions
@@ -21,6 +21,12 @@ export const MODEL_COSTS_PER_MILLION: Record<string, { input: number; output: nu
  'claude-haiku-4': { input: 0, output: 0 },
  // Local / unknown models
  'default': { input: 0, output: 0 },
+  // xAI (Grok)
+  'grok-3': { input: 3, output: 15 },
+  'grok-3-mini': { input: 0.30, output: 0.50 },
+  'grok-2': { input: 2, output: 10 },
+  'grok-2-mini': { input: 0.10, output: 0.25 },
+  'grok-3-fast': { input: 5, output: 25 },
  // Bedrock (Meta Llama)
  'meta.llama3-1-70b-instruct-v1:0': { input: 0.72, output: 0.72 },
  'meta.llama3-1-8b-instruct-v1:0': { input: 0.22, output: 0.22 },
@@ -60,7 +60,7 @@ export async function withRetry<T>(
      const delay = Math.min(baseDelay, config.maxDelayMs);
      const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay

-      console.warn(
+      console.debug(
        `[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`,
      );

@@ -76,7 +76,7 @@ export class ModelRouter implements ModelClient {
      return await primaryClient.chat(request);
    } catch (error) {
      errors.push(error instanceof Error ? error : new Error(String(error)));
-      console.warn(`Primary model failed: ${errors[0].message}`);
+      console.debug(`Primary model failed: ${errors[0].message}`);
    }

    // Try tier-specific fallbacks first
@@ -84,12 +84,12 @@ export class ModelRouter implements ModelClient {
    for (let i = 0; i < tierFallbackList.length; i++) {
      try {
        const reason = `Primary model failed (${errors[0].message}), using tier fallback #${i + 1}`;
-        console.warn(reason);
+        console.debug(reason);
        const response = await tierFallbackList[i].chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
-        console.warn(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
+        console.debug(`Tier fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
      }
    }

@@ -98,12 +98,12 @@ export class ModelRouter implements ModelClient {
      const fallbackClient = this.fallbackChain[i];
      try {
        const reason = `Primary model failed (${errors[0].message}), using global fallback #${i + 1}`;
-        console.warn(reason);
+        console.debug(reason);
        const response = await fallbackClient.chat(request);
        return { ...response, fallback: true, fallbackReason: reason };
      } catch (error) {
        errors.push(error instanceof Error ? error : new Error(String(error)));
-        console.warn(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
+        console.debug(`Global fallback #${i + 1} failed: ${errors[errors.length - 1].message}`);
      }
    }

@@ -121,7 +121,7 @@ export class ModelRouter implements ModelClient {
        if (event.type === 'error') {
          hasError = true;
          primaryError = event.error?.message ?? 'Unknown error';
-          console.warn(`Primary stream failed: ${primaryError}`);
+          console.debug(`Primary stream failed: ${primaryError}`);
          break;
        }
        yield event;
@@ -139,14 +139,14 @@ export class ModelRouter implements ModelClient {
      if (!fallbackClient.chatStream) continue;

      const reason = `Primary model failed (${primaryError}), using tier fallback #${i + 1}`;
-      console.warn(reason);
+      console.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };

      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {
        if (event.type === 'error') {
          hasError = true;
-          console.warn(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
+          console.debug(`Tier fallback stream #${i + 1} failed: ${event.error?.message}`);
          break;
        }
        yield event;
@@ -161,14 +161,14 @@ export class ModelRouter implements ModelClient {
      if (!fallbackClient.chatStream) continue;

      const reason = `Primary model failed (${primaryError}), using global fallback #${i + 1}`;
-      console.warn(reason);
+      console.debug(reason);
      yield { type: 'fallback_warning', fallbackReason: reason };

      let hasError = false;
      for await (const event of fallbackClient.chatStream(request)) {
        if (event.type === 'error') {
          hasError = true;
-          console.warn(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
+          console.debug(`Global fallback stream #${i + 1} failed: ${event.error?.message}`);
          break;
        }
        yield event;