feat: add P2 features — retry policy, prompt templating, usage tracking, tech debt cleanup

- Extract shared splitMessage() into channels/utils.ts (dedup 4 adapters) - Add Slack user name resolution with caching (users.info API) - Add withRetry() with exponential backoff + jitter, isRetryable() filter - Wire retry config into ModelRouter.chat() (non-streaming only) - Add assembleSystemPrompt() multi-file template system (SOUL/AGENTS/IDENTITY/USER/TOOLS.md) - Add usage tracking accumulators in NativeAgent + AgentOrchestrator - Add estimateCost() with per-model pricing table - Add /usage TUI command with full usage report formatting - Add retrySchema and promptSchema to config schema Tests: 569 passing, typecheck clean
2026-02-06 15:12:35 -08:00
parent de68deb1b2
commit 4316dbd3be
24 changed files with 902 additions and 143 deletions
@@ -0,0 +1,57 @@
+import { describe, it, expect } from 'vitest';
+import { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
+
+describe('estimateCost', () => {
+  it('returns 0 for local/unknown models', () => {
+    expect(estimateCost(1000, 1000)).toBe(0);
+    expect(estimateCost(1000, 1000, 'some-local-model')).toBe(0);
+  });
+
+  it('uses default costs when model name is undefined', () => {
+    const cost = estimateCost(1_000_000, 1_000_000);
+    expect(cost).toBe(0);
+  });
+
+  it('calculates correctly for known Anthropic models', () => {
+    // claude-sonnet-4: $3/M input, $15/M output
+    const cost = estimateCost(1_000_000, 1_000_000, 'claude-sonnet-4-20250514');
+    expect(cost).toBe(3 + 15);
+  });
+
+  it('calculates correctly for claude-opus', () => {
+    // claude-opus-4: $15/M input, $75/M output
+    const cost = estimateCost(1_000_000, 500_000, 'claude-opus-4-20250514');
+    expect(cost).toBe(15 + 37.5);
+  });
+
+  it('calculates correctly for OpenAI models', () => {
+    // gpt-4o: $2.50/M input, $10/M output
+    const cost = estimateCost(2_000_000, 1_000_000, 'gpt-4o');
+    expect(cost).toBe(5 + 10);
+  });
+
+  it('handles small token counts', () => {
+    // 1000 tokens of claude-sonnet input: 1000 * 3 / 1_000_000 = 0.003
+    const cost = estimateCost(1000, 0, 'claude-sonnet-4-20250514');
+    expect(cost).toBeCloseTo(0.003);
+  });
+
+  it('handles zero tokens', () => {
+    const cost = estimateCost(0, 0, 'claude-sonnet-4-20250514');
+    expect(cost).toBe(0);
+  });
+});
+
+describe('MODEL_COSTS_PER_MILLION', () => {
+  it('has a default entry', () => {
+    expect(MODEL_COSTS_PER_MILLION['default']).toEqual({ input: 0, output: 0 });
+  });
+
+  it('has entries for all expected models', () => {
+    expect(MODEL_COSTS_PER_MILLION['claude-sonnet-4-20250514']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['claude-3-5-haiku-20241022']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['claude-opus-4-20250514']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['gpt-4o']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['gpt-4o-mini']).toBeDefined();
+  });
+});
@@ -0,0 +1,21 @@
+/** Approximate cost per million tokens for known models. */
+export const MODEL_COSTS_PER_MILLION: Record<string, { input: number; output: number }> = {
+  // Anthropic
+  'claude-sonnet-4-20250514': { input: 3, output: 15 },
+  'claude-3-5-haiku-20241022': { input: 0.80, output: 4 },
+  'claude-opus-4-20250514': { input: 15, output: 75 },
+  // OpenAI
+  'gpt-4o': { input: 2.50, output: 10 },
+  'gpt-4o-mini': { input: 0.15, output: 0.60 },
+  // Local / unknown models
+  'default': { input: 0, output: 0 },
+};
+
+/**
+ * Estimate the dollar cost for a given number of input/output tokens.
+ * Falls back to zero cost for unknown or local models.
+ */
+export function estimateCost(inputTokens: number, outputTokens: number, modelName?: string): number {
+  const costs = MODEL_COSTS_PER_MILLION[modelName ?? ''] ?? MODEL_COSTS_PER_MILLION['default'];
+  return (inputTokens * costs.input + outputTokens * costs.output) / 1_000_000;
+}
@@ -3,6 +3,8 @@ export { OpenAIClient, type OpenAIClientConfig } from './openai.js';
 export { OllamaClient, type OllamaClientConfig } from './local/index.js';
 export { LlamaCppClient, type LlamaCppClientConfig } from './local/index.js';
 export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js';
+export { withRetry, isRetryable, DEFAULT_RETRY_CONFIG, type RetryConfig } from './retry.js';
+export { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
 export type {
  Message,
  ChatRequest,
@@ -0,0 +1,169 @@
+import { describe, it, expect, vi } from 'vitest';
+import { isRetryable, withRetry, DEFAULT_RETRY_CONFIG } from './retry.js';
+import type { RetryConfig } from './retry.js';
+
+describe('isRetryable', () => {
+  it('returns true for generic errors', () => {
+    const error = new Error('Connection timeout');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(true);
+  });
+
+  it('returns false for authentication errors', () => {
+    const error = new Error('Invalid API key: authentication failed');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for invalid_api_key errors', () => {
+    const error = new Error('Error: invalid_api_key');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for unauthorized errors', () => {
+    const error = new Error('Request unauthorized');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for invalid_request errors', () => {
+    const error = new Error('invalid_request: missing parameter');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for context_length_exceeded errors', () => {
+    const error = new Error('context_length_exceeded: max 128k tokens');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for content_policy errors', () => {
+    const error = new Error('content_policy violation detected');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('is case-insensitive when matching patterns', () => {
+    const error = new Error('AUTHENTICATION error');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('uses custom patterns when provided', () => {
+    const error = new Error('quota exceeded');
+    expect(isRetryable(error, ['quota'])).toBe(false);
+  });
+});
+
+describe('withRetry', () => {
+  // Use minimal real delays to avoid fake-timer race conditions
+  const fastConfig: RetryConfig = {
+    maxRetries: 3,
+    initialDelayMs: 1,
+    backoffMultiplier: 1,
+    maxDelayMs: 5,
+    nonRetryablePatterns: DEFAULT_RETRY_CONFIG.nonRetryablePatterns,
+  };
+
+  it('succeeds on first attempt without delay', async () => {
+    const fn = vi.fn().mockResolvedValue('success');
+
+    const result = await withRetry(fn, fastConfig);
+
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('retries on transient failure then succeeds', async () => {
+    const fn = vi.fn()
+      .mockRejectedValueOnce(new Error('timeout'))
+      .mockRejectedValueOnce(new Error('timeout'))
+      .mockResolvedValueOnce('recovered');
+
+    const result = await withRetry(fn, fastConfig, 'test-op');
+
+    expect(result).toBe('recovered');
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('throws after maxRetries exhausted', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('persistent failure'));
+
+    await expect(withRetry(fn, fastConfig, 'test-op')).rejects.toThrow('persistent failure');
+    // 1 initial + 3 retries = 4 total
+    expect(fn).toHaveBeenCalledTimes(4);
+  });
+
+  it('does not retry non-retryable errors', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('invalid_api_key'));
+
+    await expect(withRetry(fn, fastConfig)).rejects.toThrow('invalid_api_key');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('does not retry authentication errors', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('Request unauthorized'));
+
+    await expect(withRetry(fn, fastConfig)).rejects.toThrow('Request unauthorized');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('converts non-Error throws to Error objects', async () => {
+    const fn = vi.fn().mockRejectedValue('string error');
+
+    await expect(withRetry(fn, { ...fastConfig, maxRetries: 0 })).rejects.toThrow('string error');
+  });
+
+  it('respects maxDelayMs cap', async () => {
+    const cappedConfig: RetryConfig = {
+      maxRetries: 2,
+      initialDelayMs: 1,
+      backoffMultiplier: 10,
+      maxDelayMs: 2,
+      nonRetryablePatterns: [],
+    };
+
+    let callCount = 0;
+    const fn = vi.fn().mockImplementation(() => {
+      callCount++;
+      if (callCount < 3) return Promise.reject(new Error('fail'));
+      return Promise.resolve('ok');
+    });
+
+    // If maxDelayMs weren't respected, a 10x multiplier could cause very long waits.
+    // With maxDelayMs=2ms, this completes quickly.
+    const result = await withRetry(fn, cappedConfig, 'capped-test');
+    expect(result).toBe('ok');
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('uses default config when none provided', async () => {
+    const fn = vi.fn().mockResolvedValue('default-config');
+
+    const result = await withRetry(fn);
+
+    expect(result).toBe('default-config');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('increases delay exponentially between retries', async () => {
+    const timestamps: number[] = [];
+    const config: RetryConfig = {
+      maxRetries: 2,
+      initialDelayMs: 20,
+      backoffMultiplier: 2,
+      maxDelayMs: 1000,
+      nonRetryablePatterns: [],
+    };
+
+    const fn = vi.fn().mockImplementation(() => {
+      timestamps.push(Date.now());
+      if (timestamps.length < 3) return Promise.reject(new Error('fail'));
+      return Promise.resolve('ok');
+    });
+
+    await withRetry(fn, config, 'backoff-test');
+
+    expect(fn).toHaveBeenCalledTimes(3);
+    // First retry delay: ~20ms (jitter 50-100% of 20 = 10-20ms)
+    // Second retry delay: ~40ms (jitter 50-100% of 40 = 20-40ms)
+    const firstDelay = timestamps[1] - timestamps[0];
+    const secondDelay = timestamps[2] - timestamps[1];
+    // Second delay should be roughly double the first (within jitter range)
+    expect(secondDelay).toBeGreaterThanOrEqual(firstDelay * 0.7);
+  });
+});
@@ -0,0 +1,71 @@
+export interface RetryConfig {
+  /** Maximum number of retry attempts (default: 3). Does not count the initial attempt. */
+  maxRetries: number;
+  /** Initial delay in milliseconds before first retry (default: 1000). */
+  initialDelayMs: number;
+  /** Multiplier applied to delay after each retry (default: 2). */
+  backoffMultiplier: number;
+  /** Maximum delay in milliseconds (default: 30000). */
+  maxDelayMs: number;
+  /** Errors matching these patterns should NOT be retried (e.g. auth errors, invalid requests). */
+  nonRetryablePatterns: string[];
+}
+
+export const DEFAULT_RETRY_CONFIG: RetryConfig = {
+  maxRetries: 3,
+  initialDelayMs: 1000,
+  backoffMultiplier: 2,
+  maxDelayMs: 30000,
+  nonRetryablePatterns: [
+    'invalid_api_key',
+    'authentication',
+    'unauthorized',
+    'invalid_request',
+    'context_length_exceeded',
+    'content_policy',
+  ],
+};
+
+export function isRetryable(error: Error, nonRetryablePatterns: string[]): boolean {
+  const msg = error.message.toLowerCase();
+  return !nonRetryablePatterns.some(pattern => msg.includes(pattern.toLowerCase()));
+}
+
+export async function withRetry<T>(
+  fn: () => Promise<T>,
+  config: RetryConfig = DEFAULT_RETRY_CONFIG,
+  label?: string,
+): Promise<T> {
+  let lastError: Error | undefined;
+
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+
+      // Don't retry non-retryable errors
+      if (!isRetryable(lastError, config.nonRetryablePatterns)) {
+        throw lastError;
+      }
+
+      // Don't retry if we've exhausted attempts
+      if (attempt >= config.maxRetries) {
+        throw lastError;
+      }
+
+      // Calculate delay with exponential backoff + jitter
+      const baseDelay = config.initialDelayMs * Math.pow(config.backoffMultiplier, attempt);
+      const delay = Math.min(baseDelay, config.maxDelayMs);
+      const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay
+
+      console.warn(
+        `[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`,
+      );
+
+      await new Promise(resolve => setTimeout(resolve, jitter));
+    }
+  }
+
+  throw lastError ?? new Error('Retry failed with no error');
+}
@@ -1,4 +1,6 @@
 import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
+import { withRetry } from './retry.js';
+import type { RetryConfig } from './retry.js';

 export type ModelTier = 'fast' | 'default' | 'complex' | 'local';

@@ -8,6 +10,7 @@ export interface ModelRouterConfig {
  complex?: ModelClient;
  local?: ModelClient;
  fallbackChain: ModelClient[];
+  retryConfig?: RetryConfig;
 }

 export class ModelRouter implements ModelClient {
@@ -16,11 +19,13 @@ export class ModelRouter implements ModelClient {
  private fallbackChain: ModelClient[];
  private currentTier: ModelTier = 'default';
  private localProviderName?: string;
+  private retryConfig?: RetryConfig;

  constructor(config: ModelRouterConfig) {
    this.clients = new Map();
    this.defaultClient = config.default;
    this.fallbackChain = config.fallbackChain;
+    this.retryConfig = config.retryConfig;

    this.clients.set('default', config.default);
    if (config.fast) this.clients.set('fast', config.fast);
@@ -49,8 +54,11 @@ export class ModelRouter implements ModelClient {
    const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
    const errors: Error[] = [];

-    // Try primary client
+    // Try primary client (with retry if configured)
    try {
+      if (this.retryConfig) {
+        return await withRetry(() => primaryClient.chat(request), this.retryConfig, 'primary model');
+      }
      return await primaryClient.chat(request);
    } catch (error) {
      errors.push(error instanceof Error ? error : new Error(String(error)));