feat: add P2 features — retry policy, prompt templating, usage tracking, tech debt cleanup

- Extract shared splitMessage() into channels/utils.ts (dedup 4 adapters) - Add Slack user name resolution with caching (users.info API) - Add withRetry() with exponential backoff + jitter, isRetryable() filter - Wire retry config into ModelRouter.chat() (non-streaming only) - Add assembleSystemPrompt() multi-file template system (SOUL/AGENTS/IDENTITY/USER/TOOLS.md) - Add usage tracking accumulators in NativeAgent + AgentOrchestrator - Add estimateCost() with per-model pricing table - Add /usage TUI command with full usage report formatting - Add retrySchema and promptSchema to config schema Tests: 569 passing, typecheck clean
2026-02-06 15:12:35 -08:00
parent de68deb1b2
commit 4316dbd3be
24 changed files with 902 additions and 143 deletions
@@ -5,6 +5,7 @@ export {
  type SubAgentRequest,
  type SubAgentResult,
  type DelegationConfig,
+  type UsageReport,
 } from './native/index.js';
 export {
  COMPACTION_SYSTEM_PROMPT,
@@ -1,4 +1,4 @@
-import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall } from '../../models/types.js';
+import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall, TokenUsage } from '../../models/types.js';
 import type { ModelRouter, ModelTier } from '../../models/router.js';
 import type { Session } from '../../session/index.js';
 import type { ToolRegistry } from '../../tools/registry.js';
@@ -39,6 +39,8 @@ export class NativeAgent {
  private toolExecutor?: ToolExecutor;
  private maxIterations: number;
  private onToolUse?: (event: ToolUseEvent) => void;
+  private _totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 };
+  private _callCount: number = 0;

  constructor(config: NativeAgentConfig) {
    this.modelClient = config.modelClient;
@@ -79,6 +81,10 @@ export class NativeAgent {

    const response = await this.chatWithRouter(request);

+    this._totalUsage.inputTokens += response.usage.inputTokens;
+    this._totalUsage.outputTokens += response.usage.outputTokens;
+    this._callCount++;
+
    if (response.fallback) {
      console.warn(`[Flynn] ${response.fallbackReason}`);
    }
@@ -110,6 +116,10 @@ export class NativeAgent {

      const response = await this.chatWithRouter(request);

+      this._totalUsage.inputTokens += response.usage.inputTokens;
+      this._totalUsage.outputTokens += response.usage.outputTokens;
+      this._callCount++;
+
      if (response.fallback) {
        console.warn(`[Flynn] ${response.fallbackReason}`);
      }
@@ -185,6 +195,16 @@ export class NativeAgent {
    } else {
      this.inMemoryHistory = [];
    }
+    this.resetUsage();
+  }
+
+  getUsage(): { inputTokens: number; outputTokens: number; calls: number } {
+    return { ...this._totalUsage, calls: this._callCount };
+  }
+
+  resetUsage(): void {
+    this._totalUsage = { inputTokens: 0, outputTokens: 0 };
+    this._callCount = 0;
  }

  getHistory(): Message[] {
@@ -5,6 +5,7 @@ export {
  type SubAgentRequest,
  type SubAgentResult,
  type DelegationConfig,
+  type UsageReport,
 } from './orchestrator.js';
 export {
  COMPACTION_SYSTEM_PROMPT,
@@ -8,6 +8,7 @@ import { NativeAgent } from './agent.js';
 import type { ToolUseEvent } from './agent.js';
 import { shouldCompact } from '../../context/tokens.js';
 import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
+import { estimateCost } from '../../models/costs.js';

 // ── Public types ──────────────────────────────────────────────────────

@@ -44,6 +45,25 @@ interface TierUsageStats {
  calls: number;
 }

+/** Full usage stats for an orchestrator session. */
+export interface UsageReport {
+  /** Primary agent (user-facing) usage. */
+  primary: {
+    inputTokens: number;
+    outputTokens: number;
+    calls: number;
+  };
+  /** Delegation (sub-agent) usage, broken down by tier. */
+  delegation: Record<string, { inputTokens: number; outputTokens: number; calls: number }>;
+  /** Combined totals. */
+  total: {
+    inputTokens: number;
+    outputTokens: number;
+    calls: number;
+    estimatedCost: number;
+  };
+}
+
 /** Full configuration for the AgentOrchestrator. */
 export interface OrchestratorConfig {
  modelRouter: ModelRouter;
@@ -228,9 +248,10 @@ export class AgentOrchestrator {
    return result;
  }

-  /** Reset the primary agent's conversation history. */
+  /** Reset the primary agent's conversation history and usage stats. */
  reset(): void {
    this._agent.reset();
+    this._usageByTier.clear();
  }

  /** Get the primary agent's conversation history. */
@@ -267,6 +288,36 @@ export class AgentOrchestrator {
    return result;
  }

+  /**
+   * Returns comprehensive usage stats combining primary agent and delegation usage.
+   * Includes estimated cost based on the primary model's pricing.
+   */
+  getUsage(): UsageReport {
+    const primary = this._agent.getUsage();
+    const delegation = this.getDelegationUsage();
+
+    let totalInput = primary.inputTokens;
+    let totalOutput = primary.outputTokens;
+    let totalCalls = primary.calls;
+
+    for (const stats of Object.values(delegation)) {
+      totalInput += stats.inputTokens;
+      totalOutput += stats.outputTokens;
+      totalCalls += stats.calls;
+    }
+
+    return {
+      primary,
+      delegation,
+      total: {
+        inputTokens: totalInput,
+        outputTokens: totalOutput,
+        calls: totalCalls,
+        estimatedCost: estimateCost(totalInput, totalOutput, this._modelName),
+      },
+    };
+  }
+
  /**
   * Look up which model tier is configured for a given delegation task.
   * Convenience method so callers don't need to access the config directly.