feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
@@ -0,0 +1,309 @@
+import type { ModelRouter, ModelTier } from '../../models/router.js';
+import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
+import type { Session } from '../../session/index.js';
+import type { ToolRegistry } from '../../tools/registry.js';
+import type { ToolExecutor } from '../../tools/executor.js';
+import { NativeAgent } from './agent.js';
+import type { ToolUseEvent } from './agent.js';
+import { shouldCompact } from '../../context/tokens.js';
+import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
+
+// ── Public types ──────────────────────────────────────────────────────
+
+/** A single-turn, stateless request to a sub-agent at a specific tier. */
+export interface SubAgentRequest {
+  tier: ModelTier;
+  systemPrompt: string;
+  message: string;
+  maxTokens?: number;
+  /** When true, include tools from the toolRegistry in the request. */
+  tools?: boolean;
+}
+
+/** Result returned from a sub-agent delegation call. */
+export interface SubAgentResult {
+  content: string;
+  usage: TokenUsage;
+  tier: ModelTier;
+}
+
+/** Maps each delegation task to the model tier that should handle it. */
+export interface DelegationConfig {
+  compaction: ModelTier;
+  memory_extraction: ModelTier;
+  classification: ModelTier;
+  tool_summarisation: ModelTier;
+  complex_reasoning: ModelTier;
+}
+
+/** Per-tier cumulative usage statistics. */
+interface TierUsageStats {
+  inputTokens: number;
+  outputTokens: number;
+  calls: number;
+}
+
+/** Full configuration for the AgentOrchestrator. */
+export interface OrchestratorConfig {
+  modelRouter: ModelRouter;
+  systemPrompt: string;
+  session?: Session;
+  toolRegistry?: ToolRegistry;
+  toolExecutor?: ToolExecutor;
+  maxIterations?: number;
+  /** The tier used by the primary NativeAgent for user-facing conversation. */
+  primaryTier: ModelTier;
+  /** Which tier to use for each delegation task type. */
+  delegation: DelegationConfig;
+  /** Maximum nesting depth for delegation calls (safety guard). */
+  maxDelegationDepth: number;
+  onToolUse?: (event: ToolUseEvent) => void;
+  /** Context compaction settings. When provided, enables automatic compaction. */
+  compaction?: CompactionConfig;
+  /** Model identifier for the primary model (used for context window lookup). */
+  modelName?: string;
+  /** Optional override for the context window size (in tokens). */
+  contextWindow?: number;
+}
+
+// ── AgentOrchestrator ─────────────────────────────────────────────────
+
+/**
+ * Wraps a primary NativeAgent and adds the ability to delegate
+ * single-turn sub-tasks to different model tiers via the ModelRouter.
+ *
+ * The primary agent handles the main conversation loop (with tools),
+ * while `delegate()` enables cheap, stateless calls for tasks like
+ * compaction, classification, and memory extraction.
+ */
+export class AgentOrchestrator {
+  private _agent: NativeAgent;
+  private _modelRouter: ModelRouter;
+  private _delegation: DelegationConfig;
+  private _maxDelegationDepth: number;
+  private _toolRegistry?: ToolRegistry;
+  private _session?: Session;
+  private _compactionConfig?: CompactionConfig;
+  private _modelName?: string;
+  private _contextWindow?: number;
+  private _usageByTier: Map<string, TierUsageStats> = new Map();
+
+  constructor(config: OrchestratorConfig) {
+    this._modelRouter = config.modelRouter;
+    this._delegation = config.delegation;
+    this._maxDelegationDepth = config.maxDelegationDepth;
+    this._toolRegistry = config.toolRegistry;
+    this._session = config.session;
+    this._compactionConfig = config.compaction;
+    this._modelName = config.modelName;
+    this._contextWindow = config.contextWindow;
+
+    // Create the primary NativeAgent for user-facing conversation
+    this._agent = new NativeAgent({
+      modelClient: config.modelRouter,
+      systemPrompt: config.systemPrompt,
+      session: config.session,
+      toolRegistry: config.toolRegistry,
+      toolExecutor: config.toolExecutor,
+      maxIterations: config.maxIterations,
+      onToolUse: config.onToolUse,
+    });
+
+    // Set the primary tier on the agent
+    this._agent.setModelTier(config.primaryTier);
+  }
+
+  // ── Delegation ────────────────────────────────────────────────────
+
+  /**
+   * Perform a single-turn, stateless call to a model at the specified tier.
+   *
+   * This is used for internal sub-tasks (compaction, classification, etc.)
+   * that don't need the full conversation history or tool loop.
+   *
+   * If the requested tier is not available on the router, falls back to
+   * the 'default' tier with a warning.
+   */
+  async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
+    let tier = request.tier;
+
+    // Check if the requested tier is available; fall back to 'default' if not
+    const client = this._modelRouter.getClient(tier);
+    if (!client) {
+      console.warn(
+        `[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
+      );
+      tier = 'default';
+    }
+
+    // Build the single-turn chat request
+    const messages: Message[] = [
+      { role: 'user', content: request.message },
+    ];
+
+    const chatRequest: ChatRequest = {
+      messages,
+      system: request.systemPrompt,
+      maxTokens: request.maxTokens,
+    };
+
+    // Optionally include tools from the registry
+    if (request.tools && this._toolRegistry) {
+      chatRequest.tools = this._toolRegistry.toAnthropicFormat();
+    }
+
+    const response = await this._modelRouter.chat(chatRequest, tier);
+
+    // Track cumulative usage for this tier
+    this._trackUsage(tier, response.usage);
+
+    console.log(
+      `[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
+    );
+
+    return {
+      content: response.content,
+      usage: response.usage,
+      tier,
+    };
+  }
+
+  // ── Primary agent proxies ─────────────────────────────────────────
+
+  /**
+   * Process a user message through the primary NativeAgent.
+   * This is the main entry point for user-facing conversation.
+   *
+   * When compaction is configured, checks whether the conversation history
+   * exceeds the context window threshold and compacts it before processing.
+   */
+  async process(userMessage: string): Promise<string> {
+    await this.compactIfNeeded();
+    return this._agent.process(userMessage);
+  }
+
+  /**
+   * Force-compact the current conversation history regardless of threshold.
+   * Returns the compaction result, or null if there was nothing to compact
+   * (e.g. no session, too few messages).
+   */
+  async compact(): Promise<CompactionResult | null> {
+    const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
+    const messages = this.getHistory();
+
+    if (messages.length === 0) {
+      return null;
+    }
+
+    const result = await compactHistory({
+      messages,
+      orchestrator: this,
+      config,
+    });
+
+    // If nothing was actually compacted, skip the replace
+    if (result.compactedCount === 0) {
+      return result;
+    }
+
+    // Persist the compacted history
+    if (this._session) {
+      this._session.replaceHistory(result.messages);
+    }
+
+    console.log(
+      `[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
+      `${result.tokensBefore} → ${result.tokensAfter} tokens`,
+    );
+
+    return result;
+  }
+
+  /** Reset the primary agent's conversation history. */
+  reset(): void {
+    this._agent.reset();
+  }
+
+  /** Get the primary agent's conversation history. */
+  getHistory(): Message[] {
+    return this._agent.getHistory();
+  }
+
+  /** Set the model tier on the primary agent. */
+  setModelTier(tier: ModelTier): void {
+    this._agent.setModelTier(tier);
+  }
+
+  /** Get the current model tier of the primary agent. */
+  getModelTier(): ModelTier {
+    return this._agent.getModelTier();
+  }
+
+  /** Set the tool-use callback on the primary agent. */
+  setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
+    this._agent.setOnToolUse(callback);
+  }
+
+  // ── Usage & config accessors ──────────────────────────────────────
+
+  /**
+   * Returns cumulative delegation usage stats per tier.
+   * Useful for cost tracking and visibility into sub-agent calls.
+   */
+  getDelegationUsage(): Record<string, TierUsageStats> {
+    const result: Record<string, TierUsageStats> = {};
+    for (const [tier, stats] of this._usageByTier) {
+      result[tier] = { ...stats };
+    }
+    return result;
+  }
+
+  /**
+   * Look up which model tier is configured for a given delegation task.
+   * Convenience method so callers don't need to access the config directly.
+   */
+  getDelegationTier(task: keyof DelegationConfig): ModelTier {
+    return this._delegation[task];
+  }
+
+  // ── Private helpers ───────────────────────────────────────────────
+
+  /**
+   * Check whether automatic compaction should run, and if so, compact.
+   * Called before each `process()` call when compaction is configured.
+   */
+  private async compactIfNeeded(): Promise<void> {
+    if (!this._compactionConfig) return;
+
+    const messages = this.getHistory();
+    if (messages.length === 0) return;
+
+    const model = this._modelName ?? 'unknown';
+    const needs = shouldCompact({
+      messages,
+      model,
+      contextWindow: this._contextWindow,
+      thresholdPct: this._compactionConfig.thresholdPct,
+    });
+
+    if (!needs) return;
+
+    await this.compact();
+  }
+
+  /** Accumulate usage stats for a given tier. */
+  private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
+    const existing = this._usageByTier.get(tier);
+    if (existing) {
+      existing.inputTokens += usage.inputTokens;
+      existing.outputTokens += usage.outputTokens;
+      existing.calls += 1;
+    } else {
+      this._usageByTier.set(tier, {
+        inputTokens: usage.inputTokens,
+        outputTokens: usage.outputTokens,
+        calls: 1,
+      });
+    }
+  }
+}