feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
@@ -0,0 +1,74 @@
+import type { Message } from '../models/types.js';
+import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
+import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
+import { estimateMessageTokens } from './tokens.js';
+
+export interface CompactionConfig {
+  /** Percentage of context window that triggers compaction (default: 80). */
+  thresholdPct: number;
+  /** Number of recent turns (user+assistant pairs) to always keep intact. */
+  keepTurns: number;
+  /** Maximum tokens for the compaction summary response. */
+  summaryMaxTokens: number;
+}
+
+export interface CompactionResult {
+  /** The compacted messages: [summary, ...recentMessages]. */
+  messages: Message[];
+  /** Number of messages that were compacted (removed). */
+  compactedCount: number;
+  /** Estimated tokens before compaction. */
+  tokensBefore: number;
+  /** Estimated tokens after compaction. */
+  tokensAfter: number;
+}
+
+export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
+  thresholdPct: 80,
+  keepTurns: 4,
+  summaryMaxTokens: 1024,
+};
+
+export async function compactHistory(opts: {
+  messages: Message[];
+  orchestrator: AgentOrchestrator;
+  config: CompactionConfig;
+}): Promise<CompactionResult> {
+  const { messages, orchestrator, config } = opts;
+
+  const keepCount = config.keepTurns * 2;
+  if (messages.length <= keepCount) {
+    return {
+      messages,
+      compactedCount: 0,
+      tokensBefore: estimateMessageTokens(messages),
+      tokensAfter: estimateMessageTokens(messages),
+    };
+  }
+
+  const toCompact = messages.slice(0, -keepCount);
+  const toKeep = messages.slice(-keepCount);
+
+  const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
+
+  const tier = orchestrator.getDelegationTier('compaction');
+
+  const result = await orchestrator.delegate({
+    tier,
+    systemPrompt: COMPACTION_SYSTEM_PROMPT,
+    message: formattedConversation,
+    maxTokens: config.summaryMaxTokens,
+  });
+
+  const summaryMessage: Message = {
+    role: 'assistant',
+    content: '[Summary of earlier conversation]\n\n' + result.content,
+  };
+
+  return {
+    messages: [summaryMessage, ...toKeep],
+    compactedCount: toCompact.length,
+    tokensBefore: estimateMessageTokens(messages),
+    tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
+  };
+}