feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation:
- AgentOrchestrator wraps NativeAgent with delegate() for stateless
  single-turn calls to any model tier (fast/default/complex/local)
- DelegationConfig maps task types (compaction, classification, etc.)
  to model tiers
- Delegation prompts for compaction, memory extraction, classification,
  and tool summarisation
- Per-tier usage tracking for cost visibility
- Config schema: agents.delegation and agents.primary_tier

Phase 1 — Context Compaction:
- Token estimation (char/4 heuristic) with context window lookup
- shouldCompact() threshold check against context window percentage
- compactHistory() splits old/recent messages, delegates summary to
  fast tier, returns CompactionResult
- Automatic compaction in AgentOrchestrator.process() when configured
- Force-compact via orchestrator.compact() with session persistence
- Session.replaceHistory() with atomic SQLite transaction
- /compact TUI command with feedback on compacted token counts
- Config schema: compaction.enabled, threshold_pct, keep_turns,
  summary_max_tokens

Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
William Valentin
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
+74
View File
@@ -0,0 +1,74 @@
import type { Message } from '../models/types.js';
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
import { estimateMessageTokens } from './tokens.js';
export interface CompactionConfig {
/** Percentage of context window that triggers compaction (default: 80). */
thresholdPct: number;
/** Number of recent turns (user+assistant pairs) to always keep intact. */
keepTurns: number;
/** Maximum tokens for the compaction summary response. */
summaryMaxTokens: number;
}
export interface CompactionResult {
/** The compacted messages: [summary, ...recentMessages]. */
messages: Message[];
/** Number of messages that were compacted (removed). */
compactedCount: number;
/** Estimated tokens before compaction. */
tokensBefore: number;
/** Estimated tokens after compaction. */
tokensAfter: number;
}
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
thresholdPct: 80,
keepTurns: 4,
summaryMaxTokens: 1024,
};
export async function compactHistory(opts: {
messages: Message[];
orchestrator: AgentOrchestrator;
config: CompactionConfig;
}): Promise<CompactionResult> {
const { messages, orchestrator, config } = opts;
const keepCount = config.keepTurns * 2;
if (messages.length <= keepCount) {
return {
messages,
compactedCount: 0,
tokensBefore: estimateMessageTokens(messages),
tokensAfter: estimateMessageTokens(messages),
};
}
const toCompact = messages.slice(0, -keepCount);
const toKeep = messages.slice(-keepCount);
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
const tier = orchestrator.getDelegationTier('compaction');
const result = await orchestrator.delegate({
tier,
systemPrompt: COMPACTION_SYSTEM_PROMPT,
message: formattedConversation,
maxTokens: config.summaryMaxTokens,
});
const summaryMessage: Message = {
role: 'assistant',
content: '[Summary of earlier conversation]\n\n' + result.content,
};
return {
messages: [summaryMessage, ...toKeep],
compactedCount: toCompact.length,
tokensBefore: estimateMessageTokens(messages),
tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
};
}