feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)
Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
import type { Message } from '../models/types.js';
|
||||
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||
import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
|
||||
import { estimateMessageTokens } from './tokens.js';
|
||||
|
||||
export interface CompactionConfig {
|
||||
/** Percentage of context window that triggers compaction (default: 80). */
|
||||
thresholdPct: number;
|
||||
/** Number of recent turns (user+assistant pairs) to always keep intact. */
|
||||
keepTurns: number;
|
||||
/** Maximum tokens for the compaction summary response. */
|
||||
summaryMaxTokens: number;
|
||||
}
|
||||
|
||||
export interface CompactionResult {
|
||||
/** The compacted messages: [summary, ...recentMessages]. */
|
||||
messages: Message[];
|
||||
/** Number of messages that were compacted (removed). */
|
||||
compactedCount: number;
|
||||
/** Estimated tokens before compaction. */
|
||||
tokensBefore: number;
|
||||
/** Estimated tokens after compaction. */
|
||||
tokensAfter: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
|
||||
thresholdPct: 80,
|
||||
keepTurns: 4,
|
||||
summaryMaxTokens: 1024,
|
||||
};
|
||||
|
||||
export async function compactHistory(opts: {
|
||||
messages: Message[];
|
||||
orchestrator: AgentOrchestrator;
|
||||
config: CompactionConfig;
|
||||
}): Promise<CompactionResult> {
|
||||
const { messages, orchestrator, config } = opts;
|
||||
|
||||
const keepCount = config.keepTurns * 2;
|
||||
if (messages.length <= keepCount) {
|
||||
return {
|
||||
messages,
|
||||
compactedCount: 0,
|
||||
tokensBefore: estimateMessageTokens(messages),
|
||||
tokensAfter: estimateMessageTokens(messages),
|
||||
};
|
||||
}
|
||||
|
||||
const toCompact = messages.slice(0, -keepCount);
|
||||
const toKeep = messages.slice(-keepCount);
|
||||
|
||||
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
|
||||
|
||||
const tier = orchestrator.getDelegationTier('compaction');
|
||||
|
||||
const result = await orchestrator.delegate({
|
||||
tier,
|
||||
systemPrompt: COMPACTION_SYSTEM_PROMPT,
|
||||
message: formattedConversation,
|
||||
maxTokens: config.summaryMaxTokens,
|
||||
});
|
||||
|
||||
const summaryMessage: Message = {
|
||||
role: 'assistant',
|
||||
content: '[Summary of earlier conversation]\n\n' + result.content,
|
||||
};
|
||||
|
||||
return {
|
||||
messages: [summaryMessage, ...toKeep],
|
||||
compactedCount: toCompact.length,
|
||||
tokensBefore: estimateMessageTokens(messages),
|
||||
tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user