feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation:
- AgentOrchestrator wraps NativeAgent with delegate() for stateless
  single-turn calls to any model tier (fast/default/complex/local)
- DelegationConfig maps task types (compaction, classification, etc.)
  to model tiers
- Delegation prompts for compaction, memory extraction, classification,
  and tool summarisation
- Per-tier usage tracking for cost visibility
- Config schema: agents.delegation and agents.primary_tier

Phase 1 — Context Compaction:
- Token estimation (char/4 heuristic) with context window lookup
- shouldCompact() threshold check against context window percentage
- compactHistory() splits old/recent messages, delegates summary to
  fast tier, returns CompactionResult
- Automatic compaction in AgentOrchestrator.process() when configured
- Force-compact via orchestrator.compact() with session persistence
- Session.replaceHistory() with atomic SQLite transaction
- /compact TUI command with feedback on compacted token counts
- Config schema: compaction.enabled, threshold_pct, keep_turns,
  summary_max_tokens

Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
William Valentin
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
+104
View File
@@ -0,0 +1,104 @@
import { describe, it, expect, vi } from 'vitest';
import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js';
import type { CompactionConfig } from './compaction.js';
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
import type { Message } from '../models/types.js';
function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator {
return {
getDelegationTier: vi.fn().mockReturnValue('fast'),
delegate: vi.fn().mockResolvedValue({
content: summaryText,
usage: { inputTokens: 100, outputTokens: 50 },
tier: 'fast',
}),
} as unknown as AgentOrchestrator;
}
function makeMessages(count: number): Message[] {
const msgs: Message[] = [];
for (let i = 0; i < count; i++) {
msgs.push({
role: i % 2 === 0 ? 'user' : 'assistant',
content: `Message ${i}`,
});
}
return msgs;
}
describe('compactHistory', () => {
const config: CompactionConfig = {
thresholdPct: 80,
keepTurns: 2, // keeps last 4 messages
summaryMaxTokens: 1024,
};
it('returns no-op when messages count is at or below keepTurns threshold', async () => {
const messages = makeMessages(4); // keepTurns=2 → keep 4 messages
const orchestrator = makeMockOrchestrator();
const result = await compactHistory({ messages, orchestrator, config });
expect(result.compactedCount).toBe(0);
expect(result.messages).toEqual(messages);
expect(orchestrator.delegate).not.toHaveBeenCalled();
});
it('compacts older messages and keeps recent ones', async () => {
const messages = makeMessages(10); // 10 messages, keep last 4, compact 6
const orchestrator = makeMockOrchestrator('Summarized conversation');
const result = await compactHistory({ messages, orchestrator, config });
expect(result.compactedCount).toBe(6);
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
expect(result.messages[0].role).toBe('assistant');
expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
expect(result.messages[0].content).toContain('Summarized conversation');
// Last 4 messages should be preserved
expect(result.messages.slice(1)).toEqual(messages.slice(-4));
});
it('calls delegate with compaction tier and correct params', async () => {
const messages = makeMessages(10);
const orchestrator = makeMockOrchestrator();
await compactHistory({ messages, orchestrator, config });
expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('compaction');
expect(orchestrator.delegate).toHaveBeenCalledWith(
expect.objectContaining({
tier: 'fast',
maxTokens: 1024,
}),
);
});
it('populates token counts in result', async () => {
const messages = makeMessages(10);
const orchestrator = makeMockOrchestrator();
const result = await compactHistory({ messages, orchestrator, config });
expect(result.tokensBefore).toBeGreaterThan(0);
expect(result.tokensAfter).toBeGreaterThan(0);
expect(result.tokensAfter).toBeLessThan(result.tokensBefore);
});
it('handles single turn above keepTurns threshold', async () => {
// 3 turns = 6 messages, keepTurns=2 keeps 4, compacts 2
const messages = makeMessages(6);
const orchestrator = makeMockOrchestrator();
const result = await compactHistory({ messages, orchestrator, config });
expect(result.compactedCount).toBe(2);
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
});
it('uses DEFAULT_COMPACTION_CONFIG values correctly', () => {
expect(DEFAULT_COMPACTION_CONFIG.thresholdPct).toBe(80);
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
});
});
+74
View File
@@ -0,0 +1,74 @@
import type { Message } from '../models/types.js';
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
import { estimateMessageTokens } from './tokens.js';
export interface CompactionConfig {
/** Percentage of context window that triggers compaction (default: 80). */
thresholdPct: number;
/** Number of recent turns (user+assistant pairs) to always keep intact. */
keepTurns: number;
/** Maximum tokens for the compaction summary response. */
summaryMaxTokens: number;
}
export interface CompactionResult {
/** The compacted messages: [summary, ...recentMessages]. */
messages: Message[];
/** Number of messages that were compacted (removed). */
compactedCount: number;
/** Estimated tokens before compaction. */
tokensBefore: number;
/** Estimated tokens after compaction. */
tokensAfter: number;
}
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
thresholdPct: 80,
keepTurns: 4,
summaryMaxTokens: 1024,
};
export async function compactHistory(opts: {
messages: Message[];
orchestrator: AgentOrchestrator;
config: CompactionConfig;
}): Promise<CompactionResult> {
const { messages, orchestrator, config } = opts;
const keepCount = config.keepTurns * 2;
if (messages.length <= keepCount) {
return {
messages,
compactedCount: 0,
tokensBefore: estimateMessageTokens(messages),
tokensAfter: estimateMessageTokens(messages),
};
}
const toCompact = messages.slice(0, -keepCount);
const toKeep = messages.slice(-keepCount);
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
const tier = orchestrator.getDelegationTier('compaction');
const result = await orchestrator.delegate({
tier,
systemPrompt: COMPACTION_SYSTEM_PROMPT,
message: formattedConversation,
maxTokens: config.summaryMaxTokens,
});
const summaryMessage: Message = {
role: 'assistant',
content: '[Summary of earlier conversation]\n\n' + result.content,
};
return {
messages: [summaryMessage, ...toKeep],
compactedCount: toCompact.length,
tokensBefore: estimateMessageTokens(messages),
tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
};
}
+15
View File
@@ -0,0 +1,15 @@
export {
estimateTokens,
estimateMessageTokens,
getContextWindow,
shouldCompact,
CONTEXT_WINDOWS,
type ShouldCompactOpts,
} from './tokens.js';
export {
compactHistory,
type CompactionConfig,
type CompactionResult,
DEFAULT_COMPACTION_CONFIG,
} from './compaction.js';
+108
View File
@@ -0,0 +1,108 @@
import { describe, it, expect } from 'vitest';
import { estimateTokens, estimateMessageTokens, getContextWindow, shouldCompact, CONTEXT_WINDOWS } from './tokens.js';
describe('estimateTokens', () => {
it('returns 0 for empty string', () => {
// estimateTokens('') should be 0 — Math.ceil(0/4) = 0
expect(estimateTokens('')).toBe(0);
});
it('estimates ~1 token per 4 characters', () => {
// 'abcd' = 4 chars → ceil(4/4) = 1
expect(estimateTokens('abcd')).toBe(1);
// 'abcde' = 5 chars → ceil(5/4) = 2
expect(estimateTokens('abcde')).toBe(2);
});
it('handles longer text', () => {
const text = 'a'.repeat(100);
expect(estimateTokens(text)).toBe(25); // 100/4 = 25
});
});
describe('estimateMessageTokens', () => {
it('returns 0 for empty array', () => {
expect(estimateMessageTokens([])).toBe(0);
});
it('includes overhead per message', () => {
// 'abcd' = 1 token content + 4 overhead = 5 per message
const messages = [{ role: 'user' as const, content: 'abcd' }];
expect(estimateMessageTokens(messages)).toBe(5);
});
it('sums multiple messages', () => {
const messages = [
{ role: 'user' as const, content: 'abcd' }, // 1 + 4 = 5
{ role: 'assistant' as const, content: 'abcd' }, // 1 + 4 = 5
];
expect(estimateMessageTokens(messages)).toBe(10);
});
});
describe('getContextWindow', () => {
it('returns known window for Claude Sonnet', () => {
expect(getContextWindow('claude-sonnet-4-20250514')).toBe(200_000);
});
it('returns known window for GPT-4o', () => {
expect(getContextWindow('gpt-4o')).toBe(128_000);
});
it('returns default 128000 for unknown model', () => {
expect(getContextWindow('unknown-model')).toBe(128_000);
});
it('returns override when provided', () => {
expect(getContextWindow('claude-sonnet-4-20250514', 50_000)).toBe(50_000);
});
it('returns override even for unknown model', () => {
expect(getContextWindow('unknown-model', 32_000)).toBe(32_000);
});
});
describe('shouldCompact', () => {
it('returns false when messages are well below threshold', () => {
const messages = [{ role: 'user' as const, content: 'hello' }];
expect(shouldCompact({
messages,
model: 'gpt-4o', // 128k context window
})).toBe(false);
});
it('returns true when messages exceed threshold', () => {
// Create messages that exceed 80% of a small context window
// context window = 100, threshold = 80% = 80 tokens
// each message: ceil(400/4) + 4 = 104 tokens → well over 80
const messages = [{ role: 'user' as const, content: 'a'.repeat(400) }];
expect(shouldCompact({
messages,
model: 'unknown',
contextWindow: 100,
thresholdPct: 80,
})).toBe(true);
});
it('respects custom thresholdPct', () => {
// 1 message: ceil(20/4) + 4 = 9 tokens
// contextWindow = 100, thresholdPct = 5 → threshold = 5 tokens
const messages = [{ role: 'user' as const, content: 'a'.repeat(20) }];
expect(shouldCompact({
messages,
model: 'unknown',
contextWindow: 100,
thresholdPct: 5,
})).toBe(true);
});
it('uses model lookup when no contextWindow override', () => {
// gpt-3.5-turbo = 16385 tokens, default threshold 80% = 13108
// Large message to exceed: ceil(60000/4) + 4 = 15004 tokens → over 13108
const messages = [{ role: 'user' as const, content: 'a'.repeat(60000) }];
expect(shouldCompact({
messages,
model: 'gpt-3.5-turbo',
})).toBe(true);
});
});
+88
View File
@@ -0,0 +1,88 @@
import type { Message } from '../models/types.js';
/**
* Approximate overhead tokens per message (role marker, separators, etc.).
*/
const MESSAGE_OVERHEAD_TOKENS = 4;
/**
* Conservative default context window when a model is not in the lookup table.
*/
const DEFAULT_CONTEXT_WINDOW = 128_000;
/**
* Hard-coded context window sizes (in tokens) for known models.
*/
export const CONTEXT_WINDOWS: Record<string, number> = {
'claude-sonnet-4-20250514': 200_000,
'claude-3-5-haiku-20241022': 200_000,
'claude-3-5-sonnet-20241022': 200_000,
'claude-3-opus-20240229': 200_000,
'claude-opus-4-20250514': 200_000,
'gpt-4o': 128_000,
'gpt-4o-mini': 128_000,
'gpt-4-turbo': 128_000,
'gpt-3.5-turbo': 16_385,
} as const;
/**
* Cheap character-based token estimation.
*
* Uses `Math.ceil(text.length / 4)` as a reasonable approximation for
* English text (roughly 4 characters per token on average).
*/
export function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
/**
* Estimate the total token count for an array of messages.
*
* For each message the estimate includes the content tokens plus a fixed
* overhead of ~4 tokens to account for the role marker and separators.
*/
export function estimateMessageTokens(messages: Message[]): number {
return messages.reduce(
(sum, msg) => sum + estimateTokens(msg.content) + MESSAGE_OVERHEAD_TOKENS,
0,
);
}
/**
* Return the context window size (in tokens) for a given model.
*
* @param model - Model identifier to look up.
* @param override - If provided, this value is returned directly.
* @returns The context window size in tokens.
*/
export function getContextWindow(model: string, override?: number): number {
if (override !== undefined) {
return override;
}
return CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
}
/**
* Options for {@link shouldCompact}.
*/
export interface ShouldCompactOpts {
messages: Message[];
model: string;
contextWindow?: number;
/** Percentage of the context window that triggers compaction (default 80). */
thresholdPct?: number;
}
/**
* Determine whether the conversation should be compacted.
*
* Returns `true` when the estimated token count of `messages` exceeds
* `thresholdPct` percent of the effective context window.
*/
export function shouldCompact(opts: ShouldCompactOpts): boolean {
const { messages, model, contextWindow, thresholdPct = 80 } = opts;
const window = getContextWindow(model, contextWindow);
const threshold = (thresholdPct / 100) * window;
const estimated = estimateMessageTokens(messages);
return estimated > threshold;
}