feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)
Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js';
|
||||
import type { CompactionConfig } from './compaction.js';
|
||||
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||
import type { Message } from '../models/types.js';
|
||||
|
||||
function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator {
|
||||
return {
|
||||
getDelegationTier: vi.fn().mockReturnValue('fast'),
|
||||
delegate: vi.fn().mockResolvedValue({
|
||||
content: summaryText,
|
||||
usage: { inputTokens: 100, outputTokens: 50 },
|
||||
tier: 'fast',
|
||||
}),
|
||||
} as unknown as AgentOrchestrator;
|
||||
}
|
||||
|
||||
function makeMessages(count: number): Message[] {
|
||||
const msgs: Message[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
msgs.push({
|
||||
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||
content: `Message ${i}`,
|
||||
});
|
||||
}
|
||||
return msgs;
|
||||
}
|
||||
|
||||
describe('compactHistory', () => {
|
||||
const config: CompactionConfig = {
|
||||
thresholdPct: 80,
|
||||
keepTurns: 2, // keeps last 4 messages
|
||||
summaryMaxTokens: 1024,
|
||||
};
|
||||
|
||||
it('returns no-op when messages count is at or below keepTurns threshold', async () => {
|
||||
const messages = makeMessages(4); // keepTurns=2 → keep 4 messages
|
||||
const orchestrator = makeMockOrchestrator();
|
||||
|
||||
const result = await compactHistory({ messages, orchestrator, config });
|
||||
|
||||
expect(result.compactedCount).toBe(0);
|
||||
expect(result.messages).toEqual(messages);
|
||||
expect(orchestrator.delegate).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('compacts older messages and keeps recent ones', async () => {
|
||||
const messages = makeMessages(10); // 10 messages, keep last 4, compact 6
|
||||
const orchestrator = makeMockOrchestrator('Summarized conversation');
|
||||
|
||||
const result = await compactHistory({ messages, orchestrator, config });
|
||||
|
||||
expect(result.compactedCount).toBe(6);
|
||||
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
|
||||
expect(result.messages[0].role).toBe('assistant');
|
||||
expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
|
||||
expect(result.messages[0].content).toContain('Summarized conversation');
|
||||
// Last 4 messages should be preserved
|
||||
expect(result.messages.slice(1)).toEqual(messages.slice(-4));
|
||||
});
|
||||
|
||||
it('calls delegate with compaction tier and correct params', async () => {
|
||||
const messages = makeMessages(10);
|
||||
const orchestrator = makeMockOrchestrator();
|
||||
|
||||
await compactHistory({ messages, orchestrator, config });
|
||||
|
||||
expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('compaction');
|
||||
expect(orchestrator.delegate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
tier: 'fast',
|
||||
maxTokens: 1024,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('populates token counts in result', async () => {
|
||||
const messages = makeMessages(10);
|
||||
const orchestrator = makeMockOrchestrator();
|
||||
|
||||
const result = await compactHistory({ messages, orchestrator, config });
|
||||
|
||||
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||
expect(result.tokensAfter).toBeGreaterThan(0);
|
||||
expect(result.tokensAfter).toBeLessThan(result.tokensBefore);
|
||||
});
|
||||
|
||||
it('handles single turn above keepTurns threshold', async () => {
|
||||
// 3 turns = 6 messages, keepTurns=2 keeps 4, compacts 2
|
||||
const messages = makeMessages(6);
|
||||
const orchestrator = makeMockOrchestrator();
|
||||
|
||||
const result = await compactHistory({ messages, orchestrator, config });
|
||||
|
||||
expect(result.compactedCount).toBe(2);
|
||||
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
|
||||
});
|
||||
|
||||
it('uses DEFAULT_COMPACTION_CONFIG values correctly', () => {
|
||||
expect(DEFAULT_COMPACTION_CONFIG.thresholdPct).toBe(80);
|
||||
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
|
||||
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,74 @@
|
||||
import type { Message } from '../models/types.js';
|
||||
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||
import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
|
||||
import { estimateMessageTokens } from './tokens.js';
|
||||
|
||||
export interface CompactionConfig {
|
||||
/** Percentage of context window that triggers compaction (default: 80). */
|
||||
thresholdPct: number;
|
||||
/** Number of recent turns (user+assistant pairs) to always keep intact. */
|
||||
keepTurns: number;
|
||||
/** Maximum tokens for the compaction summary response. */
|
||||
summaryMaxTokens: number;
|
||||
}
|
||||
|
||||
export interface CompactionResult {
|
||||
/** The compacted messages: [summary, ...recentMessages]. */
|
||||
messages: Message[];
|
||||
/** Number of messages that were compacted (removed). */
|
||||
compactedCount: number;
|
||||
/** Estimated tokens before compaction. */
|
||||
tokensBefore: number;
|
||||
/** Estimated tokens after compaction. */
|
||||
tokensAfter: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
|
||||
thresholdPct: 80,
|
||||
keepTurns: 4,
|
||||
summaryMaxTokens: 1024,
|
||||
};
|
||||
|
||||
export async function compactHistory(opts: {
|
||||
messages: Message[];
|
||||
orchestrator: AgentOrchestrator;
|
||||
config: CompactionConfig;
|
||||
}): Promise<CompactionResult> {
|
||||
const { messages, orchestrator, config } = opts;
|
||||
|
||||
const keepCount = config.keepTurns * 2;
|
||||
if (messages.length <= keepCount) {
|
||||
return {
|
||||
messages,
|
||||
compactedCount: 0,
|
||||
tokensBefore: estimateMessageTokens(messages),
|
||||
tokensAfter: estimateMessageTokens(messages),
|
||||
};
|
||||
}
|
||||
|
||||
const toCompact = messages.slice(0, -keepCount);
|
||||
const toKeep = messages.slice(-keepCount);
|
||||
|
||||
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
|
||||
|
||||
const tier = orchestrator.getDelegationTier('compaction');
|
||||
|
||||
const result = await orchestrator.delegate({
|
||||
tier,
|
||||
systemPrompt: COMPACTION_SYSTEM_PROMPT,
|
||||
message: formattedConversation,
|
||||
maxTokens: config.summaryMaxTokens,
|
||||
});
|
||||
|
||||
const summaryMessage: Message = {
|
||||
role: 'assistant',
|
||||
content: '[Summary of earlier conversation]\n\n' + result.content,
|
||||
};
|
||||
|
||||
return {
|
||||
messages: [summaryMessage, ...toKeep],
|
||||
compactedCount: toCompact.length,
|
||||
tokensBefore: estimateMessageTokens(messages),
|
||||
tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
export {
|
||||
estimateTokens,
|
||||
estimateMessageTokens,
|
||||
getContextWindow,
|
||||
shouldCompact,
|
||||
CONTEXT_WINDOWS,
|
||||
type ShouldCompactOpts,
|
||||
} from './tokens.js';
|
||||
|
||||
export {
|
||||
compactHistory,
|
||||
type CompactionConfig,
|
||||
type CompactionResult,
|
||||
DEFAULT_COMPACTION_CONFIG,
|
||||
} from './compaction.js';
|
||||
@@ -0,0 +1,108 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { estimateTokens, estimateMessageTokens, getContextWindow, shouldCompact, CONTEXT_WINDOWS } from './tokens.js';
|
||||
|
||||
describe('estimateTokens', () => {
|
||||
it('returns 0 for empty string', () => {
|
||||
// estimateTokens('') should be 0 — Math.ceil(0/4) = 0
|
||||
expect(estimateTokens('')).toBe(0);
|
||||
});
|
||||
|
||||
it('estimates ~1 token per 4 characters', () => {
|
||||
// 'abcd' = 4 chars → ceil(4/4) = 1
|
||||
expect(estimateTokens('abcd')).toBe(1);
|
||||
// 'abcde' = 5 chars → ceil(5/4) = 2
|
||||
expect(estimateTokens('abcde')).toBe(2);
|
||||
});
|
||||
|
||||
it('handles longer text', () => {
|
||||
const text = 'a'.repeat(100);
|
||||
expect(estimateTokens(text)).toBe(25); // 100/4 = 25
|
||||
});
|
||||
});
|
||||
|
||||
describe('estimateMessageTokens', () => {
|
||||
it('returns 0 for empty array', () => {
|
||||
expect(estimateMessageTokens([])).toBe(0);
|
||||
});
|
||||
|
||||
it('includes overhead per message', () => {
|
||||
// 'abcd' = 1 token content + 4 overhead = 5 per message
|
||||
const messages = [{ role: 'user' as const, content: 'abcd' }];
|
||||
expect(estimateMessageTokens(messages)).toBe(5);
|
||||
});
|
||||
|
||||
it('sums multiple messages', () => {
|
||||
const messages = [
|
||||
{ role: 'user' as const, content: 'abcd' }, // 1 + 4 = 5
|
||||
{ role: 'assistant' as const, content: 'abcd' }, // 1 + 4 = 5
|
||||
];
|
||||
expect(estimateMessageTokens(messages)).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getContextWindow', () => {
|
||||
it('returns known window for Claude Sonnet', () => {
|
||||
expect(getContextWindow('claude-sonnet-4-20250514')).toBe(200_000);
|
||||
});
|
||||
|
||||
it('returns known window for GPT-4o', () => {
|
||||
expect(getContextWindow('gpt-4o')).toBe(128_000);
|
||||
});
|
||||
|
||||
it('returns default 128000 for unknown model', () => {
|
||||
expect(getContextWindow('unknown-model')).toBe(128_000);
|
||||
});
|
||||
|
||||
it('returns override when provided', () => {
|
||||
expect(getContextWindow('claude-sonnet-4-20250514', 50_000)).toBe(50_000);
|
||||
});
|
||||
|
||||
it('returns override even for unknown model', () => {
|
||||
expect(getContextWindow('unknown-model', 32_000)).toBe(32_000);
|
||||
});
|
||||
});
|
||||
|
||||
describe('shouldCompact', () => {
|
||||
it('returns false when messages are well below threshold', () => {
|
||||
const messages = [{ role: 'user' as const, content: 'hello' }];
|
||||
expect(shouldCompact({
|
||||
messages,
|
||||
model: 'gpt-4o', // 128k context window
|
||||
})).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true when messages exceed threshold', () => {
|
||||
// Create messages that exceed 80% of a small context window
|
||||
// context window = 100, threshold = 80% = 80 tokens
|
||||
// each message: ceil(400/4) + 4 = 104 tokens → well over 80
|
||||
const messages = [{ role: 'user' as const, content: 'a'.repeat(400) }];
|
||||
expect(shouldCompact({
|
||||
messages,
|
||||
model: 'unknown',
|
||||
contextWindow: 100,
|
||||
thresholdPct: 80,
|
||||
})).toBe(true);
|
||||
});
|
||||
|
||||
it('respects custom thresholdPct', () => {
|
||||
// 1 message: ceil(20/4) + 4 = 9 tokens
|
||||
// contextWindow = 100, thresholdPct = 5 → threshold = 5 tokens
|
||||
const messages = [{ role: 'user' as const, content: 'a'.repeat(20) }];
|
||||
expect(shouldCompact({
|
||||
messages,
|
||||
model: 'unknown',
|
||||
contextWindow: 100,
|
||||
thresholdPct: 5,
|
||||
})).toBe(true);
|
||||
});
|
||||
|
||||
it('uses model lookup when no contextWindow override', () => {
|
||||
// gpt-3.5-turbo = 16385 tokens, default threshold 80% = 13108
|
||||
// Large message to exceed: ceil(60000/4) + 4 = 15004 tokens → over 13108
|
||||
const messages = [{ role: 'user' as const, content: 'a'.repeat(60000) }];
|
||||
expect(shouldCompact({
|
||||
messages,
|
||||
model: 'gpt-3.5-turbo',
|
||||
})).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,88 @@
|
||||
import type { Message } from '../models/types.js';
|
||||
|
||||
/**
|
||||
* Approximate overhead tokens per message (role marker, separators, etc.).
|
||||
*/
|
||||
const MESSAGE_OVERHEAD_TOKENS = 4;
|
||||
|
||||
/**
|
||||
* Conservative default context window when a model is not in the lookup table.
|
||||
*/
|
||||
const DEFAULT_CONTEXT_WINDOW = 128_000;
|
||||
|
||||
/**
|
||||
* Hard-coded context window sizes (in tokens) for known models.
|
||||
*/
|
||||
export const CONTEXT_WINDOWS: Record<string, number> = {
|
||||
'claude-sonnet-4-20250514': 200_000,
|
||||
'claude-3-5-haiku-20241022': 200_000,
|
||||
'claude-3-5-sonnet-20241022': 200_000,
|
||||
'claude-3-opus-20240229': 200_000,
|
||||
'claude-opus-4-20250514': 200_000,
|
||||
'gpt-4o': 128_000,
|
||||
'gpt-4o-mini': 128_000,
|
||||
'gpt-4-turbo': 128_000,
|
||||
'gpt-3.5-turbo': 16_385,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Cheap character-based token estimation.
|
||||
*
|
||||
* Uses `Math.ceil(text.length / 4)` as a reasonable approximation for
|
||||
* English text (roughly 4 characters per token on average).
|
||||
*/
|
||||
export function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate the total token count for an array of messages.
|
||||
*
|
||||
* For each message the estimate includes the content tokens plus a fixed
|
||||
* overhead of ~4 tokens to account for the role marker and separators.
|
||||
*/
|
||||
export function estimateMessageTokens(messages: Message[]): number {
|
||||
return messages.reduce(
|
||||
(sum, msg) => sum + estimateTokens(msg.content) + MESSAGE_OVERHEAD_TOKENS,
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the context window size (in tokens) for a given model.
|
||||
*
|
||||
* @param model - Model identifier to look up.
|
||||
* @param override - If provided, this value is returned directly.
|
||||
* @returns The context window size in tokens.
|
||||
*/
|
||||
export function getContextWindow(model: string, override?: number): number {
|
||||
if (override !== undefined) {
|
||||
return override;
|
||||
}
|
||||
return CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for {@link shouldCompact}.
|
||||
*/
|
||||
export interface ShouldCompactOpts {
|
||||
messages: Message[];
|
||||
model: string;
|
||||
contextWindow?: number;
|
||||
/** Percentage of the context window that triggers compaction (default 80). */
|
||||
thresholdPct?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the conversation should be compacted.
|
||||
*
|
||||
* Returns `true` when the estimated token count of `messages` exceeds
|
||||
* `thresholdPct` percent of the effective context window.
|
||||
*/
|
||||
export function shouldCompact(opts: ShouldCompactOpts): boolean {
|
||||
const { messages, model, contextWindow, thresholdPct = 80 } = opts;
|
||||
const window = getContextWindow(model, contextWindow);
|
||||
const threshold = (thresholdPct / 100) * window;
|
||||
const estimated = estimateMessageTokens(messages);
|
||||
return estimated > threshold;
|
||||
}
|
||||
Reference in New Issue
Block a user