feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)
Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
+14
-1
@@ -1 +1,14 @@
|
|||||||
export { NativeAgent, type NativeAgentConfig } from './native/index.js';
|
export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './native/index.js';
|
||||||
|
export {
|
||||||
|
AgentOrchestrator,
|
||||||
|
type OrchestratorConfig,
|
||||||
|
type SubAgentRequest,
|
||||||
|
type SubAgentResult,
|
||||||
|
type DelegationConfig,
|
||||||
|
} from './native/index.js';
|
||||||
|
export {
|
||||||
|
COMPACTION_SYSTEM_PROMPT,
|
||||||
|
MEMORY_EXTRACTION_PROMPT,
|
||||||
|
CLASSIFICATION_PROMPT,
|
||||||
|
TOOL_SUMMARISATION_PROMPT,
|
||||||
|
} from './native/index.js';
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ describe('NativeAgent', () => {
|
|||||||
getHistory: vi.fn().mockReturnValue([]),
|
getHistory: vi.fn().mockReturnValue([]),
|
||||||
addMessage: vi.fn(),
|
addMessage: vi.fn(),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const agent = new NativeAgent({
|
const agent = new NativeAgent({
|
||||||
|
|||||||
@@ -1 +1,14 @@
|
|||||||
export { NativeAgent, type NativeAgentConfig } from './agent.js';
|
export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './agent.js';
|
||||||
|
export {
|
||||||
|
AgentOrchestrator,
|
||||||
|
type OrchestratorConfig,
|
||||||
|
type SubAgentRequest,
|
||||||
|
type SubAgentResult,
|
||||||
|
type DelegationConfig,
|
||||||
|
} from './orchestrator.js';
|
||||||
|
export {
|
||||||
|
COMPACTION_SYSTEM_PROMPT,
|
||||||
|
MEMORY_EXTRACTION_PROMPT,
|
||||||
|
CLASSIFICATION_PROMPT,
|
||||||
|
TOOL_SUMMARISATION_PROMPT,
|
||||||
|
} from './prompts.js';
|
||||||
|
|||||||
@@ -0,0 +1,613 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
import { AgentOrchestrator } from './orchestrator.js';
|
||||||
|
import { ModelRouter } from '../../models/router.js';
|
||||||
|
import type { ChatResponse, ModelClient } from '../../models/types.js';
|
||||||
|
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
|
||||||
|
import { HookEngine } from '../../hooks/engine.js';
|
||||||
|
import type { SubAgentRequest } from './orchestrator.js';
|
||||||
|
|
||||||
|
describe('AgentOrchestrator', () => {
|
||||||
|
let mockDefaultClient: ModelClient;
|
||||||
|
let mockFastClient: ModelClient;
|
||||||
|
let mockComplexClient: ModelClient;
|
||||||
|
let mockRouter: ModelRouter;
|
||||||
|
|
||||||
|
const createMockClient = (name: string, inputTokens = 100, outputTokens = 50): ModelClient => ({
|
||||||
|
chat: vi.fn().mockResolvedValue({
|
||||||
|
content: `${name} response`,
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens, outputTokens },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
mockDefaultClient = createMockClient('default', 100, 50);
|
||||||
|
mockFastClient = createMockClient('fast', 50, 25);
|
||||||
|
mockComplexClient = createMockClient('complex', 200, 100);
|
||||||
|
|
||||||
|
mockRouter = new ModelRouter({
|
||||||
|
default: mockDefaultClient,
|
||||||
|
fast: mockFastClient,
|
||||||
|
complex: mockComplexClient,
|
||||||
|
fallbackChain: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('delegate()', () => {
|
||||||
|
it('routes to the correct tier when specified', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful assistant.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Summarize this text',
|
||||||
|
message: 'This is a test message',
|
||||||
|
maxTokens: 1000,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.content).toBe('fast response');
|
||||||
|
expect(result.tier).toBe('fast');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes tools when requested', async () => {
|
||||||
|
const mockToolRegistry = new ToolRegistry();
|
||||||
|
const hooks = new HookEngine({
|
||||||
|
confirm: ['*'],
|
||||||
|
log: [],
|
||||||
|
silent: [],
|
||||||
|
});
|
||||||
|
const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);
|
||||||
|
|
||||||
|
const mockFastChatClient = mockRouter.getClient('fast')!;
|
||||||
|
const mockFastChatFn = vi.fn().mockResolvedValue({
|
||||||
|
content: 'response with tools',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 100, outputTokens: 50 },
|
||||||
|
} as ChatResponse);
|
||||||
|
|
||||||
|
Object.assign(mockFastChatClient, { chat: mockFastChatFn });
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
toolRegistry: mockToolRegistry,
|
||||||
|
toolExecutor: mockToolExecutor,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Use available tools',
|
||||||
|
message: 'Help me analyze data',
|
||||||
|
tools: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockFastChatFn).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to default tier when requested tier is unavailable', async () => {
|
||||||
|
const routerWithoutComplex = new ModelRouter({
|
||||||
|
default: mockDefaultClient,
|
||||||
|
fast: mockFastClient,
|
||||||
|
fallbackChain: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: routerWithoutComplex,
|
||||||
|
systemPrompt: 'You are a helpful assistant.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await orchestrator.delegate({
|
||||||
|
tier: 'complex',
|
||||||
|
systemPrompt: 'Analyze deeply',
|
||||||
|
message: 'This is complex',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.content).toBe('default response');
|
||||||
|
expect(result.tier).toBe('default');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('tracks cumulative usage after delegate calls', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Fast task',
|
||||||
|
message: 'Fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'complex',
|
||||||
|
systemPrompt: 'Complex task',
|
||||||
|
message: 'Complex message',
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Another fast task',
|
||||||
|
message: 'Another fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
const usage = orchestrator.getDelegationUsage();
|
||||||
|
|
||||||
|
expect(usage.fast).toEqual({
|
||||||
|
inputTokens: 100,
|
||||||
|
outputTokens: 50,
|
||||||
|
calls: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(usage.complex).toEqual({
|
||||||
|
inputTokens: 200,
|
||||||
|
outputTokens: 100,
|
||||||
|
calls: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(usage.default).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('tracks usage across tiers correctly', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Fast task',
|
||||||
|
message: 'Fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Another fast task',
|
||||||
|
message: 'Another fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
const usage = orchestrator.getDelegationUsage();
|
||||||
|
|
||||||
|
expect(usage.fast.inputTokens).toBe(100);
|
||||||
|
expect(usage.fast.outputTokens).toBe(50);
|
||||||
|
expect(usage.fast.calls).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('logs delegation details with tier and token counts', async () => {
|
||||||
|
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Fast task',
|
||||||
|
message: 'Fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(consoleSpy).toHaveBeenCalledWith(
|
||||||
|
'[Flynn:delegate] tier=fast tokens=50+25'
|
||||||
|
);
|
||||||
|
|
||||||
|
consoleSpy.mockRestore();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getDelegationTier()', () => {
|
||||||
|
it('returns correct tier for each task type', () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(orchestrator.getDelegationTier('compaction')).toBe('fast');
|
||||||
|
expect(orchestrator.getDelegationTier('memory_extraction')).toBe('default');
|
||||||
|
expect(orchestrator.getDelegationTier('classification')).toBe('complex');
|
||||||
|
expect(orchestrator.getDelegationTier('tool_summarisation')).toBe('default');
|
||||||
|
expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('complex');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns tier that was explicitly configured', () => {
|
||||||
|
const customDelegation = {
|
||||||
|
compaction: 'local' as const,
|
||||||
|
memory_extraction: 'fast' as const,
|
||||||
|
classification: 'complex' as const,
|
||||||
|
tool_summarisation: 'default' as const,
|
||||||
|
complex_reasoning: 'local' as const,
|
||||||
|
};
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: customDelegation,
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(orchestrator.getDelegationTier('compaction')).toBe('local');
|
||||||
|
expect(orchestrator.getDelegationTier('memory_extraction')).toBe('fast');
|
||||||
|
expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('local');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('process()', () => {
|
||||||
|
it('proxies to NativeAgent for user messages', async () => {
|
||||||
|
const mockDefaultChatClient = mockRouter.getClient('default')!;
|
||||||
|
const mockDefaultChatFn = vi.fn().mockResolvedValue({
|
||||||
|
content: 'Agent response',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 150, outputTokens: 75 },
|
||||||
|
} as ChatResponse);
|
||||||
|
|
||||||
|
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await orchestrator.process('Hello, agent!');
|
||||||
|
|
||||||
|
expect(response).toBe('Agent response');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maintains conversation history through process()', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.process('Hello');
|
||||||
|
await orchestrator.process('How are you?');
|
||||||
|
await orchestrator.process('Tell me about yourself');
|
||||||
|
|
||||||
|
const history = orchestrator.getHistory();
|
||||||
|
|
||||||
|
expect(history).toHaveLength(6);
|
||||||
|
expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
|
||||||
|
expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
|
||||||
|
expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
|
||||||
|
expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
|
||||||
|
expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
|
||||||
|
expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('reset()', () => {
|
||||||
|
it('clears primary agent conversation history', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.process('Hello');
|
||||||
|
await orchestrator.process('How are you?');
|
||||||
|
|
||||||
|
expect(orchestrator.getHistory()).toHaveLength(4);
|
||||||
|
|
||||||
|
orchestrator.reset();
|
||||||
|
|
||||||
|
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('can be called multiple times', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.process('Hello');
|
||||||
|
orchestrator.reset();
|
||||||
|
|
||||||
|
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||||
|
|
||||||
|
await orchestrator.process('World');
|
||||||
|
orchestrator.reset();
|
||||||
|
|
||||||
|
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getDelegationUsage()', () => {
|
||||||
|
it('returns copy of usage stats (doesn\'t expose internal map)', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.delegate({
|
||||||
|
tier: 'fast',
|
||||||
|
systemPrompt: 'Fast task',
|
||||||
|
message: 'Fast message',
|
||||||
|
});
|
||||||
|
|
||||||
|
const usage1 = orchestrator.getDelegationUsage();
|
||||||
|
const usage2 = orchestrator.getDelegationUsage();
|
||||||
|
|
||||||
|
expect(usage1).toEqual(usage2);
|
||||||
|
|
||||||
|
usage1.fast.inputTokens = 999;
|
||||||
|
|
||||||
|
expect(usage2.fast.inputTokens).toBe(50);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty object when no usage tracked', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const usage = orchestrator.getDelegationUsage();
|
||||||
|
|
||||||
|
expect(usage).toEqual({});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getHistory()', () => {
|
||||||
|
it('returns conversation history from primary agent', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
await orchestrator.process('Hello');
|
||||||
|
await orchestrator.process('How are you?');
|
||||||
|
|
||||||
|
const history = orchestrator.getHistory();
|
||||||
|
|
||||||
|
expect(history).toHaveLength(4);
|
||||||
|
expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
|
||||||
|
expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
|
||||||
|
expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
|
||||||
|
expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty array when no history', async () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are a helpful agent.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const history = orchestrator.getHistory();
|
||||||
|
|
||||||
|
expect(history).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('setModelTier()', () => {
|
||||||
|
it('sets model tier on primary agent', () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
orchestrator.setModelTier('fast');
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('fast');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows tier changes after initialization', () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('default');
|
||||||
|
|
||||||
|
orchestrator.setModelTier('complex');
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('complex');
|
||||||
|
|
||||||
|
orchestrator.setModelTier('fast');
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('fast');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('setOnToolUse()', () => {
|
||||||
|
it('sets tool-use callback on primary agent', () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const callback = vi.fn();
|
||||||
|
|
||||||
|
orchestrator.setOnToolUse(callback);
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('default');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows callback changes', () => {
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: mockRouter,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const callback1 = vi.fn();
|
||||||
|
const callback2 = vi.fn();
|
||||||
|
|
||||||
|
orchestrator.setOnToolUse(callback1);
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('default');
|
||||||
|
|
||||||
|
orchestrator.setOnToolUse(callback2);
|
||||||
|
|
||||||
|
expect(orchestrator.getModelTier()).toBe('default');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,309 @@
|
|||||||
|
import type { ModelRouter, ModelTier } from '../../models/router.js';
|
||||||
|
import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
|
||||||
|
import type { Session } from '../../session/index.js';
|
||||||
|
import type { ToolRegistry } from '../../tools/registry.js';
|
||||||
|
import type { ToolExecutor } from '../../tools/executor.js';
|
||||||
|
import { NativeAgent } from './agent.js';
|
||||||
|
import type { ToolUseEvent } from './agent.js';
|
||||||
|
import { shouldCompact } from '../../context/tokens.js';
|
||||||
|
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
|
||||||
|
|
||||||
|
// ── Public types ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/** A single-turn, stateless request to a sub-agent at a specific tier. */
|
||||||
|
export interface SubAgentRequest {
|
||||||
|
tier: ModelTier;
|
||||||
|
systemPrompt: string;
|
||||||
|
message: string;
|
||||||
|
maxTokens?: number;
|
||||||
|
/** When true, include tools from the toolRegistry in the request. */
|
||||||
|
tools?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Result returned from a sub-agent delegation call. */
|
||||||
|
export interface SubAgentResult {
|
||||||
|
content: string;
|
||||||
|
usage: TokenUsage;
|
||||||
|
tier: ModelTier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Maps each delegation task to the model tier that should handle it. */
|
||||||
|
export interface DelegationConfig {
|
||||||
|
compaction: ModelTier;
|
||||||
|
memory_extraction: ModelTier;
|
||||||
|
classification: ModelTier;
|
||||||
|
tool_summarisation: ModelTier;
|
||||||
|
complex_reasoning: ModelTier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Per-tier cumulative usage statistics. */
|
||||||
|
interface TierUsageStats {
|
||||||
|
inputTokens: number;
|
||||||
|
outputTokens: number;
|
||||||
|
calls: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Full configuration for the AgentOrchestrator. */
|
||||||
|
export interface OrchestratorConfig {
|
||||||
|
modelRouter: ModelRouter;
|
||||||
|
systemPrompt: string;
|
||||||
|
session?: Session;
|
||||||
|
toolRegistry?: ToolRegistry;
|
||||||
|
toolExecutor?: ToolExecutor;
|
||||||
|
maxIterations?: number;
|
||||||
|
/** The tier used by the primary NativeAgent for user-facing conversation. */
|
||||||
|
primaryTier: ModelTier;
|
||||||
|
/** Which tier to use for each delegation task type. */
|
||||||
|
delegation: DelegationConfig;
|
||||||
|
/** Maximum nesting depth for delegation calls (safety guard). */
|
||||||
|
maxDelegationDepth: number;
|
||||||
|
onToolUse?: (event: ToolUseEvent) => void;
|
||||||
|
/** Context compaction settings. When provided, enables automatic compaction. */
|
||||||
|
compaction?: CompactionConfig;
|
||||||
|
/** Model identifier for the primary model (used for context window lookup). */
|
||||||
|
modelName?: string;
|
||||||
|
/** Optional override for the context window size (in tokens). */
|
||||||
|
contextWindow?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── AgentOrchestrator ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a primary NativeAgent and adds the ability to delegate
|
||||||
|
* single-turn sub-tasks to different model tiers via the ModelRouter.
|
||||||
|
*
|
||||||
|
* The primary agent handles the main conversation loop (with tools),
|
||||||
|
* while `delegate()` enables cheap, stateless calls for tasks like
|
||||||
|
* compaction, classification, and memory extraction.
|
||||||
|
*/
|
||||||
|
export class AgentOrchestrator {
|
||||||
|
private _agent: NativeAgent;
|
||||||
|
private _modelRouter: ModelRouter;
|
||||||
|
private _delegation: DelegationConfig;
|
||||||
|
private _maxDelegationDepth: number;
|
||||||
|
private _toolRegistry?: ToolRegistry;
|
||||||
|
private _session?: Session;
|
||||||
|
private _compactionConfig?: CompactionConfig;
|
||||||
|
private _modelName?: string;
|
||||||
|
private _contextWindow?: number;
|
||||||
|
private _usageByTier: Map<string, TierUsageStats> = new Map();
|
||||||
|
|
||||||
|
constructor(config: OrchestratorConfig) {
|
||||||
|
this._modelRouter = config.modelRouter;
|
||||||
|
this._delegation = config.delegation;
|
||||||
|
this._maxDelegationDepth = config.maxDelegationDepth;
|
||||||
|
this._toolRegistry = config.toolRegistry;
|
||||||
|
this._session = config.session;
|
||||||
|
this._compactionConfig = config.compaction;
|
||||||
|
this._modelName = config.modelName;
|
||||||
|
this._contextWindow = config.contextWindow;
|
||||||
|
|
||||||
|
// Create the primary NativeAgent for user-facing conversation
|
||||||
|
this._agent = new NativeAgent({
|
||||||
|
modelClient: config.modelRouter,
|
||||||
|
systemPrompt: config.systemPrompt,
|
||||||
|
session: config.session,
|
||||||
|
toolRegistry: config.toolRegistry,
|
||||||
|
toolExecutor: config.toolExecutor,
|
||||||
|
maxIterations: config.maxIterations,
|
||||||
|
onToolUse: config.onToolUse,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Set the primary tier on the agent
|
||||||
|
this._agent.setModelTier(config.primaryTier);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Delegation ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform a single-turn, stateless call to a model at the specified tier.
|
||||||
|
*
|
||||||
|
* This is used for internal sub-tasks (compaction, classification, etc.)
|
||||||
|
* that don't need the full conversation history or tool loop.
|
||||||
|
*
|
||||||
|
* If the requested tier is not available on the router, falls back to
|
||||||
|
* the 'default' tier with a warning.
|
||||||
|
*/
|
||||||
|
async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
|
||||||
|
let tier = request.tier;
|
||||||
|
|
||||||
|
// Check if the requested tier is available; fall back to 'default' if not
|
||||||
|
const client = this._modelRouter.getClient(tier);
|
||||||
|
if (!client) {
|
||||||
|
console.warn(
|
||||||
|
`[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
|
||||||
|
);
|
||||||
|
tier = 'default';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the single-turn chat request
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: request.message },
|
||||||
|
];
|
||||||
|
|
||||||
|
const chatRequest: ChatRequest = {
|
||||||
|
messages,
|
||||||
|
system: request.systemPrompt,
|
||||||
|
maxTokens: request.maxTokens,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Optionally include tools from the registry
|
||||||
|
if (request.tools && this._toolRegistry) {
|
||||||
|
chatRequest.tools = this._toolRegistry.toAnthropicFormat();
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await this._modelRouter.chat(chatRequest, tier);
|
||||||
|
|
||||||
|
// Track cumulative usage for this tier
|
||||||
|
this._trackUsage(tier, response.usage);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: response.content,
|
||||||
|
usage: response.usage,
|
||||||
|
tier,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Primary agent proxies ─────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a user message through the primary NativeAgent.
|
||||||
|
* This is the main entry point for user-facing conversation.
|
||||||
|
*
|
||||||
|
* When compaction is configured, checks whether the conversation history
|
||||||
|
* exceeds the context window threshold and compacts it before processing.
|
||||||
|
*/
|
||||||
|
async process(userMessage: string): Promise<string> {
|
||||||
|
await this.compactIfNeeded();
|
||||||
|
return this._agent.process(userMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Force-compact the current conversation history regardless of threshold.
|
||||||
|
* Returns the compaction result, or null if there was nothing to compact
|
||||||
|
* (e.g. no session, too few messages).
|
||||||
|
*/
|
||||||
|
async compact(): Promise<CompactionResult | null> {
|
||||||
|
const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
|
||||||
|
const messages = this.getHistory();
|
||||||
|
|
||||||
|
if (messages.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await compactHistory({
|
||||||
|
messages,
|
||||||
|
orchestrator: this,
|
||||||
|
config,
|
||||||
|
});
|
||||||
|
|
||||||
|
// If nothing was actually compacted, skip the replace
|
||||||
|
if (result.compactedCount === 0) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist the compacted history
|
||||||
|
if (this._session) {
|
||||||
|
this._session.replaceHistory(result.messages);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
|
||||||
|
`${result.tokensBefore} → ${result.tokensAfter} tokens`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reset the primary agent's conversation history. */
|
||||||
|
reset(): void {
|
||||||
|
this._agent.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the primary agent's conversation history. */
|
||||||
|
getHistory(): Message[] {
|
||||||
|
return this._agent.getHistory();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Set the model tier on the primary agent. */
|
||||||
|
setModelTier(tier: ModelTier): void {
|
||||||
|
this._agent.setModelTier(tier);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the current model tier of the primary agent. */
|
||||||
|
getModelTier(): ModelTier {
|
||||||
|
return this._agent.getModelTier();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Set the tool-use callback on the primary agent. */
|
||||||
|
setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
|
||||||
|
this._agent.setOnToolUse(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Usage & config accessors ──────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns cumulative delegation usage stats per tier.
|
||||||
|
* Useful for cost tracking and visibility into sub-agent calls.
|
||||||
|
*/
|
||||||
|
getDelegationUsage(): Record<string, TierUsageStats> {
|
||||||
|
const result: Record<string, TierUsageStats> = {};
|
||||||
|
for (const [tier, stats] of this._usageByTier) {
|
||||||
|
result[tier] = { ...stats };
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up which model tier is configured for a given delegation task.
|
||||||
|
* Convenience method so callers don't need to access the config directly.
|
||||||
|
*/
|
||||||
|
getDelegationTier(task: keyof DelegationConfig): ModelTier {
|
||||||
|
return this._delegation[task];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Private helpers ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether automatic compaction should run, and if so, compact.
|
||||||
|
* Called before each `process()` call when compaction is configured.
|
||||||
|
*/
|
||||||
|
private async compactIfNeeded(): Promise<void> {
|
||||||
|
if (!this._compactionConfig) return;
|
||||||
|
|
||||||
|
const messages = this.getHistory();
|
||||||
|
if (messages.length === 0) return;
|
||||||
|
|
||||||
|
const model = this._modelName ?? 'unknown';
|
||||||
|
const needs = shouldCompact({
|
||||||
|
messages,
|
||||||
|
model,
|
||||||
|
contextWindow: this._contextWindow,
|
||||||
|
thresholdPct: this._compactionConfig.thresholdPct,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!needs) return;
|
||||||
|
|
||||||
|
await this.compact();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Accumulate usage stats for a given tier. */
|
||||||
|
private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
|
||||||
|
const existing = this._usageByTier.get(tier);
|
||||||
|
if (existing) {
|
||||||
|
existing.inputTokens += usage.inputTokens;
|
||||||
|
existing.outputTokens += usage.outputTokens;
|
||||||
|
existing.calls += 1;
|
||||||
|
} else {
|
||||||
|
this._usageByTier.set(tier, {
|
||||||
|
inputTokens: usage.inputTokens,
|
||||||
|
outputTokens: usage.outputTokens,
|
||||||
|
calls: 1,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
/**
|
||||||
|
* System prompts for delegated tasks.
|
||||||
|
*
|
||||||
|
* Each prompt is designed for a specific sub-task that the agent farms out
|
||||||
|
* to a (usually cheaper/faster) model call. Keep them focused and
|
||||||
|
* deterministic — the caller should be able to parse the output reliably.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instructs a model to summarise conversation history during compaction.
|
||||||
|
* The resulting summary replaces the full history to reclaim context window space.
|
||||||
|
*/
|
||||||
|
export const COMPACTION_SYSTEM_PROMPT = `You are a conversation summariser. Your job is to condense a conversation history into a concise summary that preserves all important information.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Preserve key facts, decisions, user preferences, and action items.
|
||||||
|
- Maintain chronological order of events.
|
||||||
|
- Note any unresolved questions or pending tasks.
|
||||||
|
- Be concise but thorough — aim for roughly 20% of the original length.
|
||||||
|
- Use bullet points for clarity.
|
||||||
|
- Never invent information that is not present in the conversation.
|
||||||
|
- If the conversation references files, paths, error messages, or specific values, include them verbatim.
|
||||||
|
- Group related points together under short descriptive headings when it aids readability.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
Return a markdown summary with bullet points. Do not include any preamble or explanation — output only the summary.`;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instructs a model to extract persistent facts from conversation text.
|
||||||
|
* Extracted facts are stored in long-term memory for future sessions.
|
||||||
|
*/
|
||||||
|
export const MEMORY_EXTRACTION_PROMPT = `You are a fact extractor. Given a block of conversation text, extract persistent facts worth remembering across sessions.
|
||||||
|
|
||||||
|
Categories to extract:
|
||||||
|
|
||||||
|
## User
|
||||||
|
- Name, role, location, timezone, or other personal details explicitly shared.
|
||||||
|
|
||||||
|
## Preferences
|
||||||
|
- Communication style, formatting preferences, tool preferences, workflow habits.
|
||||||
|
|
||||||
|
## Technical
|
||||||
|
- Project names, repositories, tech stacks, conventions, architecture decisions.
|
||||||
|
- File paths, environment details, deployment targets.
|
||||||
|
|
||||||
|
## Decisions
|
||||||
|
- Explicit decisions made during the conversation (e.g. "we decided to use X instead of Y").
|
||||||
|
- Rationale for decisions when stated.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Only extract facts that are explicitly stated — never infer or assume.
|
||||||
|
- Skip transient or session-specific information (e.g. "run this command now", "fix this error today").
|
||||||
|
- Skip information that is only relevant to the current task and has no long-term value.
|
||||||
|
- If no facts worth extracting exist, return an empty response.
|
||||||
|
- Use concise bullet points under each category heading.
|
||||||
|
- Omit any category that has no entries.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
Return markdown with the category headings above and bullet points underneath. No preamble.`;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instructs a model to classify an inbound message into a discrete category.
|
||||||
|
* The caller uses the label to route the message to the appropriate handler.
|
||||||
|
*/
|
||||||
|
export const CLASSIFICATION_PROMPT = `Classify the following message into exactly one of these categories:
|
||||||
|
|
||||||
|
- command — a direct instruction to perform an action (e.g. "run tests", "deploy to staging")
|
||||||
|
- question — a request for information or explanation (e.g. "what does this function do?")
|
||||||
|
- task — a multi-step objective that requires planning (e.g. "add authentication to the API")
|
||||||
|
- conversation — casual chat, greetings, acknowledgements, or social interaction
|
||||||
|
- unclear — the message is ambiguous or lacks enough context to classify
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Return ONLY the classification label — a single word, nothing else.
|
||||||
|
- Do not explain your reasoning.
|
||||||
|
- If the message fits multiple categories, choose the most specific one (command > task > question > conversation).`;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instructs a model to condense verbose tool output into a compact summary.
|
||||||
|
* Used to shrink large tool results before they consume context window space.
|
||||||
|
*/
|
||||||
|
export const TOOL_SUMMARISATION_PROMPT = `You are a tool-output summariser. Given the raw output of a tool invocation, produce a compact summary that preserves the essential information.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Preserve the key outcome: success or failure.
|
||||||
|
- Preserve important data: counts, IDs, names, statuses.
|
||||||
|
- Preserve all file paths, error codes, error messages, and specific values verbatim.
|
||||||
|
- Strip boilerplate, redundant lines, decorative formatting, and progress indicators.
|
||||||
|
- Keep the summary under 500 tokens.
|
||||||
|
- If the output is already concise, return it as-is rather than paraphrasing.
|
||||||
|
- Use a structured format (bullet points or short paragraphs) for readability.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
Return the summarised output directly. No preamble or meta-commentary.`;
|
||||||
+1
-1
@@ -1,2 +1,2 @@
|
|||||||
export { loadConfig } from './loader.js';
|
export { loadConfig } from './loader.js';
|
||||||
export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig } from './schema.js';
|
export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig } from './schema.js';
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ const modelConfigSchema = z.object({
|
|||||||
auth_token: z.string().optional(),
|
auth_token: z.string().optional(),
|
||||||
for: z.array(z.string()).optional(),
|
for: z.array(z.string()).optional(),
|
||||||
num_gpu: z.number().optional(),
|
num_gpu: z.number().optional(),
|
||||||
|
context_window: z.number().optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
const modelsSchema = z.object({
|
const modelsSchema = z.object({
|
||||||
@@ -87,6 +88,32 @@ const automationSchema = z.object({
|
|||||||
cron: z.array(cronJobSchema).default([]),
|
cron: z.array(cronJobSchema).default([]),
|
||||||
}).default({});
|
}).default({});
|
||||||
|
|
||||||
|
const agentsSchema = z.object({
|
||||||
|
primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
|
||||||
|
delegation: z.object({
|
||||||
|
compaction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
|
||||||
|
memory_extraction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
|
||||||
|
classification: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
|
||||||
|
tool_summarisation: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
|
||||||
|
complex_reasoning: z.enum(['fast', 'default', 'complex', 'local']).default('complex'),
|
||||||
|
}).default({
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'fast',
|
||||||
|
classification: 'fast',
|
||||||
|
tool_summarisation: 'fast',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
}),
|
||||||
|
auto_escalate: z.boolean().default(false),
|
||||||
|
max_delegation_depth: z.number().min(1).max(10).default(3),
|
||||||
|
}).default({});
|
||||||
|
|
||||||
|
const compactionSchema = z.object({
|
||||||
|
enabled: z.boolean().default(true),
|
||||||
|
threshold_pct: z.number().min(10).max(100).default(80),
|
||||||
|
keep_turns: z.number().min(1).max(50).default(4),
|
||||||
|
summary_max_tokens: z.number().min(128).max(4096).default(1024),
|
||||||
|
}).default({});
|
||||||
|
|
||||||
export const configSchema = z.object({
|
export const configSchema = z.object({
|
||||||
telegram: telegramSchema,
|
telegram: telegramSchema,
|
||||||
server: serverSchema.default({}),
|
server: serverSchema.default({}),
|
||||||
@@ -96,9 +123,13 @@ export const configSchema = z.object({
|
|||||||
skills: skillsSchema.default({}),
|
skills: skillsSchema.default({}),
|
||||||
mcp: mcpSchema.default({ servers: [] }),
|
mcp: mcpSchema.default({ servers: [] }),
|
||||||
automation: automationSchema,
|
automation: automationSchema,
|
||||||
|
agents: agentsSchema,
|
||||||
|
compaction: compactionSchema,
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Config = z.infer<typeof configSchema>;
|
export type Config = z.infer<typeof configSchema>;
|
||||||
export type TelegramConfig = z.infer<typeof telegramSchema>;
|
export type TelegramConfig = z.infer<typeof telegramSchema>;
|
||||||
export type ModelConfig = z.infer<typeof modelConfigSchema>;
|
export type ModelConfig = z.infer<typeof modelConfigSchema>;
|
||||||
export type CronJobConfig = z.infer<typeof cronJobSchema>;
|
export type CronJobConfig = z.infer<typeof cronJobSchema>;
|
||||||
|
export type AgentsConfig = z.infer<typeof agentsSchema>;
|
||||||
|
export type CompactionConfig = z.infer<typeof compactionSchema>;
|
||||||
|
|||||||
@@ -0,0 +1,104 @@
|
|||||||
|
import { describe, it, expect, vi } from 'vitest';
|
||||||
|
import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js';
|
||||||
|
import type { CompactionConfig } from './compaction.js';
|
||||||
|
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||||
|
import type { Message } from '../models/types.js';
|
||||||
|
|
||||||
|
function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator {
|
||||||
|
return {
|
||||||
|
getDelegationTier: vi.fn().mockReturnValue('fast'),
|
||||||
|
delegate: vi.fn().mockResolvedValue({
|
||||||
|
content: summaryText,
|
||||||
|
usage: { inputTokens: 100, outputTokens: 50 },
|
||||||
|
tier: 'fast',
|
||||||
|
}),
|
||||||
|
} as unknown as AgentOrchestrator;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeMessages(count: number): Message[] {
|
||||||
|
const msgs: Message[] = [];
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
msgs.push({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `Message ${i}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return msgs;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('compactHistory', () => {
|
||||||
|
const config: CompactionConfig = {
|
||||||
|
thresholdPct: 80,
|
||||||
|
keepTurns: 2, // keeps last 4 messages
|
||||||
|
summaryMaxTokens: 1024,
|
||||||
|
};
|
||||||
|
|
||||||
|
it('returns no-op when messages count is at or below keepTurns threshold', async () => {
|
||||||
|
const messages = makeMessages(4); // keepTurns=2 → keep 4 messages
|
||||||
|
const orchestrator = makeMockOrchestrator();
|
||||||
|
|
||||||
|
const result = await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
expect(result.compactedCount).toBe(0);
|
||||||
|
expect(result.messages).toEqual(messages);
|
||||||
|
expect(orchestrator.delegate).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('compacts older messages and keeps recent ones', async () => {
|
||||||
|
const messages = makeMessages(10); // 10 messages, keep last 4, compact 6
|
||||||
|
const orchestrator = makeMockOrchestrator('Summarized conversation');
|
||||||
|
|
||||||
|
const result = await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
expect(result.compactedCount).toBe(6);
|
||||||
|
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
|
||||||
|
expect(result.messages[0].role).toBe('assistant');
|
||||||
|
expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
|
||||||
|
expect(result.messages[0].content).toContain('Summarized conversation');
|
||||||
|
// Last 4 messages should be preserved
|
||||||
|
expect(result.messages.slice(1)).toEqual(messages.slice(-4));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calls delegate with compaction tier and correct params', async () => {
|
||||||
|
const messages = makeMessages(10);
|
||||||
|
const orchestrator = makeMockOrchestrator();
|
||||||
|
|
||||||
|
await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('compaction');
|
||||||
|
expect(orchestrator.delegate).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
tier: 'fast',
|
||||||
|
maxTokens: 1024,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('populates token counts in result', async () => {
|
||||||
|
const messages = makeMessages(10);
|
||||||
|
const orchestrator = makeMockOrchestrator();
|
||||||
|
|
||||||
|
const result = await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||||
|
expect(result.tokensAfter).toBeGreaterThan(0);
|
||||||
|
expect(result.tokensAfter).toBeLessThan(result.tokensBefore);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles single turn above keepTurns threshold', async () => {
|
||||||
|
// 3 turns = 6 messages, keepTurns=2 keeps 4, compacts 2
|
||||||
|
const messages = makeMessages(6);
|
||||||
|
const orchestrator = makeMockOrchestrator();
|
||||||
|
|
||||||
|
const result = await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
expect(result.compactedCount).toBe(2);
|
||||||
|
expect(result.messages).toHaveLength(5); // 1 summary + 4 kept
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses DEFAULT_COMPACTION_CONFIG values correctly', () => {
|
||||||
|
expect(DEFAULT_COMPACTION_CONFIG.thresholdPct).toBe(80);
|
||||||
|
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
|
||||||
|
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
import type { Message } from '../models/types.js';
|
||||||
|
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
||||||
|
import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
|
||||||
|
import { estimateMessageTokens } from './tokens.js';
|
||||||
|
|
||||||
|
export interface CompactionConfig {
|
||||||
|
/** Percentage of context window that triggers compaction (default: 80). */
|
||||||
|
thresholdPct: number;
|
||||||
|
/** Number of recent turns (user+assistant pairs) to always keep intact. */
|
||||||
|
keepTurns: number;
|
||||||
|
/** Maximum tokens for the compaction summary response. */
|
||||||
|
summaryMaxTokens: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CompactionResult {
|
||||||
|
/** The compacted messages: [summary, ...recentMessages]. */
|
||||||
|
messages: Message[];
|
||||||
|
/** Number of messages that were compacted (removed). */
|
||||||
|
compactedCount: number;
|
||||||
|
/** Estimated tokens before compaction. */
|
||||||
|
tokensBefore: number;
|
||||||
|
/** Estimated tokens after compaction. */
|
||||||
|
tokensAfter: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
|
||||||
|
thresholdPct: 80,
|
||||||
|
keepTurns: 4,
|
||||||
|
summaryMaxTokens: 1024,
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function compactHistory(opts: {
|
||||||
|
messages: Message[];
|
||||||
|
orchestrator: AgentOrchestrator;
|
||||||
|
config: CompactionConfig;
|
||||||
|
}): Promise<CompactionResult> {
|
||||||
|
const { messages, orchestrator, config } = opts;
|
||||||
|
|
||||||
|
const keepCount = config.keepTurns * 2;
|
||||||
|
if (messages.length <= keepCount) {
|
||||||
|
return {
|
||||||
|
messages,
|
||||||
|
compactedCount: 0,
|
||||||
|
tokensBefore: estimateMessageTokens(messages),
|
||||||
|
tokensAfter: estimateMessageTokens(messages),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const toCompact = messages.slice(0, -keepCount);
|
||||||
|
const toKeep = messages.slice(-keepCount);
|
||||||
|
|
||||||
|
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
|
||||||
|
|
||||||
|
const tier = orchestrator.getDelegationTier('compaction');
|
||||||
|
|
||||||
|
const result = await orchestrator.delegate({
|
||||||
|
tier,
|
||||||
|
systemPrompt: COMPACTION_SYSTEM_PROMPT,
|
||||||
|
message: formattedConversation,
|
||||||
|
maxTokens: config.summaryMaxTokens,
|
||||||
|
});
|
||||||
|
|
||||||
|
const summaryMessage: Message = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: '[Summary of earlier conversation]\n\n' + result.content,
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
messages: [summaryMessage, ...toKeep],
|
||||||
|
compactedCount: toCompact.length,
|
||||||
|
tokensBefore: estimateMessageTokens(messages),
|
||||||
|
tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
export {
|
||||||
|
estimateTokens,
|
||||||
|
estimateMessageTokens,
|
||||||
|
getContextWindow,
|
||||||
|
shouldCompact,
|
||||||
|
CONTEXT_WINDOWS,
|
||||||
|
type ShouldCompactOpts,
|
||||||
|
} from './tokens.js';
|
||||||
|
|
||||||
|
export {
|
||||||
|
compactHistory,
|
||||||
|
type CompactionConfig,
|
||||||
|
type CompactionResult,
|
||||||
|
DEFAULT_COMPACTION_CONFIG,
|
||||||
|
} from './compaction.js';
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { estimateTokens, estimateMessageTokens, getContextWindow, shouldCompact, CONTEXT_WINDOWS } from './tokens.js';
|
||||||
|
|
||||||
|
describe('estimateTokens', () => {
|
||||||
|
it('returns 0 for empty string', () => {
|
||||||
|
// estimateTokens('') should be 0 — Math.ceil(0/4) = 0
|
||||||
|
expect(estimateTokens('')).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('estimates ~1 token per 4 characters', () => {
|
||||||
|
// 'abcd' = 4 chars → ceil(4/4) = 1
|
||||||
|
expect(estimateTokens('abcd')).toBe(1);
|
||||||
|
// 'abcde' = 5 chars → ceil(5/4) = 2
|
||||||
|
expect(estimateTokens('abcde')).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles longer text', () => {
|
||||||
|
const text = 'a'.repeat(100);
|
||||||
|
expect(estimateTokens(text)).toBe(25); // 100/4 = 25
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('estimateMessageTokens', () => {
|
||||||
|
it('returns 0 for empty array', () => {
|
||||||
|
expect(estimateMessageTokens([])).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes overhead per message', () => {
|
||||||
|
// 'abcd' = 1 token content + 4 overhead = 5 per message
|
||||||
|
const messages = [{ role: 'user' as const, content: 'abcd' }];
|
||||||
|
expect(estimateMessageTokens(messages)).toBe(5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('sums multiple messages', () => {
|
||||||
|
const messages = [
|
||||||
|
{ role: 'user' as const, content: 'abcd' }, // 1 + 4 = 5
|
||||||
|
{ role: 'assistant' as const, content: 'abcd' }, // 1 + 4 = 5
|
||||||
|
];
|
||||||
|
expect(estimateMessageTokens(messages)).toBe(10);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getContextWindow', () => {
|
||||||
|
it('returns known window for Claude Sonnet', () => {
|
||||||
|
expect(getContextWindow('claude-sonnet-4-20250514')).toBe(200_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns known window for GPT-4o', () => {
|
||||||
|
expect(getContextWindow('gpt-4o')).toBe(128_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns default 128000 for unknown model', () => {
|
||||||
|
expect(getContextWindow('unknown-model')).toBe(128_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns override when provided', () => {
|
||||||
|
expect(getContextWindow('claude-sonnet-4-20250514', 50_000)).toBe(50_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns override even for unknown model', () => {
|
||||||
|
expect(getContextWindow('unknown-model', 32_000)).toBe(32_000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('shouldCompact', () => {
|
||||||
|
it('returns false when messages are well below threshold', () => {
|
||||||
|
const messages = [{ role: 'user' as const, content: 'hello' }];
|
||||||
|
expect(shouldCompact({
|
||||||
|
messages,
|
||||||
|
model: 'gpt-4o', // 128k context window
|
||||||
|
})).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true when messages exceed threshold', () => {
|
||||||
|
// Create messages that exceed 80% of a small context window
|
||||||
|
// context window = 100, threshold = 80% = 80 tokens
|
||||||
|
// each message: ceil(400/4) + 4 = 104 tokens → well over 80
|
||||||
|
const messages = [{ role: 'user' as const, content: 'a'.repeat(400) }];
|
||||||
|
expect(shouldCompact({
|
||||||
|
messages,
|
||||||
|
model: 'unknown',
|
||||||
|
contextWindow: 100,
|
||||||
|
thresholdPct: 80,
|
||||||
|
})).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects custom thresholdPct', () => {
|
||||||
|
// 1 message: ceil(20/4) + 4 = 9 tokens
|
||||||
|
// contextWindow = 100, thresholdPct = 5 → threshold = 5 tokens
|
||||||
|
const messages = [{ role: 'user' as const, content: 'a'.repeat(20) }];
|
||||||
|
expect(shouldCompact({
|
||||||
|
messages,
|
||||||
|
model: 'unknown',
|
||||||
|
contextWindow: 100,
|
||||||
|
thresholdPct: 5,
|
||||||
|
})).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses model lookup when no contextWindow override', () => {
|
||||||
|
// gpt-3.5-turbo = 16385 tokens, default threshold 80% = 13108
|
||||||
|
// Large message to exceed: ceil(60000/4) + 4 = 15004 tokens → over 13108
|
||||||
|
const messages = [{ role: 'user' as const, content: 'a'.repeat(60000) }];
|
||||||
|
expect(shouldCompact({
|
||||||
|
messages,
|
||||||
|
model: 'gpt-3.5-turbo',
|
||||||
|
})).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
import type { Message } from '../models/types.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Approximate overhead tokens per message (role marker, separators, etc.).
|
||||||
|
*/
|
||||||
|
const MESSAGE_OVERHEAD_TOKENS = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Conservative default context window when a model is not in the lookup table.
|
||||||
|
*/
|
||||||
|
const DEFAULT_CONTEXT_WINDOW = 128_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hard-coded context window sizes (in tokens) for known models.
|
||||||
|
*/
|
||||||
|
export const CONTEXT_WINDOWS: Record<string, number> = {
|
||||||
|
'claude-sonnet-4-20250514': 200_000,
|
||||||
|
'claude-3-5-haiku-20241022': 200_000,
|
||||||
|
'claude-3-5-sonnet-20241022': 200_000,
|
||||||
|
'claude-3-opus-20240229': 200_000,
|
||||||
|
'claude-opus-4-20250514': 200_000,
|
||||||
|
'gpt-4o': 128_000,
|
||||||
|
'gpt-4o-mini': 128_000,
|
||||||
|
'gpt-4-turbo': 128_000,
|
||||||
|
'gpt-3.5-turbo': 16_385,
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cheap character-based token estimation.
|
||||||
|
*
|
||||||
|
* Uses `Math.ceil(text.length / 4)` as a reasonable approximation for
|
||||||
|
* English text (roughly 4 characters per token on average).
|
||||||
|
*/
|
||||||
|
export function estimateTokens(text: string): number {
|
||||||
|
return Math.ceil(text.length / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate the total token count for an array of messages.
|
||||||
|
*
|
||||||
|
* For each message the estimate includes the content tokens plus a fixed
|
||||||
|
* overhead of ~4 tokens to account for the role marker and separators.
|
||||||
|
*/
|
||||||
|
export function estimateMessageTokens(messages: Message[]): number {
|
||||||
|
return messages.reduce(
|
||||||
|
(sum, msg) => sum + estimateTokens(msg.content) + MESSAGE_OVERHEAD_TOKENS,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the context window size (in tokens) for a given model.
|
||||||
|
*
|
||||||
|
* @param model - Model identifier to look up.
|
||||||
|
* @param override - If provided, this value is returned directly.
|
||||||
|
* @returns The context window size in tokens.
|
||||||
|
*/
|
||||||
|
export function getContextWindow(model: string, override?: number): number {
|
||||||
|
if (override !== undefined) {
|
||||||
|
return override;
|
||||||
|
}
|
||||||
|
return CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for {@link shouldCompact}.
|
||||||
|
*/
|
||||||
|
export interface ShouldCompactOpts {
|
||||||
|
messages: Message[];
|
||||||
|
model: string;
|
||||||
|
contextWindow?: number;
|
||||||
|
/** Percentage of the context window that triggers compaction (default 80). */
|
||||||
|
thresholdPct?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine whether the conversation should be compacted.
|
||||||
|
*
|
||||||
|
* Returns `true` when the estimated token count of `messages` exceeds
|
||||||
|
* `thresholdPct` percent of the effective context window.
|
||||||
|
*/
|
||||||
|
export function shouldCompact(opts: ShouldCompactOpts): boolean {
|
||||||
|
const { messages, model, contextWindow, thresholdPct = 80 } = opts;
|
||||||
|
const window = getContextWindow(model, contextWindow);
|
||||||
|
const threshold = (thresholdPct / 100) * window;
|
||||||
|
const estimated = estimateMessageTokens(messages);
|
||||||
|
return estimated > threshold;
|
||||||
|
}
|
||||||
+46
-9
@@ -2,7 +2,7 @@ import { Lifecycle } from './lifecycle.js';
|
|||||||
import type { Config, ModelConfig } from '../config/index.js';
|
import type { Config, ModelConfig } from '../config/index.js';
|
||||||
import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js';
|
import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js';
|
||||||
import type { ModelClient } from '../models/index.js';
|
import type { ModelClient } from '../models/index.js';
|
||||||
import { NativeAgent } from '../backends/index.js';
|
import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js';
|
||||||
import { SessionStore, SessionManager } from '../session/index.js';
|
import { SessionStore, SessionManager } from '../session/index.js';
|
||||||
import { HookEngine } from '../hooks/index.js';
|
import { HookEngine } from '../hooks/index.js';
|
||||||
import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../tools/index.js';
|
import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../tools/index.js';
|
||||||
@@ -134,7 +134,8 @@ function createModelRouter(config: Config): ModelRouter {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the unified message handler for the channel registry.
|
* Create the unified message handler for the channel registry.
|
||||||
* Each channel+sender pair gets its own NativeAgent backed by a persistent session.
|
* Each channel+sender pair gets its own AgentOrchestrator backed by a persistent session.
|
||||||
|
* The orchestrator wraps a NativeAgent and adds delegation to different model tiers.
|
||||||
*/
|
*/
|
||||||
function createMessageRouter(deps: {
|
function createMessageRouter(deps: {
|
||||||
sessionManager: SessionManager;
|
sessionManager: SessionManager;
|
||||||
@@ -142,21 +143,39 @@ function createMessageRouter(deps: {
|
|||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
toolRegistry: ToolRegistry;
|
toolRegistry: ToolRegistry;
|
||||||
toolExecutor: ToolExecutor;
|
toolExecutor: ToolExecutor;
|
||||||
|
config: Config;
|
||||||
}) {
|
}) {
|
||||||
// Cache agents by session ID to avoid recreating on every message
|
// Cache agents by session ID to avoid recreating on every message
|
||||||
const agents = new Map<string, NativeAgent>();
|
const agents = new Map<string, AgentOrchestrator>();
|
||||||
|
|
||||||
function getOrCreateAgent(channel: string, senderId: string): NativeAgent {
|
function getOrCreateAgent(channel: string, senderId: string): AgentOrchestrator {
|
||||||
const sessionId = `${channel}:${senderId}`;
|
const sessionId = `${channel}:${senderId}`;
|
||||||
let agent = agents.get(sessionId);
|
let agent = agents.get(sessionId);
|
||||||
if (!agent) {
|
if (!agent) {
|
||||||
const session = deps.sessionManager.getSession(channel, senderId);
|
const session = deps.sessionManager.getSession(channel, senderId);
|
||||||
agent = new NativeAgent({
|
const delegationConfig: DelegationConfig = {
|
||||||
modelClient: deps.modelRouter,
|
compaction: deps.config.agents.delegation.compaction ?? 'fast',
|
||||||
|
memory_extraction: deps.config.agents.delegation.memory_extraction ?? 'fast',
|
||||||
|
classification: deps.config.agents.delegation.classification ?? 'fast',
|
||||||
|
tool_summarisation: deps.config.agents.delegation.tool_summarisation ?? 'fast',
|
||||||
|
complex_reasoning: deps.config.agents.delegation.complex_reasoning ?? 'complex',
|
||||||
|
};
|
||||||
|
agent = new AgentOrchestrator({
|
||||||
|
modelRouter: deps.modelRouter,
|
||||||
systemPrompt: deps.systemPrompt,
|
systemPrompt: deps.systemPrompt,
|
||||||
session,
|
session,
|
||||||
toolRegistry: deps.toolRegistry,
|
toolRegistry: deps.toolRegistry,
|
||||||
toolExecutor: deps.toolExecutor,
|
toolExecutor: deps.toolExecutor,
|
||||||
|
primaryTier: deps.config.agents.primary_tier ?? 'default',
|
||||||
|
delegation: delegationConfig,
|
||||||
|
maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3,
|
||||||
|
compaction: deps.config.compaction.enabled ? {
|
||||||
|
thresholdPct: deps.config.compaction.threshold_pct,
|
||||||
|
keepTurns: deps.config.compaction.keep_turns,
|
||||||
|
summaryMaxTokens: deps.config.compaction.summary_max_tokens,
|
||||||
|
} : undefined,
|
||||||
|
modelName: deps.config.models.default.model,
|
||||||
|
contextWindow: deps.config.models.default.context_window,
|
||||||
});
|
});
|
||||||
agents.set(sessionId, agent);
|
agents.set(sessionId, agent);
|
||||||
}
|
}
|
||||||
@@ -167,9 +186,26 @@ function createMessageRouter(deps: {
|
|||||||
const agent = getOrCreateAgent(msg.channel, msg.senderId);
|
const agent = getOrCreateAgent(msg.channel, msg.senderId);
|
||||||
|
|
||||||
// Handle special commands
|
// Handle special commands
|
||||||
if (msg.metadata?.isCommand && msg.metadata.command === 'reset') {
|
if (msg.metadata?.isCommand) {
|
||||||
agent.reset();
|
if (msg.metadata.command === 'reset') {
|
||||||
return;
|
agent.reset();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (msg.metadata.command === 'compact') {
|
||||||
|
const result = await agent.compact();
|
||||||
|
if (result && result.compactedCount > 0) {
|
||||||
|
await reply({
|
||||||
|
text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`,
|
||||||
|
replyTo: msg.id,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
await reply({
|
||||||
|
text: 'Nothing to compact.',
|
||||||
|
replyTo: msg.id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -284,6 +320,7 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
|
|||||||
systemPrompt,
|
systemPrompt,
|
||||||
toolRegistry,
|
toolRegistry,
|
||||||
toolExecutor,
|
toolExecutor,
|
||||||
|
config,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Register Telegram adapter
|
// Register Telegram adapter
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ describe('parseCommand', () => {
|
|||||||
expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
|
expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('parses /compact command', () => {
|
||||||
|
expect(parseCommand('/compact')).toEqual({ type: 'compact' });
|
||||||
|
});
|
||||||
|
|
||||||
it('parses /model command without argument', () => {
|
it('parses /model command without argument', () => {
|
||||||
expect(parseCommand('/model')).toEqual({ type: 'model' });
|
expect(parseCommand('/model')).toEqual({ type: 'model' });
|
||||||
});
|
});
|
||||||
@@ -64,6 +68,7 @@ describe('getHelpText', () => {
|
|||||||
expect(help).toContain('/help');
|
expect(help).toContain('/help');
|
||||||
expect(help).toContain('/model');
|
expect(help).toContain('/model');
|
||||||
expect(help).toContain('/reset');
|
expect(help).toContain('/reset');
|
||||||
|
expect(help).toContain('/compact');
|
||||||
expect(help).toContain('/quit');
|
expect(help).toContain('/quit');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ export type Command =
|
|||||||
| { type: 'help' }
|
| { type: 'help' }
|
||||||
| { type: 'status' }
|
| { type: 'status' }
|
||||||
| { type: 'fullscreen' }
|
| { type: 'fullscreen' }
|
||||||
|
| { type: 'compact' }
|
||||||
| { type: 'model'; name?: string }
|
| { type: 'model'; name?: string }
|
||||||
| { type: 'backend'; provider?: string }
|
| { type: 'backend'; provider?: string }
|
||||||
| { type: 'transfer'; target: string }
|
| { type: 'transfer'; target: string }
|
||||||
@@ -38,6 +39,11 @@ export function parseCommand(input: string): Command | null {
|
|||||||
return { type: 'fullscreen' };
|
return { type: 'fullscreen' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compact
|
||||||
|
if (trimmed === '/compact') {
|
||||||
|
return { type: 'compact' };
|
||||||
|
}
|
||||||
|
|
||||||
// Model (with optional argument)
|
// Model (with optional argument)
|
||||||
if (trimmed === '/model') {
|
if (trimmed === '/model') {
|
||||||
return { type: 'model' };
|
return { type: 'model' };
|
||||||
@@ -73,6 +79,7 @@ Commands:
|
|||||||
/model [name] Show or switch model (local, default, fast, complex)
|
/model [name] Show or switch model (local, default, fast, complex)
|
||||||
/backend [provider] Show or switch local backend (ollama, llamacpp)
|
/backend [provider] Show or switch local backend (ollama, llamacpp)
|
||||||
/reset, /clear, /new Clear conversation history
|
/reset, /clear, /new Clear conversation history
|
||||||
|
/compact Compact conversation history
|
||||||
/status Show session info and token usage
|
/status Show session info and token usage
|
||||||
/fullscreen, /fs Switch to fullscreen mode
|
/fullscreen, /fs Switch to fullscreen mode
|
||||||
/transfer <dest> Transfer session to another frontend
|
/transfer <dest> Transfer session to another frontend
|
||||||
@@ -90,6 +97,7 @@ export const SLASH_COMMANDS = [
|
|||||||
'/reset',
|
'/reset',
|
||||||
'/clear',
|
'/clear',
|
||||||
'/new',
|
'/new',
|
||||||
|
'/compact',
|
||||||
'/status',
|
'/status',
|
||||||
'/fullscreen',
|
'/fullscreen',
|
||||||
'/fs',
|
'/fs',
|
||||||
@@ -106,6 +114,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
|
|||||||
'/reset': 'Clear conversation history',
|
'/reset': 'Clear conversation history',
|
||||||
'/clear': 'Clear conversation history',
|
'/clear': 'Clear conversation history',
|
||||||
'/new': 'Start a new conversation',
|
'/new': 'Start a new conversation',
|
||||||
|
'/compact': 'Compact conversation history to save context space',
|
||||||
'/status': 'Show session info and token usage',
|
'/status': 'Show session info and token usage',
|
||||||
'/fullscreen': 'Switch to fullscreen mode',
|
'/fullscreen': 'Switch to fullscreen mode',
|
||||||
'/fs': 'Switch to fullscreen mode',
|
'/fs': 'Switch to fullscreen mode',
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ describe('MinimalTui backend command', () => {
|
|||||||
getHistory: () => [],
|
getHistory: () => [],
|
||||||
addMessage: vi.fn(),
|
addMessage: vi.fn(),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockRouter = {
|
const mockRouter = {
|
||||||
@@ -84,6 +85,7 @@ describe('MinimalTui backend command', () => {
|
|||||||
getHistory: () => [],
|
getHistory: () => [],
|
||||||
addMessage: vi.fn(),
|
addMessage: vi.fn(),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockRouter = {
|
const mockRouter = {
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ describe('session handlers', () => {
|
|||||||
addMessage: vi.fn(),
|
addMessage: vi.fn(),
|
||||||
getHistory: vi.fn(() => mockHistory),
|
getHistory: vi.fn(() => mockHistory),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockSessionManager = {
|
const mockSessionManager = {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ const mockSession = {
|
|||||||
getHistory: vi.fn(() => []),
|
getHistory: vi.fn(() => []),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
setHistory: vi.fn(),
|
setHistory: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockSessionManager = {
|
const mockSessionManager = {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ const mockSession = {
|
|||||||
addMessage: vi.fn(),
|
addMessage: vi.fn(),
|
||||||
getHistory: vi.fn(() => []),
|
getHistory: vi.fn(() => []),
|
||||||
clear: vi.fn(),
|
clear: vi.fn(),
|
||||||
|
replaceHistory: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockSessionManager = {
|
const mockSessionManager = {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ export interface Session {
|
|||||||
addMessage(message: Message): void;
|
addMessage(message: Message): void;
|
||||||
getHistory(): Message[];
|
getHistory(): Message[];
|
||||||
clear(): void;
|
clear(): void;
|
||||||
|
replaceHistory(messages: Message[]): void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class ManagedSession implements Session {
|
export class ManagedSession implements Session {
|
||||||
@@ -34,6 +35,16 @@ export class ManagedSession implements Session {
|
|||||||
this.store.clearSession(this.id);
|
this.store.clearSession(this.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace the entire session history with new messages.
|
||||||
|
* Used after compaction to persist the compacted state.
|
||||||
|
* Updates both in-memory history and SQLite storage atomically.
|
||||||
|
*/
|
||||||
|
replaceHistory(messages: Message[]): void {
|
||||||
|
this.history = [...messages];
|
||||||
|
this.store.replaceMessages(this.id, messages);
|
||||||
|
}
|
||||||
|
|
||||||
setHistory(messages: Message[]): void {
|
setHistory(messages: Message[]): void {
|
||||||
this.history = [...messages];
|
this.history = [...messages];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,6 +40,26 @@ export class SessionStore {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically replace all messages for a session.
|
||||||
|
* Used by compaction to swap full history with a compacted version.
|
||||||
|
* Runs in a transaction: delete all → re-insert in order.
|
||||||
|
*/
|
||||||
|
replaceMessages(sessionId: string, messages: Message[]): void {
|
||||||
|
const transaction = this.db.transaction(() => {
|
||||||
|
// Delete existing messages
|
||||||
|
this.db.prepare('DELETE FROM messages WHERE session_id = ?').run(sessionId);
|
||||||
|
// Re-insert in order
|
||||||
|
const insert = this.db.prepare(
|
||||||
|
'INSERT INTO messages (session_id, role, content) VALUES (?, ?, ?)'
|
||||||
|
);
|
||||||
|
for (const msg of messages) {
|
||||||
|
insert.run(sessionId, msg.role, msg.content);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
transaction();
|
||||||
|
}
|
||||||
|
|
||||||
clearSession(sessionId: string): void {
|
clearSession(sessionId: string): void {
|
||||||
const stmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?');
|
const stmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?');
|
||||||
stmt.run(sessionId);
|
stmt.run(sessionId);
|
||||||
|
|||||||
Reference in New Issue
Block a user