feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)
Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
+14
-1
@@ -1 +1,14 @@
|
||||
export { NativeAgent, type NativeAgentConfig } from './native/index.js';
|
||||
export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './native/index.js';
|
||||
export {
|
||||
AgentOrchestrator,
|
||||
type OrchestratorConfig,
|
||||
type SubAgentRequest,
|
||||
type SubAgentResult,
|
||||
type DelegationConfig,
|
||||
} from './native/index.js';
|
||||
export {
|
||||
COMPACTION_SYSTEM_PROMPT,
|
||||
MEMORY_EXTRACTION_PROMPT,
|
||||
CLASSIFICATION_PROMPT,
|
||||
TOOL_SUMMARISATION_PROMPT,
|
||||
} from './native/index.js';
|
||||
|
||||
@@ -55,6 +55,7 @@ describe('NativeAgent', () => {
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
addMessage: vi.fn(),
|
||||
clear: vi.fn(),
|
||||
replaceHistory: vi.fn(),
|
||||
};
|
||||
|
||||
const agent = new NativeAgent({
|
||||
|
||||
@@ -1 +1,14 @@
|
||||
export { NativeAgent, type NativeAgentConfig } from './agent.js';
|
||||
export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './agent.js';
|
||||
export {
|
||||
AgentOrchestrator,
|
||||
type OrchestratorConfig,
|
||||
type SubAgentRequest,
|
||||
type SubAgentResult,
|
||||
type DelegationConfig,
|
||||
} from './orchestrator.js';
|
||||
export {
|
||||
COMPACTION_SYSTEM_PROMPT,
|
||||
MEMORY_EXTRACTION_PROMPT,
|
||||
CLASSIFICATION_PROMPT,
|
||||
TOOL_SUMMARISATION_PROMPT,
|
||||
} from './prompts.js';
|
||||
|
||||
@@ -0,0 +1,613 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { AgentOrchestrator } from './orchestrator.js';
|
||||
import { ModelRouter } from '../../models/router.js';
|
||||
import type { ChatResponse, ModelClient } from '../../models/types.js';
|
||||
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
|
||||
import { HookEngine } from '../../hooks/engine.js';
|
||||
import type { SubAgentRequest } from './orchestrator.js';
|
||||
|
||||
describe('AgentOrchestrator', () => {
|
||||
let mockDefaultClient: ModelClient;
|
||||
let mockFastClient: ModelClient;
|
||||
let mockComplexClient: ModelClient;
|
||||
let mockRouter: ModelRouter;
|
||||
|
||||
const createMockClient = (name: string, inputTokens = 100, outputTokens = 50): ModelClient => ({
|
||||
chat: vi.fn().mockResolvedValue({
|
||||
content: `${name} response`,
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens, outputTokens },
|
||||
}),
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
mockDefaultClient = createMockClient('default', 100, 50);
|
||||
mockFastClient = createMockClient('fast', 50, 25);
|
||||
mockComplexClient = createMockClient('complex', 200, 100);
|
||||
|
||||
mockRouter = new ModelRouter({
|
||||
default: mockDefaultClient,
|
||||
fast: mockFastClient,
|
||||
complex: mockComplexClient,
|
||||
fallbackChain: [],
|
||||
});
|
||||
});
|
||||
|
||||
describe('delegate()', () => {
|
||||
it('routes to the correct tier when specified', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const result = await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Summarize this text',
|
||||
message: 'This is a test message',
|
||||
maxTokens: 1000,
|
||||
});
|
||||
|
||||
expect(result.content).toBe('fast response');
|
||||
expect(result.tier).toBe('fast');
|
||||
});
|
||||
|
||||
it('includes tools when requested', async () => {
|
||||
const mockToolRegistry = new ToolRegistry();
|
||||
const hooks = new HookEngine({
|
||||
confirm: ['*'],
|
||||
log: [],
|
||||
silent: [],
|
||||
});
|
||||
const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);
|
||||
|
||||
const mockFastChatClient = mockRouter.getClient('fast')!;
|
||||
const mockFastChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'response with tools',
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens: 100, outputTokens: 50 },
|
||||
} as ChatResponse);
|
||||
|
||||
Object.assign(mockFastChatClient, { chat: mockFastChatFn });
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
toolRegistry: mockToolRegistry,
|
||||
toolExecutor: mockToolExecutor,
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Use available tools',
|
||||
message: 'Help me analyze data',
|
||||
tools: true,
|
||||
});
|
||||
|
||||
expect(mockFastChatFn).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('falls back to default tier when requested tier is unavailable', async () => {
|
||||
const routerWithoutComplex = new ModelRouter({
|
||||
default: mockDefaultClient,
|
||||
fast: mockFastClient,
|
||||
fallbackChain: [],
|
||||
});
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: routerWithoutComplex,
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const result = await orchestrator.delegate({
|
||||
tier: 'complex',
|
||||
systemPrompt: 'Analyze deeply',
|
||||
message: 'This is complex',
|
||||
});
|
||||
|
||||
expect(result.content).toBe('default response');
|
||||
expect(result.tier).toBe('default');
|
||||
});
|
||||
|
||||
it('tracks cumulative usage after delegate calls', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Fast task',
|
||||
message: 'Fast message',
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'complex',
|
||||
systemPrompt: 'Complex task',
|
||||
message: 'Complex message',
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Another fast task',
|
||||
message: 'Another fast message',
|
||||
});
|
||||
|
||||
const usage = orchestrator.getDelegationUsage();
|
||||
|
||||
expect(usage.fast).toEqual({
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
calls: 2,
|
||||
});
|
||||
|
||||
expect(usage.complex).toEqual({
|
||||
inputTokens: 200,
|
||||
outputTokens: 100,
|
||||
calls: 1,
|
||||
});
|
||||
|
||||
expect(usage.default).toBeUndefined();
|
||||
});
|
||||
|
||||
it('tracks usage across tiers correctly', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Fast task',
|
||||
message: 'Fast message',
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Another fast task',
|
||||
message: 'Another fast message',
|
||||
});
|
||||
|
||||
const usage = orchestrator.getDelegationUsage();
|
||||
|
||||
expect(usage.fast.inputTokens).toBe(100);
|
||||
expect(usage.fast.outputTokens).toBe(50);
|
||||
expect(usage.fast.calls).toBe(2);
|
||||
});
|
||||
|
||||
it('logs delegation details with tier and token counts', async () => {
|
||||
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Fast task',
|
||||
message: 'Fast message',
|
||||
});
|
||||
|
||||
expect(consoleSpy).toHaveBeenCalledWith(
|
||||
'[Flynn:delegate] tier=fast tokens=50+25'
|
||||
);
|
||||
|
||||
consoleSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getDelegationTier()', () => {
|
||||
it('returns correct tier for each task type', () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
expect(orchestrator.getDelegationTier('compaction')).toBe('fast');
|
||||
expect(orchestrator.getDelegationTier('memory_extraction')).toBe('default');
|
||||
expect(orchestrator.getDelegationTier('classification')).toBe('complex');
|
||||
expect(orchestrator.getDelegationTier('tool_summarisation')).toBe('default');
|
||||
expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('complex');
|
||||
});
|
||||
|
||||
it('returns tier that was explicitly configured', () => {
|
||||
const customDelegation = {
|
||||
compaction: 'local' as const,
|
||||
memory_extraction: 'fast' as const,
|
||||
classification: 'complex' as const,
|
||||
tool_summarisation: 'default' as const,
|
||||
complex_reasoning: 'local' as const,
|
||||
};
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: customDelegation,
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
expect(orchestrator.getDelegationTier('compaction')).toBe('local');
|
||||
expect(orchestrator.getDelegationTier('memory_extraction')).toBe('fast');
|
||||
expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('local');
|
||||
});
|
||||
});
|
||||
|
||||
describe('process()', () => {
|
||||
it('proxies to NativeAgent for user messages', async () => {
|
||||
const mockDefaultChatClient = mockRouter.getClient('default')!;
|
||||
const mockDefaultChatFn = vi.fn().mockResolvedValue({
|
||||
content: 'Agent response',
|
||||
stopReason: 'end_turn',
|
||||
usage: { inputTokens: 150, outputTokens: 75 },
|
||||
} as ChatResponse);
|
||||
|
||||
Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
|
||||
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const response = await orchestrator.process('Hello, agent!');
|
||||
|
||||
expect(response).toBe('Agent response');
|
||||
});
|
||||
|
||||
it('maintains conversation history through process()', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.process('Hello');
|
||||
await orchestrator.process('How are you?');
|
||||
await orchestrator.process('Tell me about yourself');
|
||||
|
||||
const history = orchestrator.getHistory();
|
||||
|
||||
expect(history).toHaveLength(6);
|
||||
expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
|
||||
expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
|
||||
expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
|
||||
expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('reset()', () => {
|
||||
it('clears primary agent conversation history', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.process('Hello');
|
||||
await orchestrator.process('How are you?');
|
||||
|
||||
expect(orchestrator.getHistory()).toHaveLength(4);
|
||||
|
||||
orchestrator.reset();
|
||||
|
||||
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('can be called multiple times', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.process('Hello');
|
||||
orchestrator.reset();
|
||||
|
||||
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||
|
||||
await orchestrator.process('World');
|
||||
orchestrator.reset();
|
||||
|
||||
expect(orchestrator.getHistory()).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getDelegationUsage()', () => {
|
||||
it('returns copy of usage stats (doesn\'t expose internal map)', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.delegate({
|
||||
tier: 'fast',
|
||||
systemPrompt: 'Fast task',
|
||||
message: 'Fast message',
|
||||
});
|
||||
|
||||
const usage1 = orchestrator.getDelegationUsage();
|
||||
const usage2 = orchestrator.getDelegationUsage();
|
||||
|
||||
expect(usage1).toEqual(usage2);
|
||||
|
||||
usage1.fast.inputTokens = 999;
|
||||
|
||||
expect(usage2.fast.inputTokens).toBe(50);
|
||||
});
|
||||
|
||||
it('returns empty object when no usage tracked', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const usage = orchestrator.getDelegationUsage();
|
||||
|
||||
expect(usage).toEqual({});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getHistory()', () => {
|
||||
it('returns conversation history from primary agent', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
await orchestrator.process('Hello');
|
||||
await orchestrator.process('How are you?');
|
||||
|
||||
const history = orchestrator.getHistory();
|
||||
|
||||
expect(history).toHaveLength(4);
|
||||
expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
|
||||
expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
|
||||
expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
|
||||
});
|
||||
|
||||
it('returns empty array when no history', async () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are a helpful agent.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const history = orchestrator.getHistory();
|
||||
|
||||
expect(history).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('setModelTier()', () => {
|
||||
it('sets model tier on primary agent', () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
orchestrator.setModelTier('fast');
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('fast');
|
||||
});
|
||||
|
||||
it('allows tier changes after initialization', () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('default');
|
||||
|
||||
orchestrator.setModelTier('complex');
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('complex');
|
||||
|
||||
orchestrator.setModelTier('fast');
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('fast');
|
||||
});
|
||||
});
|
||||
|
||||
describe('setOnToolUse()', () => {
|
||||
it('sets tool-use callback on primary agent', () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const callback = vi.fn();
|
||||
|
||||
orchestrator.setOnToolUse(callback);
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('default');
|
||||
});
|
||||
|
||||
it('allows callback changes', () => {
|
||||
const orchestrator = new AgentOrchestrator({
|
||||
modelRouter: mockRouter,
|
||||
systemPrompt: 'You are helpful.',
|
||||
primaryTier: 'default',
|
||||
delegation: {
|
||||
compaction: 'fast',
|
||||
memory_extraction: 'default',
|
||||
classification: 'complex',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'complex',
|
||||
},
|
||||
maxDelegationDepth: 10,
|
||||
});
|
||||
|
||||
const callback1 = vi.fn();
|
||||
const callback2 = vi.fn();
|
||||
|
||||
orchestrator.setOnToolUse(callback1);
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('default');
|
||||
|
||||
orchestrator.setOnToolUse(callback2);
|
||||
|
||||
expect(orchestrator.getModelTier()).toBe('default');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,309 @@
|
||||
import type { ModelRouter, ModelTier } from '../../models/router.js';
|
||||
import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
|
||||
import type { Session } from '../../session/index.js';
|
||||
import type { ToolRegistry } from '../../tools/registry.js';
|
||||
import type { ToolExecutor } from '../../tools/executor.js';
|
||||
import { NativeAgent } from './agent.js';
|
||||
import type { ToolUseEvent } from './agent.js';
|
||||
import { shouldCompact } from '../../context/tokens.js';
|
||||
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
|
||||
|
||||
// ── Public types ──────────────────────────────────────────────────────
|
||||
|
||||
/** A single-turn, stateless request to a sub-agent at a specific tier. */
|
||||
export interface SubAgentRequest {
|
||||
tier: ModelTier;
|
||||
systemPrompt: string;
|
||||
message: string;
|
||||
maxTokens?: number;
|
||||
/** When true, include tools from the toolRegistry in the request. */
|
||||
tools?: boolean;
|
||||
}
|
||||
|
||||
/** Result returned from a sub-agent delegation call. */
|
||||
export interface SubAgentResult {
|
||||
content: string;
|
||||
usage: TokenUsage;
|
||||
tier: ModelTier;
|
||||
}
|
||||
|
||||
/** Maps each delegation task to the model tier that should handle it. */
|
||||
export interface DelegationConfig {
|
||||
compaction: ModelTier;
|
||||
memory_extraction: ModelTier;
|
||||
classification: ModelTier;
|
||||
tool_summarisation: ModelTier;
|
||||
complex_reasoning: ModelTier;
|
||||
}
|
||||
|
||||
/** Per-tier cumulative usage statistics. */
|
||||
interface TierUsageStats {
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
calls: number;
|
||||
}
|
||||
|
||||
/** Full configuration for the AgentOrchestrator. */
|
||||
export interface OrchestratorConfig {
|
||||
modelRouter: ModelRouter;
|
||||
systemPrompt: string;
|
||||
session?: Session;
|
||||
toolRegistry?: ToolRegistry;
|
||||
toolExecutor?: ToolExecutor;
|
||||
maxIterations?: number;
|
||||
/** The tier used by the primary NativeAgent for user-facing conversation. */
|
||||
primaryTier: ModelTier;
|
||||
/** Which tier to use for each delegation task type. */
|
||||
delegation: DelegationConfig;
|
||||
/** Maximum nesting depth for delegation calls (safety guard). */
|
||||
maxDelegationDepth: number;
|
||||
onToolUse?: (event: ToolUseEvent) => void;
|
||||
/** Context compaction settings. When provided, enables automatic compaction. */
|
||||
compaction?: CompactionConfig;
|
||||
/** Model identifier for the primary model (used for context window lookup). */
|
||||
modelName?: string;
|
||||
/** Optional override for the context window size (in tokens). */
|
||||
contextWindow?: number;
|
||||
}
|
||||
|
||||
// ── AgentOrchestrator ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Wraps a primary NativeAgent and adds the ability to delegate
|
||||
* single-turn sub-tasks to different model tiers via the ModelRouter.
|
||||
*
|
||||
* The primary agent handles the main conversation loop (with tools),
|
||||
* while `delegate()` enables cheap, stateless calls for tasks like
|
||||
* compaction, classification, and memory extraction.
|
||||
*/
|
||||
export class AgentOrchestrator {
|
||||
private _agent: NativeAgent;
|
||||
private _modelRouter: ModelRouter;
|
||||
private _delegation: DelegationConfig;
|
||||
private _maxDelegationDepth: number;
|
||||
private _toolRegistry?: ToolRegistry;
|
||||
private _session?: Session;
|
||||
private _compactionConfig?: CompactionConfig;
|
||||
private _modelName?: string;
|
||||
private _contextWindow?: number;
|
||||
private _usageByTier: Map<string, TierUsageStats> = new Map();
|
||||
|
||||
constructor(config: OrchestratorConfig) {
|
||||
this._modelRouter = config.modelRouter;
|
||||
this._delegation = config.delegation;
|
||||
this._maxDelegationDepth = config.maxDelegationDepth;
|
||||
this._toolRegistry = config.toolRegistry;
|
||||
this._session = config.session;
|
||||
this._compactionConfig = config.compaction;
|
||||
this._modelName = config.modelName;
|
||||
this._contextWindow = config.contextWindow;
|
||||
|
||||
// Create the primary NativeAgent for user-facing conversation
|
||||
this._agent = new NativeAgent({
|
||||
modelClient: config.modelRouter,
|
||||
systemPrompt: config.systemPrompt,
|
||||
session: config.session,
|
||||
toolRegistry: config.toolRegistry,
|
||||
toolExecutor: config.toolExecutor,
|
||||
maxIterations: config.maxIterations,
|
||||
onToolUse: config.onToolUse,
|
||||
});
|
||||
|
||||
// Set the primary tier on the agent
|
||||
this._agent.setModelTier(config.primaryTier);
|
||||
}
|
||||
|
||||
// ── Delegation ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Perform a single-turn, stateless call to a model at the specified tier.
|
||||
*
|
||||
* This is used for internal sub-tasks (compaction, classification, etc.)
|
||||
* that don't need the full conversation history or tool loop.
|
||||
*
|
||||
* If the requested tier is not available on the router, falls back to
|
||||
* the 'default' tier with a warning.
|
||||
*/
|
||||
async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
|
||||
let tier = request.tier;
|
||||
|
||||
// Check if the requested tier is available; fall back to 'default' if not
|
||||
const client = this._modelRouter.getClient(tier);
|
||||
if (!client) {
|
||||
console.warn(
|
||||
`[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
|
||||
);
|
||||
tier = 'default';
|
||||
}
|
||||
|
||||
// Build the single-turn chat request
|
||||
const messages: Message[] = [
|
||||
{ role: 'user', content: request.message },
|
||||
];
|
||||
|
||||
const chatRequest: ChatRequest = {
|
||||
messages,
|
||||
system: request.systemPrompt,
|
||||
maxTokens: request.maxTokens,
|
||||
};
|
||||
|
||||
// Optionally include tools from the registry
|
||||
if (request.tools && this._toolRegistry) {
|
||||
chatRequest.tools = this._toolRegistry.toAnthropicFormat();
|
||||
}
|
||||
|
||||
const response = await this._modelRouter.chat(chatRequest, tier);
|
||||
|
||||
// Track cumulative usage for this tier
|
||||
this._trackUsage(tier, response.usage);
|
||||
|
||||
console.log(
|
||||
`[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
|
||||
);
|
||||
|
||||
return {
|
||||
content: response.content,
|
||||
usage: response.usage,
|
||||
tier,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Primary agent proxies ─────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Process a user message through the primary NativeAgent.
|
||||
* This is the main entry point for user-facing conversation.
|
||||
*
|
||||
* When compaction is configured, checks whether the conversation history
|
||||
* exceeds the context window threshold and compacts it before processing.
|
||||
*/
|
||||
async process(userMessage: string): Promise<string> {
|
||||
await this.compactIfNeeded();
|
||||
return this._agent.process(userMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Force-compact the current conversation history regardless of threshold.
|
||||
* Returns the compaction result, or null if there was nothing to compact
|
||||
* (e.g. no session, too few messages).
|
||||
*/
|
||||
async compact(): Promise<CompactionResult | null> {
|
||||
const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
|
||||
const messages = this.getHistory();
|
||||
|
||||
if (messages.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = await compactHistory({
|
||||
messages,
|
||||
orchestrator: this,
|
||||
config,
|
||||
});
|
||||
|
||||
// If nothing was actually compacted, skip the replace
|
||||
if (result.compactedCount === 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Persist the compacted history
|
||||
if (this._session) {
|
||||
this._session.replaceHistory(result.messages);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
|
||||
`${result.tokensBefore} → ${result.tokensAfter} tokens`,
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Reset the primary agent's conversation history. */
|
||||
reset(): void {
|
||||
this._agent.reset();
|
||||
}
|
||||
|
||||
/** Get the primary agent's conversation history. */
|
||||
getHistory(): Message[] {
|
||||
return this._agent.getHistory();
|
||||
}
|
||||
|
||||
/** Set the model tier on the primary agent. */
|
||||
setModelTier(tier: ModelTier): void {
|
||||
this._agent.setModelTier(tier);
|
||||
}
|
||||
|
||||
/** Get the current model tier of the primary agent. */
|
||||
getModelTier(): ModelTier {
|
||||
return this._agent.getModelTier();
|
||||
}
|
||||
|
||||
/** Set the tool-use callback on the primary agent. */
|
||||
setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
|
||||
this._agent.setOnToolUse(callback);
|
||||
}
|
||||
|
||||
// ── Usage & config accessors ──────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Returns cumulative delegation usage stats per tier.
|
||||
* Useful for cost tracking and visibility into sub-agent calls.
|
||||
*/
|
||||
getDelegationUsage(): Record<string, TierUsageStats> {
|
||||
const result: Record<string, TierUsageStats> = {};
|
||||
for (const [tier, stats] of this._usageByTier) {
|
||||
result[tier] = { ...stats };
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up which model tier is configured for a given delegation task.
|
||||
* Convenience method so callers don't need to access the config directly.
|
||||
*/
|
||||
getDelegationTier(task: keyof DelegationConfig): ModelTier {
|
||||
return this._delegation[task];
|
||||
}
|
||||
|
||||
// ── Private helpers ───────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Check whether automatic compaction should run, and if so, compact.
|
||||
* Called before each `process()` call when compaction is configured.
|
||||
*/
|
||||
private async compactIfNeeded(): Promise<void> {
|
||||
if (!this._compactionConfig) return;
|
||||
|
||||
const messages = this.getHistory();
|
||||
if (messages.length === 0) return;
|
||||
|
||||
const model = this._modelName ?? 'unknown';
|
||||
const needs = shouldCompact({
|
||||
messages,
|
||||
model,
|
||||
contextWindow: this._contextWindow,
|
||||
thresholdPct: this._compactionConfig.thresholdPct,
|
||||
});
|
||||
|
||||
if (!needs) return;
|
||||
|
||||
await this.compact();
|
||||
}
|
||||
|
||||
/** Accumulate usage stats for a given tier. */
|
||||
private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
|
||||
const existing = this._usageByTier.get(tier);
|
||||
if (existing) {
|
||||
existing.inputTokens += usage.inputTokens;
|
||||
existing.outputTokens += usage.outputTokens;
|
||||
existing.calls += 1;
|
||||
} else {
|
||||
this._usageByTier.set(tier, {
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens,
|
||||
calls: 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/**
|
||||
* System prompts for delegated tasks.
|
||||
*
|
||||
* Each prompt is designed for a specific sub-task that the agent farms out
|
||||
* to a (usually cheaper/faster) model call. Keep them focused and
|
||||
* deterministic — the caller should be able to parse the output reliably.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Instructs a model to summarise conversation history during compaction.
|
||||
* The resulting summary replaces the full history to reclaim context window space.
|
||||
*/
|
||||
export const COMPACTION_SYSTEM_PROMPT = `You are a conversation summariser. Your job is to condense a conversation history into a concise summary that preserves all important information.
|
||||
|
||||
Rules:
|
||||
- Preserve key facts, decisions, user preferences, and action items.
|
||||
- Maintain chronological order of events.
|
||||
- Note any unresolved questions or pending tasks.
|
||||
- Be concise but thorough — aim for roughly 20% of the original length.
|
||||
- Use bullet points for clarity.
|
||||
- Never invent information that is not present in the conversation.
|
||||
- If the conversation references files, paths, error messages, or specific values, include them verbatim.
|
||||
- Group related points together under short descriptive headings when it aids readability.
|
||||
|
||||
Output format:
|
||||
Return a markdown summary with bullet points. Do not include any preamble or explanation — output only the summary.`;
|
||||
|
||||
/**
|
||||
* Instructs a model to extract persistent facts from conversation text.
|
||||
* Extracted facts are stored in long-term memory for future sessions.
|
||||
*/
|
||||
export const MEMORY_EXTRACTION_PROMPT = `You are a fact extractor. Given a block of conversation text, extract persistent facts worth remembering across sessions.
|
||||
|
||||
Categories to extract:
|
||||
|
||||
## User
|
||||
- Name, role, location, timezone, or other personal details explicitly shared.
|
||||
|
||||
## Preferences
|
||||
- Communication style, formatting preferences, tool preferences, workflow habits.
|
||||
|
||||
## Technical
|
||||
- Project names, repositories, tech stacks, conventions, architecture decisions.
|
||||
- File paths, environment details, deployment targets.
|
||||
|
||||
## Decisions
|
||||
- Explicit decisions made during the conversation (e.g. "we decided to use X instead of Y").
|
||||
- Rationale for decisions when stated.
|
||||
|
||||
Rules:
|
||||
- Only extract facts that are explicitly stated — never infer or assume.
|
||||
- Skip transient or session-specific information (e.g. "run this command now", "fix this error today").
|
||||
- Skip information that is only relevant to the current task and has no long-term value.
|
||||
- If no facts worth extracting exist, return an empty response.
|
||||
- Use concise bullet points under each category heading.
|
||||
- Omit any category that has no entries.
|
||||
|
||||
Output format:
|
||||
Return markdown with the category headings above and bullet points underneath. No preamble.`;
|
||||
|
||||
/**
|
||||
* Instructs a model to classify an inbound message into a discrete category.
|
||||
* The caller uses the label to route the message to the appropriate handler.
|
||||
*/
|
||||
export const CLASSIFICATION_PROMPT = `Classify the following message into exactly one of these categories:
|
||||
|
||||
- command — a direct instruction to perform an action (e.g. "run tests", "deploy to staging")
|
||||
- question — a request for information or explanation (e.g. "what does this function do?")
|
||||
- task — a multi-step objective that requires planning (e.g. "add authentication to the API")
|
||||
- conversation — casual chat, greetings, acknowledgements, or social interaction
|
||||
- unclear — the message is ambiguous or lacks enough context to classify
|
||||
|
||||
Rules:
|
||||
- Return ONLY the classification label — a single word, nothing else.
|
||||
- Do not explain your reasoning.
|
||||
- If the message fits multiple categories, choose the most specific one (command > task > question > conversation).`;
|
||||
|
||||
/**
|
||||
* Instructs a model to condense verbose tool output into a compact summary.
|
||||
* Used to shrink large tool results before they consume context window space.
|
||||
*/
|
||||
export const TOOL_SUMMARISATION_PROMPT = `You are a tool-output summariser. Given the raw output of a tool invocation, produce a compact summary that preserves the essential information.
|
||||
|
||||
Rules:
|
||||
- Preserve the key outcome: success or failure.
|
||||
- Preserve important data: counts, IDs, names, statuses.
|
||||
- Preserve all file paths, error codes, error messages, and specific values verbatim.
|
||||
- Strip boilerplate, redundant lines, decorative formatting, and progress indicators.
|
||||
- Keep the summary under 500 tokens.
|
||||
- If the output is already concise, return it as-is rather than paraphrasing.
|
||||
- Use a structured format (bullet points or short paragraphs) for readability.
|
||||
|
||||
Output format:
|
||||
Return the summarised output directly. No preamble or meta-commentary.`;
|
||||
Reference in New Issue
Block a user