diff --git a/src/backends/index.ts b/src/backends/index.ts index 580066d..cf41f7e 100644 --- a/src/backends/index.ts +++ b/src/backends/index.ts @@ -1 +1,14 @@ -export { NativeAgent, type NativeAgentConfig } from './native/index.js'; +export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './native/index.js'; +export { + AgentOrchestrator, + type OrchestratorConfig, + type SubAgentRequest, + type SubAgentResult, + type DelegationConfig, +} from './native/index.js'; +export { + COMPACTION_SYSTEM_PROMPT, + MEMORY_EXTRACTION_PROMPT, + CLASSIFICATION_PROMPT, + TOOL_SUMMARISATION_PROMPT, +} from './native/index.js'; diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index 42d5b78..a4b4041 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -55,6 +55,7 @@ describe('NativeAgent', () => { getHistory: vi.fn().mockReturnValue([]), addMessage: vi.fn(), clear: vi.fn(), + replaceHistory: vi.fn(), }; const agent = new NativeAgent({ diff --git a/src/backends/native/index.ts b/src/backends/native/index.ts index 651b762..a6ec3c4 100644 --- a/src/backends/native/index.ts +++ b/src/backends/native/index.ts @@ -1 +1,14 @@ -export { NativeAgent, type NativeAgentConfig } from './agent.js'; +export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './agent.js'; +export { + AgentOrchestrator, + type OrchestratorConfig, + type SubAgentRequest, + type SubAgentResult, + type DelegationConfig, +} from './orchestrator.js'; +export { + COMPACTION_SYSTEM_PROMPT, + MEMORY_EXTRACTION_PROMPT, + CLASSIFICATION_PROMPT, + TOOL_SUMMARISATION_PROMPT, +} from './prompts.js'; diff --git a/src/backends/native/orchestrator.test.ts b/src/backends/native/orchestrator.test.ts new file mode 100644 index 0000000..c1966f8 --- /dev/null +++ b/src/backends/native/orchestrator.test.ts @@ -0,0 +1,613 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { AgentOrchestrator } from './orchestrator.js'; +import { ModelRouter } from '../../models/router.js'; +import type { ChatResponse, ModelClient } from '../../models/types.js'; +import { ToolRegistry, ToolExecutor } from '../../tools/index.js'; +import { HookEngine } from '../../hooks/engine.js'; +import type { SubAgentRequest } from './orchestrator.js'; + +describe('AgentOrchestrator', () => { + let mockDefaultClient: ModelClient; + let mockFastClient: ModelClient; + let mockComplexClient: ModelClient; + let mockRouter: ModelRouter; + + const createMockClient = (name: string, inputTokens = 100, outputTokens = 50): ModelClient => ({ + chat: vi.fn().mockResolvedValue({ + content: `${name} response`, + stopReason: 'end_turn', + usage: { inputTokens, outputTokens }, + }), + }); + + beforeEach(() => { + mockDefaultClient = createMockClient('default', 100, 50); + mockFastClient = createMockClient('fast', 50, 25); + mockComplexClient = createMockClient('complex', 200, 100); + + mockRouter = new ModelRouter({ + default: mockDefaultClient, + fast: mockFastClient, + complex: mockComplexClient, + fallbackChain: [], + }); + }); + + describe('delegate()', () => { + it('routes to the correct tier when specified', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful assistant.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const result = await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Summarize this text', + message: 'This is a test message', + maxTokens: 1000, + }); + + expect(result.content).toBe('fast response'); + expect(result.tier).toBe('fast'); + }); + + it('includes tools when requested', async () => { + const mockToolRegistry = new ToolRegistry(); + const hooks = new HookEngine({ + confirm: ['*'], + log: [], + silent: [], + }); + const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks); + + const mockFastChatClient = mockRouter.getClient('fast')!; + const mockFastChatFn = vi.fn().mockResolvedValue({ + content: 'response with tools', + stopReason: 'end_turn', + usage: { inputTokens: 100, outputTokens: 50 }, + } as ChatResponse); + + Object.assign(mockFastChatClient, { chat: mockFastChatFn }); + + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + toolRegistry: mockToolRegistry, + toolExecutor: mockToolExecutor, + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Use available tools', + message: 'Help me analyze data', + tools: true, + }); + + expect(mockFastChatFn).toHaveBeenCalled(); + }); + + it('falls back to default tier when requested tier is unavailable', async () => { + const routerWithoutComplex = new ModelRouter({ + default: mockDefaultClient, + fast: mockFastClient, + fallbackChain: [], + }); + + const orchestrator = new AgentOrchestrator({ + modelRouter: routerWithoutComplex, + systemPrompt: 'You are a helpful assistant.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const result = await orchestrator.delegate({ + tier: 'complex', + systemPrompt: 'Analyze deeply', + message: 'This is complex', + }); + + expect(result.content).toBe('default response'); + expect(result.tier).toBe('default'); + }); + + it('tracks cumulative usage after delegate calls', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Fast task', + message: 'Fast message', + }); + + await orchestrator.delegate({ + tier: 'complex', + systemPrompt: 'Complex task', + message: 'Complex message', + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Another fast task', + message: 'Another fast message', + }); + + const usage = orchestrator.getDelegationUsage(); + + expect(usage.fast).toEqual({ + inputTokens: 100, + outputTokens: 50, + calls: 2, + }); + + expect(usage.complex).toEqual({ + inputTokens: 200, + outputTokens: 100, + calls: 1, + }); + + expect(usage.default).toBeUndefined(); + }); + + it('tracks usage across tiers correctly', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Fast task', + message: 'Fast message', + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Another fast task', + message: 'Another fast message', + }); + + const usage = orchestrator.getDelegationUsage(); + + expect(usage.fast.inputTokens).toBe(100); + expect(usage.fast.outputTokens).toBe(50); + expect(usage.fast.calls).toBe(2); + }); + + it('logs delegation details with tier and token counts', async () => { + const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Fast task', + message: 'Fast message', + }); + + expect(consoleSpy).toHaveBeenCalledWith( + '[Flynn:delegate] tier=fast tokens=50+25' + ); + + consoleSpy.mockRestore(); + }); + }); + + describe('getDelegationTier()', () => { + it('returns correct tier for each task type', () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + expect(orchestrator.getDelegationTier('compaction')).toBe('fast'); + expect(orchestrator.getDelegationTier('memory_extraction')).toBe('default'); + expect(orchestrator.getDelegationTier('classification')).toBe('complex'); + expect(orchestrator.getDelegationTier('tool_summarisation')).toBe('default'); + expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('complex'); + }); + + it('returns tier that was explicitly configured', () => { + const customDelegation = { + compaction: 'local' as const, + memory_extraction: 'fast' as const, + classification: 'complex' as const, + tool_summarisation: 'default' as const, + complex_reasoning: 'local' as const, + }; + + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: customDelegation, + maxDelegationDepth: 10, + }); + + expect(orchestrator.getDelegationTier('compaction')).toBe('local'); + expect(orchestrator.getDelegationTier('memory_extraction')).toBe('fast'); + expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('local'); + }); + }); + + describe('process()', () => { + it('proxies to NativeAgent for user messages', async () => { + const mockDefaultChatClient = mockRouter.getClient('default')!; + const mockDefaultChatFn = vi.fn().mockResolvedValue({ + content: 'Agent response', + stopReason: 'end_turn', + usage: { inputTokens: 150, outputTokens: 75 }, + } as ChatResponse); + + Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn }); + + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const response = await orchestrator.process('Hello, agent!'); + + expect(response).toBe('Agent response'); + }); + + it('maintains conversation history through process()', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.process('Hello'); + await orchestrator.process('How are you?'); + await orchestrator.process('Tell me about yourself'); + + const history = orchestrator.getHistory(); + + expect(history).toHaveLength(6); + expect(history[0]).toEqual({ role: 'user', content: 'Hello' }); + expect(history[1]).toEqual({ role: 'assistant', content: 'default response' }); + expect(history[2]).toEqual({ role: 'user', content: 'How are you?' }); + expect(history[3]).toEqual({ role: 'assistant', content: 'default response' }); + expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' }); + expect(history[5]).toEqual({ role: 'assistant', content: 'default response' }); + }); + }); + + describe('reset()', () => { + it('clears primary agent conversation history', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.process('Hello'); + await orchestrator.process('How are you?'); + + expect(orchestrator.getHistory()).toHaveLength(4); + + orchestrator.reset(); + + expect(orchestrator.getHistory()).toHaveLength(0); + }); + + it('can be called multiple times', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.process('Hello'); + orchestrator.reset(); + + expect(orchestrator.getHistory()).toHaveLength(0); + + await orchestrator.process('World'); + orchestrator.reset(); + + expect(orchestrator.getHistory()).toHaveLength(0); + }); + }); + + describe('getDelegationUsage()', () => { + it('returns copy of usage stats (doesn\'t expose internal map)', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.delegate({ + tier: 'fast', + systemPrompt: 'Fast task', + message: 'Fast message', + }); + + const usage1 = orchestrator.getDelegationUsage(); + const usage2 = orchestrator.getDelegationUsage(); + + expect(usage1).toEqual(usage2); + + usage1.fast.inputTokens = 999; + + expect(usage2.fast.inputTokens).toBe(50); + }); + + it('returns empty object when no usage tracked', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const usage = orchestrator.getDelegationUsage(); + + expect(usage).toEqual({}); + }); + }); + + describe('getHistory()', () => { + it('returns conversation history from primary agent', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + await orchestrator.process('Hello'); + await orchestrator.process('How are you?'); + + const history = orchestrator.getHistory(); + + expect(history).toHaveLength(4); + expect(history[0]).toEqual({ role: 'user', content: 'Hello' }); + expect(history[1]).toEqual({ role: 'assistant', content: 'default response' }); + expect(history[2]).toEqual({ role: 'user', content: 'How are you?' }); + expect(history[3]).toEqual({ role: 'assistant', content: 'default response' }); + }); + + it('returns empty array when no history', async () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are a helpful agent.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const history = orchestrator.getHistory(); + + expect(history).toEqual([]); + }); + }); + + describe('setModelTier()', () => { + it('sets model tier on primary agent', () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + orchestrator.setModelTier('fast'); + + expect(orchestrator.getModelTier()).toBe('fast'); + }); + + it('allows tier changes after initialization', () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + expect(orchestrator.getModelTier()).toBe('default'); + + orchestrator.setModelTier('complex'); + + expect(orchestrator.getModelTier()).toBe('complex'); + + orchestrator.setModelTier('fast'); + + expect(orchestrator.getModelTier()).toBe('fast'); + }); + }); + + describe('setOnToolUse()', () => { + it('sets tool-use callback on primary agent', () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const callback = vi.fn(); + + orchestrator.setOnToolUse(callback); + + expect(orchestrator.getModelTier()).toBe('default'); + }); + + it('allows callback changes', () => { + const orchestrator = new AgentOrchestrator({ + modelRouter: mockRouter, + systemPrompt: 'You are helpful.', + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 10, + }); + + const callback1 = vi.fn(); + const callback2 = vi.fn(); + + orchestrator.setOnToolUse(callback1); + + expect(orchestrator.getModelTier()).toBe('default'); + + orchestrator.setOnToolUse(callback2); + + expect(orchestrator.getModelTier()).toBe('default'); + }); + }); +}); diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts new file mode 100644 index 0000000..2049674 --- /dev/null +++ b/src/backends/native/orchestrator.ts @@ -0,0 +1,309 @@ +import type { ModelRouter, ModelTier } from '../../models/router.js'; +import type { ChatRequest, Message, TokenUsage } from '../../models/types.js'; +import type { Session } from '../../session/index.js'; +import type { ToolRegistry } from '../../tools/registry.js'; +import type { ToolExecutor } from '../../tools/executor.js'; +import { NativeAgent } from './agent.js'; +import type { ToolUseEvent } from './agent.js'; +import { shouldCompact } from '../../context/tokens.js'; +import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js'; + +// ── Public types ────────────────────────────────────────────────────── + +/** A single-turn, stateless request to a sub-agent at a specific tier. */ +export interface SubAgentRequest { + tier: ModelTier; + systemPrompt: string; + message: string; + maxTokens?: number; + /** When true, include tools from the toolRegistry in the request. */ + tools?: boolean; +} + +/** Result returned from a sub-agent delegation call. */ +export interface SubAgentResult { + content: string; + usage: TokenUsage; + tier: ModelTier; +} + +/** Maps each delegation task to the model tier that should handle it. */ +export interface DelegationConfig { + compaction: ModelTier; + memory_extraction: ModelTier; + classification: ModelTier; + tool_summarisation: ModelTier; + complex_reasoning: ModelTier; +} + +/** Per-tier cumulative usage statistics. */ +interface TierUsageStats { + inputTokens: number; + outputTokens: number; + calls: number; +} + +/** Full configuration for the AgentOrchestrator. */ +export interface OrchestratorConfig { + modelRouter: ModelRouter; + systemPrompt: string; + session?: Session; + toolRegistry?: ToolRegistry; + toolExecutor?: ToolExecutor; + maxIterations?: number; + /** The tier used by the primary NativeAgent for user-facing conversation. */ + primaryTier: ModelTier; + /** Which tier to use for each delegation task type. */ + delegation: DelegationConfig; + /** Maximum nesting depth for delegation calls (safety guard). */ + maxDelegationDepth: number; + onToolUse?: (event: ToolUseEvent) => void; + /** Context compaction settings. When provided, enables automatic compaction. */ + compaction?: CompactionConfig; + /** Model identifier for the primary model (used for context window lookup). */ + modelName?: string; + /** Optional override for the context window size (in tokens). */ + contextWindow?: number; +} + +// ── AgentOrchestrator ───────────────────────────────────────────────── + +/** + * Wraps a primary NativeAgent and adds the ability to delegate + * single-turn sub-tasks to different model tiers via the ModelRouter. + * + * The primary agent handles the main conversation loop (with tools), + * while `delegate()` enables cheap, stateless calls for tasks like + * compaction, classification, and memory extraction. + */ +export class AgentOrchestrator { + private _agent: NativeAgent; + private _modelRouter: ModelRouter; + private _delegation: DelegationConfig; + private _maxDelegationDepth: number; + private _toolRegistry?: ToolRegistry; + private _session?: Session; + private _compactionConfig?: CompactionConfig; + private _modelName?: string; + private _contextWindow?: number; + private _usageByTier: Map = new Map(); + + constructor(config: OrchestratorConfig) { + this._modelRouter = config.modelRouter; + this._delegation = config.delegation; + this._maxDelegationDepth = config.maxDelegationDepth; + this._toolRegistry = config.toolRegistry; + this._session = config.session; + this._compactionConfig = config.compaction; + this._modelName = config.modelName; + this._contextWindow = config.contextWindow; + + // Create the primary NativeAgent for user-facing conversation + this._agent = new NativeAgent({ + modelClient: config.modelRouter, + systemPrompt: config.systemPrompt, + session: config.session, + toolRegistry: config.toolRegistry, + toolExecutor: config.toolExecutor, + maxIterations: config.maxIterations, + onToolUse: config.onToolUse, + }); + + // Set the primary tier on the agent + this._agent.setModelTier(config.primaryTier); + } + + // ── Delegation ──────────────────────────────────────────────────── + + /** + * Perform a single-turn, stateless call to a model at the specified tier. + * + * This is used for internal sub-tasks (compaction, classification, etc.) + * that don't need the full conversation history or tool loop. + * + * If the requested tier is not available on the router, falls back to + * the 'default' tier with a warning. + */ + async delegate(request: SubAgentRequest): Promise { + let tier = request.tier; + + // Check if the requested tier is available; fall back to 'default' if not + const client = this._modelRouter.getClient(tier); + if (!client) { + console.warn( + `[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`, + ); + tier = 'default'; + } + + // Build the single-turn chat request + const messages: Message[] = [ + { role: 'user', content: request.message }, + ]; + + const chatRequest: ChatRequest = { + messages, + system: request.systemPrompt, + maxTokens: request.maxTokens, + }; + + // Optionally include tools from the registry + if (request.tools && this._toolRegistry) { + chatRequest.tools = this._toolRegistry.toAnthropicFormat(); + } + + const response = await this._modelRouter.chat(chatRequest, tier); + + // Track cumulative usage for this tier + this._trackUsage(tier, response.usage); + + console.log( + `[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`, + ); + + return { + content: response.content, + usage: response.usage, + tier, + }; + } + + // ── Primary agent proxies ───────────────────────────────────────── + + /** + * Process a user message through the primary NativeAgent. + * This is the main entry point for user-facing conversation. + * + * When compaction is configured, checks whether the conversation history + * exceeds the context window threshold and compacts it before processing. + */ + async process(userMessage: string): Promise { + await this.compactIfNeeded(); + return this._agent.process(userMessage); + } + + /** + * Force-compact the current conversation history regardless of threshold. + * Returns the compaction result, or null if there was nothing to compact + * (e.g. no session, too few messages). + */ + async compact(): Promise { + const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG; + const messages = this.getHistory(); + + if (messages.length === 0) { + return null; + } + + const result = await compactHistory({ + messages, + orchestrator: this, + config, + }); + + // If nothing was actually compacted, skip the replace + if (result.compactedCount === 0) { + return result; + } + + // Persist the compacted history + if (this._session) { + this._session.replaceHistory(result.messages); + } + + console.log( + `[Flynn:compact] Compacted ${result.compactedCount} messages: ` + + `${result.tokensBefore} → ${result.tokensAfter} tokens`, + ); + + return result; + } + + /** Reset the primary agent's conversation history. */ + reset(): void { + this._agent.reset(); + } + + /** Get the primary agent's conversation history. */ + getHistory(): Message[] { + return this._agent.getHistory(); + } + + /** Set the model tier on the primary agent. */ + setModelTier(tier: ModelTier): void { + this._agent.setModelTier(tier); + } + + /** Get the current model tier of the primary agent. */ + getModelTier(): ModelTier { + return this._agent.getModelTier(); + } + + /** Set the tool-use callback on the primary agent. */ + setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void { + this._agent.setOnToolUse(callback); + } + + // ── Usage & config accessors ────────────────────────────────────── + + /** + * Returns cumulative delegation usage stats per tier. + * Useful for cost tracking and visibility into sub-agent calls. + */ + getDelegationUsage(): Record { + const result: Record = {}; + for (const [tier, stats] of this._usageByTier) { + result[tier] = { ...stats }; + } + return result; + } + + /** + * Look up which model tier is configured for a given delegation task. + * Convenience method so callers don't need to access the config directly. + */ + getDelegationTier(task: keyof DelegationConfig): ModelTier { + return this._delegation[task]; + } + + // ── Private helpers ─────────────────────────────────────────────── + + /** + * Check whether automatic compaction should run, and if so, compact. + * Called before each `process()` call when compaction is configured. + */ + private async compactIfNeeded(): Promise { + if (!this._compactionConfig) return; + + const messages = this.getHistory(); + if (messages.length === 0) return; + + const model = this._modelName ?? 'unknown'; + const needs = shouldCompact({ + messages, + model, + contextWindow: this._contextWindow, + thresholdPct: this._compactionConfig.thresholdPct, + }); + + if (!needs) return; + + await this.compact(); + } + + /** Accumulate usage stats for a given tier. */ + private _trackUsage(tier: ModelTier, usage: TokenUsage): void { + const existing = this._usageByTier.get(tier); + if (existing) { + existing.inputTokens += usage.inputTokens; + existing.outputTokens += usage.outputTokens; + existing.calls += 1; + } else { + this._usageByTier.set(tier, { + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + calls: 1, + }); + } + } +} diff --git a/src/backends/native/prompts.ts b/src/backends/native/prompts.ts new file mode 100644 index 0000000..f13fec1 --- /dev/null +++ b/src/backends/native/prompts.ts @@ -0,0 +1,94 @@ +/** + * System prompts for delegated tasks. + * + * Each prompt is designed for a specific sub-task that the agent farms out + * to a (usually cheaper/faster) model call. Keep them focused and + * deterministic — the caller should be able to parse the output reliably. + */ + +/** + * Instructs a model to summarise conversation history during compaction. + * The resulting summary replaces the full history to reclaim context window space. + */ +export const COMPACTION_SYSTEM_PROMPT = `You are a conversation summariser. Your job is to condense a conversation history into a concise summary that preserves all important information. + +Rules: +- Preserve key facts, decisions, user preferences, and action items. +- Maintain chronological order of events. +- Note any unresolved questions or pending tasks. +- Be concise but thorough — aim for roughly 20% of the original length. +- Use bullet points for clarity. +- Never invent information that is not present in the conversation. +- If the conversation references files, paths, error messages, or specific values, include them verbatim. +- Group related points together under short descriptive headings when it aids readability. + +Output format: +Return a markdown summary with bullet points. Do not include any preamble or explanation — output only the summary.`; + +/** + * Instructs a model to extract persistent facts from conversation text. + * Extracted facts are stored in long-term memory for future sessions. + */ +export const MEMORY_EXTRACTION_PROMPT = `You are a fact extractor. Given a block of conversation text, extract persistent facts worth remembering across sessions. + +Categories to extract: + +## User +- Name, role, location, timezone, or other personal details explicitly shared. + +## Preferences +- Communication style, formatting preferences, tool preferences, workflow habits. + +## Technical +- Project names, repositories, tech stacks, conventions, architecture decisions. +- File paths, environment details, deployment targets. + +## Decisions +- Explicit decisions made during the conversation (e.g. "we decided to use X instead of Y"). +- Rationale for decisions when stated. + +Rules: +- Only extract facts that are explicitly stated — never infer or assume. +- Skip transient or session-specific information (e.g. "run this command now", "fix this error today"). +- Skip information that is only relevant to the current task and has no long-term value. +- If no facts worth extracting exist, return an empty response. +- Use concise bullet points under each category heading. +- Omit any category that has no entries. + +Output format: +Return markdown with the category headings above and bullet points underneath. No preamble.`; + +/** + * Instructs a model to classify an inbound message into a discrete category. + * The caller uses the label to route the message to the appropriate handler. + */ +export const CLASSIFICATION_PROMPT = `Classify the following message into exactly one of these categories: + +- command — a direct instruction to perform an action (e.g. "run tests", "deploy to staging") +- question — a request for information or explanation (e.g. "what does this function do?") +- task — a multi-step objective that requires planning (e.g. "add authentication to the API") +- conversation — casual chat, greetings, acknowledgements, or social interaction +- unclear — the message is ambiguous or lacks enough context to classify + +Rules: +- Return ONLY the classification label — a single word, nothing else. +- Do not explain your reasoning. +- If the message fits multiple categories, choose the most specific one (command > task > question > conversation).`; + +/** + * Instructs a model to condense verbose tool output into a compact summary. + * Used to shrink large tool results before they consume context window space. + */ +export const TOOL_SUMMARISATION_PROMPT = `You are a tool-output summariser. Given the raw output of a tool invocation, produce a compact summary that preserves the essential information. + +Rules: +- Preserve the key outcome: success or failure. +- Preserve important data: counts, IDs, names, statuses. +- Preserve all file paths, error codes, error messages, and specific values verbatim. +- Strip boilerplate, redundant lines, decorative formatting, and progress indicators. +- Keep the summary under 500 tokens. +- If the output is already concise, return it as-is rather than paraphrasing. +- Use a structured format (bullet points or short paragraphs) for readability. + +Output format: +Return the summarised output directly. No preamble or meta-commentary.`; diff --git a/src/config/index.ts b/src/config/index.ts index 5d70ff7..fe7493e 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -1,2 +1,2 @@ export { loadConfig } from './loader.js'; -export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig } from './schema.js'; +export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig } from './schema.js'; diff --git a/src/config/schema.ts b/src/config/schema.ts index 4cb37e8..77dfc53 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -19,6 +19,7 @@ const modelConfigSchema = z.object({ auth_token: z.string().optional(), for: z.array(z.string()).optional(), num_gpu: z.number().optional(), + context_window: z.number().optional(), }); const modelsSchema = z.object({ @@ -87,6 +88,32 @@ const automationSchema = z.object({ cron: z.array(cronJobSchema).default([]), }).default({}); +const agentsSchema = z.object({ + primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'), + delegation: z.object({ + compaction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'), + memory_extraction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'), + classification: z.enum(['fast', 'default', 'complex', 'local']).default('fast'), + tool_summarisation: z.enum(['fast', 'default', 'complex', 'local']).default('fast'), + complex_reasoning: z.enum(['fast', 'default', 'complex', 'local']).default('complex'), + }).default({ + compaction: 'fast', + memory_extraction: 'fast', + classification: 'fast', + tool_summarisation: 'fast', + complex_reasoning: 'complex', + }), + auto_escalate: z.boolean().default(false), + max_delegation_depth: z.number().min(1).max(10).default(3), +}).default({}); + +const compactionSchema = z.object({ + enabled: z.boolean().default(true), + threshold_pct: z.number().min(10).max(100).default(80), + keep_turns: z.number().min(1).max(50).default(4), + summary_max_tokens: z.number().min(128).max(4096).default(1024), +}).default({}); + export const configSchema = z.object({ telegram: telegramSchema, server: serverSchema.default({}), @@ -96,9 +123,13 @@ export const configSchema = z.object({ skills: skillsSchema.default({}), mcp: mcpSchema.default({ servers: [] }), automation: automationSchema, + agents: agentsSchema, + compaction: compactionSchema, }); export type Config = z.infer; export type TelegramConfig = z.infer; export type ModelConfig = z.infer; export type CronJobConfig = z.infer; +export type AgentsConfig = z.infer; +export type CompactionConfig = z.infer; diff --git a/src/context/compaction.test.ts b/src/context/compaction.test.ts new file mode 100644 index 0000000..1de56ca --- /dev/null +++ b/src/context/compaction.test.ts @@ -0,0 +1,104 @@ +import { describe, it, expect, vi } from 'vitest'; +import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js'; +import type { CompactionConfig } from './compaction.js'; +import type { AgentOrchestrator } from '../backends/native/orchestrator.js'; +import type { Message } from '../models/types.js'; + +function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator { + return { + getDelegationTier: vi.fn().mockReturnValue('fast'), + delegate: vi.fn().mockResolvedValue({ + content: summaryText, + usage: { inputTokens: 100, outputTokens: 50 }, + tier: 'fast', + }), + } as unknown as AgentOrchestrator; +} + +function makeMessages(count: number): Message[] { + const msgs: Message[] = []; + for (let i = 0; i < count; i++) { + msgs.push({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}`, + }); + } + return msgs; +} + +describe('compactHistory', () => { + const config: CompactionConfig = { + thresholdPct: 80, + keepTurns: 2, // keeps last 4 messages + summaryMaxTokens: 1024, + }; + + it('returns no-op when messages count is at or below keepTurns threshold', async () => { + const messages = makeMessages(4); // keepTurns=2 → keep 4 messages + const orchestrator = makeMockOrchestrator(); + + const result = await compactHistory({ messages, orchestrator, config }); + + expect(result.compactedCount).toBe(0); + expect(result.messages).toEqual(messages); + expect(orchestrator.delegate).not.toHaveBeenCalled(); + }); + + it('compacts older messages and keeps recent ones', async () => { + const messages = makeMessages(10); // 10 messages, keep last 4, compact 6 + const orchestrator = makeMockOrchestrator('Summarized conversation'); + + const result = await compactHistory({ messages, orchestrator, config }); + + expect(result.compactedCount).toBe(6); + expect(result.messages).toHaveLength(5); // 1 summary + 4 kept + expect(result.messages[0].role).toBe('assistant'); + expect(result.messages[0].content).toContain('[Summary of earlier conversation]'); + expect(result.messages[0].content).toContain('Summarized conversation'); + // Last 4 messages should be preserved + expect(result.messages.slice(1)).toEqual(messages.slice(-4)); + }); + + it('calls delegate with compaction tier and correct params', async () => { + const messages = makeMessages(10); + const orchestrator = makeMockOrchestrator(); + + await compactHistory({ messages, orchestrator, config }); + + expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('compaction'); + expect(orchestrator.delegate).toHaveBeenCalledWith( + expect.objectContaining({ + tier: 'fast', + maxTokens: 1024, + }), + ); + }); + + it('populates token counts in result', async () => { + const messages = makeMessages(10); + const orchestrator = makeMockOrchestrator(); + + const result = await compactHistory({ messages, orchestrator, config }); + + expect(result.tokensBefore).toBeGreaterThan(0); + expect(result.tokensAfter).toBeGreaterThan(0); + expect(result.tokensAfter).toBeLessThan(result.tokensBefore); + }); + + it('handles single turn above keepTurns threshold', async () => { + // 3 turns = 6 messages, keepTurns=2 keeps 4, compacts 2 + const messages = makeMessages(6); + const orchestrator = makeMockOrchestrator(); + + const result = await compactHistory({ messages, orchestrator, config }); + + expect(result.compactedCount).toBe(2); + expect(result.messages).toHaveLength(5); // 1 summary + 4 kept + }); + + it('uses DEFAULT_COMPACTION_CONFIG values correctly', () => { + expect(DEFAULT_COMPACTION_CONFIG.thresholdPct).toBe(80); + expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4); + expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024); + }); +}); diff --git a/src/context/compaction.ts b/src/context/compaction.ts new file mode 100644 index 0000000..1f2f968 --- /dev/null +++ b/src/context/compaction.ts @@ -0,0 +1,74 @@ +import type { Message } from '../models/types.js'; +import type { AgentOrchestrator } from '../backends/native/orchestrator.js'; +import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js'; +import { estimateMessageTokens } from './tokens.js'; + +export interface CompactionConfig { + /** Percentage of context window that triggers compaction (default: 80). */ + thresholdPct: number; + /** Number of recent turns (user+assistant pairs) to always keep intact. */ + keepTurns: number; + /** Maximum tokens for the compaction summary response. */ + summaryMaxTokens: number; +} + +export interface CompactionResult { + /** The compacted messages: [summary, ...recentMessages]. */ + messages: Message[]; + /** Number of messages that were compacted (removed). */ + compactedCount: number; + /** Estimated tokens before compaction. */ + tokensBefore: number; + /** Estimated tokens after compaction. */ + tokensAfter: number; +} + +export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = { + thresholdPct: 80, + keepTurns: 4, + summaryMaxTokens: 1024, +}; + +export async function compactHistory(opts: { + messages: Message[]; + orchestrator: AgentOrchestrator; + config: CompactionConfig; +}): Promise { + const { messages, orchestrator, config } = opts; + + const keepCount = config.keepTurns * 2; + if (messages.length <= keepCount) { + return { + messages, + compactedCount: 0, + tokensBefore: estimateMessageTokens(messages), + tokensAfter: estimateMessageTokens(messages), + }; + } + + const toCompact = messages.slice(0, -keepCount); + const toKeep = messages.slice(-keepCount); + + const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n'); + + const tier = orchestrator.getDelegationTier('compaction'); + + const result = await orchestrator.delegate({ + tier, + systemPrompt: COMPACTION_SYSTEM_PROMPT, + message: formattedConversation, + maxTokens: config.summaryMaxTokens, + }); + + const summaryMessage: Message = { + role: 'assistant', + content: '[Summary of earlier conversation]\n\n' + result.content, + }; + + return { + messages: [summaryMessage, ...toKeep], + compactedCount: toCompact.length, + tokensBefore: estimateMessageTokens(messages), + tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]), + }; +} diff --git a/src/context/index.ts b/src/context/index.ts new file mode 100644 index 0000000..b90a041 --- /dev/null +++ b/src/context/index.ts @@ -0,0 +1,15 @@ +export { + estimateTokens, + estimateMessageTokens, + getContextWindow, + shouldCompact, + CONTEXT_WINDOWS, + type ShouldCompactOpts, +} from './tokens.js'; + +export { + compactHistory, + type CompactionConfig, + type CompactionResult, + DEFAULT_COMPACTION_CONFIG, +} from './compaction.js'; diff --git a/src/context/tokens.test.ts b/src/context/tokens.test.ts new file mode 100644 index 0000000..0f1ae6e --- /dev/null +++ b/src/context/tokens.test.ts @@ -0,0 +1,108 @@ +import { describe, it, expect } from 'vitest'; +import { estimateTokens, estimateMessageTokens, getContextWindow, shouldCompact, CONTEXT_WINDOWS } from './tokens.js'; + +describe('estimateTokens', () => { + it('returns 0 for empty string', () => { + // estimateTokens('') should be 0 — Math.ceil(0/4) = 0 + expect(estimateTokens('')).toBe(0); + }); + + it('estimates ~1 token per 4 characters', () => { + // 'abcd' = 4 chars → ceil(4/4) = 1 + expect(estimateTokens('abcd')).toBe(1); + // 'abcde' = 5 chars → ceil(5/4) = 2 + expect(estimateTokens('abcde')).toBe(2); + }); + + it('handles longer text', () => { + const text = 'a'.repeat(100); + expect(estimateTokens(text)).toBe(25); // 100/4 = 25 + }); +}); + +describe('estimateMessageTokens', () => { + it('returns 0 for empty array', () => { + expect(estimateMessageTokens([])).toBe(0); + }); + + it('includes overhead per message', () => { + // 'abcd' = 1 token content + 4 overhead = 5 per message + const messages = [{ role: 'user' as const, content: 'abcd' }]; + expect(estimateMessageTokens(messages)).toBe(5); + }); + + it('sums multiple messages', () => { + const messages = [ + { role: 'user' as const, content: 'abcd' }, // 1 + 4 = 5 + { role: 'assistant' as const, content: 'abcd' }, // 1 + 4 = 5 + ]; + expect(estimateMessageTokens(messages)).toBe(10); + }); +}); + +describe('getContextWindow', () => { + it('returns known window for Claude Sonnet', () => { + expect(getContextWindow('claude-sonnet-4-20250514')).toBe(200_000); + }); + + it('returns known window for GPT-4o', () => { + expect(getContextWindow('gpt-4o')).toBe(128_000); + }); + + it('returns default 128000 for unknown model', () => { + expect(getContextWindow('unknown-model')).toBe(128_000); + }); + + it('returns override when provided', () => { + expect(getContextWindow('claude-sonnet-4-20250514', 50_000)).toBe(50_000); + }); + + it('returns override even for unknown model', () => { + expect(getContextWindow('unknown-model', 32_000)).toBe(32_000); + }); +}); + +describe('shouldCompact', () => { + it('returns false when messages are well below threshold', () => { + const messages = [{ role: 'user' as const, content: 'hello' }]; + expect(shouldCompact({ + messages, + model: 'gpt-4o', // 128k context window + })).toBe(false); + }); + + it('returns true when messages exceed threshold', () => { + // Create messages that exceed 80% of a small context window + // context window = 100, threshold = 80% = 80 tokens + // each message: ceil(400/4) + 4 = 104 tokens → well over 80 + const messages = [{ role: 'user' as const, content: 'a'.repeat(400) }]; + expect(shouldCompact({ + messages, + model: 'unknown', + contextWindow: 100, + thresholdPct: 80, + })).toBe(true); + }); + + it('respects custom thresholdPct', () => { + // 1 message: ceil(20/4) + 4 = 9 tokens + // contextWindow = 100, thresholdPct = 5 → threshold = 5 tokens + const messages = [{ role: 'user' as const, content: 'a'.repeat(20) }]; + expect(shouldCompact({ + messages, + model: 'unknown', + contextWindow: 100, + thresholdPct: 5, + })).toBe(true); + }); + + it('uses model lookup when no contextWindow override', () => { + // gpt-3.5-turbo = 16385 tokens, default threshold 80% = 13108 + // Large message to exceed: ceil(60000/4) + 4 = 15004 tokens → over 13108 + const messages = [{ role: 'user' as const, content: 'a'.repeat(60000) }]; + expect(shouldCompact({ + messages, + model: 'gpt-3.5-turbo', + })).toBe(true); + }); +}); diff --git a/src/context/tokens.ts b/src/context/tokens.ts new file mode 100644 index 0000000..9bf278d --- /dev/null +++ b/src/context/tokens.ts @@ -0,0 +1,88 @@ +import type { Message } from '../models/types.js'; + +/** + * Approximate overhead tokens per message (role marker, separators, etc.). + */ +const MESSAGE_OVERHEAD_TOKENS = 4; + +/** + * Conservative default context window when a model is not in the lookup table. + */ +const DEFAULT_CONTEXT_WINDOW = 128_000; + +/** + * Hard-coded context window sizes (in tokens) for known models. + */ +export const CONTEXT_WINDOWS: Record = { + 'claude-sonnet-4-20250514': 200_000, + 'claude-3-5-haiku-20241022': 200_000, + 'claude-3-5-sonnet-20241022': 200_000, + 'claude-3-opus-20240229': 200_000, + 'claude-opus-4-20250514': 200_000, + 'gpt-4o': 128_000, + 'gpt-4o-mini': 128_000, + 'gpt-4-turbo': 128_000, + 'gpt-3.5-turbo': 16_385, +} as const; + +/** + * Cheap character-based token estimation. + * + * Uses `Math.ceil(text.length / 4)` as a reasonable approximation for + * English text (roughly 4 characters per token on average). + */ +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +/** + * Estimate the total token count for an array of messages. + * + * For each message the estimate includes the content tokens plus a fixed + * overhead of ~4 tokens to account for the role marker and separators. + */ +export function estimateMessageTokens(messages: Message[]): number { + return messages.reduce( + (sum, msg) => sum + estimateTokens(msg.content) + MESSAGE_OVERHEAD_TOKENS, + 0, + ); +} + +/** + * Return the context window size (in tokens) for a given model. + * + * @param model - Model identifier to look up. + * @param override - If provided, this value is returned directly. + * @returns The context window size in tokens. + */ +export function getContextWindow(model: string, override?: number): number { + if (override !== undefined) { + return override; + } + return CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW; +} + +/** + * Options for {@link shouldCompact}. + */ +export interface ShouldCompactOpts { + messages: Message[]; + model: string; + contextWindow?: number; + /** Percentage of the context window that triggers compaction (default 80). */ + thresholdPct?: number; +} + +/** + * Determine whether the conversation should be compacted. + * + * Returns `true` when the estimated token count of `messages` exceeds + * `thresholdPct` percent of the effective context window. + */ +export function shouldCompact(opts: ShouldCompactOpts): boolean { + const { messages, model, contextWindow, thresholdPct = 80 } = opts; + const window = getContextWindow(model, contextWindow); + const threshold = (thresholdPct / 100) * window; + const estimated = estimateMessageTokens(messages); + return estimated > threshold; +} diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 2b88141..ecc0c22 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -2,7 +2,7 @@ import { Lifecycle } from './lifecycle.js'; import type { Config, ModelConfig } from '../config/index.js'; import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js'; import type { ModelClient } from '../models/index.js'; -import { NativeAgent } from '../backends/index.js'; +import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js'; import { SessionStore, SessionManager } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../tools/index.js'; @@ -134,7 +134,8 @@ function createModelRouter(config: Config): ModelRouter { /** * Create the unified message handler for the channel registry. - * Each channel+sender pair gets its own NativeAgent backed by a persistent session. + * Each channel+sender pair gets its own AgentOrchestrator backed by a persistent session. + * The orchestrator wraps a NativeAgent and adds delegation to different model tiers. */ function createMessageRouter(deps: { sessionManager: SessionManager; @@ -142,21 +143,39 @@ function createMessageRouter(deps: { systemPrompt: string; toolRegistry: ToolRegistry; toolExecutor: ToolExecutor; + config: Config; }) { // Cache agents by session ID to avoid recreating on every message - const agents = new Map(); + const agents = new Map(); - function getOrCreateAgent(channel: string, senderId: string): NativeAgent { + function getOrCreateAgent(channel: string, senderId: string): AgentOrchestrator { const sessionId = `${channel}:${senderId}`; let agent = agents.get(sessionId); if (!agent) { const session = deps.sessionManager.getSession(channel, senderId); - agent = new NativeAgent({ - modelClient: deps.modelRouter, + const delegationConfig: DelegationConfig = { + compaction: deps.config.agents.delegation.compaction ?? 'fast', + memory_extraction: deps.config.agents.delegation.memory_extraction ?? 'fast', + classification: deps.config.agents.delegation.classification ?? 'fast', + tool_summarisation: deps.config.agents.delegation.tool_summarisation ?? 'fast', + complex_reasoning: deps.config.agents.delegation.complex_reasoning ?? 'complex', + }; + agent = new AgentOrchestrator({ + modelRouter: deps.modelRouter, systemPrompt: deps.systemPrompt, session, toolRegistry: deps.toolRegistry, toolExecutor: deps.toolExecutor, + primaryTier: deps.config.agents.primary_tier ?? 'default', + delegation: delegationConfig, + maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3, + compaction: deps.config.compaction.enabled ? { + thresholdPct: deps.config.compaction.threshold_pct, + keepTurns: deps.config.compaction.keep_turns, + summaryMaxTokens: deps.config.compaction.summary_max_tokens, + } : undefined, + modelName: deps.config.models.default.model, + contextWindow: deps.config.models.default.context_window, }); agents.set(sessionId, agent); } @@ -167,9 +186,26 @@ function createMessageRouter(deps: { const agent = getOrCreateAgent(msg.channel, msg.senderId); // Handle special commands - if (msg.metadata?.isCommand && msg.metadata.command === 'reset') { - agent.reset(); - return; + if (msg.metadata?.isCommand) { + if (msg.metadata.command === 'reset') { + agent.reset(); + return; + } + if (msg.metadata.command === 'compact') { + const result = await agent.compact(); + if (result && result.compactedCount > 0) { + await reply({ + text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`, + replyTo: msg.id, + }); + } else { + await reply({ + text: 'Nothing to compact.', + replyTo: msg.id, + }); + } + return; + } } try { @@ -284,6 +320,7 @@ export async function startDaemon(config: Config): Promise { systemPrompt, toolRegistry, toolExecutor, + config, })); // Register Telegram adapter diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts index dea4267..730e6ed 100644 --- a/src/frontends/tui/commands.test.ts +++ b/src/frontends/tui/commands.test.ts @@ -26,6 +26,10 @@ describe('parseCommand', () => { expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' }); }); + it('parses /compact command', () => { + expect(parseCommand('/compact')).toEqual({ type: 'compact' }); + }); + it('parses /model command without argument', () => { expect(parseCommand('/model')).toEqual({ type: 'model' }); }); @@ -64,6 +68,7 @@ describe('getHelpText', () => { expect(help).toContain('/help'); expect(help).toContain('/model'); expect(help).toContain('/reset'); + expect(help).toContain('/compact'); expect(help).toContain('/quit'); }); }); diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts index 61b9adf..b7054e7 100644 --- a/src/frontends/tui/commands.ts +++ b/src/frontends/tui/commands.ts @@ -4,6 +4,7 @@ export type Command = | { type: 'help' } | { type: 'status' } | { type: 'fullscreen' } + | { type: 'compact' } | { type: 'model'; name?: string } | { type: 'backend'; provider?: string } | { type: 'transfer'; target: string } @@ -38,6 +39,11 @@ export function parseCommand(input: string): Command | null { return { type: 'fullscreen' }; } + // Compact + if (trimmed === '/compact') { + return { type: 'compact' }; + } + // Model (with optional argument) if (trimmed === '/model') { return { type: 'model' }; @@ -73,6 +79,7 @@ Commands: /model [name] Show or switch model (local, default, fast, complex) /backend [provider] Show or switch local backend (ollama, llamacpp) /reset, /clear, /new Clear conversation history + /compact Compact conversation history /status Show session info and token usage /fullscreen, /fs Switch to fullscreen mode /transfer Transfer session to another frontend @@ -90,6 +97,7 @@ export const SLASH_COMMANDS = [ '/reset', '/clear', '/new', + '/compact', '/status', '/fullscreen', '/fs', @@ -106,6 +114,7 @@ export const COMMAND_TOOLTIPS: Record = { '/reset': 'Clear conversation history', '/clear': 'Clear conversation history', '/new': 'Start a new conversation', + '/compact': 'Compact conversation history to save context space', '/status': 'Show session info and token usage', '/fullscreen': 'Switch to fullscreen mode', '/fs': 'Switch to fullscreen mode', diff --git a/src/frontends/tui/minimal.test.ts b/src/frontends/tui/minimal.test.ts index 579e908..21e9bcb 100644 --- a/src/frontends/tui/minimal.test.ts +++ b/src/frontends/tui/minimal.test.ts @@ -44,6 +44,7 @@ describe('MinimalTui backend command', () => { getHistory: () => [], addMessage: vi.fn(), clear: vi.fn(), + replaceHistory: vi.fn(), }; const mockRouter = { @@ -84,6 +85,7 @@ describe('MinimalTui backend command', () => { getHistory: () => [], addMessage: vi.fn(), clear: vi.fn(), + replaceHistory: vi.fn(), }; const mockRouter = { diff --git a/src/gateway/handlers/handlers.test.ts b/src/gateway/handlers/handlers.test.ts index 782e866..4d4fd2c 100644 --- a/src/gateway/handlers/handlers.test.ts +++ b/src/gateway/handlers/handlers.test.ts @@ -44,6 +44,7 @@ describe('session handlers', () => { addMessage: vi.fn(), getHistory: vi.fn(() => mockHistory), clear: vi.fn(), + replaceHistory: vi.fn(), }; const mockSessionManager = { diff --git a/src/gateway/server.test.ts b/src/gateway/server.test.ts index 4af4e0d..ef408c7 100644 --- a/src/gateway/server.test.ts +++ b/src/gateway/server.test.ts @@ -13,6 +13,7 @@ const mockSession = { getHistory: vi.fn(() => []), clear: vi.fn(), setHistory: vi.fn(), + replaceHistory: vi.fn(), }; const mockSessionManager = { diff --git a/src/gateway/session-bridge.test.ts b/src/gateway/session-bridge.test.ts index 2602925..8dfbb02 100644 --- a/src/gateway/session-bridge.test.ts +++ b/src/gateway/session-bridge.test.ts @@ -8,6 +8,7 @@ const mockSession = { addMessage: vi.fn(), getHistory: vi.fn(() => []), clear: vi.fn(), + replaceHistory: vi.fn(), }; const mockSessionManager = { diff --git a/src/session/manager.ts b/src/session/manager.ts index f54e935..bbd4236 100644 --- a/src/session/manager.ts +++ b/src/session/manager.ts @@ -6,6 +6,7 @@ export interface Session { addMessage(message: Message): void; getHistory(): Message[]; clear(): void; + replaceHistory(messages: Message[]): void; } export class ManagedSession implements Session { @@ -34,6 +35,16 @@ export class ManagedSession implements Session { this.store.clearSession(this.id); } + /** + * Replace the entire session history with new messages. + * Used after compaction to persist the compacted state. + * Updates both in-memory history and SQLite storage atomically. + */ + replaceHistory(messages: Message[]): void { + this.history = [...messages]; + this.store.replaceMessages(this.id, messages); + } + setHistory(messages: Message[]): void { this.history = [...messages]; } diff --git a/src/session/store.ts b/src/session/store.ts index edeb21f..cab1218 100644 --- a/src/session/store.ts +++ b/src/session/store.ts @@ -40,6 +40,26 @@ export class SessionStore { })); } + /** + * Atomically replace all messages for a session. + * Used by compaction to swap full history with a compacted version. + * Runs in a transaction: delete all → re-insert in order. + */ + replaceMessages(sessionId: string, messages: Message[]): void { + const transaction = this.db.transaction(() => { + // Delete existing messages + this.db.prepare('DELETE FROM messages WHERE session_id = ?').run(sessionId); + // Re-insert in order + const insert = this.db.prepare( + 'INSERT INTO messages (session_id, role, content) VALUES (?, ?, ?)' + ); + for (const msg of messages) { + insert.run(sessionId, msg.role, msg.content); + } + }); + transaction(); + } + clearSession(sessionId: string): void { const stmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?'); stmt.run(sessionId);