feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation:
- AgentOrchestrator wraps NativeAgent with delegate() for stateless
  single-turn calls to any model tier (fast/default/complex/local)
- DelegationConfig maps task types (compaction, classification, etc.)
  to model tiers
- Delegation prompts for compaction, memory extraction, classification,
  and tool summarisation
- Per-tier usage tracking for cost visibility
- Config schema: agents.delegation and agents.primary_tier

Phase 1 — Context Compaction:
- Token estimation (char/4 heuristic) with context window lookup
- shouldCompact() threshold check against context window percentage
- compactHistory() splits old/recent messages, delegates summary to
  fast tier, returns CompactionResult
- Automatic compaction in AgentOrchestrator.process() when configured
- Force-compact via orchestrator.compact() with session persistence
- Session.replaceHistory() with atomic SQLite transaction
- /compact TUI command with feedback on compacted token counts
- Config schema: compaction.enabled, threshold_pct, keep_turns,
  summary_max_tokens

Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
William Valentin
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
+309
View File
@@ -0,0 +1,309 @@
import type { ModelRouter, ModelTier } from '../../models/router.js';
import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
import type { Session } from '../../session/index.js';
import type { ToolRegistry } from '../../tools/registry.js';
import type { ToolExecutor } from '../../tools/executor.js';
import { NativeAgent } from './agent.js';
import type { ToolUseEvent } from './agent.js';
import { shouldCompact } from '../../context/tokens.js';
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
// ── Public types ──────────────────────────────────────────────────────
/** A single-turn, stateless request to a sub-agent at a specific tier. */
export interface SubAgentRequest {
tier: ModelTier;
systemPrompt: string;
message: string;
maxTokens?: number;
/** When true, include tools from the toolRegistry in the request. */
tools?: boolean;
}
/** Result returned from a sub-agent delegation call. */
export interface SubAgentResult {
content: string;
usage: TokenUsage;
tier: ModelTier;
}
/** Maps each delegation task to the model tier that should handle it. */
export interface DelegationConfig {
compaction: ModelTier;
memory_extraction: ModelTier;
classification: ModelTier;
tool_summarisation: ModelTier;
complex_reasoning: ModelTier;
}
/** Per-tier cumulative usage statistics. */
interface TierUsageStats {
inputTokens: number;
outputTokens: number;
calls: number;
}
/** Full configuration for the AgentOrchestrator. */
export interface OrchestratorConfig {
modelRouter: ModelRouter;
systemPrompt: string;
session?: Session;
toolRegistry?: ToolRegistry;
toolExecutor?: ToolExecutor;
maxIterations?: number;
/** The tier used by the primary NativeAgent for user-facing conversation. */
primaryTier: ModelTier;
/** Which tier to use for each delegation task type. */
delegation: DelegationConfig;
/** Maximum nesting depth for delegation calls (safety guard). */
maxDelegationDepth: number;
onToolUse?: (event: ToolUseEvent) => void;
/** Context compaction settings. When provided, enables automatic compaction. */
compaction?: CompactionConfig;
/** Model identifier for the primary model (used for context window lookup). */
modelName?: string;
/** Optional override for the context window size (in tokens). */
contextWindow?: number;
}
// ── AgentOrchestrator ─────────────────────────────────────────────────
/**
* Wraps a primary NativeAgent and adds the ability to delegate
* single-turn sub-tasks to different model tiers via the ModelRouter.
*
* The primary agent handles the main conversation loop (with tools),
* while `delegate()` enables cheap, stateless calls for tasks like
* compaction, classification, and memory extraction.
*/
export class AgentOrchestrator {
private _agent: NativeAgent;
private _modelRouter: ModelRouter;
private _delegation: DelegationConfig;
private _maxDelegationDepth: number;
private _toolRegistry?: ToolRegistry;
private _session?: Session;
private _compactionConfig?: CompactionConfig;
private _modelName?: string;
private _contextWindow?: number;
private _usageByTier: Map<string, TierUsageStats> = new Map();
constructor(config: OrchestratorConfig) {
this._modelRouter = config.modelRouter;
this._delegation = config.delegation;
this._maxDelegationDepth = config.maxDelegationDepth;
this._toolRegistry = config.toolRegistry;
this._session = config.session;
this._compactionConfig = config.compaction;
this._modelName = config.modelName;
this._contextWindow = config.contextWindow;
// Create the primary NativeAgent for user-facing conversation
this._agent = new NativeAgent({
modelClient: config.modelRouter,
systemPrompt: config.systemPrompt,
session: config.session,
toolRegistry: config.toolRegistry,
toolExecutor: config.toolExecutor,
maxIterations: config.maxIterations,
onToolUse: config.onToolUse,
});
// Set the primary tier on the agent
this._agent.setModelTier(config.primaryTier);
}
// ── Delegation ────────────────────────────────────────────────────
/**
* Perform a single-turn, stateless call to a model at the specified tier.
*
* This is used for internal sub-tasks (compaction, classification, etc.)
* that don't need the full conversation history or tool loop.
*
* If the requested tier is not available on the router, falls back to
* the 'default' tier with a warning.
*/
async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
let tier = request.tier;
// Check if the requested tier is available; fall back to 'default' if not
const client = this._modelRouter.getClient(tier);
if (!client) {
console.warn(
`[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
);
tier = 'default';
}
// Build the single-turn chat request
const messages: Message[] = [
{ role: 'user', content: request.message },
];
const chatRequest: ChatRequest = {
messages,
system: request.systemPrompt,
maxTokens: request.maxTokens,
};
// Optionally include tools from the registry
if (request.tools && this._toolRegistry) {
chatRequest.tools = this._toolRegistry.toAnthropicFormat();
}
const response = await this._modelRouter.chat(chatRequest, tier);
// Track cumulative usage for this tier
this._trackUsage(tier, response.usage);
console.log(
`[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
);
return {
content: response.content,
usage: response.usage,
tier,
};
}
// ── Primary agent proxies ─────────────────────────────────────────
/**
* Process a user message through the primary NativeAgent.
* This is the main entry point for user-facing conversation.
*
* When compaction is configured, checks whether the conversation history
* exceeds the context window threshold and compacts it before processing.
*/
async process(userMessage: string): Promise<string> {
await this.compactIfNeeded();
return this._agent.process(userMessage);
}
/**
* Force-compact the current conversation history regardless of threshold.
* Returns the compaction result, or null if there was nothing to compact
* (e.g. no session, too few messages).
*/
async compact(): Promise<CompactionResult | null> {
const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
const messages = this.getHistory();
if (messages.length === 0) {
return null;
}
const result = await compactHistory({
messages,
orchestrator: this,
config,
});
// If nothing was actually compacted, skip the replace
if (result.compactedCount === 0) {
return result;
}
// Persist the compacted history
if (this._session) {
this._session.replaceHistory(result.messages);
}
console.log(
`[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
`${result.tokensBefore}${result.tokensAfter} tokens`,
);
return result;
}
/** Reset the primary agent's conversation history. */
reset(): void {
this._agent.reset();
}
/** Get the primary agent's conversation history. */
getHistory(): Message[] {
return this._agent.getHistory();
}
/** Set the model tier on the primary agent. */
setModelTier(tier: ModelTier): void {
this._agent.setModelTier(tier);
}
/** Get the current model tier of the primary agent. */
getModelTier(): ModelTier {
return this._agent.getModelTier();
}
/** Set the tool-use callback on the primary agent. */
setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
this._agent.setOnToolUse(callback);
}
// ── Usage & config accessors ──────────────────────────────────────
/**
* Returns cumulative delegation usage stats per tier.
* Useful for cost tracking and visibility into sub-agent calls.
*/
getDelegationUsage(): Record<string, TierUsageStats> {
const result: Record<string, TierUsageStats> = {};
for (const [tier, stats] of this._usageByTier) {
result[tier] = { ...stats };
}
return result;
}
/**
* Look up which model tier is configured for a given delegation task.
* Convenience method so callers don't need to access the config directly.
*/
getDelegationTier(task: keyof DelegationConfig): ModelTier {
return this._delegation[task];
}
// ── Private helpers ───────────────────────────────────────────────
/**
* Check whether automatic compaction should run, and if so, compact.
* Called before each `process()` call when compaction is configured.
*/
private async compactIfNeeded(): Promise<void> {
if (!this._compactionConfig) return;
const messages = this.getHistory();
if (messages.length === 0) return;
const model = this._modelName ?? 'unknown';
const needs = shouldCompact({
messages,
model,
contextWindow: this._contextWindow,
thresholdPct: this._compactionConfig.thresholdPct,
});
if (!needs) return;
await this.compact();
}
/** Accumulate usage stats for a given tier. */
private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
const existing = this._usageByTier.get(tier);
if (existing) {
existing.inputTokens += usage.inputTokens;
existing.outputTokens += usage.outputTokens;
existing.calls += 1;
} else {
this._usageByTier.set(tier, {
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
calls: 1,
});
}
}
}