feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation:
- AgentOrchestrator wraps NativeAgent with delegate() for stateless
  single-turn calls to any model tier (fast/default/complex/local)
- DelegationConfig maps task types (compaction, classification, etc.)
  to model tiers
- Delegation prompts for compaction, memory extraction, classification,
  and tool summarisation
- Per-tier usage tracking for cost visibility
- Config schema: agents.delegation and agents.primary_tier

Phase 1 — Context Compaction:
- Token estimation (char/4 heuristic) with context window lookup
- shouldCompact() threshold check against context window percentage
- compactHistory() splits old/recent messages, delegates summary to
  fast tier, returns CompactionResult
- Automatic compaction in AgentOrchestrator.process() when configured
- Force-compact via orchestrator.compact() with session persistence
- Session.replaceHistory() with atomic SQLite transaction
- /compact TUI command with feedback on compacted token counts
- Config schema: compaction.enabled, threshold_pct, keep_turns,
  summary_max_tokens

Tests: 385 passing across 50 files (22 new tests in 2 new test files)
This commit is contained in:
William Valentin
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
+1 -1
View File
@@ -1,2 +1,2 @@
export { loadConfig } from './loader.js';
export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig } from './schema.js';
export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig } from './schema.js';
+31
View File
@@ -19,6 +19,7 @@ const modelConfigSchema = z.object({
auth_token: z.string().optional(),
for: z.array(z.string()).optional(),
num_gpu: z.number().optional(),
context_window: z.number().optional(),
});
const modelsSchema = z.object({
@@ -87,6 +88,32 @@ const automationSchema = z.object({
cron: z.array(cronJobSchema).default([]),
}).default({});
const agentsSchema = z.object({
primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
delegation: z.object({
compaction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
memory_extraction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
classification: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
tool_summarisation: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
complex_reasoning: z.enum(['fast', 'default', 'complex', 'local']).default('complex'),
}).default({
compaction: 'fast',
memory_extraction: 'fast',
classification: 'fast',
tool_summarisation: 'fast',
complex_reasoning: 'complex',
}),
auto_escalate: z.boolean().default(false),
max_delegation_depth: z.number().min(1).max(10).default(3),
}).default({});
const compactionSchema = z.object({
enabled: z.boolean().default(true),
threshold_pct: z.number().min(10).max(100).default(80),
keep_turns: z.number().min(1).max(50).default(4),
summary_max_tokens: z.number().min(128).max(4096).default(1024),
}).default({});
export const configSchema = z.object({
telegram: telegramSchema,
server: serverSchema.default({}),
@@ -96,9 +123,13 @@ export const configSchema = z.object({
skills: skillsSchema.default({}),
mcp: mcpSchema.default({ servers: [] }),
automation: automationSchema,
agents: agentsSchema,
compaction: compactionSchema,
});
export type Config = z.infer<typeof configSchema>;
export type TelegramConfig = z.infer<typeof telegramSchema>;
export type ModelConfig = z.infer<typeof modelConfigSchema>;
export type CronJobConfig = z.infer<typeof cronJobSchema>;
export type AgentsConfig = z.infer<typeof agentsSchema>;
export type CompactionConfig = z.infer<typeof compactionSchema>;