feat: add multi-model delegation (Phase 0) and context compaction (Phase 1)

Phase 0 — Multi-Model Delegation: - AgentOrchestrator wraps NativeAgent with delegate() for stateless single-turn calls to any model tier (fast/default/complex/local) - DelegationConfig maps task types (compaction, classification, etc.) to model tiers - Delegation prompts for compaction, memory extraction, classification, and tool summarisation - Per-tier usage tracking for cost visibility - Config schema: agents.delegation and agents.primary_tier Phase 1 — Context Compaction: - Token estimation (char/4 heuristic) with context window lookup - shouldCompact() threshold check against context window percentage - compactHistory() splits old/recent messages, delegates summary to fast tier, returns CompactionResult - Automatic compaction in AgentOrchestrator.process() when configured - Force-compact via orchestrator.compact() with session persistence - Session.replaceHistory() with atomic SQLite transaction - /compact TUI command with feedback on compacted token counts - Config schema: compaction.enabled, threshold_pct, keep_turns, summary_max_tokens Tests: 385 passing across 50 files (22 new tests in 2 new test files)
2026-02-06 13:17:02 -08:00
parent f7cc87a4bb
commit 306e11bd2e
22 changed files with 1562 additions and 12 deletions
@@ -1,2 +1,2 @@
 export { loadConfig } from './loader.js';
-export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig } from './schema.js';
+export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig } from './schema.js';
@@ -19,6 +19,7 @@ const modelConfigSchema = z.object({
  auth_token: z.string().optional(),
  for: z.array(z.string()).optional(),
  num_gpu: z.number().optional(),
+  context_window: z.number().optional(),
 });

 const modelsSchema = z.object({
@@ -87,6 +88,32 @@ const automationSchema = z.object({
  cron: z.array(cronJobSchema).default([]),
 }).default({});

+const agentsSchema = z.object({
+  primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
+  delegation: z.object({
+    compaction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    memory_extraction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    classification: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    tool_summarisation: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    complex_reasoning: z.enum(['fast', 'default', 'complex', 'local']).default('complex'),
+  }).default({
+    compaction: 'fast',
+    memory_extraction: 'fast',
+    classification: 'fast',
+    tool_summarisation: 'fast',
+    complex_reasoning: 'complex',
+  }),
+  auto_escalate: z.boolean().default(false),
+  max_delegation_depth: z.number().min(1).max(10).default(3),
+}).default({});
+
+const compactionSchema = z.object({
+  enabled: z.boolean().default(true),
+  threshold_pct: z.number().min(10).max(100).default(80),
+  keep_turns: z.number().min(1).max(50).default(4),
+  summary_max_tokens: z.number().min(128).max(4096).default(1024),
+}).default({});
+
 export const configSchema = z.object({
  telegram: telegramSchema,
  server: serverSchema.default({}),
@@ -96,9 +123,13 @@ export const configSchema = z.object({
  skills: skillsSchema.default({}),
  mcp: mcpSchema.default({ servers: [] }),
  automation: automationSchema,
+  agents: agentsSchema,
+  compaction: compactionSchema,
 });

 export type Config = z.infer<typeof configSchema>;
 export type TelegramConfig = z.infer<typeof telegramSchema>;
 export type ModelConfig = z.infer<typeof modelConfigSchema>;
 export type CronJobConfig = z.infer<typeof cronJobSchema>;
+export type AgentsConfig = z.infer<typeof agentsSchema>;
+export type CompactionConfig = z.infer<typeof compactionSchema>;