feat(session): persist model tier overrides per session

Store per-session config in SQLite and route /model and /reset through command fast-paths so channel sessions keep independent model selection across reconnects and restarts.
2026-02-13 01:04:26 -08:00
parent 3472a0b926
commit 9f81c01603
35 changed files with 1438 additions and 144 deletions
@@ -4,7 +4,10 @@ import { ModelRouter } from '../../models/router.js';
 import type { ChatResponse, ModelClient } from '../../models/types.js';
 import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
 import { HookEngine } from '../../hooks/engine.js';
-import type { SubAgentRequest } from './orchestrator.js';
+import { MemoryStore } from '../../memory/store.js';
+import { mkdtempSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';

 describe('AgentOrchestrator', () => {
  let mockDefaultClient: ModelClient;
@@ -33,6 +36,14 @@ describe('AgentOrchestrator', () => {
    });
  });

+  const requireClient = (tier: 'default' | 'fast' | 'complex'): ModelClient => {
+    const client = mockRouter.getClient(tier);
+    if (!client) {
+      throw new Error(`Expected ${tier} model client to exist in test router`);
+    }
+    return client;
+  };
+
  describe('delegate()', () => {
    it('routes to the correct tier when specified', async () => {
      const orchestrator = new AgentOrchestrator({
@@ -69,7 +80,7 @@ describe('AgentOrchestrator', () => {
      });
      const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);

-      const mockFastChatClient = mockRouter.getClient('fast')!;
+      const mockFastChatClient = requireClient('fast');
      const mockFastChatFn = vi.fn().mockResolvedValue({
        content: 'response with tools',
        stopReason: 'end_turn',
@@ -298,7 +309,7 @@ describe('AgentOrchestrator', () => {

  describe('process()', () => {
    it('proxies to NativeAgent for user messages', async () => {
-      const mockDefaultChatClient = mockRouter.getClient('default')!;
+      const mockDefaultChatClient = requireClient('default');
      const mockDefaultChatFn = vi.fn().mockResolvedValue({
        content: 'Agent response',
        stopReason: 'end_turn',
@@ -355,6 +366,88 @@ describe('AgentOrchestrator', () => {
      expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
      expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
    });
+
+    it('uses adaptive memory injection strategy when configured', async () => {
+      const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-'));
+      const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
+      memoryStore.writeCategory('user', 'preferences', 'User prefers concise output.', 'replace');
+
+      const mockDefaultChatClient = requireClient('default');
+      const mockDefaultChatFn = vi.fn().mockResolvedValue({
+        content: 'Agent response',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 50, outputTokens: 25 },
+      } as ChatResponse);
+      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+        memoryStore,
+        memoryInjectionStrategy: 'adaptive',
+        memoryMaxInjectionTokens: 100,
+      });
+
+      await orchestrator.process('Keep this concise please');
+
+      expect(mockDefaultChatFn).toHaveBeenCalled();
+      const callArgs = mockDefaultChatFn.mock.calls[0][0];
+      expect(callArgs.system).toContain('# Memory Context');
+      expect(callArgs.system).toContain('concise');
+
+      rmSync(tempDir, { recursive: true, force: true });
+    });
+
+    it('falls back to default memory context when adaptive injection errors', async () => {
+      const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-fallback-'));
+      const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
+      memoryStore.write('user', 'Fallback memory content', 'replace');
+      const getPromptSectionsSpy = vi.spyOn(memoryStore, 'getPromptSections').mockImplementationOnce(() => {
+        throw new Error('boom');
+      });
+
+      const mockDefaultChatClient = requireClient('default');
+      const mockDefaultChatFn = vi.fn().mockResolvedValue({
+        content: 'Agent response',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 50, outputTokens: 25 },
+      } as ChatResponse);
+      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+        memoryStore,
+        memoryInjectionStrategy: 'adaptive',
+        memoryMaxInjectionTokens: 100,
+      });
+
+      await orchestrator.process('test message');
+
+      const callArgs = mockDefaultChatFn.mock.calls[0][0];
+      expect(callArgs.system).toContain('Fallback memory content');
+
+      getPromptSectionsSpy.mockRestore();
+      rmSync(tempDir, { recursive: true, force: true });
+    });
  });

  describe('reset()', () => {
@@ -13,6 +13,7 @@ import { shouldCompact } from '../../context/tokens.js';
 import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
 import { estimateCost } from '../../models/costs.js';
 import { auditLogger } from '../../audit/index.js';
+import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';

 // ── Public types ──────────────────────────────────────────────────────

@@ -91,6 +92,10 @@ export interface OrchestratorConfig {
  contextWindow?: number;
  /** Optional memory store for injecting persistent memory into the system prompt. */
  memoryStore?: MemoryStore;
+  /** Strategy for memory prompt injection. */
+  memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
+  /** Maximum tokens allowed for injected memory context. */
+  memoryMaxInjectionTokens?: number;
  /** Policy context for tool filtering (agent tier, provider). */
  toolPolicyContext?: ToolPolicyContext;
  /** Collector for outbound attachments queued by tools (e.g. media.send). */
@@ -118,6 +123,8 @@ export class AgentOrchestrator {
  private _modelName?: string;
  private _contextWindow?: number;
  private _memoryStore?: MemoryStore;
+  private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
+  private _memoryMaxInjectionTokens: number;
  private _systemPromptBase: string;
  private _usageByTier: Map<string, TierUsageStats> = new Map();

@@ -131,6 +138,8 @@ export class AgentOrchestrator {
    this._modelName = config.modelName;
    this._contextWindow = config.contextWindow;
    this._memoryStore = config.memoryStore;
+    this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
+    this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
    this._systemPromptBase = config.systemPrompt;

    // Create the primary NativeAgent for user-facing conversation
@@ -216,7 +225,7 @@ export class AgentOrchestrator {
   * exceeds the context window threshold and compacts it before processing.
   */
  async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
-    this._injectMemoryContext();
+    this._injectMemoryContext(userMessage);
    await this.compactIfNeeded();
    return this._agent.process(userMessage, attachments);
  }
@@ -355,12 +364,34 @@ export class AgentOrchestrator {
   * system prompt. If no memory store is configured or no memory content
   * exists, restores the original base prompt.
   */
-  private _injectMemoryContext(): void {
+  private _injectMemoryContext(userMessage: string): void {
    if (!this._memoryStore) {
      return;
    }

-    const memoryContext = this._memoryStore.getContextForPrompt();
+    let memoryContext = '';
+    try {
+      if (this._memoryInjectionStrategy === 'recent') {
+        memoryContext = buildRecentMemoryContext(this._memoryStore, this._memoryMaxInjectionTokens);
+      } else if (this._memoryInjectionStrategy === 'adaptive') {
+        memoryContext = buildAdaptiveMemoryContext({
+          store: this._memoryStore,
+          userMessage,
+          recentMessages: this.getHistory(),
+          config: {
+            maxTokens: this._memoryMaxInjectionTokens,
+          },
+        });
+      } else {
+        memoryContext = this._memoryStore.getContextForPrompt();
+      }
+    } catch (error) {
+      console.warn('[Flynn:memory] Adaptive memory injection failed, falling back to default context:', error);
+      memoryContext = this._memoryStore.getContextForPrompt();
+    }
+
+    memoryContext = this._clipMemoryContext(memoryContext);
+
    if (!memoryContext) {
      this._agent.setSystemPrompt(this._systemPromptBase);
      return;
@@ -370,6 +401,17 @@ export class AgentOrchestrator {
    this._agent.setSystemPrompt(enrichedPrompt);
  }

+  private _clipMemoryContext(context: string): string {
+    if (!context) {
+      return context;
+    }
+    const maxChars = this._memoryMaxInjectionTokens * 4;
+    if (context.length <= maxChars) {
+      return context;
+    }
+    return context.slice(0, maxChars);
+  }
+
  /**
   * Check whether automatic compaction should run, and if so, compact.
   * Called before each `process()` call when compaction is configured.