fix(memory): wire auto_extract through orchestrator compaction

2026-02-15 22:18:55 -08:00
parent 2eccd3e8eb
commit a525ec7b2d
8 changed files with 79 additions and 0 deletions
@@ -27,6 +27,7 @@ Make Flynn easier to reason about, configure, and monitor — so that adding fea
 - ✓ Skills system (bundled/managed/workspace) — existing
 - ✓ Media pipeline (image analysis, audio transcription, outbound attachments) — existing
 - ✓ Context compaction with memory extraction — existing
+- ✓ Memory persistence is hybrid: manual (`memory.write`) plus optional auto-extraction during compaction (`memory.auto_extract`) — existing
 - ✓ Tool policy profiles with allow/deny lists — existing
 - ✓ 1077 tests passing — existing

@@ -76,6 +76,7 @@
 - 8 extracted modules: models.ts (251), memory.ts (99), tools.ts (89), routing.ts (239), agents.ts (48), channels.ts (102), services.ts (269), lifecycle.ts (34)
 - Factory pattern established for all modules: initX(deps) → result
 - Tier 1 agent tools (session, agent list, message send, cron) remain in index.ts — they need deps from multiple init functions
+- Memory persistence is hybrid: explicit memory tools + auto-extraction during compaction when `memory.auto_extract` is enabled
 - Web dashboard is vanilla JS SPA at src/gateway/ui/
 - Config loader at src/config/loader.ts, schema at src/config/schema.ts (409 lines)
 - deepMerge + overlay-aware loadConfig in loader.ts; resolveOverlayPath + overlay-aware loadConfigSafe in cli/shared.ts
@@ -612,6 +612,10 @@ automation:

 The memory system supports hybrid search combining keyword matching with semantic vector similarity. When embeddings are enabled, `memory.search` uses both approaches and merges results with configurable weighting.

+Memory persistence is hybrid:
+- Manual writes via `memory.write`
+- Automatic fact extraction during context compaction when `memory.auto_extract: true`
+
 ```yaml
 memory:
  enabled: true
@@ -652,6 +656,8 @@ Search backend selection:

 When the selected backend is unavailable (for example embedding provider errors), search falls back gracefully to keyword matching.

+`memory.auto_extract` controls whether compaction appends extracted durable facts to `global` memory.
+
 ### Embedding Config Fields

 | Field | Required | Description |
@@ -41,6 +41,22 @@
      ],
      "test_status": "pnpm test:run + pnpm typecheck passing"
    },
+    "memory-auto-extract-config-wiring": {
+      "status": "completed",
+      "date": "2026-02-16",
+      "updated": "2026-02-16",
+      "summary": "Wired `memory.auto_extract` through orchestrator construction so automatic fact extraction during compaction can be explicitly enabled/disabled (instead of always-on when compaction runs). Added compaction tests for default auto-extraction and disabled mode, and clarified docs/planning language to describe memory as hybrid manual + automatic.",
+      "files_modified": [
+        "src/backends/native/orchestrator.ts",
+        "src/daemon/routing.ts",
+        "src/gateway/session-bridge.ts",
+        "src/context/compaction.test.ts",
+        "README.md",
+        ".planning/PROJECT.md",
+        ".planning/STATE.md"
+      ],
+      "test_status": "pnpm test:run src/context/compaction.test.ts src/backends/native/orchestrator.test.ts src/gateway/session-bridge.test.ts src/daemon/routing.test.ts + pnpm typecheck passing"
+    },
    "zai-auth-resolution-and-401-hints": {
      "status": "completed",
      "date": "2026-02-16",
@@ -93,6 +93,8 @@ export interface OrchestratorConfig {
  contextWindow?: number;
  /** Optional memory store for injecting persistent memory into the system prompt. */
  memoryStore?: MemoryStore;
+  /** Enable/disable automatic memory extraction during compaction. */
+  memoryAutoExtract?: boolean;
  /** Strategy for memory prompt injection. */
  memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
  /** Maximum tokens allowed for injected memory context. */
@@ -124,6 +126,7 @@ export class AgentOrchestrator {
  private _modelName?: string;
  private _contextWindow?: number;
  private _memoryStore?: MemoryStore;
+  private _memoryAutoExtract: boolean;
  private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
  private _memoryMaxInjectionTokens: number;
  private _systemPromptBase: string;
@@ -139,6 +142,7 @@ export class AgentOrchestrator {
    this._modelName = config.modelName;
    this._contextWindow = config.contextWindow;
    this._memoryStore = config.memoryStore;
+    this._memoryAutoExtract = config.memoryAutoExtract ?? true;
    this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
    this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
    this._systemPromptBase = config.systemPrompt;
@@ -290,6 +294,7 @@ export class AgentOrchestrator {
      orchestrator: this,
      config,
      memoryStore: this._memoryStore,
+      autoExtract: this._memoryAutoExtract,
    });

    // If nothing was actually compacted, skip the replace
@@ -3,6 +3,7 @@ import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js';
 import type { CompactionConfig } from './compaction.js';
 import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
 import type { Message } from '../models/types.js';
+import type { MemoryStore } from '../memory/store.js';

 function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator {
  return {
@@ -146,4 +147,51 @@ describe('compactHistory', () => {
    expect(result.messages.some(msg => typeof msg.content === 'string' && msg.content.includes('[Summary of earlier conversation]'))).toBe(true);
    expect(result.messages.length).toBeGreaterThan(5);
  });
+
+  it('auto-extracts memory facts by default when memoryStore is provided', async () => {
+    const messages = makeMessages(10);
+    const memoryStore = {
+      write: vi.fn(),
+    } as unknown as MemoryStore;
+    const orchestrator = {
+      getDelegationTier: vi.fn().mockReturnValue('fast'),
+      delegate: vi
+        .fn()
+        .mockResolvedValueOnce({
+          content: 'Compacted summary',
+          usage: { inputTokens: 100, outputTokens: 50 },
+          tier: 'fast',
+        })
+        .mockResolvedValueOnce({
+          content: '- User prefers concise answers.',
+          usage: { inputTokens: 40, outputTokens: 20 },
+          tier: 'fast',
+        }),
+    } as unknown as AgentOrchestrator;
+
+    await compactHistory({ messages, orchestrator, config, memoryStore });
+
+    expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('memory_extraction');
+    expect(memoryStore.write).toHaveBeenCalledWith('global', '- User prefers concise answers.', 'append');
+  });
+
+  it('skips auto-extraction when autoExtract is false', async () => {
+    const messages = makeMessages(10);
+    const memoryStore = {
+      write: vi.fn(),
+    } as unknown as MemoryStore;
+    const orchestrator = {
+      getDelegationTier: vi.fn().mockReturnValue('fast'),
+      delegate: vi.fn().mockResolvedValue({
+        content: 'Compacted summary',
+        usage: { inputTokens: 100, outputTokens: 50 },
+        tier: 'fast',
+      }),
+    } as unknown as AgentOrchestrator;
+
+    await compactHistory({ messages, orchestrator, config, memoryStore, autoExtract: false });
+
+    expect(orchestrator.getDelegationTier).not.toHaveBeenCalledWith('memory_extraction');
+    expect(memoryStore.write).not.toHaveBeenCalled();
+  });
 });
@@ -218,6 +218,7 @@ export function createMessageRouter(deps: {
        modelName: effectiveModelName,
        contextWindow: effectiveContextWindow,
        memoryStore: deps.memoryStore,
+        memoryAutoExtract: deps.config.memory?.auto_extract,
        memoryInjectionStrategy: deps.config.memory?.injection_strategy,
        memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
          toolPolicyContext: {
@@ -213,6 +213,7 @@ export class SessionBridge {
        modelName: config?.models.default.model,
        contextWindow: config?.models.default.context_window,
        memoryStore: this.config.memoryStore,
+        memoryAutoExtract: config?.memory?.auto_extract,
        memoryInjectionStrategy: config?.memory?.injection_strategy,
        memoryMaxInjectionTokens: config?.memory?.max_injection_tokens,
        toolPolicyContext: {