docs(memory): document session-start memory and config defaults

2026-02-25 13:02:30 -08:00
parent a76bc94b4d
commit 0a354a597a
5 changed files with 41 additions and 8 deletions
@@ -303,6 +303,10 @@ agents:
 memory:
  enabled: true
  auto_extract: true
  # user_namespace: "user"
  # working_memory_ttl_days: 14
  # working_memory_max_tokens: 1000
  # proactive_session_greeting: false
  embedding:
    enabled: true
    provider: ollama
@@ -40,6 +40,7 @@ The gateway serialises agent work **per session**, not per WebSocket connection:
 - Runtime backend mode overrides are available via `agent.send` command fast-path: `/runtime status`, `/runtime activate pi`, `/runtime deactivate pi`, `/runtime use config` (`/backend ...` remains a compatibility alias).
 - The gateway `agent.send` command path and channel-router path use the same runtime backend-mode command service; `flynn tui` forwards `/runtime ...` through this gateway path for parity.
 - Backend routing and fallback outcomes are emitted to audit logs (`backend.route`, `backend.success`, `backend.fallback`) for rollout evaluation; this telemetry is outside JSON-RPC response payloads.
 - Session-start memory injection (`user/profile` + `user/working`) is server-side and controlled by `memory.user_namespace`; it does not affect protocol payloads.
 This is implemented via a per-lane queue (`LaneQueue`) in the gateway server, and used by `agent.send` and `agent.cancel`.
@@ -31,13 +31,13 @@ flowchart LR
    RT[Routing\ncreateMessageRouter()]
    PF[Preferences\n~/.local/share/flynn/preferences.json\nmodelTier + backendMode]
    SM[SessionManager\nSQLite]
-    OR[AgentOrchestrator]
+  OR[AgentOrchestrator]
-    NA[NativeAgent\n(tool loop)]
+  NA[NativeAgent\n(tool loop)]
-    EB[Optional External Backends\nclaude_code/opencode/codex/gemini/pi_embedded]
+  EB[Optional External Backends\nclaude_code/opencode/codex/gemini/pi_embedded]
-    MR[ModelRouter]
+  MR[ModelRouter]
-    TP[ToolPolicy + ToolRegistry]
+  TP[ToolPolicy + ToolRegistry]
-    TE[ToolExecutor\nhooks + enforcement + audit]
+  TE[ToolExecutor\nhooks + enforcement + audit]
-    MEM[Memory Store\nfiles + vector/keyword]
+  MEM[Memory Store\nfiles + vector/keyword\nuser/profile + user/working]
    AU[Audit Logger\nredacted]
    HS[Hooks/Autonomy\nconfirm/log/silent]
    GA[Google OAuth Runtime\nsrc/google/oauth.ts]
@@ -67,6 +67,7 @@ flowchart LR
  RT --> OR
  RT --> EB
  OR --> NA
  OR -->|session-start memory| MEM
  EB --> MP
  NA --> MR
  MR --> MP
@@ -135,6 +136,10 @@ Tool Calls (inside NativeAgent loop)
                 |                                 v
                 +---------------------------> AuditLogger (redacted)
 Session start (when `memory.user_namespace` is set)
  AgentOrchestrator -> MemoryStore (user/profile + user/working)
  AgentOrchestrator -> System prompt (session context injection)
 Outbound Reply
  -> ChannelAdapter.send()  (text + optional attachments)
 ```
@@ -10,6 +10,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`.
 - Each connection is attached to a `sessionId`.
 - Agent work is queued per `sessionId` (FIFO), not per connection.
 - Sessions persist in SQLite via `SessionManager` even if clients disconnect.
 - On the first message of a session, the orchestrator can inject session-start memory (`user/profile` + `user/working`) when `memory.user_namespace` is configured.
 - Once dequeued, message routing may execute the native orchestrator path or an optional external backend path (`claude_code`, `opencode`, `codex`, `gemini`, `pi_embedded`) depending on agent/backend config.
 - Runtime backend mode can be overridden manually via `/runtime` command fast-path (`status`, `activate pi`, `deactivate pi`, `use config`) and is persisted in preferences (`/backend` remains a compatibility alias).
 - `flynn tui` now attaches to this same gateway command path for `/runtime ...` and auto-starts/attaches daemon+gateway when needed.
@@ -104,6 +104,28 @@
      ],
      "test_status": "planning/docs update only; no runtime code changes"
    },
    "pi_personal_assistant_memory": {
      "status": "completed",
      "date": "2026-02-25",
      "updated": "2026-02-25",
      "commit": "a76bc94b4dc246440d7ee99c87cf4012c451d418",
      "summary": "Two-tier personal assistant memory: working memory (user/working, TTL-based) written on compaction, injected at session start; unified user/* namespace across channels; parameterized compaction prompt; memory extraction routed to user/facts; proactive session greeting option.",
      "files_modified": [
        "src/config/schema.ts",
        "src/memory/workingMemory.ts",
        "src/memory/workingMemory.test.ts",
        "src/backends/native/prompts.ts",
        "src/context/compaction.ts",
        "src/backends/native/orchestrator.ts",
        "src/backends/native/orchestrator.test.ts",
        "src/daemon/routing.ts",
        "docs/architecture/AGENT_DIAGRAM.md",
        "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
        "docs/api/PROTOCOL.md",
        "docs/plans/state.json"
      ],
      "test_status": "pnpm test:run + pnpm typecheck passing"
    },
    "pi-embedded-manual-runtime-mode-control": {
      "status": "completed",
      "date": "2026-02-24",
@@ -6765,7 +6787,7 @@
    }
  },
  "overall_progress": {
-    "total_test_count": 2022,
+    "total_test_count": 2524,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",