feat(memory): add experimental qmd search backend
This commit is contained in:
@@ -626,6 +626,10 @@ memory:
|
|||||||
chunk_overlap: 50 # Overlap between chunks
|
chunk_overlap: 50 # Overlap between chunks
|
||||||
top_k: 5 # Top results from vector search
|
top_k: 5 # Top results from vector search
|
||||||
hybrid_weight: 0.7 # 0.0 = keyword only, 1.0 = vector only
|
hybrid_weight: 0.7 # 0.0 = keyword only, 1.0 = vector only
|
||||||
|
qmd:
|
||||||
|
enabled: false # Experimental markdown-native search backend
|
||||||
|
top_k: 8 # Max QMD results
|
||||||
|
min_score: 0.15 # Minimum match score (0-1)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Embedding Providers
|
### Embedding Providers
|
||||||
@@ -640,7 +644,13 @@ memory:
|
|||||||
|
|
||||||
Embeddings are indexed in the background — when memory is written, the namespace is marked dirty and re-indexed within 30 seconds. The vector index is stored in `vectors.db` alongside the session database.
|
Embeddings are indexed in the background — when memory is written, the namespace is marked dirty and re-indexed within 30 seconds. The vector index is stored in `vectors.db` alongside the session database.
|
||||||
|
|
||||||
When embeddings are disabled or the provider is unreachable, search falls back gracefully to keyword matching.
|
Search backend selection:
|
||||||
|
|
||||||
|
- `memory.embedding.enabled: true` -> hybrid keyword+vector backend
|
||||||
|
- `memory.embedding.enabled: false` and `memory.qmd.enabled: true` -> QMD markdown backend
|
||||||
|
- otherwise -> keyword-only fallback
|
||||||
|
|
||||||
|
When the selected backend is unavailable (for example embedding provider errors), search falls back gracefully to keyword matching.
|
||||||
|
|
||||||
### Embedding Config Fields
|
### Embedding Config Fields
|
||||||
|
|
||||||
@@ -657,6 +667,14 @@ When embeddings are disabled or the provider is unreachable, search falls back g
|
|||||||
| `top_k` | no | Number of vector results to return (default: `5`) |
|
| `top_k` | no | Number of vector results to return (default: `5`) |
|
||||||
| `hybrid_weight` | no | Vector vs keyword weight, 0.0-1.0 (default: `0.7`) |
|
| `hybrid_weight` | no | Vector vs keyword weight, 0.0-1.0 (default: `0.7`) |
|
||||||
|
|
||||||
|
### QMD Config Fields
|
||||||
|
|
||||||
|
| Field | Required | Description |
|
||||||
|
|-------|----------|-------------|
|
||||||
|
| `enabled` | no | Enable experimental markdown-native QMD backend (default: `false`) |
|
||||||
|
| `top_k` | no | Max QMD results returned by `memory.search` (default: `8`) |
|
||||||
|
| `min_score` | no | Minimum relevance score (0.0-1.0) for QMD matches (default: `0.15`) |
|
||||||
|
|
||||||
## Gateway Lock
|
## Gateway Lock
|
||||||
|
|
||||||
Single-client mode for the WebSocket gateway. When enabled, only one WebSocket connection is allowed at a time. Additional connections are rejected with close code `4003`.
|
Single-client mode for the WebSocket gateway. When enabled, only one WebSocket connection is allowed at a time. Additional connections are rejected with close code `4003`.
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ Flynn has **6 of ~15 channels** (Telegram, WhatsApp, Discord, Slack, WebChat, TU
|
|||||||
| `memory.write` tool | Write memory files | Full (write/append to namespace) | **MATCH** |
|
| `memory.write` tool | Write memory files | Full (write/append to namespace) | **MATCH** |
|
||||||
| Vector embeddings | OpenAI/Gemini/local | Full (OpenAI, Gemini, Ollama, LlamaCpp providers) | **MATCH** |
|
| Vector embeddings | OpenAI/Gemini/local | Full (OpenAI, Gemini, Ollama, LlamaCpp providers) | **MATCH** |
|
||||||
| Hybrid search (BM25 + vector) | Full | Full (keyword + vector with configurable hybrid weight) | **MATCH** |
|
| Hybrid search (BM25 + vector) | Full | Full (keyword + vector with configurable hybrid weight) | **MATCH** |
|
||||||
| QMD backend | Experimental | -- | **MISSING** |
|
| QMD backend | Experimental | Full (experimental markdown-native backend configurable via `memory.qmd`) | **MATCH** |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -313,7 +313,6 @@ All five Tier 3 items implemented: Lane Queue (per-session FIFO in gateway), cre
|
|||||||
- Elevated mode — sandbox escape hatch
|
- Elevated mode — sandbox escape hatch
|
||||||
- ~~Onboard wizard — guided setup~~ (DONE — `flynn setup` + first-run auto-trigger, 2026-02-10)
|
- ~~Onboard wizard — guided setup~~ (DONE — `flynn setup` + first-run auto-trigger, 2026-02-10)
|
||||||
- ClawHub/skill registry — community marketplace
|
- ClawHub/skill registry — community marketplace
|
||||||
- QMD backend — experimental memory search
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -39,9 +39,9 @@ A gap item is considered implemented when:
|
|||||||
|
|
||||||
- Canvas / A2UI (agent-driven visual workspace)
|
- Canvas / A2UI (agent-driven visual workspace)
|
||||||
|
|
||||||
### Memory (MISSING)
|
### Memory
|
||||||
|
|
||||||
- QMD backend (experimental)
|
- QMD backend (experimental) — completed on 2026-02-16
|
||||||
|
|
||||||
### Security (MISSING)
|
### Security (MISSING)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
# QMD Backend Checklist
|
||||||
|
|
||||||
|
Date: 2026-02-16
|
||||||
|
Status: completed
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Add an experimental QMD (query markdown database) backend for `memory.search`.
|
||||||
|
- Enable config-driven backend selection between hybrid embeddings, QMD, and keyword fallback.
|
||||||
|
- Update docs and plan state.
|
||||||
|
|
||||||
|
## Completed
|
||||||
|
|
||||||
|
- Added `memory.qmd` config schema in `src/config/schema.ts`:
|
||||||
|
- `enabled` (default `false`)
|
||||||
|
- `top_k` (default `8`)
|
||||||
|
- `min_score` (default `0.15`)
|
||||||
|
- Implemented `QmdSearch` backend in `src/memory/qmd-search.ts`:
|
||||||
|
- heading-aware scoring
|
||||||
|
- token overlap + phrase bonus ranking
|
||||||
|
- session namespace recency boost
|
||||||
|
- Wired backend selection in `src/daemon/memory.ts`:
|
||||||
|
- embedding enabled -> hybrid backend
|
||||||
|
- else if qmd enabled -> QMD backend
|
||||||
|
- else keyword-only search
|
||||||
|
- Generalized memory search tool wiring:
|
||||||
|
- introduced shared backend interface for `memory.search`
|
||||||
|
- updated memory tool factory to accept any backend implementing `search(query, topK?)`
|
||||||
|
- Updated docs:
|
||||||
|
- README memory section now documents QMD config and backend precedence.
|
||||||
|
- OpenClaw gap docs updated to mark QMD backend as implemented.
|
||||||
|
- Added tests:
|
||||||
|
- `src/memory/qmd-search.test.ts`
|
||||||
|
- `src/config/schema.test.ts` coverage for `memory.qmd`
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- `pnpm test:run src/config/schema.test.ts src/memory/qmd-search.test.ts`
|
||||||
|
- `pnpm typecheck`
|
||||||
+27
-2
@@ -223,6 +223,31 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/channels/registry.test.ts src/gateway/handlers/handlers.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/channels/registry.test.ts src/gateway/handlers/handlers.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
|
"qmd-backend": {
|
||||||
|
"file": "2026-02-16-qmd-backend-checklist.md",
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-16",
|
||||||
|
"updated": "2026-02-16",
|
||||||
|
"summary": "Added an experimental markdown-native QMD backend for memory.search with config-driven backend selection (hybrid embeddings -> QMD -> keyword fallback), tests, and docs updates.",
|
||||||
|
"files_created": [
|
||||||
|
"docs/plans/2026-02-16-qmd-backend-checklist.md",
|
||||||
|
"src/memory/qmd-search.ts",
|
||||||
|
"src/memory/qmd-search.test.ts"
|
||||||
|
],
|
||||||
|
"files_modified": [
|
||||||
|
"src/config/schema.ts",
|
||||||
|
"src/config/schema.test.ts",
|
||||||
|
"src/daemon/memory.ts",
|
||||||
|
"src/memory/hybrid-search.ts",
|
||||||
|
"src/memory/index.ts",
|
||||||
|
"src/tools/builtin/index.ts",
|
||||||
|
"src/tools/builtin/memory-search.ts",
|
||||||
|
"README.md",
|
||||||
|
"docs/plans/2026-02-06-openclaw-feature-gap-analysis.md",
|
||||||
|
"docs/plans/2026-02-15-openclaw-gap-roadmap.md"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm test:run src/config/schema.test.ts src/memory/qmd-search.test.ts + pnpm typecheck passing"
|
||||||
|
},
|
||||||
"skill-safety-scanner": {
|
"skill-safety-scanner": {
|
||||||
"file": "2026-02-15-skill-safety-scanner-checklist.md",
|
"file": "2026-02-15-skill-safety-scanner-checklist.md",
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
@@ -2267,12 +2292,12 @@
|
|||||||
"tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
|
"tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
|
||||||
"tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
|
"tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
|
||||||
"tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
|
"tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
|
||||||
"feature_gap_scorecard": "107/128 match (84%), 0 partial (0%), 21 missing (16%)",
|
"feature_gap_scorecard": "108/128 match (84%), 0 partial (0%), 20 missing (16%)",
|
||||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
|
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
|
||||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
||||||
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
||||||
"next_up": "Pick the next OpenClaw gap milestone and create a scoped checklist (candidates: QMD backend, ClawHub registry, Bonjour/mDNS discovery)"
|
"next_up": "Pick the next OpenClaw gap milestone and create a scoped checklist (candidates: ClawHub registry, Bonjour/mDNS discovery, synthetic provider)"
|
||||||
},
|
},
|
||||||
"soul_md_and_cron_create": {
|
"soul_md_and_cron_create": {
|
||||||
"date": "2026-02-11",
|
"date": "2026-02-11",
|
||||||
|
|||||||
@@ -459,6 +459,9 @@ describe('configSchema — memory injection strategy', () => {
|
|||||||
const result = configSchema.parse(minimalConfig);
|
const result = configSchema.parse(minimalConfig);
|
||||||
expect(result.memory.injection_strategy).toBe('all');
|
expect(result.memory.injection_strategy).toBe('all');
|
||||||
expect(result.memory.max_injection_tokens).toBe(2000);
|
expect(result.memory.max_injection_tokens).toBe(2000);
|
||||||
|
expect(result.memory.qmd.enabled).toBe(false);
|
||||||
|
expect(result.memory.qmd.top_k).toBe(8);
|
||||||
|
expect(result.memory.qmd.min_score).toBe(0.15);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('accepts adaptive memory injection settings', () => {
|
it('accepts adaptive memory injection settings', () => {
|
||||||
@@ -472,6 +475,22 @@ describe('configSchema — memory injection strategy', () => {
|
|||||||
expect(result.memory.injection_strategy).toBe('adaptive');
|
expect(result.memory.injection_strategy).toBe('adaptive');
|
||||||
expect(result.memory.max_injection_tokens).toBe(1200);
|
expect(result.memory.max_injection_tokens).toBe(1200);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('accepts qmd backend settings', () => {
|
||||||
|
const result = configSchema.parse({
|
||||||
|
...minimalConfig,
|
||||||
|
memory: {
|
||||||
|
qmd: {
|
||||||
|
enabled: true,
|
||||||
|
top_k: 12,
|
||||||
|
min_score: 0.2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result.memory.qmd.enabled).toBe(true);
|
||||||
|
expect(result.memory.qmd.top_k).toBe(12);
|
||||||
|
expect(result.memory.qmd.min_score).toBe(0.2);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('configSchema — compaction importance threshold', () => {
|
describe('configSchema — compaction importance threshold', () => {
|
||||||
|
|||||||
@@ -313,6 +313,15 @@ const embeddingSchema = z.object({
|
|||||||
hybrid_weight: z.number().min(0).max(1).default(0.7),
|
hybrid_weight: z.number().min(0).max(1).default(0.7),
|
||||||
}).default({});
|
}).default({});
|
||||||
|
|
||||||
|
const qmdSchema = z.object({
|
||||||
|
/** Enable experimental QMD (query markdown database) memory search backend. */
|
||||||
|
enabled: z.boolean().default(false),
|
||||||
|
/** Maximum number of QMD results returned by memory.search. */
|
||||||
|
top_k: z.number().min(1).max(50).default(8),
|
||||||
|
/** Minimum relevance score (0-1) for QMD matches. */
|
||||||
|
min_score: z.number().min(0).max(1).default(0.15),
|
||||||
|
}).default({});
|
||||||
|
|
||||||
const memorySchema = z.object({
|
const memorySchema = z.object({
|
||||||
enabled: z.boolean().default(true),
|
enabled: z.boolean().default(true),
|
||||||
dir: z.string().optional(), // Default: ~/.local/share/flynn/memory
|
dir: z.string().optional(), // Default: ~/.local/share/flynn/memory
|
||||||
@@ -321,6 +330,7 @@ const memorySchema = z.object({
|
|||||||
max_injection_tokens: z.number().min(100).max(10000).default(2000),
|
max_injection_tokens: z.number().min(100).max(10000).default(2000),
|
||||||
max_context_tokens: z.number().min(100).max(10000).default(2000),
|
max_context_tokens: z.number().min(100).max(10000).default(2000),
|
||||||
embedding: embeddingSchema,
|
embedding: embeddingSchema,
|
||||||
|
qmd: qmdSchema,
|
||||||
}).default({});
|
}).default({});
|
||||||
|
|
||||||
const compactionSchema = z.object({
|
const compactionSchema = z.object({
|
||||||
@@ -593,6 +603,7 @@ export type HeartbeatConfig = z.infer<typeof heartbeatSchema>;
|
|||||||
export type HeartbeatCheck = z.infer<typeof heartbeatCheckSchema>;
|
export type HeartbeatCheck = z.infer<typeof heartbeatCheckSchema>;
|
||||||
export type EmbeddingConfig = z.infer<typeof embeddingSchema>;
|
export type EmbeddingConfig = z.infer<typeof embeddingSchema>;
|
||||||
export type EmbeddingProvider = z.infer<typeof embeddingProviderSchema>;
|
export type EmbeddingProvider = z.infer<typeof embeddingProviderSchema>;
|
||||||
|
export type QmdConfig = z.infer<typeof qmdSchema>;
|
||||||
export type GcalConfig = z.infer<typeof gcalSchema>;
|
export type GcalConfig = z.infer<typeof gcalSchema>;
|
||||||
export type GdocsConfig = z.infer<typeof gdocsSchema>;
|
export type GdocsConfig = z.infer<typeof gdocsSchema>;
|
||||||
export type GdriveConfig = z.infer<typeof gdriveSchema>;
|
export type GdriveConfig = z.infer<typeof gdriveSchema>;
|
||||||
|
|||||||
+16
-6
@@ -1,8 +1,9 @@
|
|||||||
import type { Config } from '../config/index.js';
|
import type { Config } from '../config/index.js';
|
||||||
import type { Lifecycle } from './lifecycle.js';
|
import type { Lifecycle } from './lifecycle.js';
|
||||||
import { MemoryStore } from '../memory/index.js';
|
import { MemoryStore } from '../memory/index.js';
|
||||||
import { VectorStore, HybridSearch, createEmbeddingProvider, chunkText, contentHash } from '../memory/index.js';
|
import { VectorStore, HybridSearch, QmdSearch, createEmbeddingProvider, chunkText, contentHash } from '../memory/index.js';
|
||||||
import type { EmbeddingProvider as EmbeddingProviderInterface } from '../memory/index.js';
|
import type { EmbeddingProvider as EmbeddingProviderInterface } from '../memory/index.js';
|
||||||
|
import type { MemorySearchBackend } from '../tools/builtin/memory-search.js';
|
||||||
import { createMemoryTools } from '../tools/builtin/index.js';
|
import { createMemoryTools } from '../tools/builtin/index.js';
|
||||||
import type { ToolRegistry } from '../tools/index.js';
|
import type { ToolRegistry } from '../tools/index.js';
|
||||||
import { resolve } from 'path';
|
import { resolve } from 'path';
|
||||||
@@ -17,7 +18,7 @@ export interface MemoryDeps {
|
|||||||
|
|
||||||
export interface MemoryResult {
|
export interface MemoryResult {
|
||||||
memoryStore?: MemoryStore;
|
memoryStore?: MemoryStore;
|
||||||
hybridSearch?: HybridSearch;
|
searchBackend?: MemorySearchBackend;
|
||||||
memoryDir: string;
|
memoryDir: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,18 +33,19 @@ export async function initMemory(deps: MemoryDeps): Promise<MemoryResult> {
|
|||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
// Register memory tools if memory is enabled
|
// Register memory tools if memory is enabled
|
||||||
let hybridSearch: HybridSearch | undefined;
|
let searchBackend: MemorySearchBackend | undefined;
|
||||||
|
|
||||||
if (memoryStore && config.memory.embedding.enabled) {
|
if (memoryStore && config.memory.embedding.enabled) {
|
||||||
try {
|
try {
|
||||||
const embeddingProvider: EmbeddingProviderInterface = createEmbeddingProvider(config.memory.embedding);
|
const embeddingProvider: EmbeddingProviderInterface = createEmbeddingProvider(config.memory.embedding);
|
||||||
const vectorStore = new VectorStore(resolve(dataDir, 'vectors.db'));
|
const vectorStore = new VectorStore(resolve(dataDir, 'vectors.db'));
|
||||||
hybridSearch = new HybridSearch(
|
const hybridSearch = new HybridSearch(
|
||||||
memoryStore,
|
memoryStore,
|
||||||
vectorStore,
|
vectorStore,
|
||||||
embeddingProvider,
|
embeddingProvider,
|
||||||
config.memory.embedding.hybrid_weight,
|
config.memory.embedding.hybrid_weight,
|
||||||
);
|
);
|
||||||
|
searchBackend = hybridSearch;
|
||||||
|
|
||||||
// Background indexer: re-embed dirty namespaces every 30 seconds
|
// Background indexer: re-embed dirty namespaces every 30 seconds
|
||||||
const indexerInterval = setInterval(async () => {
|
const indexerInterval = setInterval(async () => {
|
||||||
@@ -89,11 +91,19 @@ export async function initMemory(deps: MemoryDeps): Promise<MemoryResult> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!searchBackend && memoryStore && config.memory.qmd.enabled) {
|
||||||
|
searchBackend = new QmdSearch(memoryStore, {
|
||||||
|
topK: config.memory.qmd.top_k,
|
||||||
|
minScore: config.memory.qmd.min_score,
|
||||||
|
});
|
||||||
|
console.log(`QMD memory search enabled (top_k=${config.memory.qmd.top_k}, min_score=${config.memory.qmd.min_score})`);
|
||||||
|
}
|
||||||
|
|
||||||
if (memoryStore) {
|
if (memoryStore) {
|
||||||
for (const tool of createMemoryTools(memoryStore, hybridSearch)) {
|
for (const tool of createMemoryTools(memoryStore, searchBackend)) {
|
||||||
toolRegistry.register(tool);
|
toolRegistry.register(tool);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { memoryStore, hybridSearch, memoryDir };
|
return { memoryStore, searchBackend, memoryDir };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,8 +20,8 @@ export interface HybridSearchResult {
|
|||||||
line: number;
|
line: number;
|
||||||
/** Combined relevance score (0-1). */
|
/** Combined relevance score (0-1). */
|
||||||
score: number;
|
score: number;
|
||||||
/** Source of the match: keyword, vector, or both. */
|
/** Source of the match: keyword, vector, qmd, or both. */
|
||||||
source: 'keyword' | 'vector' | 'both';
|
source: 'keyword' | 'vector' | 'qmd' | 'both';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ export { VectorStore, cosineSimilarity, contentHash } from './vector-store.js';
|
|||||||
export type { VectorSearchResult, EmbeddingRow } from './vector-store.js';
|
export type { VectorSearchResult, EmbeddingRow } from './vector-store.js';
|
||||||
export { HybridSearch } from './hybrid-search.js';
|
export { HybridSearch } from './hybrid-search.js';
|
||||||
export type { HybridSearchResult } from './hybrid-search.js';
|
export type { HybridSearchResult } from './hybrid-search.js';
|
||||||
|
export { QmdSearch } from './qmd-search.js';
|
||||||
|
export type { QmdSearchOptions } from './qmd-search.js';
|
||||||
export * from './categories.js';
|
export * from './categories.js';
|
||||||
export { buildAdaptiveMemoryContext, buildRecentMemoryContext } from './adaptive.js';
|
export { buildAdaptiveMemoryContext, buildRecentMemoryContext } from './adaptive.js';
|
||||||
export type { AdaptiveMemoryConfig } from './adaptive.js';
|
export type { AdaptiveMemoryConfig } from './adaptive.js';
|
||||||
|
|||||||
@@ -0,0 +1,51 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { mkdtempSync, rmSync } from 'fs';
|
||||||
|
import { join } from 'path';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
|
import { MemoryStore } from './store.js';
|
||||||
|
import { QmdSearch } from './qmd-search.js';
|
||||||
|
|
||||||
|
describe('QmdSearch', () => {
|
||||||
|
it('finds relevant markdown lines with heading-aware scoring', async () => {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
|
||||||
|
try {
|
||||||
|
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
|
||||||
|
store.write(
|
||||||
|
'user',
|
||||||
|
[
|
||||||
|
'# Preferences',
|
||||||
|
'- Favorite editor is Neovim',
|
||||||
|
'- Uses TypeScript daily',
|
||||||
|
'',
|
||||||
|
'# Projects',
|
||||||
|
'- QMD backend prototype for memory search',
|
||||||
|
].join('\n'),
|
||||||
|
'replace',
|
||||||
|
);
|
||||||
|
store.write('sessions/abc123', '- Discussed QMD ranking for markdown memory.', 'replace');
|
||||||
|
|
||||||
|
const qmd = new QmdSearch(store, { topK: 5, minScore: 0.1 });
|
||||||
|
const results = await qmd.search('qmd memory search');
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
expect(results[0].source).toBe('qmd');
|
||||||
|
expect(results.some((r) => r.namespace === 'user')).toBe(true);
|
||||||
|
expect(results.some((r) => r.namespace === 'sessions/abc123')).toBe(true);
|
||||||
|
} finally {
|
||||||
|
rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty results for empty query', async () => {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
|
||||||
|
try {
|
||||||
|
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
|
||||||
|
store.write('user', 'hello world', 'replace');
|
||||||
|
const qmd = new QmdSearch(store);
|
||||||
|
const results = await qmd.search(' ');
|
||||||
|
expect(results).toEqual([]);
|
||||||
|
} finally {
|
||||||
|
rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
import type { MemoryStore } from './store.js';
|
||||||
|
import type { HybridSearchResult } from './hybrid-search.js';
|
||||||
|
|
||||||
|
export interface QmdSearchOptions {
|
||||||
|
topK?: number;
|
||||||
|
minScore?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Experimental QMD (query markdown database) search backend.
|
||||||
|
*
|
||||||
|
* QMD treats markdown memory as structured text:
|
||||||
|
* - heading lines contribute topical boosts
|
||||||
|
* - line-level query token overlap is scored
|
||||||
|
* - exact phrase match receives an additional boost
|
||||||
|
*/
|
||||||
|
export class QmdSearch {
|
||||||
|
private _store: MemoryStore;
|
||||||
|
private _topK: number;
|
||||||
|
private _minScore: number;
|
||||||
|
|
||||||
|
constructor(store: MemoryStore, options?: QmdSearchOptions) {
|
||||||
|
this._store = store;
|
||||||
|
this._topK = options?.topK ?? 8;
|
||||||
|
this._minScore = options?.minScore ?? 0.15;
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query: string, topK?: number): Promise<HybridSearchResult[]> {
|
||||||
|
const queryText = query.trim().toLowerCase();
|
||||||
|
if (queryText.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const queryTokens = tokenize(queryText);
|
||||||
|
if (queryTokens.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const results: HybridSearchResult[] = [];
|
||||||
|
for (const namespace of this._store.listNamespaces()) {
|
||||||
|
const content = this._store.read(namespace);
|
||||||
|
if (content.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = content.split('\n');
|
||||||
|
let currentHeading = '';
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
const raw = lines[i];
|
||||||
|
const line = raw.trim();
|
||||||
|
if (line.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const heading = line.match(/^#{1,6}\s+(.+)$/);
|
||||||
|
if (heading) {
|
||||||
|
currentHeading = heading[1].toLowerCase();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const score = scoreLine(line, queryText, queryTokens, currentHeading, namespace);
|
||||||
|
if (score < this._minScore) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const contextParts: string[] = [];
|
||||||
|
if (i > 0 && lines[i - 1].trim().length > 0) {
|
||||||
|
contextParts.push(lines[i - 1]);
|
||||||
|
}
|
||||||
|
contextParts.push(raw);
|
||||||
|
if (i < lines.length - 1 && lines[i + 1].trim().length > 0) {
|
||||||
|
contextParts.push(lines[i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
namespace,
|
||||||
|
content: raw,
|
||||||
|
context: contextParts.join('\n'),
|
||||||
|
line: i + 1,
|
||||||
|
score,
|
||||||
|
source: 'qmd',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort((a, b) => b.score - a.score);
|
||||||
|
return results.slice(0, topK ?? this._topK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function tokenize(text: string): string[] {
|
||||||
|
return text
|
||||||
|
.split(/[^a-z0-9]+/i)
|
||||||
|
.map((token) => token.trim().toLowerCase())
|
||||||
|
.filter((token) => token.length >= 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scoreLine(
|
||||||
|
line: string,
|
||||||
|
queryText: string,
|
||||||
|
queryTokens: string[],
|
||||||
|
currentHeading: string,
|
||||||
|
namespace: string,
|
||||||
|
): number {
|
||||||
|
const lineText = line.toLowerCase();
|
||||||
|
const lineTokens = new Set(tokenize(lineText));
|
||||||
|
const headingTokens = new Set(tokenize(currentHeading));
|
||||||
|
|
||||||
|
let overlap = 0;
|
||||||
|
for (const token of queryTokens) {
|
||||||
|
if (lineTokens.has(token)) {
|
||||||
|
overlap += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const overlapScore = overlap / queryTokens.length; // 0..1
|
||||||
|
const phraseBonus = lineText.includes(queryText) ? 0.25 : 0;
|
||||||
|
|
||||||
|
let headingBonus = 0;
|
||||||
|
for (const token of queryTokens) {
|
||||||
|
if (headingTokens.has(token)) {
|
||||||
|
headingBonus += 0.08;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
headingBonus = Math.min(0.25, headingBonus);
|
||||||
|
|
||||||
|
// Session-scoped memories often represent recent conversational facts.
|
||||||
|
const recencyBonus = namespace.startsWith('sessions/') ? 0.05 : 0;
|
||||||
|
|
||||||
|
return Math.min(1, overlapScore + phraseBonus + headingBonus + recencyBonus);
|
||||||
|
}
|
||||||
@@ -30,8 +30,8 @@ export { createGtasksTools } from './gtasks.js';
|
|||||||
|
|
||||||
import type { Tool } from '../types.js';
|
import type { Tool } from '../types.js';
|
||||||
import type { MemoryStore } from '../../memory/store.js';
|
import type { MemoryStore } from '../../memory/store.js';
|
||||||
import type { HybridSearch } from '../../memory/hybrid-search.js';
|
|
||||||
import type { WebSearchConfig } from './web-search.js';
|
import type { WebSearchConfig } from './web-search.js';
|
||||||
|
import type { MemorySearchBackend } from './memory-search.js';
|
||||||
import { shellExecTool } from './shell.js';
|
import { shellExecTool } from './shell.js';
|
||||||
import { fileReadTool } from './file-read.js';
|
import { fileReadTool } from './file-read.js';
|
||||||
import { fileWriteTool } from './file-write.js';
|
import { fileWriteTool } from './file-write.js';
|
||||||
@@ -60,11 +60,11 @@ export const allBuiltinTools: Tool[] = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
/** Create memory tools that require a MemoryStore instance. */
|
/** Create memory tools that require a MemoryStore instance. */
|
||||||
export function createMemoryTools(store: MemoryStore, hybridSearch?: HybridSearch): Tool[] {
|
export function createMemoryTools(store: MemoryStore, searchBackend?: MemorySearchBackend): Tool[] {
|
||||||
return [
|
return [
|
||||||
createMemoryReadTool(store),
|
createMemoryReadTool(store),
|
||||||
createMemoryWriteTool(store),
|
createMemoryWriteTool(store),
|
||||||
createMemorySearchTool(store, hybridSearch),
|
createMemorySearchTool(store, searchBackend),
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,22 +1,26 @@
|
|||||||
import type { Tool, ToolResult } from '../types.js';
|
import type { Tool, ToolResult } from '../types.js';
|
||||||
import type { MemoryStore } from '../../memory/store.js';
|
import type { MemoryStore } from '../../memory/store.js';
|
||||||
import type { HybridSearch } from '../../memory/hybrid-search.js';
|
import type { HybridSearchResult } from '../../memory/hybrid-search.js';
|
||||||
|
|
||||||
interface MemorySearchArgs {
|
interface MemorySearchArgs {
|
||||||
query: string;
|
query: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface MemorySearchBackend {
|
||||||
|
search(query: string, topK?: number): Promise<HybridSearchResult[]>;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a memory.search tool bound to the given MemoryStore instance.
|
* Creates a memory.search tool bound to the given MemoryStore instance.
|
||||||
* When a HybridSearch instance is provided, uses vector + keyword search;
|
* When a search backend is provided, uses backend-assisted search;
|
||||||
* otherwise falls back to keyword-only search.
|
* otherwise falls back to keyword-only search.
|
||||||
*/
|
*/
|
||||||
export function createMemorySearchTool(store: MemoryStore, hybridSearch?: HybridSearch): Tool {
|
export function createMemorySearchTool(store: MemoryStore, searchBackend?: MemorySearchBackend): Tool {
|
||||||
return {
|
return {
|
||||||
name: 'memory.search',
|
name: 'memory.search',
|
||||||
description:
|
description:
|
||||||
'Search across all memory files for a keyword or phrase. Returns matching lines with surrounding context from every namespace.' +
|
'Search across all memory files for a keyword or phrase. Returns matching lines with surrounding context from every namespace.' +
|
||||||
(hybridSearch ? ' Uses semantic vector search combined with keyword matching for better results.' : '') +
|
(searchBackend ? ' Uses an enhanced search backend (hybrid vector/keyword or QMD) when configured.' : '') +
|
||||||
' Category namespaces (facts/preferences/decisions/projects) are searchable through the namespace path.',
|
' Category namespaces (facts/preferences/decisions/projects) are searchable through the namespace path.',
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
@@ -32,10 +36,10 @@ export function createMemorySearchTool(store: MemoryStore, hybridSearch?: Hybrid
|
|||||||
const args = rawArgs as MemorySearchArgs;
|
const args = rawArgs as MemorySearchArgs;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Try hybrid search first if available
|
// Try enhanced search backend first if available
|
||||||
if (hybridSearch) {
|
if (searchBackend) {
|
||||||
try {
|
try {
|
||||||
const results = await hybridSearch.search(args.query);
|
const results = await searchBackend.search(args.query);
|
||||||
|
|
||||||
if (results.length === 0) {
|
if (results.length === 0) {
|
||||||
return { success: true, output: `No matches found for "${args.query}".` };
|
return { success: true, output: `No matches found for "${args.query}".` };
|
||||||
@@ -44,6 +48,7 @@ export function createMemorySearchTool(store: MemoryStore, hybridSearch?: Hybrid
|
|||||||
const formatted = results.map((result) => {
|
const formatted = results.map((result) => {
|
||||||
const sourceLabel = result.source === 'both' ? 'keyword+vector'
|
const sourceLabel = result.source === 'both' ? 'keyword+vector'
|
||||||
: result.source === 'vector' ? 'vector'
|
: result.source === 'vector' ? 'vector'
|
||||||
|
: result.source === 'qmd' ? 'qmd'
|
||||||
: 'keyword';
|
: 'keyword';
|
||||||
return `[${result.namespace}:${result.line}] (${sourceLabel}, score: ${result.score.toFixed(3)}) ${result.content}\n context: ${result.context}`;
|
return `[${result.namespace}:${result.line}] (${sourceLabel}, score: ${result.score.toFixed(3)}) ${result.content}\n context: ${result.context}`;
|
||||||
}).join('\n\n');
|
}).join('\n\n');
|
||||||
@@ -52,9 +57,9 @@ export function createMemorySearchTool(store: MemoryStore, hybridSearch?: Hybrid
|
|||||||
success: true,
|
success: true,
|
||||||
output: `Found ${results.length} match${results.length === 1 ? '' : 'es'} for "${args.query}":\n\n${formatted}`,
|
output: `Found ${results.length} match${results.length === 1 ? '' : 'es'} for "${args.query}":\n\n${formatted}`,
|
||||||
};
|
};
|
||||||
} catch (hybridError) {
|
} catch (backendError) {
|
||||||
// Fall back to keyword search on hybrid failure
|
// Fall back to keyword search on backend failure
|
||||||
console.error('Hybrid search failed, falling back to keyword search:', hybridError);
|
console.error('Enhanced memory search backend failed, falling back to keyword search:', backendError);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user