feat(memory): add experimental qmd search backend

This commit is contained in:
William Valentin
2026-02-15 19:33:43 -08:00
parent c6e3d09ecc
commit 81c97a9df1
14 changed files with 340 additions and 29 deletions
+2 -2
View File
@@ -20,8 +20,8 @@ export interface HybridSearchResult {
line: number;
/** Combined relevance score (0-1). */
score: number;
/** Source of the match: keyword, vector, or both. */
source: 'keyword' | 'vector' | 'both';
/** Source of the match: keyword, vector, qmd, or both. */
source: 'keyword' | 'vector' | 'qmd' | 'both';
}
/**
+2
View File
@@ -8,6 +8,8 @@ export { VectorStore, cosineSimilarity, contentHash } from './vector-store.js';
export type { VectorSearchResult, EmbeddingRow } from './vector-store.js';
export { HybridSearch } from './hybrid-search.js';
export type { HybridSearchResult } from './hybrid-search.js';
export { QmdSearch } from './qmd-search.js';
export type { QmdSearchOptions } from './qmd-search.js';
export * from './categories.js';
export { buildAdaptiveMemoryContext, buildRecentMemoryContext } from './adaptive.js';
export type { AdaptiveMemoryConfig } from './adaptive.js';
+51
View File
@@ -0,0 +1,51 @@
import { describe, expect, it } from 'vitest';
import { mkdtempSync, rmSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { MemoryStore } from './store.js';
import { QmdSearch } from './qmd-search.js';
describe('QmdSearch', () => {
it('finds relevant markdown lines with heading-aware scoring', async () => {
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
try {
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
store.write(
'user',
[
'# Preferences',
'- Favorite editor is Neovim',
'- Uses TypeScript daily',
'',
'# Projects',
'- QMD backend prototype for memory search',
].join('\n'),
'replace',
);
store.write('sessions/abc123', '- Discussed QMD ranking for markdown memory.', 'replace');
const qmd = new QmdSearch(store, { topK: 5, minScore: 0.1 });
const results = await qmd.search('qmd memory search');
expect(results.length).toBeGreaterThan(0);
expect(results[0].source).toBe('qmd');
expect(results.some((r) => r.namespace === 'user')).toBe(true);
expect(results.some((r) => r.namespace === 'sessions/abc123')).toBe(true);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
it('returns empty results for empty query', async () => {
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
try {
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
store.write('user', 'hello world', 'replace');
const qmd = new QmdSearch(store);
const results = await qmd.search(' ');
expect(results).toEqual([]);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
});
+132
View File
@@ -0,0 +1,132 @@
import type { MemoryStore } from './store.js';
import type { HybridSearchResult } from './hybrid-search.js';
export interface QmdSearchOptions {
topK?: number;
minScore?: number;
}
/**
* Experimental QMD (query markdown database) search backend.
*
* QMD treats markdown memory as structured text:
* - heading lines contribute topical boosts
* - line-level query token overlap is scored
* - exact phrase match receives an additional boost
*/
export class QmdSearch {
private _store: MemoryStore;
private _topK: number;
private _minScore: number;
constructor(store: MemoryStore, options?: QmdSearchOptions) {
this._store = store;
this._topK = options?.topK ?? 8;
this._minScore = options?.minScore ?? 0.15;
}
async search(query: string, topK?: number): Promise<HybridSearchResult[]> {
const queryText = query.trim().toLowerCase();
if (queryText.length === 0) {
return [];
}
const queryTokens = tokenize(queryText);
if (queryTokens.length === 0) {
return [];
}
const results: HybridSearchResult[] = [];
for (const namespace of this._store.listNamespaces()) {
const content = this._store.read(namespace);
if (content.length === 0) {
continue;
}
const lines = content.split('\n');
let currentHeading = '';
for (let i = 0; i < lines.length; i++) {
const raw = lines[i];
const line = raw.trim();
if (line.length === 0) {
continue;
}
const heading = line.match(/^#{1,6}\s+(.+)$/);
if (heading) {
currentHeading = heading[1].toLowerCase();
continue;
}
const score = scoreLine(line, queryText, queryTokens, currentHeading, namespace);
if (score < this._minScore) {
continue;
}
const contextParts: string[] = [];
if (i > 0 && lines[i - 1].trim().length > 0) {
contextParts.push(lines[i - 1]);
}
contextParts.push(raw);
if (i < lines.length - 1 && lines[i + 1].trim().length > 0) {
contextParts.push(lines[i + 1]);
}
results.push({
namespace,
content: raw,
context: contextParts.join('\n'),
line: i + 1,
score,
source: 'qmd',
});
}
}
results.sort((a, b) => b.score - a.score);
return results.slice(0, topK ?? this._topK);
}
}
function tokenize(text: string): string[] {
return text
.split(/[^a-z0-9]+/i)
.map((token) => token.trim().toLowerCase())
.filter((token) => token.length >= 2);
}
function scoreLine(
line: string,
queryText: string,
queryTokens: string[],
currentHeading: string,
namespace: string,
): number {
const lineText = line.toLowerCase();
const lineTokens = new Set(tokenize(lineText));
const headingTokens = new Set(tokenize(currentHeading));
let overlap = 0;
for (const token of queryTokens) {
if (lineTokens.has(token)) {
overlap += 1;
}
}
const overlapScore = overlap / queryTokens.length; // 0..1
const phraseBonus = lineText.includes(queryText) ? 0.25 : 0;
let headingBonus = 0;
for (const token of queryTokens) {
if (headingTokens.has(token)) {
headingBonus += 0.08;
}
}
headingBonus = Math.min(0.25, headingBonus);
// Session-scoped memories often represent recent conversational facts.
const recencyBonus = namespace.startsWith('sessions/') ? 0.05 : 0;
return Math.min(1, overlapScore + phraseBonus + headingBonus + recencyBonus);
}