feat(memory): add experimental qmd search backend
This commit is contained in:
@@ -20,8 +20,8 @@ export interface HybridSearchResult {
|
||||
line: number;
|
||||
/** Combined relevance score (0-1). */
|
||||
score: number;
|
||||
/** Source of the match: keyword, vector, or both. */
|
||||
source: 'keyword' | 'vector' | 'both';
|
||||
/** Source of the match: keyword, vector, qmd, or both. */
|
||||
source: 'keyword' | 'vector' | 'qmd' | 'both';
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -8,6 +8,8 @@ export { VectorStore, cosineSimilarity, contentHash } from './vector-store.js';
|
||||
export type { VectorSearchResult, EmbeddingRow } from './vector-store.js';
|
||||
export { HybridSearch } from './hybrid-search.js';
|
||||
export type { HybridSearchResult } from './hybrid-search.js';
|
||||
export { QmdSearch } from './qmd-search.js';
|
||||
export type { QmdSearchOptions } from './qmd-search.js';
|
||||
export * from './categories.js';
|
||||
export { buildAdaptiveMemoryContext, buildRecentMemoryContext } from './adaptive.js';
|
||||
export type { AdaptiveMemoryConfig } from './adaptive.js';
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { mkdtempSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { MemoryStore } from './store.js';
|
||||
import { QmdSearch } from './qmd-search.js';
|
||||
|
||||
describe('QmdSearch', () => {
|
||||
it('finds relevant markdown lines with heading-aware scoring', async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
|
||||
try {
|
||||
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
|
||||
store.write(
|
||||
'user',
|
||||
[
|
||||
'# Preferences',
|
||||
'- Favorite editor is Neovim',
|
||||
'- Uses TypeScript daily',
|
||||
'',
|
||||
'# Projects',
|
||||
'- QMD backend prototype for memory search',
|
||||
].join('\n'),
|
||||
'replace',
|
||||
);
|
||||
store.write('sessions/abc123', '- Discussed QMD ranking for markdown memory.', 'replace');
|
||||
|
||||
const qmd = new QmdSearch(store, { topK: 5, minScore: 0.1 });
|
||||
const results = await qmd.search('qmd memory search');
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].source).toBe('qmd');
|
||||
expect(results.some((r) => r.namespace === 'user')).toBe(true);
|
||||
expect(results.some((r) => r.namespace === 'sessions/abc123')).toBe(true);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns empty results for empty query', async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'flynn-qmd-search-'));
|
||||
try {
|
||||
const store = new MemoryStore({ dir, maxContextTokens: 2000 });
|
||||
store.write('user', 'hello world', 'replace');
|
||||
const qmd = new QmdSearch(store);
|
||||
const results = await qmd.search(' ');
|
||||
expect(results).toEqual([]);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,132 @@
|
||||
import type { MemoryStore } from './store.js';
|
||||
import type { HybridSearchResult } from './hybrid-search.js';
|
||||
|
||||
export interface QmdSearchOptions {
|
||||
topK?: number;
|
||||
minScore?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Experimental QMD (query markdown database) search backend.
|
||||
*
|
||||
* QMD treats markdown memory as structured text:
|
||||
* - heading lines contribute topical boosts
|
||||
* - line-level query token overlap is scored
|
||||
* - exact phrase match receives an additional boost
|
||||
*/
|
||||
export class QmdSearch {
|
||||
private _store: MemoryStore;
|
||||
private _topK: number;
|
||||
private _minScore: number;
|
||||
|
||||
constructor(store: MemoryStore, options?: QmdSearchOptions) {
|
||||
this._store = store;
|
||||
this._topK = options?.topK ?? 8;
|
||||
this._minScore = options?.minScore ?? 0.15;
|
||||
}
|
||||
|
||||
async search(query: string, topK?: number): Promise<HybridSearchResult[]> {
|
||||
const queryText = query.trim().toLowerCase();
|
||||
if (queryText.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryTokens = tokenize(queryText);
|
||||
if (queryTokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const results: HybridSearchResult[] = [];
|
||||
for (const namespace of this._store.listNamespaces()) {
|
||||
const content = this._store.read(namespace);
|
||||
if (content.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lines = content.split('\n');
|
||||
let currentHeading = '';
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const raw = lines[i];
|
||||
const line = raw.trim();
|
||||
if (line.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const heading = line.match(/^#{1,6}\s+(.+)$/);
|
||||
if (heading) {
|
||||
currentHeading = heading[1].toLowerCase();
|
||||
continue;
|
||||
}
|
||||
|
||||
const score = scoreLine(line, queryText, queryTokens, currentHeading, namespace);
|
||||
if (score < this._minScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const contextParts: string[] = [];
|
||||
if (i > 0 && lines[i - 1].trim().length > 0) {
|
||||
contextParts.push(lines[i - 1]);
|
||||
}
|
||||
contextParts.push(raw);
|
||||
if (i < lines.length - 1 && lines[i + 1].trim().length > 0) {
|
||||
contextParts.push(lines[i + 1]);
|
||||
}
|
||||
|
||||
results.push({
|
||||
namespace,
|
||||
content: raw,
|
||||
context: contextParts.join('\n'),
|
||||
line: i + 1,
|
||||
score,
|
||||
source: 'qmd',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
return results.slice(0, topK ?? this._topK);
|
||||
}
|
||||
}
|
||||
|
||||
function tokenize(text: string): string[] {
|
||||
return text
|
||||
.split(/[^a-z0-9]+/i)
|
||||
.map((token) => token.trim().toLowerCase())
|
||||
.filter((token) => token.length >= 2);
|
||||
}
|
||||
|
||||
function scoreLine(
|
||||
line: string,
|
||||
queryText: string,
|
||||
queryTokens: string[],
|
||||
currentHeading: string,
|
||||
namespace: string,
|
||||
): number {
|
||||
const lineText = line.toLowerCase();
|
||||
const lineTokens = new Set(tokenize(lineText));
|
||||
const headingTokens = new Set(tokenize(currentHeading));
|
||||
|
||||
let overlap = 0;
|
||||
for (const token of queryTokens) {
|
||||
if (lineTokens.has(token)) {
|
||||
overlap += 1;
|
||||
}
|
||||
}
|
||||
|
||||
const overlapScore = overlap / queryTokens.length; // 0..1
|
||||
const phraseBonus = lineText.includes(queryText) ? 0.25 : 0;
|
||||
|
||||
let headingBonus = 0;
|
||||
for (const token of queryTokens) {
|
||||
if (headingTokens.has(token)) {
|
||||
headingBonus += 0.08;
|
||||
}
|
||||
}
|
||||
headingBonus = Math.min(0.25, headingBonus);
|
||||
|
||||
// Session-scoped memories often represent recent conversational facts.
|
||||
const recencyBonus = namespace.startsWith('sessions/') ? 0.05 : 0;
|
||||
|
||||
return Math.min(1, overlapScore + phraseBonus + headingBonus + recencyBonus);
|
||||
}
|
||||
Reference in New Issue
Block a user