fix(memory): improve hybrid search ranking across namespaces

This commit is contained in:
William Valentin
2026-02-20 13:31:10 -08:00
parent 614e5e499e
commit 36c6b75e98
2 changed files with 79 additions and 3 deletions
+26
View File
@@ -200,6 +200,32 @@ describe('HybridSearch', () => {
expect(results.length).toBe(3);
});
it('keeps user namespace visible when daily namespace is noisy', async () => {
const keywordResults: SearchResult[] = [
{ namespace: 'daily/2026-02-20', line: 10, content: 'Xfinity update', context: 'ctx' },
{ namespace: 'daily/2026-02-20', line: 20, content: 'Xfinity follow-up', context: 'ctx' },
{ namespace: 'daily/2026-02-20', line: 30, content: 'Xfinity notes', context: 'ctx' },
{ namespace: 'daily/2026-02-20', line: 40, content: 'Xfinity details', context: 'ctx' },
{ namespace: 'user', line: 5, content: 'Internet: Xfinity/Comcast', context: 'ctx' },
];
const failingProvider: EmbeddingProvider = {
dimensions: 4,
embed: vi.fn(async () => { throw new Error('API error'); }),
};
const hybrid = new HybridSearch(
mockMemoryStore(keywordResults),
mockVectorStore([]),
failingProvider,
0.7,
);
const results = await hybrid.search('xfinity', 3);
expect(results.length).toBe(3);
expect(results.some((r) => r.namespace === 'user')).toBe(true);
});
it('returns empty array when both searches find nothing', async () => {
const hybrid = new HybridSearch(
mockMemoryStore([]),
+53 -3
View File
@@ -174,9 +174,59 @@ export class HybridSearch {
}
}
// Sort by score descending, return top K
// Apply a light namespace prior so durable memory (user/global) is less
// likely to be drowned out by verbose daily/session logs on keyword-heavy queries.
const merged = Array.from(resultMap.values());
merged.sort((a, b) => b.score - a.score);
return merged.slice(0, topK);
const adjusted = merged.map((result) => ({
...result,
score: this._clampScore(result.score + this._namespaceScoreAdjustment(result.namespace)),
}));
adjusted.sort((a, b) => b.score - a.score);
// Diversity pass: first take top unique namespaces, then fill remainder
// by score. This prevents a single noisy namespace from monopolizing topK.
const selected: HybridSearchResult[] = [];
const seenNamespaces = new Set<string>();
const seenKeys = new Set<string>();
for (const result of adjusted) {
if (selected.length >= topK) {break;}
const key = `${result.namespace}:${result.line}:${result.content}`;
if (seenNamespaces.has(result.namespace) || seenKeys.has(key)) {continue;}
selected.push(result);
seenNamespaces.add(result.namespace);
seenKeys.add(key);
}
for (const result of adjusted) {
if (selected.length >= topK) {break;}
const key = `${result.namespace}:${result.line}:${result.content}`;
if (seenKeys.has(key)) {continue;}
selected.push(result);
seenKeys.add(key);
}
return selected;
}
private _namespaceScoreAdjustment(namespace: string): number {
if (namespace === 'user' || namespace.startsWith('user/')) {
return 0.2;
}
if (namespace === 'global' || namespace.startsWith('global/')) {
return 0.1;
}
if (namespace.startsWith('daily/')) {
return -0.05;
}
return 0;
}
private _clampScore(score: number): number {
if (Number.isNaN(score)) {return 0;}
if (score < 0) {return 0;}
if (score > 1) {return 1;}
return score;
}
}