fix(memory): improve hybrid search ranking across namespaces
This commit is contained in:
@@ -200,6 +200,32 @@ describe('HybridSearch', () => {
|
||||
expect(results.length).toBe(3);
|
||||
});
|
||||
|
||||
it('keeps user namespace visible when daily namespace is noisy', async () => {
|
||||
const keywordResults: SearchResult[] = [
|
||||
{ namespace: 'daily/2026-02-20', line: 10, content: 'Xfinity update', context: 'ctx' },
|
||||
{ namespace: 'daily/2026-02-20', line: 20, content: 'Xfinity follow-up', context: 'ctx' },
|
||||
{ namespace: 'daily/2026-02-20', line: 30, content: 'Xfinity notes', context: 'ctx' },
|
||||
{ namespace: 'daily/2026-02-20', line: 40, content: 'Xfinity details', context: 'ctx' },
|
||||
{ namespace: 'user', line: 5, content: 'Internet: Xfinity/Comcast', context: 'ctx' },
|
||||
];
|
||||
|
||||
const failingProvider: EmbeddingProvider = {
|
||||
dimensions: 4,
|
||||
embed: vi.fn(async () => { throw new Error('API error'); }),
|
||||
};
|
||||
|
||||
const hybrid = new HybridSearch(
|
||||
mockMemoryStore(keywordResults),
|
||||
mockVectorStore([]),
|
||||
failingProvider,
|
||||
0.7,
|
||||
);
|
||||
|
||||
const results = await hybrid.search('xfinity', 3);
|
||||
expect(results.length).toBe(3);
|
||||
expect(results.some((r) => r.namespace === 'user')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns empty array when both searches find nothing', async () => {
|
||||
const hybrid = new HybridSearch(
|
||||
mockMemoryStore([]),
|
||||
|
||||
@@ -174,9 +174,59 @@ export class HybridSearch {
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score descending, return top K
|
||||
// Apply a light namespace prior so durable memory (user/global) is less
|
||||
// likely to be drowned out by verbose daily/session logs on keyword-heavy queries.
|
||||
const merged = Array.from(resultMap.values());
|
||||
merged.sort((a, b) => b.score - a.score);
|
||||
return merged.slice(0, topK);
|
||||
const adjusted = merged.map((result) => ({
|
||||
...result,
|
||||
score: this._clampScore(result.score + this._namespaceScoreAdjustment(result.namespace)),
|
||||
}));
|
||||
|
||||
adjusted.sort((a, b) => b.score - a.score);
|
||||
|
||||
// Diversity pass: first take top unique namespaces, then fill remainder
|
||||
// by score. This prevents a single noisy namespace from monopolizing topK.
|
||||
const selected: HybridSearchResult[] = [];
|
||||
const seenNamespaces = new Set<string>();
|
||||
const seenKeys = new Set<string>();
|
||||
|
||||
for (const result of adjusted) {
|
||||
if (selected.length >= topK) {break;}
|
||||
const key = `${result.namespace}:${result.line}:${result.content}`;
|
||||
if (seenNamespaces.has(result.namespace) || seenKeys.has(key)) {continue;}
|
||||
selected.push(result);
|
||||
seenNamespaces.add(result.namespace);
|
||||
seenKeys.add(key);
|
||||
}
|
||||
|
||||
for (const result of adjusted) {
|
||||
if (selected.length >= topK) {break;}
|
||||
const key = `${result.namespace}:${result.line}:${result.content}`;
|
||||
if (seenKeys.has(key)) {continue;}
|
||||
selected.push(result);
|
||||
seenKeys.add(key);
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
private _namespaceScoreAdjustment(namespace: string): number {
|
||||
if (namespace === 'user' || namespace.startsWith('user/')) {
|
||||
return 0.2;
|
||||
}
|
||||
if (namespace === 'global' || namespace.startsWith('global/')) {
|
||||
return 0.1;
|
||||
}
|
||||
if (namespace.startsWith('daily/')) {
|
||||
return -0.05;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private _clampScore(score: number): number {
|
||||
if (Number.isNaN(score)) {return 0;}
|
||||
if (score < 0) {return 0;}
|
||||
if (score > 1) {return 1;}
|
||||
return score;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user