import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { HybridSearch } from './hybrid-search.js'; import type { HybridSearchResult } from './hybrid-search.js'; import type { MemoryStore, SearchResult } from './store.js'; import type { VectorStore, VectorSearchResult } from './vector-store.js'; import type { EmbeddingProvider } from './embeddings.js'; /** * Create a mock MemoryStore with keyword search results. */ function mockMemoryStore(results: SearchResult[]): MemoryStore { return { search: vi.fn(() => results), read: vi.fn(() => ''), write: vi.fn(), listNamespaces: vi.fn(() => []), getContextForPrompt: vi.fn(() => ''), getDirtyNamespaces: vi.fn(() => []), markAllDirty: vi.fn(), } as unknown as MemoryStore; } /** * Create a mock VectorStore that returns given results. */ function mockVectorStore(results: VectorSearchResult[]): VectorStore { return { search: vi.fn(() => results), upsertChunks: vi.fn(), deleteNamespace: vi.fn(), hasContentHash: vi.fn(() => false), count: vi.fn(() => 0), close: vi.fn(), } as unknown as VectorStore; } /** * Create a mock embedding provider that returns fixed embeddings. */ function mockEmbeddingProvider(dims: number = 4): EmbeddingProvider { return { dimensions: dims, embed: vi.fn(async (texts: string[]) => texts.map(() => new Array(dims).fill(0.1)), ), }; } describe('HybridSearch', () => { describe('search', () => { it('returns keyword results when no vector results exist', async () => { const keywordResults: SearchResult[] = [ { namespace: 'notes', line: 5, content: 'fox jumped', context: 'the fox jumped over' }, ]; const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore([]), mockEmbeddingProvider(), 0.7, ); const results = await hybrid.search('fox'); expect(results.length).toBe(1); expect(results[0].namespace).toBe('notes'); expect(results[0].source).toBe('keyword'); }); it('returns vector results when no keyword results exist', async () => { const vectorResults: VectorSearchResult[] = [ { namespace: 'journal', chunkText: 'semantic match', startLine: 10, endLine: 20, score: 0.9 }, ]; const hybrid = new HybridSearch( mockMemoryStore([]), mockVectorStore(vectorResults), mockEmbeddingProvider(), 0.7, ); const results = await hybrid.search('meaning'); expect(results.length).toBe(1); expect(results[0].namespace).toBe('journal'); expect(results[0].source).toBe('vector'); }); it('merges keyword and vector results', async () => { const keywordResults: SearchResult[] = [ { namespace: 'notes', line: 5, content: 'fox keyword', context: 'the fox keyword hit' }, ]; const vectorResults: VectorSearchResult[] = [ { namespace: 'journal', chunkText: 'fox semantic', startLine: 10, endLine: 20, score: 0.85 }, ]; const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore(vectorResults), mockEmbeddingProvider(), 0.7, ); const results = await hybrid.search('fox'); expect(results.length).toBe(2); const namespaces = results.map((r) => r.namespace); expect(namespaces).toContain('notes'); expect(namespaces).toContain('journal'); }); it('deduplicates results from same namespace and nearby lines', async () => { // Both keyword and vector find something at the same location const keywordResults: SearchResult[] = [ { namespace: 'notes', line: 5, content: 'fox hit', context: 'context' }, ]; const vectorResults: VectorSearchResult[] = [ { namespace: 'notes', chunkText: 'fox hit too', startLine: 4, endLine: 8, score: 0.9 }, ]; const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore(vectorResults), mockEmbeddingProvider(), 0.7, ); const results = await hybrid.search('fox'); // Should be deduplicated to a single "both" result expect(results.length).toBe(1); expect(results[0].source).toBe('both'); }); it('applies hybrid weight to scoring', async () => { const keywordResults: SearchResult[] = [ { namespace: 'notes', line: 100, content: 'keyword only', context: 'ctx' }, ]; const vectorResults: VectorSearchResult[] = [ { namespace: 'journal', chunkText: 'vector only', startLine: 200, endLine: 210, score: 0.95 }, ]; // High vector weight (0.9) const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore(vectorResults), mockEmbeddingProvider(), 0.9, ); const results = await hybrid.search('query'); expect(results.length).toBe(2); // Vector result should rank higher with high vector weight const vectorResult = results.find((r) => r.source === 'vector'); const keywordResult = results.find((r) => r.source === 'keyword'); expect(vectorResult).toBeDefined(); expect(keywordResult).toBeDefined(); expect(vectorResult!.score).toBeGreaterThan(keywordResult!.score); }); it('falls back to keyword search when vector search fails', async () => { const keywordResults: SearchResult[] = [ { namespace: 'notes', line: 1, content: 'fallback', context: 'ctx' }, ]; const failingProvider: EmbeddingProvider = { dimensions: 4, embed: vi.fn(async () => { throw new Error('API error'); }), }; const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore([]), failingProvider, 0.7, ); // Should not throw — should fall back to keyword results const results = await hybrid.search('test'); expect(results.length).toBe(1); expect(results[0].source).toBe('keyword'); }); it('respects topK limit', async () => { const keywordResults: SearchResult[] = Array.from({ length: 10 }, (_, i) => ({ namespace: `ns${i}`, line: i + 1, content: `result ${i}`, context: `ctx ${i}`, })); const hybrid = new HybridSearch( mockMemoryStore(keywordResults), mockVectorStore([]), mockEmbeddingProvider(), 0.5, ); const results = await hybrid.search('query', 3); expect(results.length).toBe(3); }); it('returns empty array when both searches find nothing', async () => { const hybrid = new HybridSearch( mockMemoryStore([]), mockVectorStore([]), mockEmbeddingProvider(), 0.7, ); const results = await hybrid.search('nonexistent'); expect(results).toEqual([]); }); }); });