/** * Hybrid search combining vector similarity with keyword matching. */ import type { MemoryStore, SearchResult } from './store.js'; import type { VectorStore } from './vector-store.js'; import type { EmbeddingProvider } from './embeddings.js'; /** * A result from hybrid search combining vector and keyword sources. */ export interface HybridSearchResult { /** The memory namespace the result came from. */ namespace: string; /** The matched content text. */ content: string; /** Surrounding context lines. */ context: string; /** 1-based line number of the match. */ line: number; /** Combined relevance score (0-1). */ score: number; /** Source of the match: keyword, vector, or both. */ source: 'keyword' | 'vector' | 'both'; } /** * Combines keyword search from MemoryStore with vector similarity * search from VectorStore, deduplicating and merging results with * configurable weighting. */ export class HybridSearch { private _memoryStore: MemoryStore; private _vectorStore: VectorStore; private _embeddingProvider: EmbeddingProvider; private _hybridWeight: number; /** * @param memoryStore - The keyword-based memory store. * @param vectorStore - The vector embedding store. * @param embeddingProvider - Provider for generating query embeddings. * @param hybridWeight - Weight for vector results (0-1). Keyword weight = 1 - hybridWeight. */ constructor( memoryStore: MemoryStore, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, hybridWeight: number = 0.7, ) { this._memoryStore = memoryStore; this._vectorStore = vectorStore; this._embeddingProvider = embeddingProvider; this._hybridWeight = hybridWeight; } /** * Run hybrid search combining keyword and vector results. * * @param query - The search query string. * @param topK - Maximum number of results to return. * @returns Merged and deduplicated results sorted by combined score. */ async search(query: string, topK: number = 5): Promise { // Run keyword and vector search in parallel const [keywordResults, vectorResults] = await Promise.all([ this._keywordSearch(query), this._vectorSearch(query, topK * 2), // fetch more for better merging ]); // Merge and deduplicate return this._mergeResults(keywordResults, vectorResults, topK); } // --------------------------------------------------------------------------- // Private // --------------------------------------------------------------------------- private _keywordSearch(query: string): Promise { // MemoryStore.search is synchronous but we wrap in promise for parallel use return Promise.resolve(this._memoryStore.search(query)); } private async _vectorSearch( query: string, topK: number, ): Promise { try { const [queryEmbedding] = await this._embeddingProvider.embed([query]); const results = this._vectorStore.search(queryEmbedding, topK); return results.map((r) => ({ namespace: r.namespace, content: r.chunkText, context: r.chunkText, line: r.startLine, score: r.score, source: 'vector' as const, })); } catch (error) { // Vector search failure should not break search entirely console.error('Vector search failed, falling back to keyword only:', error); return []; } } /** * Merge keyword and vector results with deduplication. * * Deduplication: two results are considered duplicates if they share the * same namespace and their line numbers are within 3 lines of each other. */ private _mergeResults( keywordResults: SearchResult[], vectorResults: HybridSearchResult[], topK: number, ): HybridSearchResult[] { // Normalise keyword scores: assign rank-based scores (best match = 1.0) const maxKeyword = keywordResults.length; const keywordScored: HybridSearchResult[] = keywordResults.map((r, idx) => ({ namespace: r.namespace, content: r.content, context: r.context, line: r.line, score: maxKeyword > 0 ? 1 - idx / (maxKeyword + 1) : 0, source: 'keyword' as const, })); // Build a combined map keyed by namespace + approximate line const resultMap = new Map(); // Key function: group results within LINE_PROXIMITY lines together const LINE_PROXIMITY = 3; const makeKey = (namespace: string, line: number): string => { const bucket = Math.floor(line / LINE_PROXIMITY); return `${namespace}:${bucket}`; }; // Add keyword results first for (const kr of keywordScored) { const key = makeKey(kr.namespace, kr.line); const existing = resultMap.get(key); if (existing) { // Combine scores existing.score = (this._hybridWeight * (existing.source === 'vector' || existing.source === 'both' ? existing.score : 0)) + ((1 - this._hybridWeight) * kr.score); existing.source = 'both'; // Prefer the more specific keyword content existing.content = kr.content; existing.context = kr.context; existing.line = kr.line; } else { resultMap.set(key, { ...kr, score: (1 - this._hybridWeight) * kr.score, }); } } // Add/merge vector results for (const vr of vectorResults) { const key = makeKey(vr.namespace, vr.line); const existing = resultMap.get(key); if (existing) { if (existing.source === 'keyword') { existing.score = (this._hybridWeight * vr.score) + existing.score; existing.source = 'both'; } // If already 'both' or 'vector', keep the higher-scoring version } else { resultMap.set(key, { ...vr, score: this._hybridWeight * vr.score, }); } } // Sort by score descending, return top K const merged = Array.from(resultMap.values()); merged.sort((a, b) => b.score - a.score); return merged.slice(0, topK); } }