88731a50e3
Heartbeat: - HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk - Configurable interval, failure threshold, notification channel - Recovery notifications when health restores - 25 new tests Vector Memory Search: - EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends - SQLite-backed VectorStore with cosine similarity search - Text chunker with paragraph-aware splitting and overlap - HybridSearch merging keyword + vector results with configurable weight - Background indexer with dirty-namespace tracking - Graceful fallback to keyword search when embeddings unavailable - 51 new tests Config: automation.heartbeat + memory.embedding schema sections Total: 950 tests passing, all types clean
183 lines
6.0 KiB
TypeScript
183 lines
6.0 KiB
TypeScript
/**
|
|
* Hybrid search combining vector similarity with keyword matching.
|
|
*/
|
|
|
|
import type { MemoryStore, SearchResult } from './store.js';
|
|
import type { VectorStore } from './vector-store.js';
|
|
import type { EmbeddingProvider } from './embeddings.js';
|
|
|
|
/**
|
|
* A result from hybrid search combining vector and keyword sources.
|
|
*/
|
|
export interface HybridSearchResult {
|
|
/** The memory namespace the result came from. */
|
|
namespace: string;
|
|
/** The matched content text. */
|
|
content: string;
|
|
/** Surrounding context lines. */
|
|
context: string;
|
|
/** 1-based line number of the match. */
|
|
line: number;
|
|
/** Combined relevance score (0-1). */
|
|
score: number;
|
|
/** Source of the match: keyword, vector, or both. */
|
|
source: 'keyword' | 'vector' | 'both';
|
|
}
|
|
|
|
/**
|
|
* Combines keyword search from MemoryStore with vector similarity
|
|
* search from VectorStore, deduplicating and merging results with
|
|
* configurable weighting.
|
|
*/
|
|
export class HybridSearch {
|
|
private _memoryStore: MemoryStore;
|
|
private _vectorStore: VectorStore;
|
|
private _embeddingProvider: EmbeddingProvider;
|
|
private _hybridWeight: number;
|
|
|
|
/**
|
|
* @param memoryStore - The keyword-based memory store.
|
|
* @param vectorStore - The vector embedding store.
|
|
* @param embeddingProvider - Provider for generating query embeddings.
|
|
* @param hybridWeight - Weight for vector results (0-1). Keyword weight = 1 - hybridWeight.
|
|
*/
|
|
constructor(
|
|
memoryStore: MemoryStore,
|
|
vectorStore: VectorStore,
|
|
embeddingProvider: EmbeddingProvider,
|
|
hybridWeight: number = 0.7,
|
|
) {
|
|
this._memoryStore = memoryStore;
|
|
this._vectorStore = vectorStore;
|
|
this._embeddingProvider = embeddingProvider;
|
|
this._hybridWeight = hybridWeight;
|
|
}
|
|
|
|
/**
|
|
* Run hybrid search combining keyword and vector results.
|
|
*
|
|
* @param query - The search query string.
|
|
* @param topK - Maximum number of results to return.
|
|
* @returns Merged and deduplicated results sorted by combined score.
|
|
*/
|
|
async search(query: string, topK: number = 5): Promise<HybridSearchResult[]> {
|
|
// Run keyword and vector search in parallel
|
|
const [keywordResults, vectorResults] = await Promise.all([
|
|
this._keywordSearch(query),
|
|
this._vectorSearch(query, topK * 2), // fetch more for better merging
|
|
]);
|
|
|
|
// Merge and deduplicate
|
|
return this._mergeResults(keywordResults, vectorResults, topK);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Private
|
|
// ---------------------------------------------------------------------------
|
|
|
|
private _keywordSearch(query: string): Promise<SearchResult[]> {
|
|
// MemoryStore.search is synchronous but we wrap in promise for parallel use
|
|
return Promise.resolve(this._memoryStore.search(query));
|
|
}
|
|
|
|
private async _vectorSearch(
|
|
query: string,
|
|
topK: number,
|
|
): Promise<HybridSearchResult[]> {
|
|
try {
|
|
const [queryEmbedding] = await this._embeddingProvider.embed([query]);
|
|
const results = this._vectorStore.search(queryEmbedding, topK);
|
|
|
|
return results.map((r) => ({
|
|
namespace: r.namespace,
|
|
content: r.chunkText,
|
|
context: r.chunkText,
|
|
line: r.startLine,
|
|
score: r.score,
|
|
source: 'vector' as const,
|
|
}));
|
|
} catch (error) {
|
|
// Vector search failure should not break search entirely
|
|
console.error('Vector search failed, falling back to keyword only:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merge keyword and vector results with deduplication.
|
|
*
|
|
* Deduplication: two results are considered duplicates if they share the
|
|
* same namespace and their line numbers are within 3 lines of each other.
|
|
*/
|
|
private _mergeResults(
|
|
keywordResults: SearchResult[],
|
|
vectorResults: HybridSearchResult[],
|
|
topK: number,
|
|
): HybridSearchResult[] {
|
|
// Normalise keyword scores: assign rank-based scores (best match = 1.0)
|
|
const maxKeyword = keywordResults.length;
|
|
const keywordScored: HybridSearchResult[] = keywordResults.map((r, idx) => ({
|
|
namespace: r.namespace,
|
|
content: r.content,
|
|
context: r.context,
|
|
line: r.line,
|
|
score: maxKeyword > 0 ? 1 - idx / (maxKeyword + 1) : 0,
|
|
source: 'keyword' as const,
|
|
}));
|
|
|
|
// Build a combined map keyed by namespace + approximate line
|
|
const resultMap = new Map<string, HybridSearchResult>();
|
|
|
|
// Key function: group results within LINE_PROXIMITY lines together
|
|
const LINE_PROXIMITY = 3;
|
|
const makeKey = (namespace: string, line: number): string => {
|
|
const bucket = Math.floor(line / LINE_PROXIMITY);
|
|
return `${namespace}:${bucket}`;
|
|
};
|
|
|
|
// Add keyword results first
|
|
for (const kr of keywordScored) {
|
|
const key = makeKey(kr.namespace, kr.line);
|
|
const existing = resultMap.get(key);
|
|
if (existing) {
|
|
// Combine scores
|
|
existing.score = (this._hybridWeight * (existing.source === 'vector' || existing.source === 'both' ? existing.score : 0))
|
|
+ ((1 - this._hybridWeight) * kr.score);
|
|
existing.source = 'both';
|
|
// Prefer the more specific keyword content
|
|
existing.content = kr.content;
|
|
existing.context = kr.context;
|
|
existing.line = kr.line;
|
|
} else {
|
|
resultMap.set(key, {
|
|
...kr,
|
|
score: (1 - this._hybridWeight) * kr.score,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Add/merge vector results
|
|
for (const vr of vectorResults) {
|
|
const key = makeKey(vr.namespace, vr.line);
|
|
const existing = resultMap.get(key);
|
|
if (existing) {
|
|
if (existing.source === 'keyword') {
|
|
existing.score = (this._hybridWeight * vr.score) + existing.score;
|
|
existing.source = 'both';
|
|
}
|
|
// If already 'both' or 'vector', keep the higher-scoring version
|
|
} else {
|
|
resultMap.set(key, {
|
|
...vr,
|
|
score: this._hybridWeight * vr.score,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Sort by score descending, return top K
|
|
const merged = Array.from(resultMap.values());
|
|
merged.sort((a, b) => b.score - a.score);
|
|
return merged.slice(0, topK);
|
|
}
|
|
}
|