Files
flynn/src/memory/hybrid-search.ts
T
William Valentin 88731a50e3 feat: add heartbeat monitor and vector memory search (Tier 2)
Heartbeat:
- HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk
- Configurable interval, failure threshold, notification channel
- Recovery notifications when health restores
- 25 new tests

Vector Memory Search:
- EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends
- SQLite-backed VectorStore with cosine similarity search
- Text chunker with paragraph-aware splitting and overlap
- HybridSearch merging keyword + vector results with configurable weight
- Background indexer with dirty-namespace tracking
- Graceful fallback to keyword search when embeddings unavailable
- 51 new tests

Config: automation.heartbeat + memory.embedding schema sections
Total: 950 tests passing, all types clean
2026-02-07 14:45:11 -08:00

183 lines
6.0 KiB
TypeScript

/**
* Hybrid search combining vector similarity with keyword matching.
*/
import type { MemoryStore, SearchResult } from './store.js';
import type { VectorStore } from './vector-store.js';
import type { EmbeddingProvider } from './embeddings.js';
/**
* A result from hybrid search combining vector and keyword sources.
*/
export interface HybridSearchResult {
/** The memory namespace the result came from. */
namespace: string;
/** The matched content text. */
content: string;
/** Surrounding context lines. */
context: string;
/** 1-based line number of the match. */
line: number;
/** Combined relevance score (0-1). */
score: number;
/** Source of the match: keyword, vector, or both. */
source: 'keyword' | 'vector' | 'both';
}
/**
* Combines keyword search from MemoryStore with vector similarity
* search from VectorStore, deduplicating and merging results with
* configurable weighting.
*/
export class HybridSearch {
private _memoryStore: MemoryStore;
private _vectorStore: VectorStore;
private _embeddingProvider: EmbeddingProvider;
private _hybridWeight: number;
/**
* @param memoryStore - The keyword-based memory store.
* @param vectorStore - The vector embedding store.
* @param embeddingProvider - Provider for generating query embeddings.
* @param hybridWeight - Weight for vector results (0-1). Keyword weight = 1 - hybridWeight.
*/
constructor(
memoryStore: MemoryStore,
vectorStore: VectorStore,
embeddingProvider: EmbeddingProvider,
hybridWeight: number = 0.7,
) {
this._memoryStore = memoryStore;
this._vectorStore = vectorStore;
this._embeddingProvider = embeddingProvider;
this._hybridWeight = hybridWeight;
}
/**
* Run hybrid search combining keyword and vector results.
*
* @param query - The search query string.
* @param topK - Maximum number of results to return.
* @returns Merged and deduplicated results sorted by combined score.
*/
async search(query: string, topK: number = 5): Promise<HybridSearchResult[]> {
// Run keyword and vector search in parallel
const [keywordResults, vectorResults] = await Promise.all([
this._keywordSearch(query),
this._vectorSearch(query, topK * 2), // fetch more for better merging
]);
// Merge and deduplicate
return this._mergeResults(keywordResults, vectorResults, topK);
}
// ---------------------------------------------------------------------------
// Private
// ---------------------------------------------------------------------------
private _keywordSearch(query: string): Promise<SearchResult[]> {
// MemoryStore.search is synchronous but we wrap in promise for parallel use
return Promise.resolve(this._memoryStore.search(query));
}
private async _vectorSearch(
query: string,
topK: number,
): Promise<HybridSearchResult[]> {
try {
const [queryEmbedding] = await this._embeddingProvider.embed([query]);
const results = this._vectorStore.search(queryEmbedding, topK);
return results.map((r) => ({
namespace: r.namespace,
content: r.chunkText,
context: r.chunkText,
line: r.startLine,
score: r.score,
source: 'vector' as const,
}));
} catch (error) {
// Vector search failure should not break search entirely
console.error('Vector search failed, falling back to keyword only:', error);
return [];
}
}
/**
* Merge keyword and vector results with deduplication.
*
* Deduplication: two results are considered duplicates if they share the
* same namespace and their line numbers are within 3 lines of each other.
*/
private _mergeResults(
keywordResults: SearchResult[],
vectorResults: HybridSearchResult[],
topK: number,
): HybridSearchResult[] {
// Normalise keyword scores: assign rank-based scores (best match = 1.0)
const maxKeyword = keywordResults.length;
const keywordScored: HybridSearchResult[] = keywordResults.map((r, idx) => ({
namespace: r.namespace,
content: r.content,
context: r.context,
line: r.line,
score: maxKeyword > 0 ? 1 - idx / (maxKeyword + 1) : 0,
source: 'keyword' as const,
}));
// Build a combined map keyed by namespace + approximate line
const resultMap = new Map<string, HybridSearchResult>();
// Key function: group results within LINE_PROXIMITY lines together
const LINE_PROXIMITY = 3;
const makeKey = (namespace: string, line: number): string => {
const bucket = Math.floor(line / LINE_PROXIMITY);
return `${namespace}:${bucket}`;
};
// Add keyword results first
for (const kr of keywordScored) {
const key = makeKey(kr.namespace, kr.line);
const existing = resultMap.get(key);
if (existing) {
// Combine scores
existing.score = (this._hybridWeight * (existing.source === 'vector' || existing.source === 'both' ? existing.score : 0))
+ ((1 - this._hybridWeight) * kr.score);
existing.source = 'both';
// Prefer the more specific keyword content
existing.content = kr.content;
existing.context = kr.context;
existing.line = kr.line;
} else {
resultMap.set(key, {
...kr,
score: (1 - this._hybridWeight) * kr.score,
});
}
}
// Add/merge vector results
for (const vr of vectorResults) {
const key = makeKey(vr.namespace, vr.line);
const existing = resultMap.get(key);
if (existing) {
if (existing.source === 'keyword') {
existing.score = (this._hybridWeight * vr.score) + existing.score;
existing.source = 'both';
}
// If already 'both' or 'vector', keep the higher-scoring version
} else {
resultMap.set(key, {
...vr,
score: this._hybridWeight * vr.score,
});
}
}
// Sort by score descending, return top K
const merged = Array.from(resultMap.values());
merged.sort((a, b) => b.score - a.score);
return merged.slice(0, topK);
}
}