feat: add heartbeat monitor and vector memory search (Tier 2)
Heartbeat: - HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk - Configurable interval, failure threshold, notification channel - Recovery notifications when health restores - 25 new tests Vector Memory Search: - EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends - SQLite-backed VectorStore with cosine similarity search - Text chunker with paragraph-aware splitting and overlap - HybridSearch merging keyword + vector results with configurable weight - Background indexer with dirty-namespace tracking - Graceful fallback to keyword search when embeddings unavailable - 51 new tests Config: automation.heartbeat + memory.embedding schema sections Total: 950 tests passing, all types clean
This commit is contained in:
@@ -0,0 +1,182 @@
|
||||
/**
|
||||
* Hybrid search combining vector similarity with keyword matching.
|
||||
*/
|
||||
|
||||
import type { MemoryStore, SearchResult } from './store.js';
|
||||
import type { VectorStore } from './vector-store.js';
|
||||
import type { EmbeddingProvider } from './embeddings.js';
|
||||
|
||||
/**
|
||||
* A result from hybrid search combining vector and keyword sources.
|
||||
*/
|
||||
export interface HybridSearchResult {
|
||||
/** The memory namespace the result came from. */
|
||||
namespace: string;
|
||||
/** The matched content text. */
|
||||
content: string;
|
||||
/** Surrounding context lines. */
|
||||
context: string;
|
||||
/** 1-based line number of the match. */
|
||||
line: number;
|
||||
/** Combined relevance score (0-1). */
|
||||
score: number;
|
||||
/** Source of the match: keyword, vector, or both. */
|
||||
source: 'keyword' | 'vector' | 'both';
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines keyword search from MemoryStore with vector similarity
|
||||
* search from VectorStore, deduplicating and merging results with
|
||||
* configurable weighting.
|
||||
*/
|
||||
export class HybridSearch {
|
||||
private _memoryStore: MemoryStore;
|
||||
private _vectorStore: VectorStore;
|
||||
private _embeddingProvider: EmbeddingProvider;
|
||||
private _hybridWeight: number;
|
||||
|
||||
/**
|
||||
* @param memoryStore - The keyword-based memory store.
|
||||
* @param vectorStore - The vector embedding store.
|
||||
* @param embeddingProvider - Provider for generating query embeddings.
|
||||
* @param hybridWeight - Weight for vector results (0-1). Keyword weight = 1 - hybridWeight.
|
||||
*/
|
||||
constructor(
|
||||
memoryStore: MemoryStore,
|
||||
vectorStore: VectorStore,
|
||||
embeddingProvider: EmbeddingProvider,
|
||||
hybridWeight: number = 0.7,
|
||||
) {
|
||||
this._memoryStore = memoryStore;
|
||||
this._vectorStore = vectorStore;
|
||||
this._embeddingProvider = embeddingProvider;
|
||||
this._hybridWeight = hybridWeight;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run hybrid search combining keyword and vector results.
|
||||
*
|
||||
* @param query - The search query string.
|
||||
* @param topK - Maximum number of results to return.
|
||||
* @returns Merged and deduplicated results sorted by combined score.
|
||||
*/
|
||||
async search(query: string, topK: number = 5): Promise<HybridSearchResult[]> {
|
||||
// Run keyword and vector search in parallel
|
||||
const [keywordResults, vectorResults] = await Promise.all([
|
||||
this._keywordSearch(query),
|
||||
this._vectorSearch(query, topK * 2), // fetch more for better merging
|
||||
]);
|
||||
|
||||
// Merge and deduplicate
|
||||
return this._mergeResults(keywordResults, vectorResults, topK);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Private
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private _keywordSearch(query: string): Promise<SearchResult[]> {
|
||||
// MemoryStore.search is synchronous but we wrap in promise for parallel use
|
||||
return Promise.resolve(this._memoryStore.search(query));
|
||||
}
|
||||
|
||||
private async _vectorSearch(
|
||||
query: string,
|
||||
topK: number,
|
||||
): Promise<HybridSearchResult[]> {
|
||||
try {
|
||||
const [queryEmbedding] = await this._embeddingProvider.embed([query]);
|
||||
const results = this._vectorStore.search(queryEmbedding, topK);
|
||||
|
||||
return results.map((r) => ({
|
||||
namespace: r.namespace,
|
||||
content: r.chunkText,
|
||||
context: r.chunkText,
|
||||
line: r.startLine,
|
||||
score: r.score,
|
||||
source: 'vector' as const,
|
||||
}));
|
||||
} catch (error) {
|
||||
// Vector search failure should not break search entirely
|
||||
console.error('Vector search failed, falling back to keyword only:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge keyword and vector results with deduplication.
|
||||
*
|
||||
* Deduplication: two results are considered duplicates if they share the
|
||||
* same namespace and their line numbers are within 3 lines of each other.
|
||||
*/
|
||||
private _mergeResults(
|
||||
keywordResults: SearchResult[],
|
||||
vectorResults: HybridSearchResult[],
|
||||
topK: number,
|
||||
): HybridSearchResult[] {
|
||||
// Normalise keyword scores: assign rank-based scores (best match = 1.0)
|
||||
const maxKeyword = keywordResults.length;
|
||||
const keywordScored: HybridSearchResult[] = keywordResults.map((r, idx) => ({
|
||||
namespace: r.namespace,
|
||||
content: r.content,
|
||||
context: r.context,
|
||||
line: r.line,
|
||||
score: maxKeyword > 0 ? 1 - idx / (maxKeyword + 1) : 0,
|
||||
source: 'keyword' as const,
|
||||
}));
|
||||
|
||||
// Build a combined map keyed by namespace + approximate line
|
||||
const resultMap = new Map<string, HybridSearchResult>();
|
||||
|
||||
// Key function: group results within LINE_PROXIMITY lines together
|
||||
const LINE_PROXIMITY = 3;
|
||||
const makeKey = (namespace: string, line: number): string => {
|
||||
const bucket = Math.floor(line / LINE_PROXIMITY);
|
||||
return `${namespace}:${bucket}`;
|
||||
};
|
||||
|
||||
// Add keyword results first
|
||||
for (const kr of keywordScored) {
|
||||
const key = makeKey(kr.namespace, kr.line);
|
||||
const existing = resultMap.get(key);
|
||||
if (existing) {
|
||||
// Combine scores
|
||||
existing.score = (this._hybridWeight * (existing.source === 'vector' || existing.source === 'both' ? existing.score : 0))
|
||||
+ ((1 - this._hybridWeight) * kr.score);
|
||||
existing.source = 'both';
|
||||
// Prefer the more specific keyword content
|
||||
existing.content = kr.content;
|
||||
existing.context = kr.context;
|
||||
existing.line = kr.line;
|
||||
} else {
|
||||
resultMap.set(key, {
|
||||
...kr,
|
||||
score: (1 - this._hybridWeight) * kr.score,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add/merge vector results
|
||||
for (const vr of vectorResults) {
|
||||
const key = makeKey(vr.namespace, vr.line);
|
||||
const existing = resultMap.get(key);
|
||||
if (existing) {
|
||||
if (existing.source === 'keyword') {
|
||||
existing.score = (this._hybridWeight * vr.score) + existing.score;
|
||||
existing.source = 'both';
|
||||
}
|
||||
// If already 'both' or 'vector', keep the higher-scoring version
|
||||
} else {
|
||||
resultMap.set(key, {
|
||||
...vr,
|
||||
score: this._hybridWeight * vr.score,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score descending, return top K
|
||||
const merged = Array.from(resultMap.values());
|
||||
merged.sort((a, b) => b.score - a.score);
|
||||
return merged.slice(0, topK);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user