feat: add heartbeat monitor and vector memory search (Tier 2)

Heartbeat: - HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk - Configurable interval, failure threshold, notification channel - Recovery notifications when health restores - 25 new tests Vector Memory Search: - EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends - SQLite-backed VectorStore with cosine similarity search - Text chunker with paragraph-aware splitting and overlap - HybridSearch merging keyword + vector results with configurable weight - Background indexer with dirty-namespace tracking - Graceful fallback to keyword search when embeddings unavailable - 51 new tests Config: automation.heartbeat + memory.embedding schema sections Total: 950 tests passing, all types clean
2026-02-07 14:45:11 -08:00
parent b50c140d25
commit 88731a50e3
17 changed files with 2354 additions and 7 deletions
@@ -0,0 +1,182 @@
+/**
+ * Hybrid search combining vector similarity with keyword matching.
+ */
+
+import type { MemoryStore, SearchResult } from './store.js';
+import type { VectorStore } from './vector-store.js';
+import type { EmbeddingProvider } from './embeddings.js';
+
+/**
+ * A result from hybrid search combining vector and keyword sources.
+ */
+export interface HybridSearchResult {
+  /** The memory namespace the result came from. */
+  namespace: string;
+  /** The matched content text. */
+  content: string;
+  /** Surrounding context lines. */
+  context: string;
+  /** 1-based line number of the match. */
+  line: number;
+  /** Combined relevance score (0-1). */
+  score: number;
+  /** Source of the match: keyword, vector, or both. */
+  source: 'keyword' | 'vector' | 'both';
+}
+
+/**
+ * Combines keyword search from MemoryStore with vector similarity
+ * search from VectorStore, deduplicating and merging results with
+ * configurable weighting.
+ */
+export class HybridSearch {
+  private _memoryStore: MemoryStore;
+  private _vectorStore: VectorStore;
+  private _embeddingProvider: EmbeddingProvider;
+  private _hybridWeight: number;
+
+  /**
+   * @param memoryStore - The keyword-based memory store.
+   * @param vectorStore - The vector embedding store.
+   * @param embeddingProvider - Provider for generating query embeddings.
+   * @param hybridWeight - Weight for vector results (0-1). Keyword weight = 1 - hybridWeight.
+   */
+  constructor(
+    memoryStore: MemoryStore,
+    vectorStore: VectorStore,
+    embeddingProvider: EmbeddingProvider,
+    hybridWeight: number = 0.7,
+  ) {
+    this._memoryStore = memoryStore;
+    this._vectorStore = vectorStore;
+    this._embeddingProvider = embeddingProvider;
+    this._hybridWeight = hybridWeight;
+  }
+
+  /**
+   * Run hybrid search combining keyword and vector results.
+   *
+   * @param query - The search query string.
+   * @param topK - Maximum number of results to return.
+   * @returns Merged and deduplicated results sorted by combined score.
+   */
+  async search(query: string, topK: number = 5): Promise<HybridSearchResult[]> {
+    // Run keyword and vector search in parallel
+    const [keywordResults, vectorResults] = await Promise.all([
+      this._keywordSearch(query),
+      this._vectorSearch(query, topK * 2), // fetch more for better merging
+    ]);
+
+    // Merge and deduplicate
+    return this._mergeResults(keywordResults, vectorResults, topK);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private
+  // ---------------------------------------------------------------------------
+
+  private _keywordSearch(query: string): Promise<SearchResult[]> {
+    // MemoryStore.search is synchronous but we wrap in promise for parallel use
+    return Promise.resolve(this._memoryStore.search(query));
+  }
+
+  private async _vectorSearch(
+    query: string,
+    topK: number,
+  ): Promise<HybridSearchResult[]> {
+    try {
+      const [queryEmbedding] = await this._embeddingProvider.embed([query]);
+      const results = this._vectorStore.search(queryEmbedding, topK);
+
+      return results.map((r) => ({
+        namespace: r.namespace,
+        content: r.chunkText,
+        context: r.chunkText,
+        line: r.startLine,
+        score: r.score,
+        source: 'vector' as const,
+      }));
+    } catch (error) {
+      // Vector search failure should not break search entirely
+      console.error('Vector search failed, falling back to keyword only:', error);
+      return [];
+    }
+  }
+
+  /**
+   * Merge keyword and vector results with deduplication.
+   *
+   * Deduplication: two results are considered duplicates if they share the
+   * same namespace and their line numbers are within 3 lines of each other.
+   */
+  private _mergeResults(
+    keywordResults: SearchResult[],
+    vectorResults: HybridSearchResult[],
+    topK: number,
+  ): HybridSearchResult[] {
+    // Normalise keyword scores: assign rank-based scores (best match = 1.0)
+    const maxKeyword = keywordResults.length;
+    const keywordScored: HybridSearchResult[] = keywordResults.map((r, idx) => ({
+      namespace: r.namespace,
+      content: r.content,
+      context: r.context,
+      line: r.line,
+      score: maxKeyword > 0 ? 1 - idx / (maxKeyword + 1) : 0,
+      source: 'keyword' as const,
+    }));
+
+    // Build a combined map keyed by namespace + approximate line
+    const resultMap = new Map<string, HybridSearchResult>();
+
+    // Key function: group results within LINE_PROXIMITY lines together
+    const LINE_PROXIMITY = 3;
+    const makeKey = (namespace: string, line: number): string => {
+      const bucket = Math.floor(line / LINE_PROXIMITY);
+      return `${namespace}:${bucket}`;
+    };
+
+    // Add keyword results first
+    for (const kr of keywordScored) {
+      const key = makeKey(kr.namespace, kr.line);
+      const existing = resultMap.get(key);
+      if (existing) {
+        // Combine scores
+        existing.score = (this._hybridWeight * (existing.source === 'vector' || existing.source === 'both' ? existing.score : 0))
+          + ((1 - this._hybridWeight) * kr.score);
+        existing.source = 'both';
+        // Prefer the more specific keyword content
+        existing.content = kr.content;
+        existing.context = kr.context;
+        existing.line = kr.line;
+      } else {
+        resultMap.set(key, {
+          ...kr,
+          score: (1 - this._hybridWeight) * kr.score,
+        });
+      }
+    }
+
+    // Add/merge vector results
+    for (const vr of vectorResults) {
+      const key = makeKey(vr.namespace, vr.line);
+      const existing = resultMap.get(key);
+      if (existing) {
+        if (existing.source === 'keyword') {
+          existing.score = (this._hybridWeight * vr.score) + existing.score;
+          existing.source = 'both';
+        }
+        // If already 'both' or 'vector', keep the higher-scoring version
+      } else {
+        resultMap.set(key, {
+          ...vr,
+          score: this._hybridWeight * vr.score,
+        });
+      }
+    }
+
+    // Sort by score descending, return top K
+    const merged = Array.from(resultMap.values());
+    merged.sort((a, b) => b.score - a.score);
+    return merged.slice(0, topK);
+  }
+}