flynn/src/memory/hybrid-search.test.ts

import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { HybridSearch } from './hybrid-search.js';
import type { HybridSearchResult } from './hybrid-search.js';
import type { MemoryStore, SearchResult } from './store.js';
import type { VectorStore, VectorSearchResult } from './vector-store.js';
import type { EmbeddingProvider } from './embeddings.js';

/**
 * Create a mock MemoryStore with keyword search results.
 */
function mockMemoryStore(results: SearchResult[]): MemoryStore {
  return {
    search: vi.fn(() => results),
    read: vi.fn(() => ''),
    write: vi.fn(),
    listNamespaces: vi.fn(() => []),
    getContextForPrompt: vi.fn(() => ''),
    getDirtyNamespaces: vi.fn(() => []),
    markAllDirty: vi.fn(),
  } as unknown as MemoryStore;
}

/**
 * Create a mock VectorStore that returns given results.
 */
function mockVectorStore(results: VectorSearchResult[]): VectorStore {
  return {
    search: vi.fn(() => results),
    upsertChunks: vi.fn(),
    deleteNamespace: vi.fn(),
    hasContentHash: vi.fn(() => false),
    count: vi.fn(() => 0),
    close: vi.fn(),
  } as unknown as VectorStore;
}

/**
 * Create a mock embedding provider that returns fixed embeddings.
 */
function mockEmbeddingProvider(dims: number = 4): EmbeddingProvider {
  return {
    dimensions: dims,
    embed: vi.fn(async (texts: string[]) =>
      texts.map(() => new Array(dims).fill(0.1)),
    ),
  };
}

describe('HybridSearch', () => {
  describe('search', () => {
    it('returns keyword results when no vector results exist', async () => {
      const keywordResults: SearchResult[] = [
        { namespace: 'notes', line: 5, content: 'fox jumped', context: 'the fox jumped over' },
      ];

      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore([]),
        mockEmbeddingProvider(),
        0.7,
      );

      const results = await hybrid.search('fox');
      expect(results.length).toBe(1);
      expect(results[0].namespace).toBe('notes');
      expect(results[0].source).toBe('keyword');
    });

    it('returns vector results when no keyword results exist', async () => {
      const vectorResults: VectorSearchResult[] = [
        { namespace: 'journal', chunkText: 'semantic match', startLine: 10, endLine: 20, score: 0.9 },
      ];

      const hybrid = new HybridSearch(
        mockMemoryStore([]),
        mockVectorStore(vectorResults),
        mockEmbeddingProvider(),
        0.7,
      );

      const results = await hybrid.search('meaning');
      expect(results.length).toBe(1);
      expect(results[0].namespace).toBe('journal');
      expect(results[0].source).toBe('vector');
    });

    it('merges keyword and vector results', async () => {
      const keywordResults: SearchResult[] = [
        { namespace: 'notes', line: 5, content: 'fox keyword', context: 'the fox keyword hit' },
      ];
      const vectorResults: VectorSearchResult[] = [
        { namespace: 'journal', chunkText: 'fox semantic', startLine: 10, endLine: 20, score: 0.85 },
      ];

      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore(vectorResults),
        mockEmbeddingProvider(),
        0.7,
      );

      const results = await hybrid.search('fox');
      expect(results.length).toBe(2);

      const namespaces = results.map((r) => r.namespace);
      expect(namespaces).toContain('notes');
      expect(namespaces).toContain('journal');
    });

    it('deduplicates results from same namespace and nearby lines', async () => {
      // Both keyword and vector find something at the same location
      const keywordResults: SearchResult[] = [
        { namespace: 'notes', line: 5, content: 'fox hit', context: 'context' },
      ];
      const vectorResults: VectorSearchResult[] = [
        { namespace: 'notes', chunkText: 'fox hit too', startLine: 4, endLine: 8, score: 0.9 },
      ];

      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore(vectorResults),
        mockEmbeddingProvider(),
        0.7,
      );

      const results = await hybrid.search('fox');
      // Should be deduplicated to a single "both" result
      expect(results.length).toBe(1);
      expect(results[0].source).toBe('both');
    });

    it('applies hybrid weight to scoring', async () => {
      const keywordResults: SearchResult[] = [
        { namespace: 'notes', line: 100, content: 'keyword only', context: 'ctx' },
      ];
      const vectorResults: VectorSearchResult[] = [
        { namespace: 'journal', chunkText: 'vector only', startLine: 200, endLine: 210, score: 0.95 },
      ];

      // High vector weight (0.9)
      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore(vectorResults),
        mockEmbeddingProvider(),
        0.9,
      );

      const results = await hybrid.search('query');
      expect(results.length).toBe(2);

      // Vector result should rank higher with high vector weight
      const vectorResult = results.find((r) => r.source === 'vector');
      const keywordResult = results.find((r) => r.source === 'keyword');
      expect(vectorResult).toBeDefined();
      expect(keywordResult).toBeDefined();
      expect(vectorResult!.score).toBeGreaterThan(keywordResult!.score);
    });

    it('falls back to keyword search when vector search fails', async () => {
      const keywordResults: SearchResult[] = [
        { namespace: 'notes', line: 1, content: 'fallback', context: 'ctx' },
      ];

      const failingProvider: EmbeddingProvider = {
        dimensions: 4,
        embed: vi.fn(async () => { throw new Error('API error'); }),
      };

      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore([]),
        failingProvider,
        0.7,
      );

      // Should not throw — should fall back to keyword results
      const results = await hybrid.search('test');
      expect(results.length).toBe(1);
      expect(results[0].source).toBe('keyword');
    });

    it('respects topK limit', async () => {
      const keywordResults: SearchResult[] = Array.from({ length: 10 }, (_, i) => ({
        namespace: `ns${i}`,
        line: i + 1,
        content: `result ${i}`,
        context: `ctx ${i}`,
      }));

      const hybrid = new HybridSearch(
        mockMemoryStore(keywordResults),
        mockVectorStore([]),
        mockEmbeddingProvider(),
        0.5,
      );

      const results = await hybrid.search('query', 3);
      expect(results.length).toBe(3);
    });

    it('returns empty array when both searches find nothing', async () => {
      const hybrid = new HybridSearch(
        mockMemoryStore([]),
        mockVectorStore([]),
        mockEmbeddingProvider(),
        0.7,
      );

      const results = await hybrid.search('nonexistent');
      expect(results).toEqual([]);
    });
  });
});