Files
flynn/src/memory/chunker.test.ts
T
William Valentin 88731a50e3 feat: add heartbeat monitor and vector memory search (Tier 2)
Heartbeat:
- HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk
- Configurable interval, failure threshold, notification channel
- Recovery notifications when health restores
- 25 new tests

Vector Memory Search:
- EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends
- SQLite-backed VectorStore with cosine similarity search
- Text chunker with paragraph-aware splitting and overlap
- HybridSearch merging keyword + vector results with configurable weight
- Background indexer with dirty-namespace tracking
- Graceful fallback to keyword search when embeddings unavailable
- 51 new tests

Config: automation.heartbeat + memory.embedding schema sections
Total: 950 tests passing, all types clean
2026-02-07 14:45:11 -08:00

107 lines
4.2 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import { chunkText } from './chunker.js';
import type { Chunk } from './chunker.js';
describe('chunkText', () => {
it('returns empty array for empty content', () => {
expect(chunkText('', 'test')).toEqual([]);
expect(chunkText(' \n\n ', 'test')).toEqual([]);
});
it('returns single chunk for small content', () => {
const content = 'Hello world\nSecond line';
const chunks = chunkText(content, 'notes', { chunkSize: 1000, chunkOverlap: 0 });
expect(chunks).toHaveLength(1);
expect(chunks[0].text).toBe('Hello world\nSecond line');
expect(chunks[0].namespace).toBe('notes');
expect(chunks[0].startLine).toBe(1);
expect(chunks[0].endLine).toBe(2);
});
it('splits on paragraph boundaries (double newline)', () => {
const content = 'Paragraph one line one\nParagraph one line two\n\nParagraph two line one\nParagraph two line two';
const chunks = chunkText(content, 'test', { chunkSize: 30, chunkOverlap: 0 });
// Should split into two chunks at the paragraph boundary
expect(chunks.length).toBeGreaterThanOrEqual(2);
expect(chunks[0].text).toContain('Paragraph one');
expect(chunks[1].text).toContain('Paragraph two');
});
it('merges small paragraphs to reach target chunk size', () => {
const content = 'A\n\nB\n\nC\n\nD';
const chunks = chunkText(content, 'test', { chunkSize: 100, chunkOverlap: 0 });
// All paragraphs are tiny, so they should all fit in one chunk
expect(chunks).toHaveLength(1);
expect(chunks[0].text).toContain('A');
expect(chunks[0].text).toContain('D');
});
it('tracks line numbers accurately', () => {
const content = 'Line one\n\nLine three\n\nLine five';
const chunks = chunkText(content, 'test', { chunkSize: 10, chunkOverlap: 0 });
// First chunk should start at line 1
expect(chunks[0].startLine).toBe(1);
expect(chunks[0].endLine).toBe(1);
// Line three is on actual line 3
const lineThreeChunk = chunks.find((c) => c.text.includes('Line three'));
expect(lineThreeChunk).toBeDefined();
expect(lineThreeChunk!.startLine).toBe(3);
// Line five is on actual line 5
const lineFiveChunk = chunks.find((c) => c.text.includes('Line five'));
expect(lineFiveChunk).toBeDefined();
expect(lineFiveChunk!.startLine).toBe(5);
});
it('includes overlap between consecutive chunks', () => {
// Create content with clear paragraphs that force splitting
const para1 = 'First paragraph with enough text to matter';
const para2 = 'Second paragraph with some more text';
const para3 = 'Third paragraph and final content here';
const content = `${para1}\n\n${para2}\n\n${para3}`;
// Use a chunk size that forces splitting, with overlap
const chunks = chunkText(content, 'test', { chunkSize: 50, chunkOverlap: 40 });
// With overlap, later chunks should contain content from previous paragraphs
if (chunks.length >= 2) {
// Check that there's some content overlap between consecutive chunks
const lastChunk = chunks[chunks.length - 1];
const prevChunk = chunks[chunks.length - 2];
// Either chunks share content or at least have proper sequencing
expect(lastChunk.startLine).toBeLessThanOrEqual(prevChunk.endLine + 5);
}
});
it('preserves namespace in all chunks', () => {
const content = 'Para one\n\nPara two\n\nPara three';
const chunks = chunkText(content, 'sessions/abc123', { chunkSize: 10, chunkOverlap: 0 });
for (const chunk of chunks) {
expect(chunk.namespace).toBe('sessions/abc123');
}
});
it('handles content with multiple consecutive blank lines', () => {
const content = 'First\n\n\n\nSecond';
const chunks = chunkText(content, 'test', { chunkSize: 1000, chunkOverlap: 0 });
expect(chunks.length).toBeGreaterThanOrEqual(1);
expect(chunks.some((c) => c.text.includes('First'))).toBe(true);
expect(chunks.some((c) => c.text.includes('Second'))).toBe(true);
});
it('handles single-line content', () => {
const chunks = chunkText('single line', 'test', { chunkSize: 100, chunkOverlap: 0 });
expect(chunks).toHaveLength(1);
expect(chunks[0].text).toBe('single line');
expect(chunks[0].startLine).toBe(1);
expect(chunks[0].endLine).toBe(1);
});
});