88731a50e3
Heartbeat: - HeartbeatMonitor with 5 checks: gateway, model, channels, memory, disk - Configurable interval, failure threshold, notification channel - Recovery notifications when health restores - 25 new tests Vector Memory Search: - EmbeddingProvider interface with OpenAI, Gemini, Ollama, LlamaCpp backends - SQLite-backed VectorStore with cosine similarity search - Text chunker with paragraph-aware splitting and overlap - HybridSearch merging keyword + vector results with configurable weight - Background indexer with dirty-namespace tracking - Graceful fallback to keyword search when embeddings unavailable - 51 new tests Config: automation.heartbeat + memory.embedding schema sections Total: 950 tests passing, all types clean
107 lines
4.2 KiB
TypeScript
107 lines
4.2 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { chunkText } from './chunker.js';
|
|
import type { Chunk } from './chunker.js';
|
|
|
|
describe('chunkText', () => {
|
|
it('returns empty array for empty content', () => {
|
|
expect(chunkText('', 'test')).toEqual([]);
|
|
expect(chunkText(' \n\n ', 'test')).toEqual([]);
|
|
});
|
|
|
|
it('returns single chunk for small content', () => {
|
|
const content = 'Hello world\nSecond line';
|
|
const chunks = chunkText(content, 'notes', { chunkSize: 1000, chunkOverlap: 0 });
|
|
|
|
expect(chunks).toHaveLength(1);
|
|
expect(chunks[0].text).toBe('Hello world\nSecond line');
|
|
expect(chunks[0].namespace).toBe('notes');
|
|
expect(chunks[0].startLine).toBe(1);
|
|
expect(chunks[0].endLine).toBe(2);
|
|
});
|
|
|
|
it('splits on paragraph boundaries (double newline)', () => {
|
|
const content = 'Paragraph one line one\nParagraph one line two\n\nParagraph two line one\nParagraph two line two';
|
|
const chunks = chunkText(content, 'test', { chunkSize: 30, chunkOverlap: 0 });
|
|
|
|
// Should split into two chunks at the paragraph boundary
|
|
expect(chunks.length).toBeGreaterThanOrEqual(2);
|
|
expect(chunks[0].text).toContain('Paragraph one');
|
|
expect(chunks[1].text).toContain('Paragraph two');
|
|
});
|
|
|
|
it('merges small paragraphs to reach target chunk size', () => {
|
|
const content = 'A\n\nB\n\nC\n\nD';
|
|
const chunks = chunkText(content, 'test', { chunkSize: 100, chunkOverlap: 0 });
|
|
|
|
// All paragraphs are tiny, so they should all fit in one chunk
|
|
expect(chunks).toHaveLength(1);
|
|
expect(chunks[0].text).toContain('A');
|
|
expect(chunks[0].text).toContain('D');
|
|
});
|
|
|
|
it('tracks line numbers accurately', () => {
|
|
const content = 'Line one\n\nLine three\n\nLine five';
|
|
const chunks = chunkText(content, 'test', { chunkSize: 10, chunkOverlap: 0 });
|
|
|
|
// First chunk should start at line 1
|
|
expect(chunks[0].startLine).toBe(1);
|
|
expect(chunks[0].endLine).toBe(1);
|
|
|
|
// Line three is on actual line 3
|
|
const lineThreeChunk = chunks.find((c) => c.text.includes('Line three'));
|
|
expect(lineThreeChunk).toBeDefined();
|
|
expect(lineThreeChunk!.startLine).toBe(3);
|
|
|
|
// Line five is on actual line 5
|
|
const lineFiveChunk = chunks.find((c) => c.text.includes('Line five'));
|
|
expect(lineFiveChunk).toBeDefined();
|
|
expect(lineFiveChunk!.startLine).toBe(5);
|
|
});
|
|
|
|
it('includes overlap between consecutive chunks', () => {
|
|
// Create content with clear paragraphs that force splitting
|
|
const para1 = 'First paragraph with enough text to matter';
|
|
const para2 = 'Second paragraph with some more text';
|
|
const para3 = 'Third paragraph and final content here';
|
|
const content = `${para1}\n\n${para2}\n\n${para3}`;
|
|
|
|
// Use a chunk size that forces splitting, with overlap
|
|
const chunks = chunkText(content, 'test', { chunkSize: 50, chunkOverlap: 40 });
|
|
|
|
// With overlap, later chunks should contain content from previous paragraphs
|
|
if (chunks.length >= 2) {
|
|
// Check that there's some content overlap between consecutive chunks
|
|
const lastChunk = chunks[chunks.length - 1];
|
|
const prevChunk = chunks[chunks.length - 2];
|
|
// Either chunks share content or at least have proper sequencing
|
|
expect(lastChunk.startLine).toBeLessThanOrEqual(prevChunk.endLine + 5);
|
|
}
|
|
});
|
|
|
|
it('preserves namespace in all chunks', () => {
|
|
const content = 'Para one\n\nPara two\n\nPara three';
|
|
const chunks = chunkText(content, 'sessions/abc123', { chunkSize: 10, chunkOverlap: 0 });
|
|
|
|
for (const chunk of chunks) {
|
|
expect(chunk.namespace).toBe('sessions/abc123');
|
|
}
|
|
});
|
|
|
|
it('handles content with multiple consecutive blank lines', () => {
|
|
const content = 'First\n\n\n\nSecond';
|
|
const chunks = chunkText(content, 'test', { chunkSize: 1000, chunkOverlap: 0 });
|
|
|
|
expect(chunks.length).toBeGreaterThanOrEqual(1);
|
|
expect(chunks.some((c) => c.text.includes('First'))).toBe(true);
|
|
expect(chunks.some((c) => c.text.includes('Second'))).toBe(true);
|
|
});
|
|
|
|
it('handles single-line content', () => {
|
|
const chunks = chunkText('single line', 'test', { chunkSize: 100, chunkOverlap: 0 });
|
|
expect(chunks).toHaveLength(1);
|
|
expect(chunks[0].text).toBe('single line');
|
|
expect(chunks[0].startLine).toBe(1);
|
|
expect(chunks[0].endLine).toBe(1);
|
|
});
|
|
});
|