export interface HistoryMetadata { keywords: string[]; topics: string[]; } export interface HistoryIndexerConfig { maxKeywords: number; } const STOPWORDS = new Set([ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that', 'the', 'to', 'was', 'were', 'will', 'with', 'you', 'your', 'we', 'this', 'they', 'their', 'our', 'but', 'not', 'can', 'just', 'into', 'about', 'after', 'before', 'than', 'then', ]); export function tokenize(text: string): string[] { return text .toLowerCase() .split(/[^a-z0-9]+/) .filter(token => token.length >= 3 && !STOPWORDS.has(token)); } export class SessionIndexer { private readonly maxKeywords: number; constructor(config: HistoryIndexerConfig) { this.maxKeywords = config.maxKeywords; } indexText(text: string): HistoryMetadata { const tokens = tokenize(text); const frequencies = new Map(); for (const token of tokens) { frequencies.set(token, (frequencies.get(token) ?? 0) + 1); } const sorted = Array.from(frequencies.entries()) .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])) .slice(0, this.maxKeywords) .map(([token]) => token); return { keywords: sorted, topics: sorted.slice(0, Math.min(3, sorted.length)), }; } }