import type { Message } from '../models/types.js'; import type { AgentOrchestrator } from '../backends/native/orchestrator.js'; import type { MemoryStore } from '../memory/store.js'; import { COMPACTION_SYSTEM_PROMPT, MEMORY_EXTRACTION_PROMPT, buildCompactionPrompt } from '../backends/native/prompts.js'; import { estimateMessageTokens } from './tokens.js'; import { getMessageText } from '../models/media.js'; import { selectImportantMessages } from './weighting.js'; export interface CompactionConfig { /** Percentage of context window that triggers compaction (default: 80). */ thresholdPct: number; /** Number of recent turns (user+assistant pairs) to always keep intact. */ keepTurns: number; /** Maximum tokens for the compaction summary response. */ summaryMaxTokens: number; /** Preserve messages at or above this importance score from compaction. */ importanceThreshold: number; /** Optional proactive context usage thresholds and actions. */ proactive?: ProactiveCompactionConfig; } export interface ProactiveCompactionConfig { /** Enable proactive context warnings/checkpoints before hard compaction cliffs. */ enabled: boolean; /** Emit warning signals when usage crosses this percentage. */ warnPct: number; /** Save a checkpoint summary to memory when usage crosses this percentage. */ checkpointPct: number; /** Auto-run compaction when usage crosses this percentage. */ autoCompactPct: number; /** Cooldown window between checkpoint writes. */ checkpointCooldownMs: number; /** Memory namespace base for proactive checkpoints. */ memoryNamespace: string; } export interface CompactionResult { /** The compacted messages: [summary, ...recentMessages]. */ messages: Message[]; /** Number of messages that were compacted (removed). */ compactedCount: number; /** Estimated tokens before compaction. */ tokensBefore: number; /** Estimated tokens after compaction. */ tokensAfter: number; /** The raw summary text produced by the compaction model (populated when compaction ran). */ summary?: string; } export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = { thresholdPct: 80, keepTurns: 4, summaryMaxTokens: 1024, importanceThreshold: 1, proactive: { enabled: false, warnPct: 75, checkpointPct: 85, autoCompactPct: 95, checkpointCooldownMs: 300_000, memoryNamespace: 'session/checkpoints', }, }; export async function compactHistory(opts: { messages: Message[]; orchestrator: AgentOrchestrator; config: CompactionConfig; memoryStore?: MemoryStore; autoExtract?: boolean; usePersonalAssistantPrompt?: boolean; memoryExtractionNamespace?: string; }): Promise { const { messages, orchestrator, config } = opts; const keepCount = config.keepTurns * 2; if (messages.length <= keepCount) { return { messages, compactedCount: 0, tokensBefore: estimateMessageTokens(messages), tokensAfter: estimateMessageTokens(messages), }; } const toCompact = messages.slice(0, -keepCount); const toKeep = messages.slice(-keepCount); // Ensure toKeep starts with a user message to avoid assistant→assistant // after the compaction summary (which has role 'assistant'). while (toKeep.length > 0 && toKeep[0].role === 'assistant') { const shifted = toKeep.shift(); if (!shifted) { break; } toCompact.push(shifted); } const preservedImportant = selectImportantMessages(toCompact, { threshold: config.importanceThreshold, maxMessages: Math.max(1, config.keepTurns), }); const preservedSet = new Set(preservedImportant.map(item => item.index)); const toSummarize = toCompact.filter((_, index) => !preservedSet.has(index)); const formattedConversation = toSummarize.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n'); const preservedMessages = preservedImportant.map(item => item.message); if (formattedConversation.trim().length === 0) { const compactedMessages = [...preservedMessages, ...toKeep]; return { messages: compactedMessages, compactedCount: messages.length - compactedMessages.length, tokensBefore: estimateMessageTokens(messages), tokensAfter: estimateMessageTokens(compactedMessages), }; } const tier = orchestrator.getDelegationTier('compaction'); const systemPrompt = opts.usePersonalAssistantPrompt ? buildCompactionPrompt({ personalAssistant: true }) : COMPACTION_SYSTEM_PROMPT; const result = await orchestrator.delegate({ task: 'compaction', tier, systemPrompt, message: formattedConversation, maxTokens: config.summaryMaxTokens, }); const summaryMessage: Message = { role: 'assistant', content: '[Summary of earlier conversation]\n\n' + result.content, }; // Phase 2: Extract persistent facts and append to memory (if enabled) if (opts.memoryStore && opts.autoExtract !== false) { try { const extractionTier = orchestrator.getDelegationTier('memory_extraction'); const extraction = await orchestrator.delegate({ task: 'memory_extraction', tier: extractionTier, systemPrompt: MEMORY_EXTRACTION_PROMPT, message: `Extract persistent facts from this conversation:\n\n${formattedConversation}`, maxTokens: 512, }); // Only write if the extraction produced meaningful content const extractedContent = extraction.content.trim(); if (extractedContent.length > 0 && !extractedContent.toLowerCase().includes('no facts')) { const extractionNs = opts.memoryExtractionNamespace ?? 'global'; opts.memoryStore.write(extractionNs, extractedContent, 'append'); console.log(`[Flynn:memory] Extracted ${extractedContent.length} chars of facts to ${extractionNs} memory`); } } catch (error) { // Memory extraction is best-effort — don't fail compaction if it errors console.warn('[Flynn:memory] Failed to extract facts during compaction:', error); } } return { messages: [...preservedMessages, summaryMessage, ...toKeep], compactedCount: toSummarize.length, tokensBefore: estimateMessageTokens(messages), tokensAfter: estimateMessageTokens([...preservedMessages, summaryMessage, ...toKeep]), summary: result.content, }; }