173 lines
6.2 KiB
TypeScript
173 lines
6.2 KiB
TypeScript
import type { Message } from '../models/types.js';
|
|
import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
|
|
import type { MemoryStore } from '../memory/store.js';
|
|
import { COMPACTION_SYSTEM_PROMPT, MEMORY_EXTRACTION_PROMPT, buildCompactionPrompt } from '../backends/native/prompts.js';
|
|
import { estimateMessageTokens } from './tokens.js';
|
|
import { getMessageText } from '../models/media.js';
|
|
import { selectImportantMessages } from './weighting.js';
|
|
|
|
export interface CompactionConfig {
|
|
/** Percentage of context window that triggers compaction (default: 80). */
|
|
thresholdPct: number;
|
|
/** Number of recent turns (user+assistant pairs) to always keep intact. */
|
|
keepTurns: number;
|
|
/** Maximum tokens for the compaction summary response. */
|
|
summaryMaxTokens: number;
|
|
/** Preserve messages at or above this importance score from compaction. */
|
|
importanceThreshold: number;
|
|
/** Optional proactive context usage thresholds and actions. */
|
|
proactive?: ProactiveCompactionConfig;
|
|
}
|
|
|
|
export interface ProactiveCompactionConfig {
|
|
/** Enable proactive context warnings/checkpoints before hard compaction cliffs. */
|
|
enabled: boolean;
|
|
/** Emit warning signals when usage crosses this percentage. */
|
|
warnPct: number;
|
|
/** Save a checkpoint summary to memory when usage crosses this percentage. */
|
|
checkpointPct: number;
|
|
/** Auto-run compaction when usage crosses this percentage. */
|
|
autoCompactPct: number;
|
|
/** Cooldown window between checkpoint writes. */
|
|
checkpointCooldownMs: number;
|
|
/** Memory namespace base for proactive checkpoints. */
|
|
memoryNamespace: string;
|
|
}
|
|
|
|
export interface CompactionResult {
|
|
/** The compacted messages: [summary, ...recentMessages]. */
|
|
messages: Message[];
|
|
/** Number of messages that were compacted (removed). */
|
|
compactedCount: number;
|
|
/** Estimated tokens before compaction. */
|
|
tokensBefore: number;
|
|
/** Estimated tokens after compaction. */
|
|
tokensAfter: number;
|
|
/** The raw summary text produced by the compaction model (populated when compaction ran). */
|
|
summary?: string;
|
|
}
|
|
|
|
export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
|
|
thresholdPct: 80,
|
|
keepTurns: 4,
|
|
summaryMaxTokens: 1024,
|
|
importanceThreshold: 1,
|
|
proactive: {
|
|
enabled: false,
|
|
warnPct: 75,
|
|
checkpointPct: 85,
|
|
autoCompactPct: 95,
|
|
checkpointCooldownMs: 300_000,
|
|
memoryNamespace: 'session/checkpoints',
|
|
},
|
|
};
|
|
|
|
export async function compactHistory(opts: {
|
|
messages: Message[];
|
|
orchestrator: AgentOrchestrator;
|
|
config: CompactionConfig;
|
|
memoryStore?: MemoryStore;
|
|
autoExtract?: boolean;
|
|
usePersonalAssistantPrompt?: boolean;
|
|
memoryExtractionNamespace?: string;
|
|
}): Promise<CompactionResult> {
|
|
const { messages, orchestrator, config } = opts;
|
|
|
|
const keepCount = config.keepTurns * 2;
|
|
if (messages.length <= keepCount) {
|
|
return {
|
|
messages,
|
|
compactedCount: 0,
|
|
tokensBefore: estimateMessageTokens(messages),
|
|
tokensAfter: estimateMessageTokens(messages),
|
|
};
|
|
}
|
|
|
|
const toCompact = messages.slice(0, -keepCount);
|
|
const toKeep = messages.slice(-keepCount);
|
|
|
|
// Ensure toKeep starts with a user message to avoid assistant→assistant
|
|
// after the compaction summary (which has role 'assistant').
|
|
while (toKeep.length > 0 && toKeep[0].role === 'assistant') {
|
|
const shifted = toKeep.shift();
|
|
if (!shifted) {
|
|
break;
|
|
}
|
|
toCompact.push(shifted);
|
|
}
|
|
|
|
const preservedImportant = selectImportantMessages(toCompact, {
|
|
threshold: config.importanceThreshold,
|
|
maxMessages: Math.max(1, config.keepTurns),
|
|
});
|
|
|
|
const preservedSet = new Set(preservedImportant.map(item => item.index));
|
|
const toSummarize = toCompact.filter((_, index) => !preservedSet.has(index));
|
|
|
|
const formattedConversation = toSummarize.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');
|
|
|
|
const preservedMessages = preservedImportant.map(item => item.message);
|
|
|
|
if (formattedConversation.trim().length === 0) {
|
|
const compactedMessages = [...preservedMessages, ...toKeep];
|
|
return {
|
|
messages: compactedMessages,
|
|
compactedCount: messages.length - compactedMessages.length,
|
|
tokensBefore: estimateMessageTokens(messages),
|
|
tokensAfter: estimateMessageTokens(compactedMessages),
|
|
};
|
|
}
|
|
|
|
const tier = orchestrator.getDelegationTier('compaction');
|
|
|
|
const systemPrompt = opts.usePersonalAssistantPrompt
|
|
? buildCompactionPrompt({ personalAssistant: true })
|
|
: COMPACTION_SYSTEM_PROMPT;
|
|
|
|
const result = await orchestrator.delegate({
|
|
task: 'compaction',
|
|
tier,
|
|
systemPrompt,
|
|
message: formattedConversation,
|
|
maxTokens: config.summaryMaxTokens,
|
|
});
|
|
|
|
const summaryMessage: Message = {
|
|
role: 'assistant',
|
|
content: '[Summary of earlier conversation]\n\n' + result.content,
|
|
};
|
|
|
|
// Phase 2: Extract persistent facts and append to memory (if enabled)
|
|
if (opts.memoryStore && opts.autoExtract !== false) {
|
|
try {
|
|
const extractionTier = orchestrator.getDelegationTier('memory_extraction');
|
|
const extraction = await orchestrator.delegate({
|
|
task: 'memory_extraction',
|
|
tier: extractionTier,
|
|
systemPrompt: MEMORY_EXTRACTION_PROMPT,
|
|
message: `Extract persistent facts from this conversation:\n\n${formattedConversation}`,
|
|
maxTokens: 512,
|
|
});
|
|
|
|
// Only write if the extraction produced meaningful content
|
|
const extractedContent = extraction.content.trim();
|
|
if (extractedContent.length > 0 && !extractedContent.toLowerCase().includes('no facts')) {
|
|
const extractionNs = opts.memoryExtractionNamespace ?? 'global';
|
|
opts.memoryStore.write(extractionNs, extractedContent, 'append');
|
|
console.log(`[Flynn:memory] Extracted ${extractedContent.length} chars of facts to ${extractionNs} memory`);
|
|
}
|
|
} catch (error) {
|
|
// Memory extraction is best-effort — don't fail compaction if it errors
|
|
console.warn('[Flynn:memory] Failed to extract facts during compaction:', error);
|
|
}
|
|
}
|
|
|
|
return {
|
|
messages: [...preservedMessages, summaryMessage, ...toKeep],
|
|
compactedCount: toSummarize.length,
|
|
tokensBefore: estimateMessageTokens(messages),
|
|
tokensAfter: estimateMessageTokens([...preservedMessages, summaryMessage, ...toKeep]),
|
|
summary: result.content,
|
|
};
|
|
}
|