diff --git a/README.md b/README.md index 7eff0e2..2704a29 100644 --- a/README.md +++ b/README.md @@ -1024,6 +1024,11 @@ sessions: Flynn writes structured audit events to `audit.path`, including tool execution, session lifecycle, and user actions (`user.action`) from both channel and gateway requests. +Session lifecycle now includes proactive context maintenance events: +- `session.compact` for normal compaction passes +- `session.checkpoint` when proactive checkpoint summaries are written to memory +- `session.auto_compact` when proactive critical-threshold auto-compaction runs + ## Gateway Lock Single-client mode for the WebSocket gateway. When enabled, only one WebSocket connection is allowed at a time. Additional connections are rejected with close code `4003`. diff --git a/docs/operations/OPERATOR_PACK.md b/docs/operations/OPERATOR_PACK.md index 3461542..ff50477 100644 --- a/docs/operations/OPERATOR_PACK.md +++ b/docs/operations/OPERATOR_PACK.md @@ -70,3 +70,4 @@ automation: - Heartbeat notification noise is controlled by `automation.heartbeat.notify_cooldown` (default `30m`). - If `notify_cooldown` is invalid, Flynn falls back to `30m` and logs a warning. - Re-running setup Automation detects an existing Operator Pack and asks whether to reconfigure. +- For context-pressure observability, monitor audit events `session.checkpoint` and `session.auto_compact`. diff --git a/docs/plans/state.json b/docs/plans/state.json index be6f909..e89ee21 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -3505,7 +3505,7 @@ "status": "completed", "date": "2026-02-16", "updated": "2026-02-17", - "summary": "Implemented proactive context-window management end-to-end: orchestrator now exposes estimated context budget, emits staged context alerts, writes checkpoint summaries to memory near threshold, and can auto-compact proactively. Gateway now emits `context_warning` stream events during `agent.send`, serves `system.contextUsage` snapshots, and dashboard usage UI includes context budget visibility. Added config schema support under `compaction.proactive`, mapped runtime wiring in both WS SessionBridge and channel routing paths, and updated protocol/docs/default config examples with focused tests. Follow-up added `/context` command fast-path visibility and dedicated audit events for proactive checkpoint writes and proactive auto-compaction.", + "summary": "Implemented proactive context-window management end-to-end: orchestrator now exposes estimated context budget, emits staged context alerts, writes checkpoint summaries to memory near threshold, and can auto-compact proactively. Gateway now emits `context_warning` stream events during `agent.send`, serves `system.contextUsage` snapshots, and dashboard usage UI includes context budget visibility. Added config schema support under `compaction.proactive`, mapped runtime wiring in both WS SessionBridge and channel routing paths, and updated protocol/docs/default config examples with focused tests. Follow-up added `/context` command fast-path visibility, TUI parser/help/autocomplete + handler parity for `/context`, dedicated audit events for proactive checkpoint writes and proactive auto-compaction, and operator/docs references for those events.", "files_modified": [ "src/context/compaction.ts", "src/backends/native/prompts.ts", @@ -3525,6 +3525,11 @@ "src/daemon/services.ts", "src/gateway/ui/pages/chat.js", "src/gateway/ui/pages/usage.js", + "src/gateway/ui/pages/dashboard.js", + "src/frontends/tui/commands.ts", + "src/frontends/tui/commands.test.ts", + "src/frontends/tui/minimal.ts", + "src/frontends/tui/components/App.tsx", "src/commands/builtin/index.ts", "src/commands/types.ts", "src/commands/index.ts", @@ -3533,11 +3538,12 @@ "src/audit/logger.ts", "docs/api/PROTOCOL.md", "README.md", + "docs/operations/OPERATOR_PACK.md", "docs/performance/TUNING.md", "config/default.yaml", "docs/plans/state.json" ], - "test_status": "pnpm test:run src/backends/native/orchestrator.test.ts src/config/schema.test.ts src/gateway/handlers/agent.test.ts src/gateway/handlers/handlers.test.ts src/gateway/protocol.test.ts src/commands/builtin/index.test.ts + pnpm typecheck passing" + "test_status": "pnpm test:run src/backends/native/orchestrator.test.ts src/config/schema.test.ts src/gateway/handlers/agent.test.ts src/gateway/handlers/handlers.test.ts src/gateway/protocol.test.ts src/commands/builtin/index.test.ts src/frontends/tui/commands.test.ts + pnpm typecheck passing" } }, "overall_progress": { diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts index 44039c3..81314a0 100644 --- a/src/frontends/tui/commands.test.ts +++ b/src/frontends/tui/commands.test.ts @@ -35,6 +35,10 @@ describe('parseCommand', () => { expect(parseCommand('/usage')).toEqual({ type: 'usage' }); }); + it('parses /context command', () => { + expect(parseCommand('/context')).toEqual({ type: 'context' }); + }); + it('parses /verbose command', () => { expect(parseCommand('/verbose')).toEqual({ type: 'verbose' }); }); @@ -117,6 +121,7 @@ describe('getHelpText', () => { expect(help).toContain('/reset'); expect(help).toContain('/compact'); expect(help).toContain('/usage'); + expect(help).toContain('/context'); expect(help).toContain('/verbose'); expect(help).toContain('/queue'); expect(help).toContain('/elevate'); diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts index 597feff..09e6ade 100644 --- a/src/frontends/tui/commands.ts +++ b/src/frontends/tui/commands.ts @@ -6,6 +6,7 @@ export type Command = | { type: 'fullscreen' } | { type: 'compact' } | { type: 'usage' } + | { type: 'context' } | { type: 'verbose' } | { type: 'model'; name?: string; providerModel?: string } | { type: 'backend'; provider?: string } @@ -55,6 +56,11 @@ export function parseCommand(input: string): Command | null { return { type: 'usage' }; } + // Context + if (trimmed === '/context') { + return { type: 'context' }; + } + // Verbose if (trimmed === '/verbose') { return { type: 'verbose' }; @@ -162,6 +168,7 @@ Commands: /reset, /clear, /new Clear conversation history /compact Compact conversation history /usage Show token usage and estimated cost + /context Show estimated context-window usage /verbose Toggle verbose mode (show raw streaming and tool output) /status Show session info and token usage /fullscreen, /fs Switch to fullscreen mode @@ -184,6 +191,7 @@ export const SLASH_COMMANDS = [ '/new', '/compact', '/usage', + '/context', '/verbose', '/status', '/fullscreen', @@ -207,6 +215,7 @@ export const COMMAND_TOOLTIPS: Record = { '/new': 'Start a new conversation', '/compact': 'Compact conversation history to save context space', '/usage': 'Show token usage and estimated cost', + '/context': 'Show estimated context-window usage', '/verbose': 'Toggle verbose mode (show raw streaming and tool output)', '/status': 'Show session info and token usage', '/fullscreen': 'Switch to fullscreen mode', diff --git a/src/frontends/tui/components/App.tsx b/src/frontends/tui/components/App.tsx index a64e7f9..e61cbdd 100644 --- a/src/frontends/tui/components/App.tsx +++ b/src/frontends/tui/components/App.tsx @@ -12,6 +12,7 @@ import type { HookEngine, HookResult } from '../../../hooks/index.js'; import type { ModelConfig, ModelProvider } from '../../../config/schema.js'; import { MODEL_PROVIDERS } from '../../../config/schema.js'; import { createClientFromConfig } from '../../../daemon/index.js'; +import { estimateMessageTokens, getContextWindow } from '../../../context/tokens.js'; /** Format a tool name like "gmail.list" -> "Gmail: List" */ function formatToolName(name: string): string { @@ -239,6 +240,29 @@ export function App({ return; } + case 'context': { + const history = session.getHistory(); + const estimated = estimateMessageTokens(history); + const tier = modelRouter?.getTier() ?? 'default'; + const modelName = modelRouter?.getLabel(tier) ?? model; + const window = getContextWindow(modelName); + const usagePct = window > 0 ? (estimated / window) * 100 : 0; + const thresholdPct = 80; + const thresholdTokens = Math.floor((thresholdPct / 100) * window); + const remaining = Math.max(0, window - estimated); + const text = [ + 'Context Usage (estimated)', + '', + `Model: ${modelName}`, + `Used: ${estimated.toLocaleString()} / ${window.toLocaleString()} tokens (${usagePct.toFixed(1)}%)`, + `Remaining: ${remaining.toLocaleString()} tokens`, + `Compaction threshold: ${thresholdPct}% (${thresholdTokens.toLocaleString()} tokens)`, + `Should compact: ${estimated > thresholdTokens ? 'yes' : 'no'}`, + ].join('\n'); + setMessages(prev => [...prev, session.addMessage({ role: 'assistant', content: text })]); + return; + } + case 'verbose': { const next = !verbose; setVerbose(next); diff --git a/src/frontends/tui/minimal.ts b/src/frontends/tui/minimal.ts index b6c14f8..8afafbe 100644 --- a/src/frontends/tui/minimal.ts +++ b/src/frontends/tui/minimal.ts @@ -25,6 +25,7 @@ import { import type { PairingManager } from '../../channels/pairing.js'; import { getColoredBanner } from './banner.js'; import type { HookEngine } from '../../hooks/index.js'; +import { estimateMessageTokens, getContextWindow } from '../../context/tokens.js'; export { parseCommand, type Command }; @@ -331,6 +332,10 @@ export class MinimalTui { this.handleUsageCommand(); break; + case 'context': + this.handleContextCommand(); + break; + case 'verbose': this.handleVerboseCommand(); break; @@ -382,6 +387,25 @@ export class MinimalTui { this.printStatus(); } + private handleContextCommand(): void { + const history = this.config.session.getHistory(); + const estimated = estimateMessageTokens(history); + const tier = this.config.modelRouter?.getTier() ?? 'default'; + const modelName = this.config.modelRouter?.getLabel(tier) ?? 'unknown'; + const window = getContextWindow(modelName); + const usagePct = window > 0 ? (estimated / window) * 100 : 0; + const thresholdPct = 80; + const thresholdTokens = Math.floor((thresholdPct / 100) * window); + const remaining = Math.max(0, window - estimated); + + console.log(`${colors.gray}Context usage (estimated):${colors.reset}`); + console.log(` model: ${modelName}`); + console.log(` used: ${estimated.toLocaleString()} / ${window.toLocaleString()} tokens (${usagePct.toFixed(1)}%)`); + console.log(` remaining: ${remaining.toLocaleString()} tokens`); + console.log(` compaction threshold: ${thresholdPct}% (${thresholdTokens.toLocaleString()} tokens)`); + console.log(` should compact: ${estimated > thresholdTokens ? 'yes' : 'no'}\n`); + } + private handleVerboseCommand(): void { this.verbose = !this.verbose; console.log(`${colors.gray}Verbose mode:${colors.reset} ${this.verbose ? 'on' : 'off'}\n`); diff --git a/src/gateway/ui/pages/dashboard.js b/src/gateway/ui/pages/dashboard.js index 9d827d9..9991e25 100644 --- a/src/gateway/ui/pages/dashboard.js +++ b/src/gateway/ui/pages/dashboard.js @@ -75,6 +75,11 @@ function renderSkeleton(el) {
Loading...
+

Context Health

+
+
Loading...
+
+

Event Stream

Loading events...
@@ -346,6 +351,66 @@ function updateSessionAnalytics(analyticsData) { `; } +function updateContextHealth(contextData) { + const el = document.getElementById('ops-context-health'); + if (!el) {return;} + + const sessions = contextData?.sessions ?? []; + if (sessions.length === 0) { + el.innerHTML = '
No active context usage snapshots
'; + return; + } + + const sorted = [...sessions].sort((a, b) => (b.budget?.usagePct ?? 0) - (a.budget?.usagePct ?? 0)); + const top = sorted.slice(0, 8); + const highest = top[0]?.budget?.usagePct ?? 0; + const overThreshold = sessions.filter(s => (s.budget?.shouldCompact ?? false)).length; + + const summary = ` +
+
+
Highest Usage
+
${highest.toFixed(1)}%
+
+
+
Sessions Near Limit
+
${overThreshold}
+
+
+
Active Snapshots
+
${sessions.length}
+
+
+ `; + + const rows = top.map((entry) => { + const budget = entry.budget ?? {}; + const usage = budget.usagePct ?? 0; + const cls = usage >= 95 ? 'text-error' : usage >= 85 ? 'status-warning' : ''; + return ` + ${escapeHtml(entry.sessionId)} + ${usage.toFixed(1)}% + ${formatNumber(budget.estimatedTokens ?? 0)} / ${formatNumber(budget.contextWindow ?? 0)} + ${budget.shouldCompact ? 'yes' : 'no'} + `; + }).join(''); + + el.innerHTML = ` + ${summary} + + + + + + + + + + ${rows} +
SessionUsageEstimated TokensShould Compact
+ `; +} + function _updateChannels(channelsData) { const el = document.getElementById('ops-channels'); if (!el) {return;} @@ -414,12 +479,13 @@ async function fetchFast(client) { async function fetchSlow(client) { try { - const [health, services, sessionAnalytics] = await Promise.all([ + const [health, services, sessionAnalytics, contextUsage] = await Promise.all([ client.call('system.health'), client.call('system.services'), client.call('system.sessionAnalytics', { days: 14, topLimit: 5 }), + client.call('system.contextUsage'), ]); - return { health, services, sessionAnalytics }; + return { health, services, sessionAnalytics, contextUsage }; } catch { return null; } @@ -451,6 +517,7 @@ async function loadDashboard(el, client) { if (slow) { updateServices(slow.services); updateSessionAnalytics(slow.sessionAnalytics); + updateContextHealth(slow.contextUsage); } // Fast refresh: 3 seconds for metrics, events, requests @@ -473,6 +540,7 @@ async function loadDashboard(el, client) { updateCounters(_lastMetrics, data.health); updateServices(data.services); updateSessionAnalytics(data.sessionAnalytics); + updateContextHealth(data.contextUsage); } }, 10000); }