feat(gateway): expose context usage and warning events

This commit is contained in:
William Valentin
2026-02-16 15:44:09 -08:00
parent 8758ea8f1c
commit fee8be1de0
11 changed files with 645 additions and 334 deletions
+31
View File
@@ -411,6 +411,37 @@ export function createGateway(deps: GatewayDeps): GatewayServer {
} }
} }
return results;
},
getContextUsage: () => {
const results: Array<{
sessionId: string;
budget: {
estimatedTokens: number;
contextWindow: number;
remainingTokens: number;
usagePct: number;
thresholdPct: number;
thresholdTokens: number;
shouldCompact: boolean;
};
}> = [];
const sessionBridge = gateway.getSessionBridge();
for (const entry of sessionBridge.getAllContextUsage()) {
results.push(entry);
}
const channelAgents = getChannelAgents();
if (channelAgents) {
for (const [sessionId, { orchestrator }] of channelAgents) {
results.push({
sessionId,
budget: orchestrator.getContextBudget(),
});
}
}
return results; return results;
}, },
}); });
+64
View File
@@ -9,6 +9,16 @@ import { initAuditLogger } from '../../audit/index.js';
describe('createAgentHandlers command fast-path', () => { describe('createAgentHandlers command fast-path', () => {
const mockAgent = { const mockAgent = {
process: vi.fn(async () => 'agent response'), process: vi.fn(async () => 'agent response'),
consumeContextAlert: vi.fn(() => undefined as unknown),
getContextBudget: vi.fn(() => ({
estimatedTokens: 100,
contextWindow: 200000,
remainingTokens: 199900,
usagePct: 0.05,
thresholdPct: 80,
thresholdTokens: 160000,
shouldCompact: false,
})),
getUsage: vi.fn(() => ({ getUsage: vi.fn(() => ({
primary: { inputTokens: 10, outputTokens: 5, calls: 1 }, primary: { inputTokens: 10, outputTokens: 5, calls: 1 },
delegation: {}, delegation: {},
@@ -169,12 +179,56 @@ describe('createAgentHandlers command fast-path', () => {
expect(mockAgent.process).not.toHaveBeenCalled(); expect(mockAgent.process).not.toHaveBeenCalled();
expect(((sent[0] as GatewayEvent).data as { content: string }).content).toContain('Set queue.mode=followup'); expect(((sent[0] as GatewayEvent).data as { content: string }).content).toContain('Set queue.mode=followup');
}); });
it('emits context_warning event before done when orchestrator reports an alert', async () => {
mockAgent.consumeContextAlert.mockReturnValueOnce({
level: 'checkpoint',
message: 'Context usage is 86.0% (172000/200000 estimated tokens).',
budget: {
estimatedTokens: 172000,
contextWindow: 200000,
remainingTokens: 28000,
usagePct: 86,
thresholdPct: 80,
thresholdTokens: 160000,
shouldCompact: true,
},
actions: {
checkpointSaved: true,
autoCompacted: false,
checkpointNamespace: 'session/checkpoints/ws/conn-1',
},
});
const sent: OutboundMessage[] = [];
const send = vi.fn((msg: OutboundMessage) => sent.push(msg));
await handlers['agent.send']({
id: 6,
method: 'agent.send',
params: { message: 'hello', connectionId: 'conn-1' },
}, send);
expect(sent).toHaveLength(2);
expect((sent[0] as GatewayEvent).event).toBe('context_warning');
expect((sent[1] as GatewayEvent).event).toBe('done');
});
}); });
describe('createAgentHandlers queue policy resolution', () => { describe('createAgentHandlers queue policy resolution', () => {
it('passes resolved per-request queue policy into lane enqueue', async () => { it('passes resolved per-request queue policy into lane enqueue', async () => {
const mockAgent = { const mockAgent = {
process: vi.fn(async () => 'ok'), process: vi.fn(async () => 'ok'),
consumeContextAlert: vi.fn(() => undefined),
getContextBudget: vi.fn(() => ({
estimatedTokens: 0,
contextWindow: 128000,
remainingTokens: 128000,
usagePct: 0,
thresholdPct: 80,
thresholdTokens: 102400,
shouldCompact: false,
})),
getUsage: vi.fn(() => ({ getUsage: vi.fn(() => ({
primary: { inputTokens: 0, outputTokens: 0, calls: 0 }, primary: { inputTokens: 0, outputTokens: 0, calls: 0 },
delegation: {}, delegation: {},
@@ -234,6 +288,16 @@ describe('createAgentHandlers queue policy resolution', () => {
const sessionBridge = { const sessionBridge = {
getAgent: vi.fn(() => ({ getAgent: vi.fn(() => ({
process: vi.fn(async () => 'ok'), process: vi.fn(async () => 'ok'),
consumeContextAlert: vi.fn(() => undefined),
getContextBudget: vi.fn(() => ({
estimatedTokens: 0,
contextWindow: 128000,
remainingTokens: 128000,
usagePct: 0,
thresholdPct: 80,
thresholdTokens: 102400,
shouldCompact: false,
})),
getUsage: vi.fn(() => ({ getUsage: vi.fn(() => ({
primary: { inputTokens: 0, outputTokens: 0, calls: 0 }, primary: { inputTokens: 0, outputTokens: 0, calls: 0 },
delegation: {}, delegation: {},
+16
View File
@@ -97,6 +97,7 @@ export function createAgentHandlers(deps: AgentHandlerDeps) {
getStatus: () => `Gateway session active. Current model tier: ${agent.getModelTier()}`, getStatus: () => `Gateway session active. Current model tier: ${agent.getModelTier()}`,
getUsage: () => { getUsage: () => {
const usage = agent.getUsage(); const usage = agent.getUsage();
const budget = agent.getContextBudget();
const lines = [ const lines = [
'**Token Usage**', '**Token Usage**',
'', '',
@@ -114,6 +115,10 @@ export function createAgentHandlers(deps: AgentHandlerDeps) {
lines.push(''); lines.push('');
lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`); lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
lines.push('');
lines.push('**Context usage (estimated):** '
+ `${budget.estimatedTokens.toLocaleString()}/${budget.contextWindow.toLocaleString()} `
+ `(${budget.usagePct.toFixed(1)}%)`);
if (usage.total.estimatedCost > 0) { if (usage.total.estimatedCost > 0) {
lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`); lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
@@ -403,6 +408,17 @@ export function createAgentHandlers(deps: AgentHandlerDeps) {
const response = await agent.process(safeParams.message ?? '', attachments); const response = await agent.process(safeParams.message ?? '', attachments);
deps.metrics?.incrementMessages(); deps.metrics?.incrementMessages();
const contextAlert = agent.consumeContextAlert();
if (contextAlert) {
send(makeEvent(request.id, 'context_warning', contextAlert));
deps.metrics?.recordEvent({
timestamp: Date.now(),
level: 'warn',
source: 'context',
message: contextAlert.message,
context: { sessionId: laneId, level: contextAlert.level },
});
}
send(makeEvent(request.id, 'done', { content: response })); send(makeEvent(request.id, 'done', { content: response }));
} catch (err) { } catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error'; const message = err instanceof Error ? err.message : 'Unknown error';
+64
View File
@@ -327,6 +327,60 @@ describe('system.tokenUsage handler', () => {
}); });
}); });
describe('system.contextUsage handler', () => {
it('returns empty sessions when no getContextUsage provided', async () => {
const handlers = createSystemHandlers({
startTime: Date.now(),
version: '0.1.0',
getSessionCount: () => 0,
getToolCount: () => 0,
getConnectionCount: () => 0,
});
const req: GatewayRequest = { id: 21, method: 'system.contextUsage' };
const result = await handlers['system.contextUsage'](req) as GatewayResponse;
expect(result.id).toBe(21);
const r = result.result as { sessions: unknown[] };
expect(r.sessions).toEqual([]);
});
it('returns session context budget data from getContextUsage callback', async () => {
const mockUsage = [
{
sessionId: 'telegram:user1',
budget: {
estimatedTokens: 120000,
contextWindow: 200000,
remainingTokens: 80000,
usagePct: 60,
thresholdPct: 80,
thresholdTokens: 160000,
shouldCompact: false,
},
},
];
const handlers = createSystemHandlers({
startTime: Date.now(),
version: '0.1.0',
getSessionCount: () => 1,
getToolCount: () => 0,
getConnectionCount: () => 1,
getContextUsage: () => mockUsage,
});
const req: GatewayRequest = { id: 22, method: 'system.contextUsage' };
const result = await handlers['system.contextUsage'](req) as GatewayResponse;
const r = result.result as { sessions: typeof mockUsage };
expect(r.sessions).toHaveLength(1);
expect(r.sessions[0].sessionId).toBe('telegram:user1');
expect(r.sessions[0].budget.usagePct).toBe(60);
expect(r.sessions[0].budget.shouldCompact).toBe(false);
});
});
describe('system.sessionAnalytics handler', () => { describe('system.sessionAnalytics handler', () => {
it('returns empty analytics when callback is not provided', async () => { it('returns empty analytics when callback is not provided', async () => {
const handlers = createSystemHandlers({ const handlers = createSystemHandlers({
@@ -614,6 +668,16 @@ describe('tool handlers', () => {
describe('agent handlers', () => { describe('agent handlers', () => {
const mockAgent = { const mockAgent = {
process: vi.fn(async () => 'response text'), process: vi.fn(async () => 'response text'),
consumeContextAlert: vi.fn(() => undefined),
getContextBudget: vi.fn(() => ({
estimatedTokens: 0,
contextWindow: 128000,
remainingTokens: 128000,
usagePct: 0,
thresholdPct: 80,
thresholdTokens: 102400,
shouldCompact: false,
})),
setOnToolUse: vi.fn(), setOnToolUse: vi.fn(),
}; };
+21
View File
@@ -13,6 +13,20 @@ export interface TokenUsageEntry {
total: { inputTokens: number; outputTokens: number; calls: number; estimatedCost: number }; total: { inputTokens: number; outputTokens: number; calls: number; estimatedCost: number };
} }
/** Per-session context budget report returned by system.contextUsage. */
export interface ContextUsageEntry {
sessionId: string;
budget: {
estimatedTokens: number;
contextWindow: number;
remainingTokens: number;
usagePct: number;
thresholdPct: number;
thresholdTokens: number;
shouldCompact: boolean;
};
}
export interface PresenceEntry { export interface PresenceEntry {
channel: string; channel: string;
senderId: string; senderId: string;
@@ -63,6 +77,8 @@ export interface SystemHandlerDeps {
getUsage?: () => { totalSessions: number; activeConnections: number }; getUsage?: () => { totalSessions: number; activeConnections: number };
/** Optional callback to retrieve per-session token usage data. */ /** Optional callback to retrieve per-session token usage data. */
getTokenUsage?: () => TokenUsageEntry[]; getTokenUsage?: () => TokenUsageEntry[];
/** Optional callback to retrieve per-session context budget data. */
getContextUsage?: () => ContextUsageEntry[];
/** Optional callback to retrieve aggregated metrics snapshot. */ /** Optional callback to retrieve aggregated metrics snapshot. */
getMetrics?: () => MetricsSnapshot; getMetrics?: () => MetricsSnapshot;
/** Optional callback to retrieve session analytics. */ /** Optional callback to retrieve session analytics. */
@@ -202,6 +218,11 @@ export function createSystemHandlers(deps: SystemHandlerDeps) {
return makeResponse(request.id, { sessions }); return makeResponse(request.id, { sessions });
}, },
'system.contextUsage': async (request: GatewayRequest): Promise<OutboundMessage> => {
const sessions = deps.getContextUsage?.() ?? [];
return makeResponse(request.id, { sessions });
},
'system.metrics': async (request: GatewayRequest): Promise<OutboundMessage> => { 'system.metrics': async (request: GatewayRequest): Promise<OutboundMessage> => {
if (!deps.getMetrics) { if (!deps.getMetrics) {
return makeResponse(request.id, {}); return makeResponse(request.id, {});
+25
View File
@@ -308,5 +308,30 @@ describe('protocol', () => {
data, data,
}); });
}); });
it('creates a context warning event message', () => {
const data = {
level: 'warning',
message: 'Context usage is 76.0%',
budget: {
estimatedTokens: 76000,
contextWindow: 100000,
remainingTokens: 24000,
usagePct: 76,
thresholdPct: 80,
thresholdTokens: 80000,
shouldCompact: false,
},
actions: {
checkpointSaved: false,
autoCompacted: false,
},
};
expect(makeEvent(3, 'context_warning', data)).toEqual({
id: 3,
event: 'context_warning',
data,
});
});
}); });
}); });
+20
View File
@@ -93,6 +93,7 @@ export type EventType =
| 'content' | 'content'
| 'tool_start' | 'tool_start'
| 'tool_end' | 'tool_end'
| 'context_warning'
| 'attachment' | 'attachment'
| 'done' | 'done'
| 'error'; | 'error';
@@ -115,6 +116,25 @@ export interface ToolEndEventData {
}; };
} }
export interface ContextWarningEventData {
level: 'warning' | 'checkpoint' | 'critical';
message: string;
budget: {
estimatedTokens: number;
contextWindow: number;
remainingTokens: number;
usagePct: number;
thresholdPct: number;
thresholdTokens: number;
shouldCompact: boolean;
};
actions: {
checkpointSaved: boolean;
autoCompacted: boolean;
checkpointNamespace?: string;
};
}
export interface AttachmentEventData { export interface AttachmentEventData {
mimeType: string; mimeType: string;
data?: string; data?: string;
+4 -1
View File
@@ -33,7 +33,7 @@ import {
createNodeHandlers, createNodeHandlers,
} from './handlers/index.js'; } from './handlers/index.js';
import { discoverServices } from './handlers/services.js'; import { discoverServices } from './handlers/services.js';
import type { TokenUsageEntry } from './handlers/system.js'; import type { TokenUsageEntry, ContextUsageEntry } from './handlers/system.js';
import type { NodeConnectionState } from './handlers/node.js'; import type { NodeConnectionState } from './handlers/node.js';
import type { SessionManager } from '../session/manager.js'; import type { SessionManager } from '../session/manager.js';
import type { Config } from '../config/index.js'; import type { Config } from '../config/index.js';
@@ -82,6 +82,8 @@ export interface GatewayServerConfig {
gmailHandler?: GmailWatcher; gmailHandler?: GmailWatcher;
/** Optional callback to retrieve per-session token usage data for the dashboard. */ /** Optional callback to retrieve per-session token usage data for the dashboard. */
getTokenUsage?: () => TokenUsageEntry[]; getTokenUsage?: () => TokenUsageEntry[];
/** Optional callback to retrieve per-session context usage data for the dashboard. */
getContextUsage?: () => ContextUsageEntry[];
/** Maximum allowed request body size for inbound HTTP POST bodies. */ /** Maximum allowed request body size for inbound HTTP POST bodies. */
maxRequestBodyBytes?: number; maxRequestBodyBytes?: number;
/** Per-connection WebSocket ingress rate limiting. */ /** Per-connection WebSocket ingress rate limiting. */
@@ -294,6 +296,7 @@ export class GatewayServer {
activeConnections: this.sessionBridge.connectionCount, activeConnections: this.sessionBridge.connectionCount,
}), }),
getTokenUsage: this.config.getTokenUsage, getTokenUsage: this.config.getTokenUsage,
getContextUsage: this.config.getContextUsage,
getMetrics: () => this.metrics.getSnapshot(), getMetrics: () => this.metrics.getSnapshot(),
getEvents: (opts) => this.metrics.getEvents(opts), getEvents: (opts) => this.metrics.getEvents(opts),
getActiveRequests: () => this.metrics.getActiveRequests(), getActiveRequests: () => this.metrics.getActiveRequests(),
+49
View File
@@ -200,6 +200,47 @@ export class SessionBridge {
return results; return results;
} }
/** Get estimated context budget for all active sessions. */
getAllContextUsage(): Array<{
sessionId: string;
budget: {
estimatedTokens: number;
contextWindow: number;
remainingTokens: number;
usagePct: number;
thresholdPct: number;
thresholdTokens: number;
shouldCompact: boolean;
};
}> {
const results: Array<{
sessionId: string;
budget: {
estimatedTokens: number;
contextWindow: number;
remainingTokens: number;
usagePct: number;
thresholdPct: number;
thresholdTokens: number;
shouldCompact: boolean;
};
}> = [];
const seen = new Set<string>();
for (const client of this.clients.values()) {
if (seen.has(client.sessionId)) {
continue;
}
seen.add(client.sessionId);
results.push({
sessionId: client.sessionId,
budget: client.agent.getContextBudget(),
});
}
return results;
}
private getOrCreateAgent(sessionId: string): AgentOrchestrator { private getOrCreateAgent(sessionId: string): AgentOrchestrator {
let agent = this.agents.get(sessionId); let agent = this.agents.get(sessionId);
if (!agent) { if (!agent) {
@@ -233,6 +274,14 @@ export class SessionBridge {
keepTurns: config.compaction.keep_turns, keepTurns: config.compaction.keep_turns,
summaryMaxTokens: config.compaction.summary_max_tokens, summaryMaxTokens: config.compaction.summary_max_tokens,
importanceThreshold: config.compaction.importance_threshold, importanceThreshold: config.compaction.importance_threshold,
proactive: {
enabled: config.compaction.proactive.enabled,
warnPct: config.compaction.proactive.warn_pct,
checkpointPct: config.compaction.proactive.checkpoint_pct,
autoCompactPct: config.compaction.proactive.auto_compact_pct,
checkpointCooldownMs: config.compaction.proactive.checkpoint_cooldown_ms,
memoryNamespace: config.compaction.proactive.memory_namespace,
},
} : undefined, } : undefined,
modelName: config?.models.default.model, modelName: config?.models.default.model,
contextWindow: config?.models.default.context_window, contextWindow: config?.models.default.context_window,
+9
View File
@@ -593,6 +593,15 @@ async function sendMessage(client, overrideText) {
scrollToBottom(); scrollToBottom();
}); });
stream.on('context_warning', (data) => {
const note = document.createElement('div');
note.className = 'message assistant';
const text = data?.message || 'Context usage is getting high.';
note.innerHTML = renderSafeMarkdown(`> ${text}`);
_elements.messages.insertBefore(note, placeholder);
scrollToBottom();
});
const done = await stream.result; const done = await stream.result;
// Replace placeholder with actual response // Replace placeholder with actual response
placeholder.classList.remove('streaming-cursor'); placeholder.classList.remove('streaming-cursor');
+9
View File
@@ -26,15 +26,18 @@ function truncateId(id) {
async function loadUsage(el, client) { async function loadUsage(el, client) {
let data; let data;
let contextData;
try { try {
data = await client.call('system.tokenUsage'); data = await client.call('system.tokenUsage');
contextData = await client.call('system.contextUsage');
} catch (err) { } catch (err) {
el.innerHTML = `<div class="empty-state">Failed to load usage: ${err.message}</div>`; el.innerHTML = `<div class="empty-state">Failed to load usage: ${err.message}</div>`;
return; return;
} }
const sessions = data?.sessions ?? []; const sessions = data?.sessions ?? [];
const contextBySession = new Map((contextData?.sessions ?? []).map(s => [s.sessionId, s.budget]));
// Compute totals across all sessions // Compute totals across all sessions
let totalInput = 0; let totalInput = 0;
@@ -89,6 +92,10 @@ async function loadUsage(el, client) {
const outTok = s.total?.outputTokens ?? 0; const outTok = s.total?.outputTokens ?? 0;
const calls = s.total?.calls ?? 0; const calls = s.total?.calls ?? 0;
const cost = s.total?.estimatedCost ?? 0; const cost = s.total?.estimatedCost ?? 0;
const budget = contextBySession.get(s.sessionId);
const contextCell = budget
? `${budget.usagePct.toFixed(1)}% (${formatNumber(budget.estimatedTokens)}/${formatNumber(budget.contextWindow)})`
: '<span class="text-muted">-</span>';
// Build delegation breakdown if present // Build delegation breakdown if present
const delegationEntries = Object.entries(s.delegation ?? {}); const delegationEntries = Object.entries(s.delegation ?? {});
@@ -107,6 +114,7 @@ async function loadUsage(el, client) {
<td>${formatNumber(inTok + outTok)}</td> <td>${formatNumber(inTok + outTok)}</td>
<td>${formatNumber(calls)}</td> <td>${formatNumber(calls)}</td>
<td>${formatCost(cost)}</td> <td>${formatCost(cost)}</td>
<td>${contextCell}</td>
<td>${delegationCell}</td> <td>${delegationCell}</td>
</tr> </tr>
`; `;
@@ -122,6 +130,7 @@ async function loadUsage(el, client) {
<th>Total</th> <th>Total</th>
<th>Calls</th> <th>Calls</th>
<th>Cost</th> <th>Cost</th>
<th>Context</th>
<th>Delegation</th> <th>Delegation</th>
</tr> </tr>
</thead> </thead>