bd1880a44c
- Add MetricsCollector class with counters, model call ring buffer, event ring buffer, and active request tracking - Add system.metrics, system.events, system.activeRequests RPC handlers - Add GET /health unauthenticated HTTP endpoint for Docker HEALTHCHECK - Add totalPending() to LaneQueue for queue depth metrics - Add 20 tests for MetricsCollector
110 lines
4.3 KiB
TypeScript
110 lines
4.3 KiB
TypeScript
import type { GatewayRequest, OutboundMessage } from '../protocol.js';
|
|
import { makeResponse, makeError, ErrorCode } from '../protocol.js';
|
|
import type { MetricsSnapshot, EventEntry, ActiveRequestInfo } from '../metrics.js';
|
|
|
|
/** Per-session token usage report returned by system.tokenUsage. */
|
|
export interface TokenUsageEntry {
|
|
sessionId: string;
|
|
primary: { inputTokens: number; outputTokens: number; calls: number };
|
|
delegation: Record<string, { inputTokens: number; outputTokens: number; calls: number }>;
|
|
total: { inputTokens: number; outputTokens: number; calls: number; estimatedCost: number };
|
|
}
|
|
|
|
export interface SystemHandlerDeps {
|
|
startTime: number;
|
|
version: string;
|
|
getSessionCount: () => number;
|
|
getToolCount: () => number;
|
|
getConnectionCount: () => number;
|
|
/** Optional callback to trigger a graceful restart. If not provided, system.restart returns an error. */
|
|
restart?: () => Promise<void>;
|
|
getChannels?: () => Array<{ name: string; status: string }>;
|
|
getUsage?: () => { totalSessions: number; activeConnections: number };
|
|
/** Optional callback to retrieve per-session token usage data. */
|
|
getTokenUsage?: () => TokenUsageEntry[];
|
|
/** Optional callback to retrieve aggregated metrics snapshot. */
|
|
getMetrics?: () => MetricsSnapshot;
|
|
/** Optional callback to retrieve recent events. */
|
|
getEvents?: (opts?: { level?: string; limit?: number }) => EventEntry[];
|
|
/** Optional callback to retrieve active requests. */
|
|
getActiveRequests?: () => ActiveRequestInfo[];
|
|
}
|
|
|
|
export function createSystemHandlers(deps: SystemHandlerDeps) {
|
|
return {
|
|
'system.health': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
return makeResponse(request.id, {
|
|
status: 'ok',
|
|
uptime: Math.floor((Date.now() - deps.startTime) / 1000),
|
|
version: deps.version,
|
|
sessions: deps.getSessionCount(),
|
|
tools: deps.getToolCount(),
|
|
connections: deps.getConnectionCount(),
|
|
});
|
|
},
|
|
|
|
'system.restart': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
if (!deps.restart) {
|
|
return makeError(request.id, ErrorCode.InternalError, 'Restart not available in this environment');
|
|
}
|
|
|
|
// Send response before initiating restart (client receives confirmation)
|
|
const response = makeResponse(request.id, { restarting: true });
|
|
|
|
// Schedule restart after response is sent (next tick)
|
|
queueMicrotask(() => {
|
|
deps.restart!().catch((err) => {
|
|
console.error('Restart failed:', err);
|
|
});
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
'system.channels': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
if (!deps.getChannels) {
|
|
return makeResponse(request.id, { channels: [] });
|
|
}
|
|
return makeResponse(request.id, { channels: deps.getChannels() });
|
|
},
|
|
|
|
'system.usage': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
const uptime = Math.floor((Date.now() - deps.startTime) / 1000);
|
|
const usage = deps.getUsage?.() ?? { totalSessions: 0, activeConnections: 0 };
|
|
return makeResponse(request.id, {
|
|
uptime,
|
|
...usage,
|
|
tools: deps.getToolCount(),
|
|
});
|
|
},
|
|
|
|
'system.tokenUsage': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
const sessions = deps.getTokenUsage?.() ?? [];
|
|
return makeResponse(request.id, { sessions });
|
|
},
|
|
|
|
'system.metrics': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
if (!deps.getMetrics) {
|
|
return makeResponse(request.id, {});
|
|
}
|
|
return makeResponse(request.id, deps.getMetrics());
|
|
},
|
|
|
|
'system.events': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
if (!deps.getEvents) {
|
|
return makeResponse(request.id, { events: [] });
|
|
}
|
|
const params = request.params as { level?: string; limit?: number } | undefined;
|
|
const events = deps.getEvents({ level: params?.level, limit: params?.limit });
|
|
return makeResponse(request.id, { events });
|
|
},
|
|
|
|
'system.activeRequests': async (request: GatewayRequest): Promise<OutboundMessage> => {
|
|
if (!deps.getActiveRequests) {
|
|
return makeResponse(request.id, { requests: [] });
|
|
}
|
|
return makeResponse(request.id, { requests: deps.getActiveRequests() });
|
|
},
|
|
};
|
|
}
|