From 478c7529a7b8118be51e9a65103e8c2144e31140 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Tue, 23 Jun 2026 11:16:23 -0700 Subject: [PATCH] feat(hooks): emit per-run token usage and duration on run.end The stats layer reads usage/duration only from run.end, but neither framework populated them, so tokens/cost/avg-duration were always 0. - hermes: accumulate token usage across each run's api-result calls in session state and attach the summed usage plus a computed duration_ms (from a stored runStartedAt) onto run.end. metric.snapshot emission is unchanged, so there is no double counting. - claude-code: store runStartedAt and use it as a duration_ms fallback at all run.end sites. Usage is unavailable from CC hook inputs. Live verification: a real hermes run now reports duration_ms and total_tokens on run.end; dashboard tokens_today/avg_duration_ms, both previously 0, now populate. cost_today stays 0 (no provider emits cost through the hooks). Co-Authored-By: Claude Opus 4.8 --- hooks/claude-code/handler.js | 10 ++--- hooks/claude-code/handler.ts | 11 ++--- hooks/hermes/handler.js | 86 +++++++++++++++++++++++++++++------- hooks/hermes/handler.ts | 52 +++++++++++++++++++++- 4 files changed, 130 insertions(+), 29 deletions(-) diff --git a/hooks/claude-code/handler.js b/hooks/claude-code/handler.js index 6ed5399..0674555 100755 --- a/hooks/claude-code/handler.js +++ b/hooks/claude-code/handler.js @@ -335,7 +335,7 @@ async function handleSessionStart(input) { } const runId = randomUUID2(); activeRuns.set(sessionKey, runId); - saveState(sessionKey, { runId, spans: {} }); + saveState(sessionKey, { runId, runStartedAt: Date.now(), spans: {} }); const contextWindow = getContextWindow(input); enqueue(buildEnvelope(FRAMEWORK, HOST, "session.start", sessionKey, { attributes: { @@ -362,7 +362,7 @@ async function handleSessionEnd(input) { const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0; const usage = getUsage(input); const contextWindow = getContextWindow(input); - const duration = pickNumber(input.duration_ms, input.elapsed_ms); + const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0); if (runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { runId, @@ -398,7 +398,7 @@ async function handlePromptSubmit(input) { runId, payload: { status: "success", - duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) + duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0) } })); } @@ -414,7 +414,7 @@ async function handlePromptSubmit(input) { const newRunId = randomUUID2(); if (sessionKey) { activeRuns.set(sessionKey, newRunId); - saveState(sessionKey, { runId: newRunId, spans: {} }); + saveState(sessionKey, { runId: newRunId, runStartedAt: Date.now(), spans: {} }); } enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, { runId: newRunId, @@ -613,10 +613,10 @@ async function handleNotification(input) { const notificationType = pickString(input.notification_type, input.type); const usage = getUsage(input); const contextWindow = getContextWindow(input); - const duration = pickNumber(input.duration_ms, input.elapsed_ms); if (notificationType === "Done" || notificationType === "success") { const state = sessionKey ? loadState(sessionKey) : { spans: {} }; const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0; + const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0); if (runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { runId, diff --git a/hooks/claude-code/handler.ts b/hooks/claude-code/handler.ts index 377eb4a..6384321 100644 --- a/hooks/claude-code/handler.ts +++ b/hooks/claude-code/handler.ts @@ -24,6 +24,7 @@ const { enqueue, flush } = createTransport(INGEST_URL); // ── Persisted state (survives between hook subprocess invocations) ────────── interface SessionState { runId?: string; + runStartedAt?: number; // epoch ms when the current run began spans: { [key: string]: string }; // key = sessionKey:toolName, value = spanId spanStartTimes?: { [spanId: string]: number }; // spanId -> epoch ms subagent?: { name: string; spanId: string }; @@ -198,7 +199,7 @@ async function handleSessionStart(input: Dict) { const runId = randomUUID(); activeRuns.set(sessionKey, runId); - saveState(sessionKey, { runId, spans: {} }); + saveState(sessionKey, { runId, runStartedAt: Date.now(), spans: {} }); const contextWindow = getContextWindow(input); @@ -230,7 +231,7 @@ async function handleSessionEnd(input: Dict) { const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined; const usage = getUsage(input); const contextWindow = getContextWindow(input); - const duration = pickNumber(input.duration_ms, input.elapsed_ms); + const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined); if (runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { @@ -270,7 +271,7 @@ async function handlePromptSubmit(input: Dict) { runId, payload: { status: 'success', - duration_ms: pickNumber(input.elapsed_ms, input.duration_ms), + duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined), }, })); } @@ -288,7 +289,7 @@ async function handlePromptSubmit(input: Dict) { const newRunId = randomUUID(); if (sessionKey) { activeRuns.set(sessionKey, newRunId); - saveState(sessionKey, { runId: newRunId, spans: {} }); + saveState(sessionKey, { runId: newRunId, runStartedAt: Date.now(), spans: {} }); } enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.start', sessionKey, { @@ -508,11 +509,11 @@ async function handleNotification(input: Dict) { const notificationType = pickString(input.notification_type, input.type); const usage = getUsage(input); const contextWindow = getContextWindow(input); - const duration = pickNumber(input.duration_ms, input.elapsed_ms); if (notificationType === 'Done' || notificationType === 'success') { const state = sessionKey ? loadState(sessionKey) : { spans: {} }; const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined; + const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined); if (runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { diff --git a/hooks/hermes/handler.js b/hooks/hermes/handler.js index 592b4a5..b6065ca 100755 --- a/hooks/hermes/handler.js +++ b/hooks/hermes/handler.js @@ -154,7 +154,8 @@ async function readStdin() { let done = false; const timer = setTimeout(() => finish(data), 100); const finish = (value) => { - if (done) return; + if (done) + return; done = true; clearTimeout(timer); resolve(value); @@ -172,6 +173,30 @@ var INGEST_URL = process.env.AGENTMON_INGEST_URL || "http://localhost:8080"; var FRAMEWORK = process.env.AGENTMON_FRAMEWORK || "hermes"; var HOST = process.env.AGENTMON_HOST || hostname(); var { enqueue, flush } = createTransport(INGEST_URL); +var USAGE_TOKEN_FIELDS = [ + "input_tokens", + "output_tokens", + "total_tokens", + "cache_read_tokens", + "cache_write_tokens", + "reasoning_tokens", + "total_cost" +]; +function accumulateUsage(into, usage) { + if (!usage) { + return into; + } + for (const key of USAGE_TOKEN_FIELDS) { + const v = pickNumber(usage[key]); + if (v !== void 0) { + into[key] = (pickNumber(into[key]) ?? 0) + v; + } + } + return into; +} +function runUsagePayload(state) { + return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : void 0; +} var STATE_DIR = join(homedir(), ".agentmon-state", "hermes"); function ensureStateDir() { try { @@ -235,19 +260,31 @@ function getToolName(input) { function getUsage(input) { const extra = getExtra(input); const usage = isRecord(input.usage) ? input.usage : isRecord(extra.usage) ? extra.usage : void 0; - if (!usage) return void 0; + if (!usage) + return void 0; const result = {}; - if (usage.input_tokens !== void 0) result.input_tokens = usage.input_tokens; - if (usage.prompt_tokens !== void 0) result.input_tokens = usage.prompt_tokens; - if (usage.output_tokens !== void 0) result.output_tokens = usage.output_tokens; - if (usage.completion_tokens !== void 0) result.output_tokens = usage.completion_tokens; - if (usage.cache_read_tokens !== void 0) result.cache_read_tokens = usage.cache_read_tokens; - if (usage.cache_write_tokens !== void 0) result.cache_write_tokens = usage.cache_write_tokens; - if (usage.cache_creation_tokens !== void 0) result.cache_write_tokens = usage.cache_creation_tokens; - if (usage.reasoning_tokens !== void 0) result.reasoning_tokens = usage.reasoning_tokens; - if (usage.total_tokens !== void 0) result.total_tokens = usage.total_tokens; - if (usage.total_cost !== void 0) result.total_cost = usage.total_cost; - if (usage.cost !== void 0) result.total_cost = usage.cost; + if (usage.input_tokens !== void 0) + result.input_tokens = usage.input_tokens; + if (usage.prompt_tokens !== void 0) + result.input_tokens = usage.prompt_tokens; + if (usage.output_tokens !== void 0) + result.output_tokens = usage.output_tokens; + if (usage.completion_tokens !== void 0) + result.output_tokens = usage.completion_tokens; + if (usage.cache_read_tokens !== void 0) + result.cache_read_tokens = usage.cache_read_tokens; + if (usage.cache_write_tokens !== void 0) + result.cache_write_tokens = usage.cache_write_tokens; + if (usage.cache_creation_tokens !== void 0) + result.cache_write_tokens = usage.cache_creation_tokens; + if (usage.reasoning_tokens !== void 0) + result.reasoning_tokens = usage.reasoning_tokens; + if (usage.total_tokens !== void 0) + result.total_tokens = usage.total_tokens; + if (usage.total_cost !== void 0) + result.total_cost = usage.total_cost; + if (usage.cost !== void 0) + result.total_cost = usage.cost; return Object.keys(result).length > 0 ? result : void 0; } function getModel(input) { @@ -301,11 +338,17 @@ async function handleRunStart(input) { if (state.runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { runId: state.runId, - payload: { status: "success" } + payload: { + status: "success", + duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0, + ...runUsagePayload(state) && { usage: runUsagePayload(state) } + } })); } const runId = randomUUID2(); state.runId = runId; + state.runStartedAt = Date.now(); + state.runUsage = {}; saveState(sessionKey, state); const extra = getExtra(input); enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, { @@ -334,12 +377,15 @@ async function handleRunEnd(input) { runId: state.runId, payload: { status: "success", - duration_ms: getDuration(input), + duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0), model: getModel(input), - response_preview: truncate(getExtra(input).assistant_response, 500) + response_preview: truncate(getExtra(input).assistant_response, 500), + ...runUsagePayload(state) && { usage: runUsagePayload(state) } } })); state.runId = void 0; + state.runStartedAt = void 0; + state.runUsage = {}; saveState(sessionKey, state); } await flush(); @@ -405,6 +451,10 @@ async function handleAPIResult(input) { await flush(); return; } + if (sessionKey) { + state.runUsage = accumulateUsage(state.runUsage || {}, usage); + saveState(sessionKey, state); + } enqueue(buildEnvelope(FRAMEWORK, HOST, "metric.snapshot", sessionKey, { runId: state.runId, payload: { @@ -426,7 +476,9 @@ async function handleSessionEnd(input) { runId: state.runId, payload: { status: input.interrupted || getExtra(input).interrupted ? "interrupted" : "success", - model: getModel(input) + duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0, + model: getModel(input), + ...runUsagePayload(state) && { usage: runUsagePayload(state) } } })); } diff --git a/hooks/hermes/handler.ts b/hooks/hermes/handler.ts index 9e915f8..49ab3cd 100644 --- a/hooks/hermes/handler.ts +++ b/hooks/hermes/handler.ts @@ -23,9 +23,41 @@ const { enqueue, flush } = createTransport(INGEST_URL); interface SessionState { sessionStarted?: boolean; runId?: string; + runStartedAt?: number; + runUsage?: Dict; spans: { [key: string]: string }; } +// Token fields are reported per LLM call (api-result). A single run (user turn) +// can span several calls, so we sum them into a per-run total that rides along +// on run.end — the location the stats layer reads usage from. +const USAGE_TOKEN_FIELDS = [ + 'input_tokens', + 'output_tokens', + 'total_tokens', + 'cache_read_tokens', + 'cache_write_tokens', + 'reasoning_tokens', + 'total_cost', +]; + +function accumulateUsage(into: Dict, usage: Dict | undefined): Dict { + if (!usage) { + return into; + } + for (const key of USAGE_TOKEN_FIELDS) { + const v = pickNumber(usage[key]); + if (v !== undefined) { + into[key] = (pickNumber(into[key]) ?? 0) + v; + } + } + return into; +} + +function runUsagePayload(state: SessionState): Dict | undefined { + return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : undefined; +} + const STATE_DIR = join(homedir(), '.agentmon-state', 'hermes'); function ensureStateDir() { @@ -179,12 +211,18 @@ async function handleRunStart(input: Dict) { if (state.runId) { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { runId: state.runId, - payload: { status: 'success' }, + payload: { + status: 'success', + duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined, + ...(runUsagePayload(state) && { usage: runUsagePayload(state) }), + }, })); } const runId = randomUUID(); state.runId = runId; + state.runStartedAt = Date.now(); + state.runUsage = {}; saveState(sessionKey, state); const extra = getExtra(input); @@ -217,12 +255,15 @@ async function handleRunEnd(input: Dict) { runId: state.runId, payload: { status: 'success', - duration_ms: getDuration(input), + duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined), model: getModel(input), response_preview: truncate(getExtra(input).assistant_response, 500), + ...(runUsagePayload(state) && { usage: runUsagePayload(state) }), }, })); state.runId = undefined; + state.runStartedAt = undefined; + state.runUsage = {}; saveState(sessionKey, state); } await flush(); @@ -296,6 +337,11 @@ async function handleAPIResult(input: Dict) { return; } + if (sessionKey) { + state.runUsage = accumulateUsage(state.runUsage || {}, usage); + saveState(sessionKey, state); + } + enqueue(buildEnvelope(FRAMEWORK, HOST, 'metric.snapshot', sessionKey, { runId: state.runId, payload: { @@ -318,7 +364,9 @@ async function handleSessionEnd(input: Dict) { runId: state.runId, payload: { status: input.interrupted || getExtra(input).interrupted ? 'interrupted' : 'success', + duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined, model: getModel(input), + ...(runUsagePayload(state) && { usage: runUsagePayload(state) }), }, })); }