feat(hooks): emit per-run token usage and duration on run.end

The stats layer reads usage/duration only from run.end, but neither
framework populated them, so tokens/cost/avg-duration were always 0.

- hermes: accumulate token usage across each run's api-result calls in
  session state and attach the summed usage plus a computed duration_ms
  (from a stored runStartedAt) onto run.end. metric.snapshot emission is
  unchanged, so there is no double counting.
- claude-code: store runStartedAt and use it as a duration_ms fallback at
  all run.end sites. Usage is unavailable from CC hook inputs.

Live verification: a real hermes run now reports duration_ms and
total_tokens on run.end; dashboard tokens_today/avg_duration_ms, both
previously 0, now populate. cost_today stays 0 (no provider emits cost
through the hooks).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-06-23 11:16:23 -07:00
parent 5014d89258
commit 478c7529a7
4 changed files with 130 additions and 29 deletions
+5 -5
View File
@@ -335,7 +335,7 @@ async function handleSessionStart(input) {
} }
const runId = randomUUID2(); const runId = randomUUID2();
activeRuns.set(sessionKey, runId); activeRuns.set(sessionKey, runId);
saveState(sessionKey, { runId, spans: {} }); saveState(sessionKey, { runId, runStartedAt: Date.now(), spans: {} });
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
enqueue(buildEnvelope(FRAMEWORK, HOST, "session.start", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "session.start", sessionKey, {
attributes: { attributes: {
@@ -362,7 +362,7 @@ async function handleSessionEnd(input) {
const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0; const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0;
const usage = getUsage(input); const usage = getUsage(input);
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
const duration = pickNumber(input.duration_ms, input.elapsed_ms); const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0);
if (runId) { if (runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, {
runId, runId,
@@ -398,7 +398,7 @@ async function handlePromptSubmit(input) {
runId, runId,
payload: { payload: {
status: "success", status: "success",
duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0)
} }
})); }));
} }
@@ -414,7 +414,7 @@ async function handlePromptSubmit(input) {
const newRunId = randomUUID2(); const newRunId = randomUUID2();
if (sessionKey) { if (sessionKey) {
activeRuns.set(sessionKey, newRunId); activeRuns.set(sessionKey, newRunId);
saveState(sessionKey, { runId: newRunId, spans: {} }); saveState(sessionKey, { runId: newRunId, runStartedAt: Date.now(), spans: {} });
} }
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, {
runId: newRunId, runId: newRunId,
@@ -613,10 +613,10 @@ async function handleNotification(input) {
const notificationType = pickString(input.notification_type, input.type); const notificationType = pickString(input.notification_type, input.type);
const usage = getUsage(input); const usage = getUsage(input);
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
const duration = pickNumber(input.duration_ms, input.elapsed_ms);
if (notificationType === "Done" || notificationType === "success") { if (notificationType === "Done" || notificationType === "success") {
const state = sessionKey ? loadState(sessionKey) : { spans: {} }; const state = sessionKey ? loadState(sessionKey) : { spans: {} };
const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0; const runId = sessionKey ? activeRuns.get(sessionKey) || state.runId : void 0;
const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0);
if (runId) { if (runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, {
runId, runId,
+6 -5
View File
@@ -24,6 +24,7 @@ const { enqueue, flush } = createTransport(INGEST_URL);
// ── Persisted state (survives between hook subprocess invocations) ────────── // ── Persisted state (survives between hook subprocess invocations) ──────────
interface SessionState { interface SessionState {
runId?: string; runId?: string;
runStartedAt?: number; // epoch ms when the current run began
spans: { [key: string]: string }; // key = sessionKey:toolName, value = spanId spans: { [key: string]: string }; // key = sessionKey:toolName, value = spanId
spanStartTimes?: { [spanId: string]: number }; // spanId -> epoch ms spanStartTimes?: { [spanId: string]: number }; // spanId -> epoch ms
subagent?: { name: string; spanId: string }; subagent?: { name: string; spanId: string };
@@ -198,7 +199,7 @@ async function handleSessionStart(input: Dict) {
const runId = randomUUID(); const runId = randomUUID();
activeRuns.set(sessionKey, runId); activeRuns.set(sessionKey, runId);
saveState(sessionKey, { runId, spans: {} }); saveState(sessionKey, { runId, runStartedAt: Date.now(), spans: {} });
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
@@ -230,7 +231,7 @@ async function handleSessionEnd(input: Dict) {
const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined; const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined;
const usage = getUsage(input); const usage = getUsage(input);
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
const duration = pickNumber(input.duration_ms, input.elapsed_ms); const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined);
if (runId) { if (runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, {
@@ -270,7 +271,7 @@ async function handlePromptSubmit(input: Dict) {
runId, runId,
payload: { payload: {
status: 'success', status: 'success',
duration_ms: pickNumber(input.elapsed_ms, input.duration_ms), duration_ms: pickNumber(input.elapsed_ms, input.duration_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined),
}, },
})); }));
} }
@@ -288,7 +289,7 @@ async function handlePromptSubmit(input: Dict) {
const newRunId = randomUUID(); const newRunId = randomUUID();
if (sessionKey) { if (sessionKey) {
activeRuns.set(sessionKey, newRunId); activeRuns.set(sessionKey, newRunId);
saveState(sessionKey, { runId: newRunId, spans: {} }); saveState(sessionKey, { runId: newRunId, runStartedAt: Date.now(), spans: {} });
} }
enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.start', sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.start', sessionKey, {
@@ -508,11 +509,11 @@ async function handleNotification(input: Dict) {
const notificationType = pickString(input.notification_type, input.type); const notificationType = pickString(input.notification_type, input.type);
const usage = getUsage(input); const usage = getUsage(input);
const contextWindow = getContextWindow(input); const contextWindow = getContextWindow(input);
const duration = pickNumber(input.duration_ms, input.elapsed_ms);
if (notificationType === 'Done' || notificationType === 'success') { if (notificationType === 'Done' || notificationType === 'success') {
const state = sessionKey ? loadState(sessionKey) : { spans: {} }; const state = sessionKey ? loadState(sessionKey) : { spans: {} };
const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined; const runId = sessionKey ? (activeRuns.get(sessionKey) || state.runId) : undefined;
const duration = pickNumber(input.duration_ms, input.elapsed_ms) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined);
if (runId) { if (runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, {
+69 -17
View File
@@ -154,7 +154,8 @@ async function readStdin() {
let done = false; let done = false;
const timer = setTimeout(() => finish(data), 100); const timer = setTimeout(() => finish(data), 100);
const finish = (value) => { const finish = (value) => {
if (done) return; if (done)
return;
done = true; done = true;
clearTimeout(timer); clearTimeout(timer);
resolve(value); resolve(value);
@@ -172,6 +173,30 @@ var INGEST_URL = process.env.AGENTMON_INGEST_URL || "http://localhost:8080";
var FRAMEWORK = process.env.AGENTMON_FRAMEWORK || "hermes"; var FRAMEWORK = process.env.AGENTMON_FRAMEWORK || "hermes";
var HOST = process.env.AGENTMON_HOST || hostname(); var HOST = process.env.AGENTMON_HOST || hostname();
var { enqueue, flush } = createTransport(INGEST_URL); var { enqueue, flush } = createTransport(INGEST_URL);
var USAGE_TOKEN_FIELDS = [
"input_tokens",
"output_tokens",
"total_tokens",
"cache_read_tokens",
"cache_write_tokens",
"reasoning_tokens",
"total_cost"
];
function accumulateUsage(into, usage) {
if (!usage) {
return into;
}
for (const key of USAGE_TOKEN_FIELDS) {
const v = pickNumber(usage[key]);
if (v !== void 0) {
into[key] = (pickNumber(into[key]) ?? 0) + v;
}
}
return into;
}
function runUsagePayload(state) {
return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : void 0;
}
var STATE_DIR = join(homedir(), ".agentmon-state", "hermes"); var STATE_DIR = join(homedir(), ".agentmon-state", "hermes");
function ensureStateDir() { function ensureStateDir() {
try { try {
@@ -235,19 +260,31 @@ function getToolName(input) {
function getUsage(input) { function getUsage(input) {
const extra = getExtra(input); const extra = getExtra(input);
const usage = isRecord(input.usage) ? input.usage : isRecord(extra.usage) ? extra.usage : void 0; const usage = isRecord(input.usage) ? input.usage : isRecord(extra.usage) ? extra.usage : void 0;
if (!usage) return void 0; if (!usage)
return void 0;
const result = {}; const result = {};
if (usage.input_tokens !== void 0) result.input_tokens = usage.input_tokens; if (usage.input_tokens !== void 0)
if (usage.prompt_tokens !== void 0) result.input_tokens = usage.prompt_tokens; result.input_tokens = usage.input_tokens;
if (usage.output_tokens !== void 0) result.output_tokens = usage.output_tokens; if (usage.prompt_tokens !== void 0)
if (usage.completion_tokens !== void 0) result.output_tokens = usage.completion_tokens; result.input_tokens = usage.prompt_tokens;
if (usage.cache_read_tokens !== void 0) result.cache_read_tokens = usage.cache_read_tokens; if (usage.output_tokens !== void 0)
if (usage.cache_write_tokens !== void 0) result.cache_write_tokens = usage.cache_write_tokens; result.output_tokens = usage.output_tokens;
if (usage.cache_creation_tokens !== void 0) result.cache_write_tokens = usage.cache_creation_tokens; if (usage.completion_tokens !== void 0)
if (usage.reasoning_tokens !== void 0) result.reasoning_tokens = usage.reasoning_tokens; result.output_tokens = usage.completion_tokens;
if (usage.total_tokens !== void 0) result.total_tokens = usage.total_tokens; if (usage.cache_read_tokens !== void 0)
if (usage.total_cost !== void 0) result.total_cost = usage.total_cost; result.cache_read_tokens = usage.cache_read_tokens;
if (usage.cost !== void 0) result.total_cost = usage.cost; if (usage.cache_write_tokens !== void 0)
result.cache_write_tokens = usage.cache_write_tokens;
if (usage.cache_creation_tokens !== void 0)
result.cache_write_tokens = usage.cache_creation_tokens;
if (usage.reasoning_tokens !== void 0)
result.reasoning_tokens = usage.reasoning_tokens;
if (usage.total_tokens !== void 0)
result.total_tokens = usage.total_tokens;
if (usage.total_cost !== void 0)
result.total_cost = usage.total_cost;
if (usage.cost !== void 0)
result.total_cost = usage.cost;
return Object.keys(result).length > 0 ? result : void 0; return Object.keys(result).length > 0 ? result : void 0;
} }
function getModel(input) { function getModel(input) {
@@ -301,11 +338,17 @@ async function handleRunStart(input) {
if (state.runId) { if (state.runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, {
runId: state.runId, runId: state.runId,
payload: { status: "success" } payload: {
status: "success",
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0,
...runUsagePayload(state) && { usage: runUsagePayload(state) }
}
})); }));
} }
const runId = randomUUID2(); const runId = randomUUID2();
state.runId = runId; state.runId = runId;
state.runStartedAt = Date.now();
state.runUsage = {};
saveState(sessionKey, state); saveState(sessionKey, state);
const extra = getExtra(input); const extra = getExtra(input);
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, {
@@ -334,12 +377,15 @@ async function handleRunEnd(input) {
runId: state.runId, runId: state.runId,
payload: { payload: {
status: "success", status: "success",
duration_ms: getDuration(input), duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0),
model: getModel(input), model: getModel(input),
response_preview: truncate(getExtra(input).assistant_response, 500) response_preview: truncate(getExtra(input).assistant_response, 500),
...runUsagePayload(state) && { usage: runUsagePayload(state) }
} }
})); }));
state.runId = void 0; state.runId = void 0;
state.runStartedAt = void 0;
state.runUsage = {};
saveState(sessionKey, state); saveState(sessionKey, state);
} }
await flush(); await flush();
@@ -405,6 +451,10 @@ async function handleAPIResult(input) {
await flush(); await flush();
return; return;
} }
if (sessionKey) {
state.runUsage = accumulateUsage(state.runUsage || {}, usage);
saveState(sessionKey, state);
}
enqueue(buildEnvelope(FRAMEWORK, HOST, "metric.snapshot", sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, "metric.snapshot", sessionKey, {
runId: state.runId, runId: state.runId,
payload: { payload: {
@@ -426,7 +476,9 @@ async function handleSessionEnd(input) {
runId: state.runId, runId: state.runId,
payload: { payload: {
status: input.interrupted || getExtra(input).interrupted ? "interrupted" : "success", status: input.interrupted || getExtra(input).interrupted ? "interrupted" : "success",
model: getModel(input) duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0,
model: getModel(input),
...runUsagePayload(state) && { usage: runUsagePayload(state) }
} }
})); }));
} }
+50 -2
View File
@@ -23,9 +23,41 @@ const { enqueue, flush } = createTransport(INGEST_URL);
interface SessionState { interface SessionState {
sessionStarted?: boolean; sessionStarted?: boolean;
runId?: string; runId?: string;
runStartedAt?: number;
runUsage?: Dict;
spans: { [key: string]: string }; spans: { [key: string]: string };
} }
// Token fields are reported per LLM call (api-result). A single run (user turn)
// can span several calls, so we sum them into a per-run total that rides along
// on run.end — the location the stats layer reads usage from.
const USAGE_TOKEN_FIELDS = [
'input_tokens',
'output_tokens',
'total_tokens',
'cache_read_tokens',
'cache_write_tokens',
'reasoning_tokens',
'total_cost',
];
function accumulateUsage(into: Dict, usage: Dict | undefined): Dict {
if (!usage) {
return into;
}
for (const key of USAGE_TOKEN_FIELDS) {
const v = pickNumber(usage[key]);
if (v !== undefined) {
into[key] = (pickNumber(into[key]) ?? 0) + v;
}
}
return into;
}
function runUsagePayload(state: SessionState): Dict | undefined {
return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : undefined;
}
const STATE_DIR = join(homedir(), '.agentmon-state', 'hermes'); const STATE_DIR = join(homedir(), '.agentmon-state', 'hermes');
function ensureStateDir() { function ensureStateDir() {
@@ -179,12 +211,18 @@ async function handleRunStart(input: Dict) {
if (state.runId) { if (state.runId) {
enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, {
runId: state.runId, runId: state.runId,
payload: { status: 'success' }, payload: {
status: 'success',
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined,
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
},
})); }));
} }
const runId = randomUUID(); const runId = randomUUID();
state.runId = runId; state.runId = runId;
state.runStartedAt = Date.now();
state.runUsage = {};
saveState(sessionKey, state); saveState(sessionKey, state);
const extra = getExtra(input); const extra = getExtra(input);
@@ -217,12 +255,15 @@ async function handleRunEnd(input: Dict) {
runId: state.runId, runId: state.runId,
payload: { payload: {
status: 'success', status: 'success',
duration_ms: getDuration(input), duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined),
model: getModel(input), model: getModel(input),
response_preview: truncate(getExtra(input).assistant_response, 500), response_preview: truncate(getExtra(input).assistant_response, 500),
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
}, },
})); }));
state.runId = undefined; state.runId = undefined;
state.runStartedAt = undefined;
state.runUsage = {};
saveState(sessionKey, state); saveState(sessionKey, state);
} }
await flush(); await flush();
@@ -296,6 +337,11 @@ async function handleAPIResult(input: Dict) {
return; return;
} }
if (sessionKey) {
state.runUsage = accumulateUsage(state.runUsage || {}, usage);
saveState(sessionKey, state);
}
enqueue(buildEnvelope(FRAMEWORK, HOST, 'metric.snapshot', sessionKey, { enqueue(buildEnvelope(FRAMEWORK, HOST, 'metric.snapshot', sessionKey, {
runId: state.runId, runId: state.runId,
payload: { payload: {
@@ -318,7 +364,9 @@ async function handleSessionEnd(input: Dict) {
runId: state.runId, runId: state.runId,
payload: { payload: {
status: input.interrupted || getExtra(input).interrupted ? 'interrupted' : 'success', status: input.interrupted || getExtra(input).interrupted ? 'interrupted' : 'success',
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined,
model: getModel(input), model: getModel(input),
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
}, },
})); }));
} }