feat(hooks): emit per-run token usage and duration on run.end
The stats layer reads usage/duration only from run.end, but neither framework populated them, so tokens/cost/avg-duration were always 0. - hermes: accumulate token usage across each run's api-result calls in session state and attach the summed usage plus a computed duration_ms (from a stored runStartedAt) onto run.end. metric.snapshot emission is unchanged, so there is no double counting. - claude-code: store runStartedAt and use it as a duration_ms fallback at all run.end sites. Usage is unavailable from CC hook inputs. Live verification: a real hermes run now reports duration_ms and total_tokens on run.end; dashboard tokens_today/avg_duration_ms, both previously 0, now populate. cost_today stays 0 (no provider emits cost through the hooks). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+69
-17
@@ -154,7 +154,8 @@ async function readStdin() {
|
||||
let done = false;
|
||||
const timer = setTimeout(() => finish(data), 100);
|
||||
const finish = (value) => {
|
||||
if (done) return;
|
||||
if (done)
|
||||
return;
|
||||
done = true;
|
||||
clearTimeout(timer);
|
||||
resolve(value);
|
||||
@@ -172,6 +173,30 @@ var INGEST_URL = process.env.AGENTMON_INGEST_URL || "http://localhost:8080";
|
||||
var FRAMEWORK = process.env.AGENTMON_FRAMEWORK || "hermes";
|
||||
var HOST = process.env.AGENTMON_HOST || hostname();
|
||||
var { enqueue, flush } = createTransport(INGEST_URL);
|
||||
var USAGE_TOKEN_FIELDS = [
|
||||
"input_tokens",
|
||||
"output_tokens",
|
||||
"total_tokens",
|
||||
"cache_read_tokens",
|
||||
"cache_write_tokens",
|
||||
"reasoning_tokens",
|
||||
"total_cost"
|
||||
];
|
||||
function accumulateUsage(into, usage) {
|
||||
if (!usage) {
|
||||
return into;
|
||||
}
|
||||
for (const key of USAGE_TOKEN_FIELDS) {
|
||||
const v = pickNumber(usage[key]);
|
||||
if (v !== void 0) {
|
||||
into[key] = (pickNumber(into[key]) ?? 0) + v;
|
||||
}
|
||||
}
|
||||
return into;
|
||||
}
|
||||
function runUsagePayload(state) {
|
||||
return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : void 0;
|
||||
}
|
||||
var STATE_DIR = join(homedir(), ".agentmon-state", "hermes");
|
||||
function ensureStateDir() {
|
||||
try {
|
||||
@@ -235,19 +260,31 @@ function getToolName(input) {
|
||||
function getUsage(input) {
|
||||
const extra = getExtra(input);
|
||||
const usage = isRecord(input.usage) ? input.usage : isRecord(extra.usage) ? extra.usage : void 0;
|
||||
if (!usage) return void 0;
|
||||
if (!usage)
|
||||
return void 0;
|
||||
const result = {};
|
||||
if (usage.input_tokens !== void 0) result.input_tokens = usage.input_tokens;
|
||||
if (usage.prompt_tokens !== void 0) result.input_tokens = usage.prompt_tokens;
|
||||
if (usage.output_tokens !== void 0) result.output_tokens = usage.output_tokens;
|
||||
if (usage.completion_tokens !== void 0) result.output_tokens = usage.completion_tokens;
|
||||
if (usage.cache_read_tokens !== void 0) result.cache_read_tokens = usage.cache_read_tokens;
|
||||
if (usage.cache_write_tokens !== void 0) result.cache_write_tokens = usage.cache_write_tokens;
|
||||
if (usage.cache_creation_tokens !== void 0) result.cache_write_tokens = usage.cache_creation_tokens;
|
||||
if (usage.reasoning_tokens !== void 0) result.reasoning_tokens = usage.reasoning_tokens;
|
||||
if (usage.total_tokens !== void 0) result.total_tokens = usage.total_tokens;
|
||||
if (usage.total_cost !== void 0) result.total_cost = usage.total_cost;
|
||||
if (usage.cost !== void 0) result.total_cost = usage.cost;
|
||||
if (usage.input_tokens !== void 0)
|
||||
result.input_tokens = usage.input_tokens;
|
||||
if (usage.prompt_tokens !== void 0)
|
||||
result.input_tokens = usage.prompt_tokens;
|
||||
if (usage.output_tokens !== void 0)
|
||||
result.output_tokens = usage.output_tokens;
|
||||
if (usage.completion_tokens !== void 0)
|
||||
result.output_tokens = usage.completion_tokens;
|
||||
if (usage.cache_read_tokens !== void 0)
|
||||
result.cache_read_tokens = usage.cache_read_tokens;
|
||||
if (usage.cache_write_tokens !== void 0)
|
||||
result.cache_write_tokens = usage.cache_write_tokens;
|
||||
if (usage.cache_creation_tokens !== void 0)
|
||||
result.cache_write_tokens = usage.cache_creation_tokens;
|
||||
if (usage.reasoning_tokens !== void 0)
|
||||
result.reasoning_tokens = usage.reasoning_tokens;
|
||||
if (usage.total_tokens !== void 0)
|
||||
result.total_tokens = usage.total_tokens;
|
||||
if (usage.total_cost !== void 0)
|
||||
result.total_cost = usage.total_cost;
|
||||
if (usage.cost !== void 0)
|
||||
result.total_cost = usage.cost;
|
||||
return Object.keys(result).length > 0 ? result : void 0;
|
||||
}
|
||||
function getModel(input) {
|
||||
@@ -301,11 +338,17 @@ async function handleRunStart(input) {
|
||||
if (state.runId) {
|
||||
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.end", sessionKey, {
|
||||
runId: state.runId,
|
||||
payload: { status: "success" }
|
||||
payload: {
|
||||
status: "success",
|
||||
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0,
|
||||
...runUsagePayload(state) && { usage: runUsagePayload(state) }
|
||||
}
|
||||
}));
|
||||
}
|
||||
const runId = randomUUID2();
|
||||
state.runId = runId;
|
||||
state.runStartedAt = Date.now();
|
||||
state.runUsage = {};
|
||||
saveState(sessionKey, state);
|
||||
const extra = getExtra(input);
|
||||
enqueue(buildEnvelope(FRAMEWORK, HOST, "run.start", sessionKey, {
|
||||
@@ -334,12 +377,15 @@ async function handleRunEnd(input) {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
status: "success",
|
||||
duration_ms: getDuration(input),
|
||||
duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : void 0),
|
||||
model: getModel(input),
|
||||
response_preview: truncate(getExtra(input).assistant_response, 500)
|
||||
response_preview: truncate(getExtra(input).assistant_response, 500),
|
||||
...runUsagePayload(state) && { usage: runUsagePayload(state) }
|
||||
}
|
||||
}));
|
||||
state.runId = void 0;
|
||||
state.runStartedAt = void 0;
|
||||
state.runUsage = {};
|
||||
saveState(sessionKey, state);
|
||||
}
|
||||
await flush();
|
||||
@@ -405,6 +451,10 @@ async function handleAPIResult(input) {
|
||||
await flush();
|
||||
return;
|
||||
}
|
||||
if (sessionKey) {
|
||||
state.runUsage = accumulateUsage(state.runUsage || {}, usage);
|
||||
saveState(sessionKey, state);
|
||||
}
|
||||
enqueue(buildEnvelope(FRAMEWORK, HOST, "metric.snapshot", sessionKey, {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
@@ -426,7 +476,9 @@ async function handleSessionEnd(input) {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
status: input.interrupted || getExtra(input).interrupted ? "interrupted" : "success",
|
||||
model: getModel(input)
|
||||
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : void 0,
|
||||
model: getModel(input),
|
||||
...runUsagePayload(state) && { usage: runUsagePayload(state) }
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
+50
-2
@@ -23,9 +23,41 @@ const { enqueue, flush } = createTransport(INGEST_URL);
|
||||
interface SessionState {
|
||||
sessionStarted?: boolean;
|
||||
runId?: string;
|
||||
runStartedAt?: number;
|
||||
runUsage?: Dict;
|
||||
spans: { [key: string]: string };
|
||||
}
|
||||
|
||||
// Token fields are reported per LLM call (api-result). A single run (user turn)
|
||||
// can span several calls, so we sum them into a per-run total that rides along
|
||||
// on run.end — the location the stats layer reads usage from.
|
||||
const USAGE_TOKEN_FIELDS = [
|
||||
'input_tokens',
|
||||
'output_tokens',
|
||||
'total_tokens',
|
||||
'cache_read_tokens',
|
||||
'cache_write_tokens',
|
||||
'reasoning_tokens',
|
||||
'total_cost',
|
||||
];
|
||||
|
||||
function accumulateUsage(into: Dict, usage: Dict | undefined): Dict {
|
||||
if (!usage) {
|
||||
return into;
|
||||
}
|
||||
for (const key of USAGE_TOKEN_FIELDS) {
|
||||
const v = pickNumber(usage[key]);
|
||||
if (v !== undefined) {
|
||||
into[key] = (pickNumber(into[key]) ?? 0) + v;
|
||||
}
|
||||
}
|
||||
return into;
|
||||
}
|
||||
|
||||
function runUsagePayload(state: SessionState): Dict | undefined {
|
||||
return state.runUsage && Object.keys(state.runUsage).length > 0 ? state.runUsage : undefined;
|
||||
}
|
||||
|
||||
const STATE_DIR = join(homedir(), '.agentmon-state', 'hermes');
|
||||
|
||||
function ensureStateDir() {
|
||||
@@ -179,12 +211,18 @@ async function handleRunStart(input: Dict) {
|
||||
if (state.runId) {
|
||||
enqueue(buildEnvelope(FRAMEWORK, HOST, 'run.end', sessionKey, {
|
||||
runId: state.runId,
|
||||
payload: { status: 'success' },
|
||||
payload: {
|
||||
status: 'success',
|
||||
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined,
|
||||
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
const runId = randomUUID();
|
||||
state.runId = runId;
|
||||
state.runStartedAt = Date.now();
|
||||
state.runUsage = {};
|
||||
saveState(sessionKey, state);
|
||||
|
||||
const extra = getExtra(input);
|
||||
@@ -217,12 +255,15 @@ async function handleRunEnd(input: Dict) {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
status: 'success',
|
||||
duration_ms: getDuration(input),
|
||||
duration_ms: getDuration(input) ?? (state.runStartedAt ? Date.now() - state.runStartedAt : undefined),
|
||||
model: getModel(input),
|
||||
response_preview: truncate(getExtra(input).assistant_response, 500),
|
||||
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
|
||||
},
|
||||
}));
|
||||
state.runId = undefined;
|
||||
state.runStartedAt = undefined;
|
||||
state.runUsage = {};
|
||||
saveState(sessionKey, state);
|
||||
}
|
||||
await flush();
|
||||
@@ -296,6 +337,11 @@ async function handleAPIResult(input: Dict) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (sessionKey) {
|
||||
state.runUsage = accumulateUsage(state.runUsage || {}, usage);
|
||||
saveState(sessionKey, state);
|
||||
}
|
||||
|
||||
enqueue(buildEnvelope(FRAMEWORK, HOST, 'metric.snapshot', sessionKey, {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
@@ -318,7 +364,9 @@ async function handleSessionEnd(input: Dict) {
|
||||
runId: state.runId,
|
||||
payload: {
|
||||
status: input.interrupted || getExtra(input).interrupted ? 'interrupted' : 'success',
|
||||
duration_ms: state.runStartedAt ? Date.now() - state.runStartedAt : undefined,
|
||||
model: getModel(input),
|
||||
...(runUsagePayload(state) && { usage: runUsagePayload(state) }),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user