feat(metrics): surface tool-span latency in stats and dashboard
Tool spans already carry duration_ms and status, but the metrics layer only counted them. Expose that data: - GetTopTools now returns avg/p95 duration and error count per tool. - Timeseries buckets gain tool_avg_ms / tool_p95_ms (filtered percentile_cont over tool spans). - Dashboard Top Tools shows avg latency per tool; the Latency panel, previously always empty (it read run-level duration that is never emitted), now plots real tool-span latency (min/avg/p95). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -33,6 +33,8 @@ type TimeseriesBucket struct {
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
Cost float64 `json:"cost"`
|
||||
AvgDurationMS float64 `json:"avg_duration_ms"`
|
||||
ToolAvgMS float64 `json:"tool_avg_ms"`
|
||||
ToolP95MS float64 `json:"tool_p95_ms"`
|
||||
}
|
||||
|
||||
type TimeseriesResult struct {
|
||||
@@ -157,8 +159,11 @@ func (d *DB) GetSummary(ctx context.Context) (*Summary, error) {
|
||||
}
|
||||
|
||||
type TopTool struct {
|
||||
Name string `json:"name"`
|
||||
Count int `json:"count"`
|
||||
Name string `json:"name"`
|
||||
Count int `json:"count"`
|
||||
AvgMS float64 `json:"avg_ms"`
|
||||
P95MS float64 `json:"p95_ms"`
|
||||
Errors int `json:"errors"`
|
||||
}
|
||||
|
||||
type TopModel struct {
|
||||
@@ -176,7 +181,11 @@ func (d *DB) GetTopTools(ctx context.Context, limit int) ([]TopTool, error) {
|
||||
q := `
|
||||
SELECT
|
||||
payload->'attributes'->>'name' AS tool_name,
|
||||
COUNT(*) AS cnt
|
||||
COUNT(*) AS cnt,
|
||||
COALESCE(AVG((payload->'payload'->>'duration_ms')::float8), 0) AS avg_ms,
|
||||
COALESCE(percentile_cont(0.95) WITHIN GROUP (
|
||||
ORDER BY (payload->'payload'->>'duration_ms')::float8), 0) AS p95_ms,
|
||||
COUNT(*) FILTER (WHERE payload->'payload'->>'status' = 'error') AS errors
|
||||
FROM events
|
||||
WHERE type = 'span.end'
|
||||
AND payload->'attributes'->>'span_kind' = 'tool'
|
||||
@@ -195,7 +204,7 @@ func (d *DB) GetTopTools(ctx context.Context, limit int) ([]TopTool, error) {
|
||||
var out []TopTool
|
||||
for rows.Next() {
|
||||
var t TopTool
|
||||
if err := rows.Scan(&t.Name, &t.Count); err != nil {
|
||||
if err := rows.Scan(&t.Name, &t.Count, &t.AvgMS, &t.P95MS, &t.Errors); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
@@ -300,7 +309,14 @@ func (d *DB) GetTimeseries(ctx context.Context, window string) (*TimeseriesResul
|
||||
COALESCE(SUM((payload->'payload'->'usage'->>'total_cost')::float8)
|
||||
FILTER (WHERE type = 'run.end'), 0) AS cost,
|
||||
COALESCE(AVG((payload->'payload'->>'duration_ms')::float8)
|
||||
FILTER (WHERE type = 'run.end'), 0) AS avg_duration_ms
|
||||
FILTER (WHERE type = 'run.end'), 0) AS avg_duration_ms,
|
||||
COALESCE(AVG((payload->'payload'->>'duration_ms')::float8)
|
||||
FILTER (WHERE type = 'span.end'
|
||||
AND payload->'attributes'->>'span_kind' = 'tool'), 0) AS tool_avg_ms,
|
||||
COALESCE(percentile_cont(0.95) WITHIN GROUP (
|
||||
ORDER BY (payload->'payload'->>'duration_ms')::float8)
|
||||
FILTER (WHERE type = 'span.end'
|
||||
AND payload->'attributes'->>'span_kind' = 'tool'), 0) AS tool_p95_ms
|
||||
FROM events
|
||||
WHERE ts >= $2
|
||||
AND type IN ('run.start', 'run.end', 'span.end', 'error')
|
||||
@@ -318,7 +334,8 @@ func (d *DB) GetTimeseries(ctx context.Context, window string) (*TimeseriesResul
|
||||
for rows.Next() {
|
||||
var b TimeseriesBucket
|
||||
if err := rows.Scan(&b.TS, &b.Runs, &b.Tools, &b.Errors,
|
||||
&b.Tokens, &b.InputTokens, &b.OutputTokens, &b.Cost, &b.AvgDurationMS); err != nil {
|
||||
&b.Tokens, &b.InputTokens, &b.OutputTokens, &b.Cost, &b.AvgDurationMS,
|
||||
&b.ToolAvgMS, &b.ToolP95MS); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
series = append(series, b)
|
||||
|
||||
Reference in New Issue
Block a user