fix(store): backfill spans in run detail

This commit is contained in:
William Valentin
2026-04-21 13:07:09 -07:00
parent 43113f6241
commit 41b7165800
2 changed files with 188 additions and 33 deletions
+133 -33
View File
@@ -144,10 +144,75 @@ func (d *DB) GetRunWithSpans(ctx context.Context, runID string) (*RunDetail, []S
return nil, nil, err
}
spans, err := d.listSpansForRun(ctx, runID)
// Get spans directly linked to this run.
rs := &runSpans{}
directSpans, err := d.listSpansForRun(ctx, runID)
if err != nil {
return nil, nil, err
}
for _, s := range directSpans {
addSpanToRunSpans(rs, s)
}
// Backfill orphaned spans (have session_id but no run_id) that fall
// within this run's time window, matching the same logic used by
// attachSpansToRuns in the session detail view.
if run.SessionID != "" {
// Find the upper bound: the next run's start time, or this run's
// ended_at, whichever is more precise. This prevents capturing
// spans that belong to subsequent runs.
var upperBound *time.Time
if run.EndedAt != nil {
upperBound = run.EndedAt
}
var nextRunStart *time.Time
_ = d.sql.QueryRowContext(ctx, `
SELECT MIN(ts)
FROM events
WHERE session_id = $1 AND run_id != $2 AND type = 'run.start' AND ts > $3
`, run.SessionID, runID, run.StartedAt).Scan(&nextRunStart)
if nextRunStart != nil && (upperBound == nil || nextRunStart.Before(*upperBound)) {
upperBound = nextRunStart
}
rows, err := d.sql.QueryContext(ctx, `
SELECT
run_id,
span_id,
COALESCE(payload->'attributes'->>'name', payload->'event'->>'type', type) as name,
COALESCE(payload->'attributes'->>'span_kind', 'unknown') as kind,
ts as started_at,
(payload->'payload'->>'duration_ms')::bigint as duration_ms,
CASE WHEN type = 'error' OR payload->'payload'->>'status' = 'error' THEN 'error' ELSE 'success' END as status,
payload
FROM events
WHERE session_id = $1 AND span_id IS NOT NULL AND (run_id IS NULL OR run_id = '')
AND ts >= $2 AND ($3::timestamptz IS NULL OR ts < $3)
ORDER BY ts ASC
`, run.SessionID, run.StartedAt, upperBound)
if err != nil {
return nil, nil, err
}
defer rows.Close()
for rows.Next() {
var s SpanRow
var dbRunID *string
if err := rows.Scan(&dbRunID, &s.SpanID, &s.Name, &s.Kind, &s.StartedAt, &s.Duration, &s.Status, &s.Payload); err != nil {
return nil, nil, err
}
s.RunID = runID
addSpanToRunSpans(rs, s)
}
if err := rows.Err(); err != nil {
return nil, nil, err
}
}
spans := make([]SpanRow, 0, len(rs.order))
for _, spanID := range rs.order {
spans = append(spans, *rs.byID[spanID])
}
return &run, spans, nil
}
@@ -213,6 +278,27 @@ func mergeJSONObjects(dst, src map[string]any) {
}
}
type runSpans struct {
byID map[string]*SpanRow
order []string
}
func addSpanToRunSpans(rs *runSpans, s SpanRow) {
if rs.byID == nil {
rs.byID = make(map[string]*SpanRow)
}
existing := rs.byID[s.SpanID]
if existing == nil {
copy := s
rs.byID[s.SpanID] = &copy
rs.order = append(rs.order, s.SpanID)
return
}
mergeSpanEvent(existing, s)
}
func findRunIndexForSpan(runs []RunRow, spanStartedAt time.Time) int {
for i := len(runs) - 1; i >= 0; i-- {
run := runs[i]
@@ -284,18 +370,44 @@ func (d *DB) attachSpansToRuns(ctx context.Context, sessionID string, runs []Run
return runs, nil
}
spansByRun := make(map[string]*runSpans)
// First attach spans directly from each run_id so the session view still
// works even when some span events are missing session_id.
for i := range runs {
spans, err := d.listSpansForRun(ctx, runs[i].RunID)
if err != nil {
return nil, err
}
if len(spans) == 0 {
continue
}
rs := spansByRun[runs[i].RunID]
if rs == nil {
rs = &runSpans{}
spansByRun[runs[i].RunID] = rs
}
for _, span := range spans {
addSpanToRunSpans(rs, span)
}
}
// Then backfill spans that only have session_id. These are assigned to the
// most likely run by timestamp.
rows, err := d.sql.QueryContext(ctx, `
SELECT
run_id,
span_id,
COALESCE(payload->'attributes'->>'name', payload->'event'->>'type', type) as name,
COALESCE(payload->'attributes'->>'span_kind', 'unknown') as kind,
ts as started_at,
(payload->'payload'->>'duration_ms')::bigint as duration_ms,
CASE WHEN type = 'error' OR payload->'payload'->>'status' = 'error' THEN 'error' ELSE 'success' END as status,
payload
span_id,
COALESCE(payload->'attributes'->>'name', payload->'event'->>'type', type) as name,
COALESCE(payload->'attributes'->>'span_kind', 'unknown') as kind,
ts as started_at,
(payload->'payload'->>'duration_ms')::bigint as duration_ms,
CASE WHEN type = 'error' OR payload->'payload'->>'status' = 'error' THEN 'error' ELSE 'success' END as status,
payload
FROM events
WHERE session_id = $1 AND span_id IS NOT NULL
WHERE session_id = $1 AND span_id IS NOT NULL AND (run_id IS NULL OR run_id = '')
ORDER BY ts ASC
`, sessionID)
if err != nil {
@@ -303,45 +415,30 @@ func (d *DB) attachSpansToRuns(ctx context.Context, sessionID string, runs []Run
}
defer rows.Close()
// Map of run_id -> (map of span_id -> *SpanRow) for merging
type runSpans struct {
byID map[string]*SpanRow
order []string
}
spansByRun := make(map[string]*runSpans)
for rows.Next() {
var s SpanRow
var runID *string
if err := rows.Scan(&runID, &s.SpanID, &s.Name, &s.Kind, &s.StartedAt, &s.Duration, &s.Status, &s.Payload); err != nil {
return nil, err
}
if runID != nil {
s.RunID = *runID
}
if s.RunID == "" {
runIndex := findRunIndexForSpan(runs, s.StartedAt)
if runIndex == -1 {
continue
if runIndex != -1 {
s.RunID = runs[runIndex].RunID
} else if runID != nil {
s.RunID = *runID
}
s.RunID = runs[runIndex].RunID
}
if s.RunID == "" {
continue
}
rs := spansByRun[s.RunID]
if rs == nil {
rs = &runSpans{byID: make(map[string]*SpanRow)}
rs = &runSpans{}
spansByRun[s.RunID] = rs
}
existing := rs.byID[s.SpanID]
if existing == nil {
copy := s
rs.byID[s.SpanID] = &copy
rs.order = append(rs.order, s.SpanID)
continue
}
mergeSpanEvent(existing, s)
addSpanToRunSpans(rs, s)
}
if err := rows.Err(); err != nil {
return nil, err
@@ -357,6 +454,9 @@ func (d *DB) attachSpansToRuns(ctx context.Context, sessionID string, runs []Run
spans = append(spans, *rs.byID[spanID])
}
runs[i].Spans = spans
// Update span_count to reflect backfilled spans, not just
// the SQL aggregate which misses orphaned (no run_id) spans.
runs[i].SpanCount = len(spans)
}
return runs, nil
}