feat: add agentmon services section to infrastructure page

Label all agentmon docker-compose services with agentmon.monitor=true
and agentmon.group=agentmon so the swarm-monitor picks them up.
Adds Group field to ServiceSnapshot, probes /healthz for api/web roles,
and renders a separate "Agentmon" section below Swarm Services on the
Infrastructure page with new api and worker card renderers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-03-18 13:41:26 -07:00
parent d2d044a3d8
commit f8ddea3698
7 changed files with 147 additions and 6 deletions
+6 -2
View File
@@ -13,6 +13,8 @@ type RunRow struct {
EndedAt *time.Time `json:"ended_at,omitempty"`
Status string `json:"status"`
SpanCount int `json:"span_count"`
ToolCount int `json:"tool_count"`
Model string `json:"model,omitempty"`
}
type SessionDetail struct {
@@ -59,7 +61,9 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
WHEN bool_or(type = 'error' OR payload->'payload'->>'status' = 'error') THEN 'error'
ELSE 'success'
END as status,
COUNT(DISTINCT span_id) as span_count
COUNT(DISTINCT span_id) as span_count,
COUNT(DISTINCT CASE WHEN payload->'attributes'->>'span_kind' = 'tool' THEN span_id END) as tool_count,
COALESCE(MAX(CASE WHEN type = 'run.end' THEN payload->'payload'->>'model' END), '') as model
FROM events
WHERE session_id = $1 AND run_id IS NOT NULL
GROUP BY run_id, session_id
@@ -74,7 +78,7 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
var runs []RunRow
for rows.Next() {
var r RunRow
if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount); err != nil {
if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount, &r.ToolCount, &r.Model); err != nil {
return nil, nil, err
}
runs = append(runs, r)
+42
View File
@@ -99,6 +99,48 @@ func (d *DB) GetSummary(ctx context.Context) (*Summary, error) {
}, nil
}
type TopTool struct {
Name string `json:"name"`
Count int `json:"count"`
}
func (d *DB) GetTopTools(ctx context.Context, limit int) ([]TopTool, error) {
if limit <= 0 {
limit = 10
}
now := time.Now()
midnight := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
q := `
SELECT
payload->'attributes'->>'name' AS tool_name,
COUNT(*) AS cnt
FROM events
WHERE type = 'span.end'
AND payload->'attributes'->>'span_kind' = 'tool'
AND payload->'attributes'->>'name' IS NOT NULL
AND ts >= $1
GROUP BY tool_name
ORDER BY cnt DESC
LIMIT $2
`
rows, err := d.sql.QueryContext(ctx, q, midnight, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var out []TopTool
for rows.Next() {
var t TopTool
if err := rows.Scan(&t.Name, &t.Count); err != nil {
return nil, err
}
out = append(out, t)
}
return out, rows.Err()
}
func bucketForWindow(window string) string {
switch window {
case "1h":