feat: add agentmon services section to infrastructure page
Label all agentmon docker-compose services with agentmon.monitor=true and agentmon.group=agentmon so the swarm-monitor picks them up. Adds Group field to ServiceSnapshot, probes /healthz for api/web roles, and renders a separate "Agentmon" section below Swarm Services on the Infrastructure page with new api and worker card renderers. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -87,6 +87,7 @@ func collectOne(ctx context.Context, c dockerContainer, dockerClient, httpClient
|
||||
name := containerName(c)
|
||||
snap := ServiceSnapshot{
|
||||
Name: name,
|
||||
Group: c.Labels["agentmon.group"],
|
||||
Role: c.Labels["agentmon.role"],
|
||||
ContainerState: c.State,
|
||||
HealthState: "none",
|
||||
@@ -121,6 +122,10 @@ func collectOne(ctx context.Context, c dockerContainer, dockerClient, httpClient
|
||||
collectHTTPProbe(ctx, &snap, httpClient, "http://localhost:"+port+"/")
|
||||
case "mcp":
|
||||
collectPortProbe(&snap, port)
|
||||
case "api", "web":
|
||||
if port != "" {
|
||||
collectHTTPProbe(ctx, &snap, httpClient, "http://localhost:"+port+"/healthz")
|
||||
}
|
||||
}
|
||||
|
||||
snap.Status = deriveStatus(snap)
|
||||
|
||||
@@ -5,6 +5,7 @@ import "time"
|
||||
// ServiceSnapshot holds the collected state for one docker-compose service.
|
||||
type ServiceSnapshot struct {
|
||||
Name string `json:"name"`
|
||||
Group string `json:"group,omitempty"`
|
||||
Role string `json:"role"`
|
||||
ContainerState string `json:"container_state"` // running/stopped/exited/missing
|
||||
HealthState string `json:"health_state"` // healthy/unhealthy/starting/none
|
||||
|
||||
@@ -13,6 +13,8 @@ type RunRow struct {
|
||||
EndedAt *time.Time `json:"ended_at,omitempty"`
|
||||
Status string `json:"status"`
|
||||
SpanCount int `json:"span_count"`
|
||||
ToolCount int `json:"tool_count"`
|
||||
Model string `json:"model,omitempty"`
|
||||
}
|
||||
|
||||
type SessionDetail struct {
|
||||
@@ -59,7 +61,9 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
|
||||
WHEN bool_or(type = 'error' OR payload->'payload'->>'status' = 'error') THEN 'error'
|
||||
ELSE 'success'
|
||||
END as status,
|
||||
COUNT(DISTINCT span_id) as span_count
|
||||
COUNT(DISTINCT span_id) as span_count,
|
||||
COUNT(DISTINCT CASE WHEN payload->'attributes'->>'span_kind' = 'tool' THEN span_id END) as tool_count,
|
||||
COALESCE(MAX(CASE WHEN type = 'run.end' THEN payload->'payload'->>'model' END), '') as model
|
||||
FROM events
|
||||
WHERE session_id = $1 AND run_id IS NOT NULL
|
||||
GROUP BY run_id, session_id
|
||||
@@ -74,7 +78,7 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
|
||||
var runs []RunRow
|
||||
for rows.Next() {
|
||||
var r RunRow
|
||||
if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount); err != nil {
|
||||
if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount, &r.ToolCount, &r.Model); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
runs = append(runs, r)
|
||||
|
||||
@@ -99,6 +99,48 @@ func (d *DB) GetSummary(ctx context.Context) (*Summary, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
type TopTool struct {
|
||||
Name string `json:"name"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
func (d *DB) GetTopTools(ctx context.Context, limit int) ([]TopTool, error) {
|
||||
if limit <= 0 {
|
||||
limit = 10
|
||||
}
|
||||
now := time.Now()
|
||||
midnight := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
|
||||
|
||||
q := `
|
||||
SELECT
|
||||
payload->'attributes'->>'name' AS tool_name,
|
||||
COUNT(*) AS cnt
|
||||
FROM events
|
||||
WHERE type = 'span.end'
|
||||
AND payload->'attributes'->>'span_kind' = 'tool'
|
||||
AND payload->'attributes'->>'name' IS NOT NULL
|
||||
AND ts >= $1
|
||||
GROUP BY tool_name
|
||||
ORDER BY cnt DESC
|
||||
LIMIT $2
|
||||
`
|
||||
rows, err := d.sql.QueryContext(ctx, q, midnight, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []TopTool
|
||||
for rows.Next() {
|
||||
var t TopTool
|
||||
if err := rows.Scan(&t.Name, &t.Count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func bucketForWindow(window string) string {
|
||||
switch window {
|
||||
case "1h":
|
||||
|
||||
Reference in New Issue
Block a user