diff --git a/cmd/query-api/main.go b/cmd/query-api/main.go
index 0b58176..c4a3afd 100644
--- a/cmd/query-api/main.go
+++ b/cmd/query-api/main.go
@@ -218,6 +218,19 @@ func main() {
httpx.WriteJSON(w, http.StatusOK, summary)
})
+ r.Get("/v1/stats/top-tools", func(w http.ResponseWriter, r *http.Request) {
+ limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
+ tools, err := db.GetTopTools(r.Context(), limit)
+ if err != nil {
+ httpx.WriteJSON(w, http.StatusInternalServerError, map[string]any{"error": "db_error"})
+ return
+ }
+ if tools == nil {
+ tools = []postgres.TopTool{}
+ }
+ httpx.WriteJSON(w, http.StatusOK, map[string]any{"tools": tools})
+ })
+
r.Get("/v1/stats/timeseries", func(w http.ResponseWriter, r *http.Request) {
window := r.URL.Query().Get("window")
switch window {
diff --git a/cmd/web-ui/static/app.js b/cmd/web-ui/static/app.js
index c62c918..9d5b6c8 100644
--- a/cmd/web-ui/static/app.js
+++ b/cmd/web-ui/static/app.js
@@ -751,7 +751,9 @@
function renderInfraGrid() {
const vmNames = Object.keys(openclawState.instances).sort();
- const services = Object.values(swarmState.services);
+ const allServices = Object.values(swarmState.services);
+ const agentmonServices = allServices.filter(s => s.group === 'agentmon');
+ const swarmServices = allServices.filter(s => s.group !== 'agentmon');
app.innerHTML = `
-
Services
- ${services.length === 0
+
Swarm Services
+ ${swarmServices.length === 0
? '
No swarm service data
'
- : `
${services.map(svc => renderServiceCard(svc)).join('')}
`
+ : `
${swarmServices.map(svc => renderServiceCard(svc)).join('')}
`
+ }
+
+
+
+
Agentmon
+ ${agentmonServices.length === 0
+ ? '
No agentmon service data
'
+ : `
${agentmonServices.map(svc => renderServiceCard(svc)).join('')}
`
}
`;
@@ -835,6 +845,10 @@
case 'mcp': return renderMCPCard(svc);
case 'voice': return renderVoiceCard(svc);
case 'automation':return renderAutomationCard(svc);
+ case 'api':
+ case 'web': return renderAPICard(svc);
+ case 'worker':
+ case 'queue': return renderWorkerCard(svc);
default: return renderGenericServiceCard(svc);
}
}
@@ -966,6 +980,33 @@
`;
}
+ function renderAPICard(svc) {
+ const httpStatus = svc.http_status;
+ const httpClass = httpStatus === 200 ? 'ok' : httpStatus ? 'bad' : '';
+ return `
+
+ ${serviceCardHeader(svc)}
+
+ ${serviceStatRow('HTTP', httpStatus ? String(httpStatus) : '-', httpClass)}
+ ${serviceStatRow('Uptime', formatUptime(svc.uptime_sec), '')}
+ ${serviceStatRow('Container', escapeHTML(svc.container_state || '-'), svc.container_state === 'running' ? 'ok' : 'bad')}
+
+
+ `;
+ }
+
+ function renderWorkerCard(svc) {
+ return `
+
+ ${serviceCardHeader(svc)}
+
+ ${serviceStatRow('Container', escapeHTML(svc.container_state || '-'), svc.container_state === 'running' ? 'ok' : 'bad')}
+ ${serviceStatRow('Uptime', formatUptime(svc.uptime_sec), '')}
+
+
+ `;
+ }
+
function renderGenericServiceCard(svc) {
return `
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 2cde8e8..47217ef 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -15,6 +15,10 @@ services:
interval: 5s
timeout: 5s
retries: 5
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: db
nats:
image: nats:latest
@@ -24,6 +28,10 @@ services:
command: "--jetstream"
volumes:
- nats-data:/data
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: queue
ingest-gateway:
build: .
@@ -38,6 +46,11 @@ services:
AGENTMON_ADDR: :8080
NATS_URL: nats://nats:4222
NATS_TOPIC: agentmon.events.v1
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: api
+ agentmon.port: "8080"
query-api:
build: .
@@ -56,6 +69,11 @@ services:
AGENTMON_QUERY_BASE: http://localhost:8081
NATS_URL: nats://nats:4222
NATS_TOPIC: agentmon.events.v1
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: api
+ agentmon.port: "8081"
web-ui:
build: .
@@ -69,6 +87,11 @@ services:
environment:
AGENTMON_UI_ADDR: :8082
AGENTMON_QUERY_BASE: http://query-api:8081
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: api
+ agentmon.port: "8082"
event-processor:
build: .
@@ -83,6 +106,10 @@ services:
DATABASE_URL: postgres://postgres:pass@postgres:5432/agentmon?sslmode=disable
NATS_URL: nats://nats:4222
NATS_TOPIC: agentmon.events.v1
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: worker
swarm-monitor:
build: .
@@ -100,6 +127,10 @@ services:
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-}
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: worker
openclaw-monitor:
build: .
@@ -121,6 +152,10 @@ services:
- /home/will/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub:ro
- /home/will/.ssh/authorized_keys:/root/.ssh/authorized_keys:ro
- /var/lib/libvirt:/var/lib/libvirt:ro
+ labels:
+ agentmon.monitor: "true"
+ agentmon.group: agentmon
+ agentmon.role: worker
volumes:
postgres-data:
diff --git a/internal/monitor/swarm/collector.go b/internal/monitor/swarm/collector.go
index 41b435d..6425e3c 100644
--- a/internal/monitor/swarm/collector.go
+++ b/internal/monitor/swarm/collector.go
@@ -87,6 +87,7 @@ func collectOne(ctx context.Context, c dockerContainer, dockerClient, httpClient
name := containerName(c)
snap := ServiceSnapshot{
Name: name,
+ Group: c.Labels["agentmon.group"],
Role: c.Labels["agentmon.role"],
ContainerState: c.State,
HealthState: "none",
@@ -121,6 +122,10 @@ func collectOne(ctx context.Context, c dockerContainer, dockerClient, httpClient
collectHTTPProbe(ctx, &snap, httpClient, "http://localhost:"+port+"/")
case "mcp":
collectPortProbe(&snap, port)
+ case "api", "web":
+ if port != "" {
+ collectHTTPProbe(ctx, &snap, httpClient, "http://localhost:"+port+"/healthz")
+ }
}
snap.Status = deriveStatus(snap)
diff --git a/internal/monitor/swarm/types.go b/internal/monitor/swarm/types.go
index 1ef8f1f..2af8395 100644
--- a/internal/monitor/swarm/types.go
+++ b/internal/monitor/swarm/types.go
@@ -5,6 +5,7 @@ import "time"
// ServiceSnapshot holds the collected state for one docker-compose service.
type ServiceSnapshot struct {
Name string `json:"name"`
+ Group string `json:"group,omitempty"`
Role string `json:"role"`
ContainerState string `json:"container_state"` // running/stopped/exited/missing
HealthState string `json:"health_state"` // healthy/unhealthy/starting/none
diff --git a/internal/store/postgres/runs.go b/internal/store/postgres/runs.go
index dcffddb..81e8b81 100644
--- a/internal/store/postgres/runs.go
+++ b/internal/store/postgres/runs.go
@@ -13,6 +13,8 @@ type RunRow struct {
EndedAt *time.Time `json:"ended_at,omitempty"`
Status string `json:"status"`
SpanCount int `json:"span_count"`
+ ToolCount int `json:"tool_count"`
+ Model string `json:"model,omitempty"`
}
type SessionDetail struct {
@@ -59,7 +61,9 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
WHEN bool_or(type = 'error' OR payload->'payload'->>'status' = 'error') THEN 'error'
ELSE 'success'
END as status,
- COUNT(DISTINCT span_id) as span_count
+ COUNT(DISTINCT span_id) as span_count,
+ COUNT(DISTINCT CASE WHEN payload->'attributes'->>'span_kind' = 'tool' THEN span_id END) as tool_count,
+ COALESCE(MAX(CASE WHEN type = 'run.end' THEN payload->'payload'->>'model' END), '') as model
FROM events
WHERE session_id = $1 AND run_id IS NOT NULL
GROUP BY run_id, session_id
@@ -74,7 +78,7 @@ func (d *DB) GetSessionWithRuns(ctx context.Context, sessionID string) (*Session
var runs []RunRow
for rows.Next() {
var r RunRow
- if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount); err != nil {
+ if err := rows.Scan(&r.RunID, &r.SessionID, &r.StartedAt, &r.EndedAt, &r.Status, &r.SpanCount, &r.ToolCount, &r.Model); err != nil {
return nil, nil, err
}
runs = append(runs, r)
diff --git a/internal/store/postgres/stats.go b/internal/store/postgres/stats.go
index 77943f3..375e78c 100644
--- a/internal/store/postgres/stats.go
+++ b/internal/store/postgres/stats.go
@@ -99,6 +99,48 @@ func (d *DB) GetSummary(ctx context.Context) (*Summary, error) {
}, nil
}
+type TopTool struct {
+ Name string `json:"name"`
+ Count int `json:"count"`
+}
+
+func (d *DB) GetTopTools(ctx context.Context, limit int) ([]TopTool, error) {
+ if limit <= 0 {
+ limit = 10
+ }
+ now := time.Now()
+ midnight := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
+
+ q := `
+ SELECT
+ payload->'attributes'->>'name' AS tool_name,
+ COUNT(*) AS cnt
+ FROM events
+ WHERE type = 'span.end'
+ AND payload->'attributes'->>'span_kind' = 'tool'
+ AND payload->'attributes'->>'name' IS NOT NULL
+ AND ts >= $1
+ GROUP BY tool_name
+ ORDER BY cnt DESC
+ LIMIT $2
+ `
+ rows, err := d.sql.QueryContext(ctx, q, midnight, limit)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ var out []TopTool
+ for rows.Next() {
+ var t TopTool
+ if err := rows.Scan(&t.Name, &t.Count); err != nil {
+ return nil, err
+ }
+ out = append(out, t)
+ }
+ return out, rows.Err()
+}
+
func bucketForWindow(window string) string {
switch window {
case "1h":