fix: use Docker socket HTTP API in swarm collector, no CLI dependency

Replace exec.CommandContext calls (docker ps, docker inspect, nc -z) with
direct HTTP calls over the Unix socket using Go's net/http + custom transport.
Also removes netcat-openbsd from Dockerfile since nc is no longer used.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-03-18 10:36:32 -07:00
parent f48953781b
commit d2d044a3d8
2 changed files with 86 additions and 83 deletions
-1
View File
@@ -20,7 +20,6 @@ RUN apt-get update && apt-get install -y \
ca-certificates \ ca-certificates \
libvirt-clients \ libvirt-clients \
openssh-client \ openssh-client \
netcat-openbsd \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app
+86 -82
View File
@@ -4,8 +4,9 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net"
"net/http" "net/http"
"os/exec" "net/url"
"strings" "strings"
"time" "time"
) )
@@ -15,19 +16,19 @@ type Config struct {
LiteLLMBaseURL string LiteLLMBaseURL string
LiteLLMAPIKey string LiteLLMAPIKey string
HTTPTimeout time.Duration HTTPTimeout time.Duration
DockerSocket string // defaults to /var/run/docker.sock
} }
// dockerPsEntry is the JSON shape from `docker ps --format '{{json .}}'`. // dockerContainer is the shape returned by GET /containers/json.
type dockerPsEntry struct { type dockerContainer struct {
ID string `json:"ID"` ID string `json:"Id"`
Names string `json:"Names"` Names []string `json:"Names"`
Status string `json:"Status"` State string `json:"State"`
State string `json:"State"` Labels map[string]string `json:"Labels"`
} }
// dockerInspectEntry is the minimal shape we need from `docker inspect`. // dockerContainerDetail is the shape returned by GET /containers/{id}/json.
type dockerInspectEntry struct { type dockerContainerDetail struct {
Name string `json:"Name"`
State struct { State struct {
Status string `json:"Status"` Status string `json:"Status"`
Running bool `json:"Running"` Running bool `json:"Running"`
@@ -36,119 +37,122 @@ type dockerInspectEntry struct {
Status string `json:"Status"` Status string `json:"Status"`
} `json:"Health"` } `json:"Health"`
} `json:"State"` } `json:"State"`
Config struct { }
Labels map[string]string `json:"Labels"`
} `json:"Config"` func newDockerClient(socketPath string) *http.Client {
if socketPath == "" {
socketPath = "/var/run/docker.sock"
}
return &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
return (&net.Dialer{}).DialContext(ctx, "unix", socketPath)
},
},
}
} }
// CollectAll lists all containers labeled agentmon.monitor=true and collects // CollectAll lists all containers labeled agentmon.monitor=true and collects
// a ServiceSnapshot for each. // a ServiceSnapshot for each.
func CollectAll(ctx context.Context, cfg Config) ([]ServiceSnapshot, error) { func CollectAll(ctx context.Context, cfg Config) ([]ServiceSnapshot, error) {
// List labeled containers (running + stopped). dockerClient := newDockerClient(cfg.DockerSocket)
out, err := exec.CommandContext(ctx, "docker", "ps", "-a", httpClient := &http.Client{Timeout: cfg.HTTPTimeout}
"--filter", "label=agentmon.monitor=true",
"--format", "{{json .}}", filters := url.QueryEscape(`{"label":["agentmon.monitor=true"]}`)
).Output() req, err := http.NewRequestWithContext(ctx, http.MethodGet,
"http://localhost/v1.41/containers/json?all=1&filters="+filters, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("docker ps failed: %w", err) return nil, err
} }
var entries []dockerPsEntry resp, err := dockerClient.Do(req)
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { if err != nil {
if line == "" { return nil, fmt.Errorf("docker API unavailable: %w", err)
continue }
} defer resp.Body.Close()
var e dockerPsEntry
if err := json.Unmarshal([]byte(line), &e); err != nil { var containers []dockerContainer
continue if err := json.NewDecoder(resp.Body).Decode(&containers); err != nil {
} return nil, fmt.Errorf("docker API parse error: %w", err)
entries = append(entries, e)
} }
client := &http.Client{Timeout: cfg.HTTPTimeout}
var snapshots []ServiceSnapshot var snapshots []ServiceSnapshot
for _, e := range entries { for _, c := range containers {
snap := collectOne(ctx, e.Names, client, cfg) snapshots = append(snapshots, collectOne(ctx, c, dockerClient, httpClient, cfg))
snapshots = append(snapshots, snap)
} }
return snapshots, nil return snapshots, nil
} }
func collectOne(ctx context.Context, name string, client *http.Client, cfg Config) ServiceSnapshot { func collectOne(ctx context.Context, c dockerContainer, dockerClient, httpClient *http.Client, cfg Config) ServiceSnapshot {
name := containerName(c)
snap := ServiceSnapshot{ snap := ServiceSnapshot{
Name: name, Name: name,
ContainerState: "missing", Role: c.Labels["agentmon.role"],
ContainerState: c.State,
HealthState: "none", HealthState: "none",
Status: "down", Status: "down",
} }
// Inspect for detailed state. // Inspect for health state and uptime (not in list response).
out, err := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .}}", name).Output() req, err := http.NewRequestWithContext(ctx, http.MethodGet,
if err != nil { "http://localhost/v1.41/containers/"+c.ID+"/json", nil)
return snap if err == nil {
} if resp, err := dockerClient.Do(req); err == nil {
var detail dockerContainerDetail
var detail dockerInspectEntry if json.NewDecoder(resp.Body).Decode(&detail) == nil {
if err := json.Unmarshal(out, &detail); err != nil { if detail.State.Health != nil {
return snap snap.HealthState = detail.State.Health.Status
} }
if detail.State.Running && detail.State.StartedAt != "" {
snap.Role = detail.Config.Labels["agentmon.role"] if t, err := time.Parse(time.RFC3339Nano, detail.State.StartedAt); err == nil {
snap.ContainerState = detail.State.Status snap.UptimeSec = int64(time.Since(t).Seconds())
}
if detail.State.Health != nil { }
snap.HealthState = detail.State.Health.Status }
} resp.Body.Close()
// Calculate uptime if running.
if detail.State.Running && detail.State.StartedAt != "" {
if t, err := time.Parse(time.RFC3339Nano, detail.State.StartedAt); err == nil {
snap.UptimeSec = int64(time.Since(t).Seconds())
} }
} }
// Role-specific probes. port := c.Labels["agentmon.port"]
switch snap.Role { switch snap.Role {
case "llm-proxy": case "llm-proxy":
collectLLMProxy(ctx, &snap, client, cfg) collectLLMProxy(ctx, &snap, httpClient, cfg)
case "search": case "search":
collectHTTPProbe(ctx, &snap, client, "http://localhost:"+detail.Config.Labels["agentmon.port"]+"/") collectHTTPProbe(ctx, &snap, httpClient, "http://localhost:"+port+"/")
case "mcp": case "mcp":
collectPortProbe(ctx, &snap, detail.Config.Labels["agentmon.port"]) collectPortProbe(&snap, port)
case "db", "voice", "automation":
// Docker healthcheck state is sufficient; no HTTP probe.
} }
snap.Status = deriveStatus(snap) snap.Status = deriveStatus(snap)
return snap return snap
} }
func containerName(c dockerContainer) string {
if len(c.Names) > 0 {
return strings.TrimPrefix(c.Names[0], "/")
}
return c.ID[:12]
}
func collectLLMProxy(ctx context.Context, snap *ServiceSnapshot, client *http.Client, cfg Config) { func collectLLMProxy(ctx context.Context, snap *ServiceSnapshot, client *http.Client, cfg Config) {
if snap.Extra == nil { if snap.Extra == nil {
snap.Extra = make(map[string]any) snap.Extra = make(map[string]any)
} }
// Health probe.
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, cfg.LiteLLMBaseURL+"/health/liveliness", nil) req, _ := http.NewRequestWithContext(ctx, http.MethodGet, cfg.LiteLLMBaseURL+"/health/liveliness", nil)
resp, err := client.Do(req) if resp, err := client.Do(req); err == nil {
if err == nil {
code := resp.StatusCode code := resp.StatusCode
snap.HTTPStatus = &code snap.HTTPStatus = &code
resp.Body.Close() resp.Body.Close()
} }
// Model count.
if cfg.LiteLLMAPIKey != "" { if cfg.LiteLLMAPIKey != "" {
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, cfg.LiteLLMBaseURL+"/v2/model/info", nil) req, _ := http.NewRequestWithContext(ctx, http.MethodGet, cfg.LiteLLMBaseURL+"/v2/model/info", nil)
req.Header.Set("Authorization", "Bearer "+cfg.LiteLLMAPIKey) req.Header.Set("Authorization", "Bearer "+cfg.LiteLLMAPIKey)
resp, err := client.Do(req) if resp, err := client.Do(req); err == nil {
if err == nil {
defer resp.Body.Close() defer resp.Body.Close()
var result struct { var result struct {
Data []struct { Data []struct{} `json:"data"`
ModelName string `json:"model_name"`
} `json:"data"`
} }
if json.NewDecoder(resp.Body).Decode(&result) == nil { if json.NewDecoder(resp.Body).Decode(&result) == nil {
snap.Extra["model_count"] = len(result.Data) snap.Extra["model_count"] = len(result.Data)
@@ -157,29 +161,29 @@ func collectLLMProxy(ctx context.Context, snap *ServiceSnapshot, client *http.Cl
} }
} }
func collectHTTPProbe(ctx context.Context, snap *ServiceSnapshot, client *http.Client, url string) { func collectHTTPProbe(ctx context.Context, snap *ServiceSnapshot, client *http.Client, target string) {
start := time.Now() start := time.Now()
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) req, _ := http.NewRequestWithContext(ctx, http.MethodGet, target, nil)
resp, err := client.Do(req) if resp, err := client.Do(req); err == nil {
if err == nil {
code := resp.StatusCode code := resp.StatusCode
snap.HTTPStatus = &code snap.HTTPStatus = &code
resp.Body.Close() resp.Body.Close()
ms := time.Since(start).Milliseconds()
if snap.Extra == nil { if snap.Extra == nil {
snap.Extra = make(map[string]any) snap.Extra = make(map[string]any)
} }
snap.Extra["response_ms"] = ms snap.Extra["response_ms"] = time.Since(start).Milliseconds()
} }
} }
func collectPortProbe(ctx context.Context, snap *ServiceSnapshot, port string) { func collectPortProbe(snap *ServiceSnapshot, port string) {
if port == "" { if port == "" {
return return
} }
// Use nc to check TCP reachability. conn, err := net.DialTimeout("tcp", "localhost:"+port, 2*time.Second)
err := exec.CommandContext(ctx, "nc", "-z", "-w1", "localhost", port).Run()
reachable := err == nil reachable := err == nil
if conn != nil {
conn.Close()
}
if snap.Extra == nil { if snap.Extra == nil {
snap.Extra = make(map[string]any) snap.Extra = make(map[string]any)
} }