diff --git a/Dockerfile b/Dockerfile index 92554b9..13d9281 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,17 +7,19 @@ RUN go mod download COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor -RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor +RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway && \ + CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api && \ + CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui && \ + CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor && \ + CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor && \ + CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor FROM debian:bookworm-slim RUN apt-get update && apt-get install -y \ + awscli \ ca-certificates \ + curl \ libvirt-clients \ openssh-client \ && rm -rf /var/lib/apt/lists/* diff --git a/cmd/openclaw-monitor/main.go b/cmd/openclaw-monitor/main.go index 2325fe9..d8d1313 100644 --- a/cmd/openclaw-monitor/main.go +++ b/cmd/openclaw-monitor/main.go @@ -87,6 +87,13 @@ func pollInstances(ctx context.Context, pub *qnats.Publisher, registryPath strin metrics.Backup = backupStatus } + minioMetrics, err := openclaw.CollectMinIOMetrics(instance.Name) + if err != nil { + log.Printf("minio collection failed for %s: %v", instance.Name, err) + } else { + metrics.MinIO = minioMetrics + } + issues := openclaw.DetectIssues(metrics) if anyIssues(issues) { log.Printf("issues detected for %s: %+v", instance.Name, issues) @@ -121,6 +128,9 @@ func emitEvent(ctx context.Context, pub *qnats.Publisher, instanceName string, m if metrics.Backup != nil { event.Payload["backup"] = metrics.Backup } + if metrics.MinIO != nil { + event.Payload["minio"] = metrics.MinIO + } if metrics.Error != "" { event.Payload["error"] = metrics.Error } diff --git a/docker-compose.yaml b/docker-compose.yaml index 47217ef..b1edc7b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -145,8 +145,13 @@ services: NATS_TOPIC: agentmon.events.v1 OPENCLAW_REGISTRY: /openclaw-registry/openclaw-instances.json POLL_INTERVAL: 30s + OPENCLAW_MINIO_INSTANCE: zap + OPENCLAW_MINIO_ENDPOINT: http://192.168.153.253:9000 + OPENCLAW_MINIO_BUCKET: zap + OPENCLAW_MINIO_PREFIX: backups volumes: - /home/will/.claude/state/openclaw-instances.json:/openclaw-registry/openclaw-instances.json:ro + - /home/will/.aws:/root/.aws:ro - /var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock - /home/will/.ssh/id_rsa:/root/.ssh/id_rsa:ro - /home/will/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub:ro diff --git a/internal/monitor/openclaw/collector.go b/internal/monitor/openclaw/collector.go index 9464c17..a8931c9 100644 --- a/internal/monitor/openclaw/collector.go +++ b/internal/monitor/openclaw/collector.go @@ -3,7 +3,9 @@ package openclaw import ( "encoding/json" "fmt" + "os" "os/exec" + "sort" "regexp" "strconv" "strings" @@ -196,6 +198,78 @@ func CollectBackupStatus(instanceName string) (*BackupStatus, error) { }, nil } +func CollectMinIOMetrics(instanceName string) (*MinIOMetrics, error) { + targetInstance := envDefault("OPENCLAW_MINIO_INSTANCE", "zap") + if instanceName != targetInstance { + return nil, nil + } + + endpoint := envDefault("OPENCLAW_MINIO_ENDPOINT", "http://192.168.153.253:9000") + bucket := envDefault("OPENCLAW_MINIO_BUCKET", "zap") + prefix := envDefault("OPENCLAW_MINIO_PREFIX", "backups") + + metrics := &MinIOMetrics{ + Endpoint: endpoint, + Bucket: bucket, + Prefix: prefix, + } + + healthURL := strings.TrimRight(endpoint, "/") + "/minio/health/live" + statusOutput, err := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--connect-timeout", "5", healthURL).CombinedOutput() + if err != nil { + metrics.Error = fmt.Sprintf("health check failed: %v", err) + return metrics, nil + } + if code, err := strconv.Atoi(strings.TrimSpace(string(statusOutput))); err == nil { + metrics.HTTPStatus = code + metrics.Reachable = code == 200 + } + + listOutput, err := exec.Command( + "aws", + "--endpoint-url", endpoint, + "s3api", "list-objects-v2", + "--bucket", bucket, + "--prefix", strings.Trim(prefix, "/")+"/", + "--output", "json", + ).CombinedOutput() + if err != nil { + msg := strings.TrimSpace(string(listOutput)) + if msg == "" { + msg = err.Error() + } + metrics.Error = msg + return metrics, nil + } + + var resp struct { + Contents []struct { + Key string `json:"Key"` + Size int64 `json:"Size"` + LastModified time.Time `json:"LastModified"` + } `json:"Contents"` + } + if err := json.Unmarshal(listOutput, &resp); err != nil { + metrics.Error = fmt.Sprintf("invalid usage response: %v", err) + return metrics, nil + } + + metrics.ObjectCount = len(resp.Contents) + for _, obj := range resp.Contents { + metrics.TotalBytes += obj.Size + } + + if len(resp.Contents) > 0 { + sort.Slice(resp.Contents, func(i, j int) bool { + return resp.Contents[i].LastModified.After(resp.Contents[j].LastModified) + }) + metrics.LatestKey = resp.Contents[0].Key + metrics.LatestBackup = resp.Contents[0].LastModified.UTC().Format(time.RFC3339) + } + + return metrics, nil +} + func DetectIssues(metrics Metrics) Issues { issues := Issues{} @@ -270,6 +344,13 @@ func LoadInstances(registryPath string) ([]Instance, error) { return instances, nil } +func envDefault(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + func virshCmd(args ...string) (string, error) { cmd := exec.Command("virsh", append([]string{"-c", "qemu:///system"}, args...)...) output, err := cmd.CombinedOutput() diff --git a/internal/monitor/openclaw/types.go b/internal/monitor/openclaw/types.go index ce306e5..474dbe2 100644 --- a/internal/monitor/openclaw/types.go +++ b/internal/monitor/openclaw/types.go @@ -43,11 +43,25 @@ type BackupStatus struct { AgeHours float64 `json:"age_hours"` } +type MinIOMetrics struct { + Endpoint string `json:"endpoint"` + Bucket string `json:"bucket"` + Prefix string `json:"prefix"` + HTTPStatus int `json:"http_status"` + Reachable bool `json:"reachable"` + ObjectCount int `json:"object_count"` + TotalBytes int64 `json:"total_bytes"` + LatestKey string `json:"latest_key,omitempty"` + LatestBackup string `json:"latest_backup,omitempty"` + Error string `json:"error,omitempty"` +} + type Metrics struct { Instance Instance `json:"instance"` Host HostMetrics `json:"host"` Guest *GuestMetrics `json:"guest,omitempty"` Backup *BackupStatus `json:"backup,omitempty"` + MinIO *MinIOMetrics `json:"minio,omitempty"` Timestamp time.Time `json:"timestamp"` Error string `json:"error,omitempty"` }