feat(openclaw-monitor): add MinIO telemetry

This commit is contained in:
William Valentin
2026-03-26 11:22:45 -07:00
parent 6605780b58
commit 5ff4794d98
5 changed files with 118 additions and 6 deletions
+8 -6
View File
@@ -7,17 +7,19 @@ RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway && \
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api && \
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui && \
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor && \
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor && \
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y \
awscli \
ca-certificates \
curl \
libvirt-clients \
openssh-client \
&& rm -rf /var/lib/apt/lists/*
+10
View File
@@ -87,6 +87,13 @@ func pollInstances(ctx context.Context, pub *qnats.Publisher, registryPath strin
metrics.Backup = backupStatus
}
minioMetrics, err := openclaw.CollectMinIOMetrics(instance.Name)
if err != nil {
log.Printf("minio collection failed for %s: %v", instance.Name, err)
} else {
metrics.MinIO = minioMetrics
}
issues := openclaw.DetectIssues(metrics)
if anyIssues(issues) {
log.Printf("issues detected for %s: %+v", instance.Name, issues)
@@ -121,6 +128,9 @@ func emitEvent(ctx context.Context, pub *qnats.Publisher, instanceName string, m
if metrics.Backup != nil {
event.Payload["backup"] = metrics.Backup
}
if metrics.MinIO != nil {
event.Payload["minio"] = metrics.MinIO
}
if metrics.Error != "" {
event.Payload["error"] = metrics.Error
}
+5
View File
@@ -145,8 +145,13 @@ services:
NATS_TOPIC: agentmon.events.v1
OPENCLAW_REGISTRY: /openclaw-registry/openclaw-instances.json
POLL_INTERVAL: 30s
OPENCLAW_MINIO_INSTANCE: zap
OPENCLAW_MINIO_ENDPOINT: http://192.168.153.253:9000
OPENCLAW_MINIO_BUCKET: zap
OPENCLAW_MINIO_PREFIX: backups
volumes:
- /home/will/.claude/state/openclaw-instances.json:/openclaw-registry/openclaw-instances.json:ro
- /home/will/.aws:/root/.aws:ro
- /var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock
- /home/will/.ssh/id_rsa:/root/.ssh/id_rsa:ro
- /home/will/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub:ro
+81
View File
@@ -3,7 +3,9 @@ package openclaw
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"sort"
"regexp"
"strconv"
"strings"
@@ -196,6 +198,78 @@ func CollectBackupStatus(instanceName string) (*BackupStatus, error) {
}, nil
}
func CollectMinIOMetrics(instanceName string) (*MinIOMetrics, error) {
targetInstance := envDefault("OPENCLAW_MINIO_INSTANCE", "zap")
if instanceName != targetInstance {
return nil, nil
}
endpoint := envDefault("OPENCLAW_MINIO_ENDPOINT", "http://192.168.153.253:9000")
bucket := envDefault("OPENCLAW_MINIO_BUCKET", "zap")
prefix := envDefault("OPENCLAW_MINIO_PREFIX", "backups")
metrics := &MinIOMetrics{
Endpoint: endpoint,
Bucket: bucket,
Prefix: prefix,
}
healthURL := strings.TrimRight(endpoint, "/") + "/minio/health/live"
statusOutput, err := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--connect-timeout", "5", healthURL).CombinedOutput()
if err != nil {
metrics.Error = fmt.Sprintf("health check failed: %v", err)
return metrics, nil
}
if code, err := strconv.Atoi(strings.TrimSpace(string(statusOutput))); err == nil {
metrics.HTTPStatus = code
metrics.Reachable = code == 200
}
listOutput, err := exec.Command(
"aws",
"--endpoint-url", endpoint,
"s3api", "list-objects-v2",
"--bucket", bucket,
"--prefix", strings.Trim(prefix, "/")+"/",
"--output", "json",
).CombinedOutput()
if err != nil {
msg := strings.TrimSpace(string(listOutput))
if msg == "" {
msg = err.Error()
}
metrics.Error = msg
return metrics, nil
}
var resp struct {
Contents []struct {
Key string `json:"Key"`
Size int64 `json:"Size"`
LastModified time.Time `json:"LastModified"`
} `json:"Contents"`
}
if err := json.Unmarshal(listOutput, &resp); err != nil {
metrics.Error = fmt.Sprintf("invalid usage response: %v", err)
return metrics, nil
}
metrics.ObjectCount = len(resp.Contents)
for _, obj := range resp.Contents {
metrics.TotalBytes += obj.Size
}
if len(resp.Contents) > 0 {
sort.Slice(resp.Contents, func(i, j int) bool {
return resp.Contents[i].LastModified.After(resp.Contents[j].LastModified)
})
metrics.LatestKey = resp.Contents[0].Key
metrics.LatestBackup = resp.Contents[0].LastModified.UTC().Format(time.RFC3339)
}
return metrics, nil
}
func DetectIssues(metrics Metrics) Issues {
issues := Issues{}
@@ -270,6 +344,13 @@ func LoadInstances(registryPath string) ([]Instance, error) {
return instances, nil
}
func envDefault(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func virshCmd(args ...string) (string, error) {
cmd := exec.Command("virsh", append([]string{"-c", "qemu:///system"}, args...)...)
output, err := cmd.CombinedOutput()
+14
View File
@@ -43,11 +43,25 @@ type BackupStatus struct {
AgeHours float64 `json:"age_hours"`
}
type MinIOMetrics struct {
Endpoint string `json:"endpoint"`
Bucket string `json:"bucket"`
Prefix string `json:"prefix"`
HTTPStatus int `json:"http_status"`
Reachable bool `json:"reachable"`
ObjectCount int `json:"object_count"`
TotalBytes int64 `json:"total_bytes"`
LatestKey string `json:"latest_key,omitempty"`
LatestBackup string `json:"latest_backup,omitempty"`
Error string `json:"error,omitempty"`
}
type Metrics struct {
Instance Instance `json:"instance"`
Host HostMetrics `json:"host"`
Guest *GuestMetrics `json:"guest,omitempty"`
Backup *BackupStatus `json:"backup,omitempty"`
MinIO *MinIOMetrics `json:"minio,omitempty"`
Timestamp time.Time `json:"timestamp"`
Error string `json:"error,omitempty"`
}