feat(openclaw-monitor): add MinIO telemetry
This commit is contained in:
+8
-6
@@ -7,17 +7,19 @@ RUN go mod download
|
|||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway
|
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/ingest-gateway ./cmd/ingest-gateway && \
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api
|
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/query-api ./cmd/query-api && \
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui
|
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/web-ui ./cmd/web-ui && \
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor
|
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/event-processor ./cmd/event-processor && \
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor
|
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/openclaw-monitor ./cmd/openclaw-monitor && \
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor
|
CGO_ENABLED=0 GOOS=linux go build -o /usr/local/bin/swarm-monitor ./cmd/swarm-monitor
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
|
awscli \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
|
curl \
|
||||||
libvirt-clients \
|
libvirt-clients \
|
||||||
openssh-client \
|
openssh-client \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -87,6 +87,13 @@ func pollInstances(ctx context.Context, pub *qnats.Publisher, registryPath strin
|
|||||||
metrics.Backup = backupStatus
|
metrics.Backup = backupStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
|
minioMetrics, err := openclaw.CollectMinIOMetrics(instance.Name)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("minio collection failed for %s: %v", instance.Name, err)
|
||||||
|
} else {
|
||||||
|
metrics.MinIO = minioMetrics
|
||||||
|
}
|
||||||
|
|
||||||
issues := openclaw.DetectIssues(metrics)
|
issues := openclaw.DetectIssues(metrics)
|
||||||
if anyIssues(issues) {
|
if anyIssues(issues) {
|
||||||
log.Printf("issues detected for %s: %+v", instance.Name, issues)
|
log.Printf("issues detected for %s: %+v", instance.Name, issues)
|
||||||
@@ -121,6 +128,9 @@ func emitEvent(ctx context.Context, pub *qnats.Publisher, instanceName string, m
|
|||||||
if metrics.Backup != nil {
|
if metrics.Backup != nil {
|
||||||
event.Payload["backup"] = metrics.Backup
|
event.Payload["backup"] = metrics.Backup
|
||||||
}
|
}
|
||||||
|
if metrics.MinIO != nil {
|
||||||
|
event.Payload["minio"] = metrics.MinIO
|
||||||
|
}
|
||||||
if metrics.Error != "" {
|
if metrics.Error != "" {
|
||||||
event.Payload["error"] = metrics.Error
|
event.Payload["error"] = metrics.Error
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -145,8 +145,13 @@ services:
|
|||||||
NATS_TOPIC: agentmon.events.v1
|
NATS_TOPIC: agentmon.events.v1
|
||||||
OPENCLAW_REGISTRY: /openclaw-registry/openclaw-instances.json
|
OPENCLAW_REGISTRY: /openclaw-registry/openclaw-instances.json
|
||||||
POLL_INTERVAL: 30s
|
POLL_INTERVAL: 30s
|
||||||
|
OPENCLAW_MINIO_INSTANCE: zap
|
||||||
|
OPENCLAW_MINIO_ENDPOINT: http://192.168.153.253:9000
|
||||||
|
OPENCLAW_MINIO_BUCKET: zap
|
||||||
|
OPENCLAW_MINIO_PREFIX: backups
|
||||||
volumes:
|
volumes:
|
||||||
- /home/will/.claude/state/openclaw-instances.json:/openclaw-registry/openclaw-instances.json:ro
|
- /home/will/.claude/state/openclaw-instances.json:/openclaw-registry/openclaw-instances.json:ro
|
||||||
|
- /home/will/.aws:/root/.aws:ro
|
||||||
- /var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock
|
- /var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock
|
||||||
- /home/will/.ssh/id_rsa:/root/.ssh/id_rsa:ro
|
- /home/will/.ssh/id_rsa:/root/.ssh/id_rsa:ro
|
||||||
- /home/will/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub:ro
|
- /home/will/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub:ro
|
||||||
|
|||||||
@@ -3,7 +3,9 @@ package openclaw
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"sort"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -196,6 +198,78 @@ func CollectBackupStatus(instanceName string) (*BackupStatus, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CollectMinIOMetrics(instanceName string) (*MinIOMetrics, error) {
|
||||||
|
targetInstance := envDefault("OPENCLAW_MINIO_INSTANCE", "zap")
|
||||||
|
if instanceName != targetInstance {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := envDefault("OPENCLAW_MINIO_ENDPOINT", "http://192.168.153.253:9000")
|
||||||
|
bucket := envDefault("OPENCLAW_MINIO_BUCKET", "zap")
|
||||||
|
prefix := envDefault("OPENCLAW_MINIO_PREFIX", "backups")
|
||||||
|
|
||||||
|
metrics := &MinIOMetrics{
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Bucket: bucket,
|
||||||
|
Prefix: prefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
healthURL := strings.TrimRight(endpoint, "/") + "/minio/health/live"
|
||||||
|
statusOutput, err := exec.Command("curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--connect-timeout", "5", healthURL).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
metrics.Error = fmt.Sprintf("health check failed: %v", err)
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
if code, err := strconv.Atoi(strings.TrimSpace(string(statusOutput))); err == nil {
|
||||||
|
metrics.HTTPStatus = code
|
||||||
|
metrics.Reachable = code == 200
|
||||||
|
}
|
||||||
|
|
||||||
|
listOutput, err := exec.Command(
|
||||||
|
"aws",
|
||||||
|
"--endpoint-url", endpoint,
|
||||||
|
"s3api", "list-objects-v2",
|
||||||
|
"--bucket", bucket,
|
||||||
|
"--prefix", strings.Trim(prefix, "/")+"/",
|
||||||
|
"--output", "json",
|
||||||
|
).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
msg := strings.TrimSpace(string(listOutput))
|
||||||
|
if msg == "" {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
|
metrics.Error = msg
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var resp struct {
|
||||||
|
Contents []struct {
|
||||||
|
Key string `json:"Key"`
|
||||||
|
Size int64 `json:"Size"`
|
||||||
|
LastModified time.Time `json:"LastModified"`
|
||||||
|
} `json:"Contents"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(listOutput, &resp); err != nil {
|
||||||
|
metrics.Error = fmt.Sprintf("invalid usage response: %v", err)
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics.ObjectCount = len(resp.Contents)
|
||||||
|
for _, obj := range resp.Contents {
|
||||||
|
metrics.TotalBytes += obj.Size
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(resp.Contents) > 0 {
|
||||||
|
sort.Slice(resp.Contents, func(i, j int) bool {
|
||||||
|
return resp.Contents[i].LastModified.After(resp.Contents[j].LastModified)
|
||||||
|
})
|
||||||
|
metrics.LatestKey = resp.Contents[0].Key
|
||||||
|
metrics.LatestBackup = resp.Contents[0].LastModified.UTC().Format(time.RFC3339)
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
func DetectIssues(metrics Metrics) Issues {
|
func DetectIssues(metrics Metrics) Issues {
|
||||||
issues := Issues{}
|
issues := Issues{}
|
||||||
|
|
||||||
@@ -270,6 +344,13 @@ func LoadInstances(registryPath string) ([]Instance, error) {
|
|||||||
return instances, nil
|
return instances, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func envDefault(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
func virshCmd(args ...string) (string, error) {
|
func virshCmd(args ...string) (string, error) {
|
||||||
cmd := exec.Command("virsh", append([]string{"-c", "qemu:///system"}, args...)...)
|
cmd := exec.Command("virsh", append([]string{"-c", "qemu:///system"}, args...)...)
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
|
|||||||
@@ -43,11 +43,25 @@ type BackupStatus struct {
|
|||||||
AgeHours float64 `json:"age_hours"`
|
AgeHours float64 `json:"age_hours"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MinIOMetrics struct {
|
||||||
|
Endpoint string `json:"endpoint"`
|
||||||
|
Bucket string `json:"bucket"`
|
||||||
|
Prefix string `json:"prefix"`
|
||||||
|
HTTPStatus int `json:"http_status"`
|
||||||
|
Reachable bool `json:"reachable"`
|
||||||
|
ObjectCount int `json:"object_count"`
|
||||||
|
TotalBytes int64 `json:"total_bytes"`
|
||||||
|
LatestKey string `json:"latest_key,omitempty"`
|
||||||
|
LatestBackup string `json:"latest_backup,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type Metrics struct {
|
type Metrics struct {
|
||||||
Instance Instance `json:"instance"`
|
Instance Instance `json:"instance"`
|
||||||
Host HostMetrics `json:"host"`
|
Host HostMetrics `json:"host"`
|
||||||
Guest *GuestMetrics `json:"guest,omitempty"`
|
Guest *GuestMetrics `json:"guest,omitempty"`
|
||||||
Backup *BackupStatus `json:"backup,omitempty"`
|
Backup *BackupStatus `json:"backup,omitempty"`
|
||||||
|
MinIO *MinIOMetrics `json:"minio,omitempty"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user