#!/usr/bin/env bash set -euo pipefail # Lightweight operational snapshot for OpenClaw homelab. # Output frame: Now / Soon / Watch / Next actions OPENCLAW_BIN="${OPENCLAW_BIN:-openclaw}" BACKUP_LOG="${BACKUP_LOG:-memory/minio-backup.log}" BACKUP_MAX_AGE_HOURS="${BACKUP_MAX_AGE_HOURS:-8}" SEARX_URL="${SEARX_URL:-http://192.168.153.113:18803}" WHISPER_URL="${WHISPER_URL:-http://192.168.153.113:18801}" MCP_URL="${MCP_URL:-http://192.168.153.113:18802/mcp}" WARN_DISK_PCT="${WARN_DISK_PCT:-85}" WARN_MEM_PCT="${WARN_MEM_PCT:-85}" TS_DAY="$(date -u +%F)" TS_STAMP="$(date -u +%H%M%S)" ARTIFACT_DIR="${HEALTHCHECK_OUTPUT_DIR:-/tmp/openclaw-healthcheck}/${TS_DAY}/${TS_STAMP}" mkdir -p "$ARTIFACT_DIR" NOW=() SOON=() WATCH=() NEXT=() P1=0 P2=0 add_now() { NOW+=("$1"); } add_soon() { SOON+=("$1"); } add_watch() { WATCH+=("$1"); } add_next() { NEXT+=("$1"); } mark_p1() { P1=$((P1 + 1)); } mark_p2() { P2=$((P2 + 1)); } http_probe() { local name="$1" url="$2" expected_regex="$3" local out_file="$ARTIFACT_DIR/http-${name}.txt" local result code ttotal if ! result="$(curl -sS -m 6 -o "$out_file" -w '%{http_code} %{time_total}' "$url" 2>"$ARTIFACT_DIR/http-${name}.err")"; then add_now "P1 ${name} unreachable (${url})" mark_p1 add_next "Check ${name} service/container and LAN route" return fi code="${result%% *}" ttotal="${result##* }" if [[ "$code" =~ $expected_regex ]]; then add_watch "P4 ${name} OK (HTTP ${code}, ${ttotal}s)" else add_watch "P2 ${name} unexpected response (HTTP ${code}, ${ttotal}s)" mark_p2 add_next "Validate ${name} endpoint/health semantics" fi } # 1) OpenClaw health + security if "$OPENCLAW_BIN" health --json >"$ARTIFACT_DIR/openclaw-health.json" 2>"$ARTIFACT_DIR/openclaw-health.err"; then if jq -e '.ok == true' "$ARTIFACT_DIR/openclaw-health.json" >/dev/null 2>&1; then add_watch "P4 OpenClaw gateway health OK" else add_now "P1 OpenClaw health reported not-ok" mark_p1 add_next "Run: openclaw health --json" fi else add_now "P1 Failed to run openclaw health" mark_p1 add_next "Run: openclaw status && openclaw logs --follow" fi if "$OPENCLAW_BIN" security audit --json >"$ARTIFACT_DIR/openclaw-security-audit.json" 2>"$ARTIFACT_DIR/openclaw-security-audit.err"; then sec_critical="$(jq -r '.summary.critical // 0' "$ARTIFACT_DIR/openclaw-security-audit.json" 2>/dev/null || echo 0)" sec_warn="$(jq -r '.summary.warn // 0' "$ARTIFACT_DIR/openclaw-security-audit.json" 2>/dev/null || echo 0)" if [[ "$sec_critical" =~ ^[0-9]+$ ]] && (( sec_critical > 0 )); then add_now "P1 Security audit has ${sec_critical} critical finding(s)" mark_p1 add_next "Run: openclaw security audit --deep" fi if [[ "$sec_warn" =~ ^[0-9]+$ ]] && (( sec_warn > 0 )); then add_watch "P2 Security audit has ${sec_warn} warning(s)" mark_p2 add_next "Review plugin/tool policy allowlists" fi else add_watch "P3 Security audit command failed" add_next "Run: openclaw security audit --json" fi # 2) Backup freshness from minio backup log if [[ -f "$BACKUP_LOG" ]]; then last_key="$(grep 'Backup complete:' "$BACKUP_LOG" | tail -n1 | sed -E 's#.*workspace-backups/([0-9]{8}T[0-9]{6}Z).*#\1#' || true)" if [[ -n "$last_key" ]]; then backup_iso="${last_key:0:4}-${last_key:4:2}-${last_key:6:2} ${last_key:9:2}:${last_key:11:2}:${last_key:13:2} UTC" backup_epoch="$(date -u -d "$backup_iso" +%s 2>/dev/null || echo 0)" now_epoch="$(date -u +%s)" if (( backup_epoch > 0 )); then age_hours=$(( (now_epoch - backup_epoch) / 3600 )) if (( age_hours > BACKUP_MAX_AGE_HOURS )); then add_now "P1 Backup stale: last success ${age_hours}h ago (${last_key})" mark_p1 add_next "Run backup job now and verify new 'Backup complete' entry" elif (( age_hours >= BACKUP_MAX_AGE_HOURS - 1 )); then add_soon "P2 Backup nearing threshold: ${age_hours}h old (${last_key})" mark_p2 else add_watch "P4 Backup fresh (${age_hours}h old, ${last_key})" fi else add_now "P1 Could not parse backup timestamp from ${BACKUP_LOG}" mark_p1 add_next "Inspect backup log format or backup script output" fi else add_now "P1 No 'Backup complete' entry found in ${BACKUP_LOG}" mark_p1 add_next "Run backup and confirm completion line is written" fi else add_now "P1 Backup log missing: ${BACKUP_LOG}" mark_p1 add_next "Create backup log or fix backup script path" fi # 3) Key LAN service probes http_probe "searxng" "$SEARX_URL" '^200$' http_probe "whisper" "$WHISPER_URL" '^200$' http_probe "brave-mcp" "$MCP_URL" '^(200|406)$' # 4) Host pressure: disk + memory root_disk_pct="$(df -P / | awk 'NR==2 {gsub(/%/,"",$5); print $5}' 2>/dev/null || echo 0)" if [[ "$root_disk_pct" =~ ^[0-9]+$ ]]; then if (( root_disk_pct >= 95 )); then add_now "P1 Root disk critical: ${root_disk_pct}% used" mark_p1 add_next "Free disk space urgently" elif (( root_disk_pct >= WARN_DISK_PCT )); then add_soon "P2 Root disk high: ${root_disk_pct}% used" mark_p2 add_next "Prune logs/artifacts and monitor growth" else add_watch "P4 Root disk normal: ${root_disk_pct}% used" fi fi if [[ -r /proc/meminfo ]]; then mem_total_kb="$(awk '/MemTotal:/ {print $2}' /proc/meminfo)" mem_avail_kb="$(awk '/MemAvailable:/ {print $2}' /proc/meminfo)" if [[ "$mem_total_kb" =~ ^[0-9]+$ ]] && [[ "$mem_avail_kb" =~ ^[0-9]+$ ]] && (( mem_total_kb > 0 )); then mem_used_pct=$(( (100 * (mem_total_kb - mem_avail_kb)) / mem_total_kb )) if (( mem_used_pct >= 95 )); then add_now "P1 Memory pressure critical: ${mem_used_pct}% used" mark_p1 add_next "Inspect heavy processes / reduce workload" elif (( mem_used_pct >= WARN_MEM_PCT )); then add_soon "P2 Memory pressure high: ${mem_used_pct}% used" mark_p2 add_next "Check workload spikes and tune limits" else add_watch "P4 Memory normal: ${mem_used_pct}% used" fi fi fi VERDICT="OK" EXIT_CODE=0 if (( P1 > 0 )); then VERDICT="NEEDS_ATTENTION" EXIT_CODE=2 elif (( P2 > 0 )); then VERDICT="MONITOR" EXIT_CODE=1 fi { echo "Verdict: ${VERDICT}" echo "Counts: p1=${P1} p2=${P2}" echo "Artifact path: ${ARTIFACT_DIR}" echo echo "Now:" if (( ${#NOW[@]} == 0 )); then echo "- P4 Nothing urgent"; else for x in "${NOW[@]}"; do echo "- ${x}"; done; fi echo echo "Soon:" if (( ${#SOON[@]} == 0 )); then echo "- P4 No near-term risks"; else for x in "${SOON[@]}"; do echo "- ${x}"; done; fi echo echo "Watch:" if (( ${#WATCH[@]} == 0 )); then echo "- P4 No watch items"; else for x in "${WATCH[@]}"; do echo "- ${x}"; done; fi echo echo "Next actions:" if (( ${#NEXT[@]} == 0 )); then echo "- Keep current cadence" else printf '%s\n' "${NEXT[@]}" | awk '!seen[$0]++' | sed 's/^/- /' fi } | tee "$ARTIFACT_DIR/summary.txt" exit "$EXIT_CODE"