feat(scripts): add ops and mcp sentinel automation scripts

This commit is contained in:
zap
2026-03-05 02:17:32 +00:00
parent d31bb80f04
commit ca65f245a3
2 changed files with 439 additions and 0 deletions

242
scripts/mcp-smoke.sh Executable file
View File

@@ -0,0 +1,242 @@
#!/usr/bin/env bash
set -euo pipefail
# Lightweight MCP smoke test for HTTP MCP servers.
# Default target: local Brave MCP server.
MCP_URL="${MCP_URL:-http://192.168.153.113:18802/mcp}"
TIMEOUT_SEC="${TIMEOUT_SEC:-10}"
BASELINE_FILE="${BASELINE_FILE:-memory/mcp-smoke-tools-baseline.txt}"
PROBE_QUERY="${PROBE_QUERY:-openclaw}"
UPDATE_BASELINE=0
SKIP_TOOL_CALL=0
while [[ $# -gt 0 ]]; do
case "$1" in
--url)
MCP_URL="$2"; shift 2 ;;
--timeout)
TIMEOUT_SEC="$2"; shift 2 ;;
--baseline)
BASELINE_FILE="$2"; shift 2 ;;
--query)
PROBE_QUERY="$2"; shift 2 ;;
--update-baseline)
UPDATE_BASELINE=1; shift ;;
--skip-tool-call)
SKIP_TOOL_CALL=1; shift ;;
-h|--help)
cat <<EOF
Usage: $(basename "$0") [options]
--url <mcp_url> MCP endpoint (default: ${MCP_URL})
--timeout <seconds> Curl timeout (default: ${TIMEOUT_SEC})
--baseline <path> Baseline tool-name file (default: ${BASELINE_FILE})
--query <text> Query used for brave_web_search probe (default: ${PROBE_QUERY})
--skip-tool-call Skip tools/call probe
--update-baseline Save current tool names as baseline
EOF
exit 0 ;;
*)
echo "Unknown arg: $1" >&2
exit 2 ;;
esac
done
TS_DAY="$(date -u +%F)"
TS_STAMP="$(date -u +%H%M%S)"
ARTIFACT_DIR="${MCP_SMOKE_OUTPUT_DIR:-/tmp/openclaw-mcp-smoke}/${TS_DAY}/${TS_STAMP}"
mkdir -p "$ARTIFACT_DIR"
NOW=()
WATCH=()
NEXT=()
P1=0
P2=0
add_now(){ NOW+=("$1"); }
add_watch(){ WATCH+=("$1"); }
add_next(){ NEXT+=("$1"); }
mark_p1(){ P1=$((P1+1)); }
mark_p2(){ P2=$((P2+1)); }
ms_now() { date +%s%3N; }
# 1) initialize
init_headers="$ARTIFACT_DIR/init.headers"
init_body="$ARTIFACT_DIR/init.body"
init_start="$(ms_now)"
if ! curl -sS -m "$TIMEOUT_SEC" -D "$init_headers" -o "$init_body" \
-H 'Accept: text/event-stream, application/json' \
-H 'Content-Type: application/json' \
-X POST "$MCP_URL" \
--data '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"mcp-smoke","version":"1.0"}}}' \
2>"$ARTIFACT_DIR/init.err"; then
add_now "P1 initialize request failed (${MCP_URL})"
mark_p1
add_next "Check MCP endpoint reachability and auth requirements"
fi
init_ms=$(( $(ms_now) - init_start ))
session_id="$(awk -F': ' 'tolower($1)=="mcp-session-id" {gsub(/\r/,"",$2); print $2}' "$init_headers" | tail -n1 || true)"
init_data_line="$(grep '^data: ' "$init_body" | tail -n1 | sed 's/^data: //' || true)"
if [[ -z "$session_id" ]]; then
add_now "P1 initialize succeeded without mcp-session-id header"
mark_p1
add_next "Confirm endpoint is MCP over HTTP (streamable)"
else
add_watch "P4 initialize OK (${init_ms}ms)"
fi
if [[ -n "$init_data_line" ]] && jq -e '.error' >/dev/null 2>&1 <<<"$init_data_line"; then
init_err_msg="$(jq -r '.error.message // "unknown initialize error"' <<<"$init_data_line")"
add_now "P1 initialize error: ${init_err_msg}"
mark_p1
add_next "Verify MCP auth/API key configuration"
fi
# 2) notifications/initialized (best effort)
if [[ -n "$session_id" ]]; then
curl -sS -m "$TIMEOUT_SEC" -D "$ARTIFACT_DIR/initialized.headers" -o "$ARTIFACT_DIR/initialized.body" \
-H "mcp-session-id: ${session_id}" \
-H 'Accept: text/event-stream, application/json' \
-H 'Content-Type: application/json' \
-X POST "$MCP_URL" \
--data '{"jsonrpc":"2.0","method":"notifications/initialized","params":{}}' \
> /dev/null 2>"$ARTIFACT_DIR/initialized.err" || true
fi
# 3) tools/list
tools_names_file="$ARTIFACT_DIR/tools.current.txt"
tools_ms=0
if [[ -n "$session_id" ]]; then
tools_start="$(ms_now)"
if curl -sS -m "$TIMEOUT_SEC" -o "$ARTIFACT_DIR/tools.body" \
-H "mcp-session-id: ${session_id}" \
-H 'Accept: text/event-stream, application/json' \
-H 'Content-Type: application/json' \
-X POST "$MCP_URL" \
--data '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' \
2>"$ARTIFACT_DIR/tools.err"; then
tools_ms=$(( $(ms_now) - tools_start ))
grep '^data: ' "$ARTIFACT_DIR/tools.body" | sed 's/^data: //' | tail -n1 > "$ARTIFACT_DIR/tools.json" || true
if jq -e '.error' "$ARTIFACT_DIR/tools.json" >/dev/null 2>&1; then
msg="$(jq -r '.error.message // "tools/list failed"' "$ARTIFACT_DIR/tools.json")"
add_now "P1 tools/list error: ${msg}"
mark_p1
add_next "Check MCP upstream provider credentials"
else
jq -r '.result.tools[]?.name' "$ARTIFACT_DIR/tools.json" | sort -u > "$tools_names_file"
tool_count="$(wc -l < "$tools_names_file" | tr -d ' ')"
add_watch "P4 tools/list OK (${tools_ms}ms, ${tool_count} tools)"
fi
else
add_now "P1 tools/list request failed"
mark_p1
add_next "Inspect MCP server logs and network path"
fi
fi
# 4) optional tool probe (auth + runtime)
if (( SKIP_TOOL_CALL == 0 )) && [[ -n "$session_id" ]] && [[ -s "$tools_names_file" ]]; then
if grep -qx 'brave_web_search' "$tools_names_file"; then
call_start="$(ms_now)"
if curl -sS -m "$TIMEOUT_SEC" -o "$ARTIFACT_DIR/tool-call.body" \
-H "mcp-session-id: ${session_id}" \
-H 'Accept: text/event-stream, application/json' \
-H 'Content-Type: application/json' \
-X POST "$MCP_URL" \
--data "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"brave_web_search\",\"arguments\":{\"query\":\"${PROBE_QUERY}\",\"count\":1}}}" \
2>"$ARTIFACT_DIR/tool-call.err"; then
call_ms=$(( $(ms_now) - call_start ))
grep '^data: ' "$ARTIFACT_DIR/tool-call.body" | sed 's/^data: //' | tail -n1 > "$ARTIFACT_DIR/tool-call.json" || true
if jq -e '.error' "$ARTIFACT_DIR/tool-call.json" >/dev/null 2>&1; then
msg="$(jq -r '.error.message // "tools/call failed"' "$ARTIFACT_DIR/tool-call.json")"
add_now "P1 tools/call error: ${msg}"
mark_p1
add_next "Verify Brave API key/plan and outbound internet access"
else
add_watch "P4 tools/call brave_web_search OK (${call_ms}ms)"
fi
else
add_now "P1 tools/call request failed"
mark_p1
add_next "Check MCP service health and external API reachability"
fi
else
add_watch "P3 brave_web_search not present; skipped tools/call probe"
fi
fi
# 5) tool-list drift
if [[ -s "$tools_names_file" ]]; then
if [[ -f "$BASELINE_FILE" ]]; then
sort -u "$BASELINE_FILE" > "$ARTIFACT_DIR/tools.baseline.sorted.txt"
comm -13 "$ARTIFACT_DIR/tools.baseline.sorted.txt" "$tools_names_file" > "$ARTIFACT_DIR/tools.added.txt" || true
comm -23 "$ARTIFACT_DIR/tools.baseline.sorted.txt" "$tools_names_file" > "$ARTIFACT_DIR/tools.removed.txt" || true
added_n="$(wc -l < "$ARTIFACT_DIR/tools.added.txt" | tr -d ' ')"
removed_n="$(wc -l < "$ARTIFACT_DIR/tools.removed.txt" | tr -d ' ')"
if (( added_n > 0 || removed_n > 0 )); then
add_watch "P2 Tool-list drift detected (+${added_n}/-${removed_n})"
mark_p2
add_next "Review drift and update baseline if expected"
else
add_watch "P4 Tool list matches baseline"
fi
else
if (( UPDATE_BASELINE == 1 )); then
add_watch "P4 Baseline bootstrap mode (creating ${BASELINE_FILE})"
else
add_watch "P3 No baseline file yet (${BASELINE_FILE})"
add_next "Run with --update-baseline after confirming current tool list"
fi
fi
fi
if (( UPDATE_BASELINE == 1 )) && [[ -s "$tools_names_file" ]]; then
mkdir -p "$(dirname "$BASELINE_FILE")"
cp "$tools_names_file" "$BASELINE_FILE"
add_watch "P4 Baseline updated: ${BASELINE_FILE}"
fi
# 6) mcporter quick config signal (optional)
if command -v mcporter >/dev/null 2>&1; then
if mcporter list --json >"$ARTIFACT_DIR/mcporter-list.json" 2>"$ARTIFACT_DIR/mcporter-list.err"; then
configured="$(jq -r '(.servers // []) | length' "$ARTIFACT_DIR/mcporter-list.json" 2>/dev/null || echo 0)"
add_watch "P4 mcporter configured servers: ${configured}"
fi
fi
VERDICT="OK"
EXIT_CODE=0
if (( P1 > 0 )); then
VERDICT="NEEDS_ATTENTION"
EXIT_CODE=2
elif (( P2 > 0 )); then
VERDICT="MONITOR"
EXIT_CODE=1
fi
{
echo "Verdict: ${VERDICT}"
echo "Counts: p1=${P1} p2=${P2}"
echo "Endpoint: ${MCP_URL}"
echo "Session: ${session_id:-none}"
echo "Artifact path: ${ARTIFACT_DIR}"
echo
echo "Now:"
if (( ${#NOW[@]} == 0 )); then echo "- P4 Nothing urgent"; else for x in "${NOW[@]}"; do echo "- ${x}"; done; fi
echo
echo "Watch:"
if (( ${#WATCH[@]} == 0 )); then echo "- P4 No watch items"; else for x in "${WATCH[@]}"; do echo "- ${x}"; done; fi
echo
echo "Next actions:"
if (( ${#NEXT[@]} == 0 )); then
echo "- Keep current baseline and run periodically"
else
printf '%s\n' "${NEXT[@]}" | awk '!seen[$0]++' | sed 's/^/- /'
fi
} | tee "$ARTIFACT_DIR/summary.txt"
exit "$EXIT_CODE"

197
scripts/ops-sentinel.sh Executable file
View File

@@ -0,0 +1,197 @@
#!/usr/bin/env bash
set -euo pipefail
# Lightweight operational snapshot for OpenClaw homelab.
# Output frame: Now / Soon / Watch / Next actions
OPENCLAW_BIN="${OPENCLAW_BIN:-openclaw}"
BACKUP_LOG="${BACKUP_LOG:-memory/minio-backup.log}"
BACKUP_MAX_AGE_HOURS="${BACKUP_MAX_AGE_HOURS:-8}"
SEARX_URL="${SEARX_URL:-http://192.168.153.113:18803}"
WHISPER_URL="${WHISPER_URL:-http://192.168.153.113:18801}"
MCP_URL="${MCP_URL:-http://192.168.153.113:18802/mcp}"
WARN_DISK_PCT="${WARN_DISK_PCT:-85}"
WARN_MEM_PCT="${WARN_MEM_PCT:-85}"
TS_DAY="$(date -u +%F)"
TS_STAMP="$(date -u +%H%M%S)"
ARTIFACT_DIR="${HEALTHCHECK_OUTPUT_DIR:-/tmp/openclaw-healthcheck}/${TS_DAY}/${TS_STAMP}"
mkdir -p "$ARTIFACT_DIR"
NOW=()
SOON=()
WATCH=()
NEXT=()
P1=0
P2=0
add_now() { NOW+=("$1"); }
add_soon() { SOON+=("$1"); }
add_watch() { WATCH+=("$1"); }
add_next() { NEXT+=("$1"); }
mark_p1() { P1=$((P1 + 1)); }
mark_p2() { P2=$((P2 + 1)); }
http_probe() {
local name="$1" url="$2" expected_regex="$3"
local out_file="$ARTIFACT_DIR/http-${name}.txt"
local result code ttotal
if ! result="$(curl -sS -m 6 -o "$out_file" -w '%{http_code} %{time_total}' "$url" 2>"$ARTIFACT_DIR/http-${name}.err")"; then
add_now "P1 ${name} unreachable (${url})"
mark_p1
add_next "Check ${name} service/container and LAN route"
return
fi
code="${result%% *}"
ttotal="${result##* }"
if [[ "$code" =~ $expected_regex ]]; then
add_watch "P4 ${name} OK (HTTP ${code}, ${ttotal}s)"
else
add_watch "P2 ${name} unexpected response (HTTP ${code}, ${ttotal}s)"
mark_p2
add_next "Validate ${name} endpoint/health semantics"
fi
}
# 1) OpenClaw health + security
if "$OPENCLAW_BIN" health --json >"$ARTIFACT_DIR/openclaw-health.json" 2>"$ARTIFACT_DIR/openclaw-health.err"; then
if jq -e '.ok == true' "$ARTIFACT_DIR/openclaw-health.json" >/dev/null 2>&1; then
add_watch "P4 OpenClaw gateway health OK"
else
add_now "P1 OpenClaw health reported not-ok"
mark_p1
add_next "Run: openclaw health --json"
fi
else
add_now "P1 Failed to run openclaw health"
mark_p1
add_next "Run: openclaw status && openclaw logs --follow"
fi
if "$OPENCLAW_BIN" security audit --json >"$ARTIFACT_DIR/openclaw-security-audit.json" 2>"$ARTIFACT_DIR/openclaw-security-audit.err"; then
sec_critical="$(jq -r '.summary.critical // 0' "$ARTIFACT_DIR/openclaw-security-audit.json" 2>/dev/null || echo 0)"
sec_warn="$(jq -r '.summary.warn // 0' "$ARTIFACT_DIR/openclaw-security-audit.json" 2>/dev/null || echo 0)"
if [[ "$sec_critical" =~ ^[0-9]+$ ]] && (( sec_critical > 0 )); then
add_now "P1 Security audit has ${sec_critical} critical finding(s)"
mark_p1
add_next "Run: openclaw security audit --deep"
fi
if [[ "$sec_warn" =~ ^[0-9]+$ ]] && (( sec_warn > 0 )); then
add_watch "P2 Security audit has ${sec_warn} warning(s)"
mark_p2
add_next "Review plugin/tool policy allowlists"
fi
else
add_watch "P3 Security audit command failed"
add_next "Run: openclaw security audit --json"
fi
# 2) Backup freshness from minio backup log
if [[ -f "$BACKUP_LOG" ]]; then
last_key="$(grep 'Backup complete:' "$BACKUP_LOG" | tail -n1 | sed -E 's#.*workspace-backups/([0-9]{8}T[0-9]{6}Z).*#\1#' || true)"
if [[ -n "$last_key" ]]; then
backup_iso="${last_key:0:4}-${last_key:4:2}-${last_key:6:2} ${last_key:9:2}:${last_key:11:2}:${last_key:13:2} UTC"
backup_epoch="$(date -u -d "$backup_iso" +%s 2>/dev/null || echo 0)"
now_epoch="$(date -u +%s)"
if (( backup_epoch > 0 )); then
age_hours=$(( (now_epoch - backup_epoch) / 3600 ))
if (( age_hours > BACKUP_MAX_AGE_HOURS )); then
add_now "P1 Backup stale: last success ${age_hours}h ago (${last_key})"
mark_p1
add_next "Run backup job now and verify new 'Backup complete' entry"
elif (( age_hours >= BACKUP_MAX_AGE_HOURS - 1 )); then
add_soon "P2 Backup nearing threshold: ${age_hours}h old (${last_key})"
mark_p2
else
add_watch "P4 Backup fresh (${age_hours}h old, ${last_key})"
fi
else
add_now "P1 Could not parse backup timestamp from ${BACKUP_LOG}"
mark_p1
add_next "Inspect backup log format or backup script output"
fi
else
add_now "P1 No 'Backup complete' entry found in ${BACKUP_LOG}"
mark_p1
add_next "Run backup and confirm completion line is written"
fi
else
add_now "P1 Backup log missing: ${BACKUP_LOG}"
mark_p1
add_next "Create backup log or fix backup script path"
fi
# 3) Key LAN service probes
http_probe "searxng" "$SEARX_URL" '^200$'
http_probe "whisper" "$WHISPER_URL" '^200$'
http_probe "brave-mcp" "$MCP_URL" '^(200|406)$'
# 4) Host pressure: disk + memory
root_disk_pct="$(df -P / | awk 'NR==2 {gsub(/%/,"",$5); print $5}' 2>/dev/null || echo 0)"
if [[ "$root_disk_pct" =~ ^[0-9]+$ ]]; then
if (( root_disk_pct >= 95 )); then
add_now "P1 Root disk critical: ${root_disk_pct}% used"
mark_p1
add_next "Free disk space urgently"
elif (( root_disk_pct >= WARN_DISK_PCT )); then
add_soon "P2 Root disk high: ${root_disk_pct}% used"
mark_p2
add_next "Prune logs/artifacts and monitor growth"
else
add_watch "P4 Root disk normal: ${root_disk_pct}% used"
fi
fi
if [[ -r /proc/meminfo ]]; then
mem_total_kb="$(awk '/MemTotal:/ {print $2}' /proc/meminfo)"
mem_avail_kb="$(awk '/MemAvailable:/ {print $2}' /proc/meminfo)"
if [[ "$mem_total_kb" =~ ^[0-9]+$ ]] && [[ "$mem_avail_kb" =~ ^[0-9]+$ ]] && (( mem_total_kb > 0 )); then
mem_used_pct=$(( (100 * (mem_total_kb - mem_avail_kb)) / mem_total_kb ))
if (( mem_used_pct >= 95 )); then
add_now "P1 Memory pressure critical: ${mem_used_pct}% used"
mark_p1
add_next "Inspect heavy processes / reduce workload"
elif (( mem_used_pct >= WARN_MEM_PCT )); then
add_soon "P2 Memory pressure high: ${mem_used_pct}% used"
mark_p2
add_next "Check workload spikes and tune limits"
else
add_watch "P4 Memory normal: ${mem_used_pct}% used"
fi
fi
fi
VERDICT="OK"
EXIT_CODE=0
if (( P1 > 0 )); then
VERDICT="NEEDS_ATTENTION"
EXIT_CODE=2
elif (( P2 > 0 )); then
VERDICT="MONITOR"
EXIT_CODE=1
fi
{
echo "Verdict: ${VERDICT}"
echo "Counts: p1=${P1} p2=${P2}"
echo "Artifact path: ${ARTIFACT_DIR}"
echo
echo "Now:"
if (( ${#NOW[@]} == 0 )); then echo "- P4 Nothing urgent"; else for x in "${NOW[@]}"; do echo "- ${x}"; done; fi
echo
echo "Soon:"
if (( ${#SOON[@]} == 0 )); then echo "- P4 No near-term risks"; else for x in "${SOON[@]}"; do echo "- ${x}"; done; fi
echo
echo "Watch:"
if (( ${#WATCH[@]} == 0 )); then echo "- P4 No watch items"; else for x in "${WATCH[@]}"; do echo "- ${x}"; done; fi
echo
echo "Next actions:"
if (( ${#NEXT[@]} == 0 )); then
echo "- Keep current cadence"
else
printf '%s\n' "${NEXT[@]}" | awk '!seen[$0]++' | sed 's/^/- /'
fi
} | tee "$ARTIFACT_DIR/summary.txt"
exit "$EXIT_CODE"