Files
swarm-master/scripts/npu-service-health.sh
T
2026-06-04 16:03:52 -07:00

116 lines
3.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Read-only health probe for Will's local OpenVINO/NPU services.
# This script intentionally does not start, stop, restart, enable, reindex, or route anything.
BUSY_PATH=${BUSY_PATH:-/sys/class/accel/accel0/device/npu_busy_time_us}
CURL_TIMEOUT=${CURL_TIMEOUT:-8}
EMBED_MODEL=${EMBED_MODEL:-bge-base-en-v1.5-int8-ov}
EMBED_URL=${EMBED_URL:-http://127.0.0.1:18817/v1/embeddings}
have() { command -v "$1" >/dev/null 2>&1; }
json_pretty() {
if have jq; then
jq .
else
python -m json.tool
fi
}
section() {
printf '\n== %s ==\n' "$1"
}
http_json() {
local name=$1 url=$2
printf '\n[%s] %s\n' "$name" "$url"
if ! curl -fsS --max-time "$CURL_TIMEOUT" "$url" | json_pretty; then
printf 'status=unavailable_or_non_json\n'
return 1
fi
}
busy_value() {
if [[ -r "$BUSY_PATH" ]]; then
tr -d '\n' < "$BUSY_PATH"
else
printf 'missing'
fi
}
section "NPU counter"
printf 'busy_path=%s\n' "$BUSY_PATH"
printf 'busy_time_us=%s\n' "$(busy_value)"
section "Listeners"
# Required OpenVINO/NPU program ports: live baseline 18810/18816/18817,
# reranker 18818, local-only specialists 18819/18820/18829, and advisory gateway 18830.
# 18814 is the existing RAG/embedding health wrapper; 18828 is a review-only
# alternate used to avoid collisions during prior smoke tests.
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829|18830)\b' || true
section "User service states"
for unit in \
openvino-embeddings.service \
rag-embedding-health.service \
openvino-reranker.service \
openvino-router-classifier.service \
openvino-genai-npu-worker.service; do
active=$(systemctl --user is-active "$unit" 2>/dev/null || true)
enabled=$(systemctl --user is-enabled "$unit" 2>/dev/null || true)
printf '%-38s active=%-10s enabled=%s\n' "$unit" "${active:-unknown}" "${enabled:-unknown}"
done
section "Docker service states"
if [[ -d /home/will/lab/swarm ]]; then
(cd /home/will/lab/swarm && docker compose ps whisper-server-npu 2>/dev/null) || true
fi
section "HTTP health"
http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true
http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true
http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true
http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true
# Prototypes are expected to be unavailable until explicitly started/approved.
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
http_json "NPU doc/image triage prototype" "http://127.0.0.1:18829/healthz" || true
section "Embeddings NPU busy-time proof"
if [[ ! -r "$BUSY_PATH" ]]; then
printf 'result=failed reason=missing_busy_counter\n'
exit 2
fi
before=$(busy_value)
response=$(curl -fsS --max-time "$CURL_TIMEOUT" \
"$EMBED_URL" \
-H 'Content-Type: application/json' \
-d "{\"input\":\"non-private npu health probe\",\"model\":\"$EMBED_MODEL\"}" || true)
after=$(busy_value)
if [[ -z "$response" ]]; then
printf 'result=failed reason=embedding_request_failed before_us=%s after_us=%s\n' "$before" "$after"
exit 3
fi
delta=$((after - before))
printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta"
RESPONSE_JSON="$response" python - <<'PY' || true
import json, os
try:
data = json.loads(os.environ.get('RESPONSE_JSON', ''))
except Exception as exc:
print(f'response_parse_error={type(exc).__name__}: {exc}')
raise SystemExit(0)
print(f"response_object={data.get('object')}")
print(f"response_model={data.get('model')}")
print(f"response_npu_busy_delta_us={data.get('npu_busy_delta_us')}")
print(f"embedding_count={len(data.get('data', []))}")
PY
if (( delta <= 0 )); then
printf 'result=failed reason=no_positive_sysfs_npu_delta\n'
exit 4
fi
printf 'result=ok\n'