116 lines
3.9 KiB
Bash
Executable File
116 lines
3.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Read-only health probe for Will's local OpenVINO/NPU services.
|
|
# This script intentionally does not start, stop, restart, enable, reindex, or route anything.
|
|
|
|
BUSY_PATH=${BUSY_PATH:-/sys/class/accel/accel0/device/npu_busy_time_us}
|
|
CURL_TIMEOUT=${CURL_TIMEOUT:-8}
|
|
EMBED_MODEL=${EMBED_MODEL:-bge-base-en-v1.5-int8-ov}
|
|
EMBED_URL=${EMBED_URL:-http://127.0.0.1:18817/v1/embeddings}
|
|
|
|
have() { command -v "$1" >/dev/null 2>&1; }
|
|
|
|
json_pretty() {
|
|
if have jq; then
|
|
jq .
|
|
else
|
|
python -m json.tool
|
|
fi
|
|
}
|
|
|
|
section() {
|
|
printf '\n== %s ==\n' "$1"
|
|
}
|
|
|
|
http_json() {
|
|
local name=$1 url=$2
|
|
printf '\n[%s] %s\n' "$name" "$url"
|
|
if ! curl -fsS --max-time "$CURL_TIMEOUT" "$url" | json_pretty; then
|
|
printf 'status=unavailable_or_non_json\n'
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
busy_value() {
|
|
if [[ -r "$BUSY_PATH" ]]; then
|
|
tr -d '\n' < "$BUSY_PATH"
|
|
else
|
|
printf 'missing'
|
|
fi
|
|
}
|
|
|
|
section "NPU counter"
|
|
printf 'busy_path=%s\n' "$BUSY_PATH"
|
|
printf 'busy_time_us=%s\n' "$(busy_value)"
|
|
|
|
section "Listeners"
|
|
# Required OpenVINO/NPU program ports: live baseline 18810/18816/18817,
|
|
# approved prototypes 18818/18819/18820, and optional doc/image triage 18829.
|
|
# 18814 is the existing RAG/embedding health wrapper; 18828 is a review-only
|
|
# alternate used to avoid collisions during prior smoke tests.
|
|
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
|
|
|
|
section "User service states"
|
|
for unit in \
|
|
openvino-embeddings.service \
|
|
rag-embedding-health.service \
|
|
openvino-reranker.service \
|
|
openvino-router-classifier.service \
|
|
openvino-genai-npu-worker.service; do
|
|
active=$(systemctl --user is-active "$unit" 2>/dev/null || true)
|
|
enabled=$(systemctl --user is-enabled "$unit" 2>/dev/null || true)
|
|
printf '%-38s active=%-10s enabled=%s\n' "$unit" "${active:-unknown}" "${enabled:-unknown}"
|
|
done
|
|
|
|
section "Docker service states"
|
|
if [[ -d /home/will/lab/swarm ]]; then
|
|
(cd /home/will/lab/swarm && docker compose ps whisper-server-npu 2>/dev/null) || true
|
|
fi
|
|
|
|
section "HTTP health"
|
|
http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true
|
|
http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true
|
|
http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true
|
|
http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true
|
|
# Prototypes are expected to be unavailable until explicitly started/approved.
|
|
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
|
|
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
|
|
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
|
|
http_json "NPU doc/image triage prototype" "http://127.0.0.1:18829/healthz" || true
|
|
|
|
section "Embeddings NPU busy-time proof"
|
|
if [[ ! -r "$BUSY_PATH" ]]; then
|
|
printf 'result=failed reason=missing_busy_counter\n'
|
|
exit 2
|
|
fi
|
|
before=$(busy_value)
|
|
response=$(curl -fsS --max-time "$CURL_TIMEOUT" \
|
|
"$EMBED_URL" \
|
|
-H 'Content-Type: application/json' \
|
|
-d "{\"input\":\"non-private npu health probe\",\"model\":\"$EMBED_MODEL\"}" || true)
|
|
after=$(busy_value)
|
|
if [[ -z "$response" ]]; then
|
|
printf 'result=failed reason=embedding_request_failed before_us=%s after_us=%s\n' "$before" "$after"
|
|
exit 3
|
|
fi
|
|
delta=$((after - before))
|
|
printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta"
|
|
RESPONSE_JSON="$response" python - <<'PY' || true
|
|
import json, os
|
|
try:
|
|
data = json.loads(os.environ.get('RESPONSE_JSON', ''))
|
|
except Exception as exc:
|
|
print(f'response_parse_error={type(exc).__name__}: {exc}')
|
|
raise SystemExit(0)
|
|
print(f"response_object={data.get('object')}")
|
|
print(f"response_model={data.get('model')}")
|
|
print(f"response_npu_busy_delta_us={data.get('npu_busy_delta_us')}")
|
|
print(f"embedding_count={len(data.get('data', []))}")
|
|
PY
|
|
if (( delta <= 0 )); then
|
|
printf 'result=failed reason=no_positive_sysfs_npu_delta\n'
|
|
exit 4
|
|
fi
|
|
printf 'result=ok\n'
|