docs: add OpenVINO NPU services runbook
This commit is contained in:
committed by
William Valentin
parent
4003198ba9
commit
d67c259187
Executable
+110
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Read-only health probe for Will's local OpenVINO/NPU services.
|
||||
# This script intentionally does not start, stop, restart, enable, reindex, or route anything.
|
||||
|
||||
BUSY_PATH=${BUSY_PATH:-/sys/class/accel/accel0/device/npu_busy_time_us}
|
||||
CURL_TIMEOUT=${CURL_TIMEOUT:-8}
|
||||
EMBED_MODEL=${EMBED_MODEL:-bge-base-en-v1.5-int8-ov}
|
||||
EMBED_URL=${EMBED_URL:-http://127.0.0.1:18817/v1/embeddings}
|
||||
|
||||
have() { command -v "$1" >/dev/null 2>&1; }
|
||||
|
||||
json_pretty() {
|
||||
if have jq; then
|
||||
jq .
|
||||
else
|
||||
python -m json.tool
|
||||
fi
|
||||
}
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
http_json() {
|
||||
local name=$1 url=$2
|
||||
printf '\n[%s] %s\n' "$name" "$url"
|
||||
if ! curl -fsS --max-time "$CURL_TIMEOUT" "$url" | json_pretty; then
|
||||
printf 'status=unavailable_or_non_json\n'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
busy_value() {
|
||||
if [[ -r "$BUSY_PATH" ]]; then
|
||||
tr -d '\n' < "$BUSY_PATH"
|
||||
else
|
||||
printf 'missing'
|
||||
fi
|
||||
}
|
||||
|
||||
section "NPU counter"
|
||||
printf 'busy_path=%s\n' "$BUSY_PATH"
|
||||
printf 'busy_time_us=%s\n' "$(busy_value)"
|
||||
|
||||
section "Listeners"
|
||||
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
|
||||
|
||||
section "User service states"
|
||||
for unit in \
|
||||
openvino-embeddings.service \
|
||||
rag-embedding-health.service \
|
||||
openvino-reranker.service \
|
||||
openvino-router-classifier.service \
|
||||
openvino-genai-npu-worker.service; do
|
||||
active=$(systemctl --user is-active "$unit" 2>/dev/null || true)
|
||||
enabled=$(systemctl --user is-enabled "$unit" 2>/dev/null || true)
|
||||
printf '%-38s active=%-10s enabled=%s\n' "$unit" "${active:-unknown}" "${enabled:-unknown}"
|
||||
done
|
||||
|
||||
section "Docker service states"
|
||||
if [[ -d /home/will/lab/swarm ]]; then
|
||||
(cd /home/will/lab/swarm && docker compose ps whisper-server-npu 2>/dev/null) || true
|
||||
fi
|
||||
|
||||
section "HTTP health"
|
||||
http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true
|
||||
http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true
|
||||
http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true
|
||||
http_json "OpenVINO embeddings" "http://127.0.0.1:18817/health" || true
|
||||
# Prototypes are expected to be unavailable until explicitly started/approved.
|
||||
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
|
||||
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
|
||||
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
|
||||
|
||||
section "Embeddings NPU busy-time proof"
|
||||
if [[ ! -r "$BUSY_PATH" ]]; then
|
||||
printf 'result=failed reason=missing_busy_counter\n'
|
||||
exit 2
|
||||
fi
|
||||
before=$(busy_value)
|
||||
response=$(curl -fsS --max-time "$CURL_TIMEOUT" \
|
||||
"$EMBED_URL" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"input\":\"non-private npu health probe\",\"model\":\"$EMBED_MODEL\"}" || true)
|
||||
after=$(busy_value)
|
||||
if [[ -z "$response" ]]; then
|
||||
printf 'result=failed reason=embedding_request_failed before_us=%s after_us=%s\n' "$before" "$after"
|
||||
exit 3
|
||||
fi
|
||||
delta=$((after - before))
|
||||
printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta"
|
||||
printf '%s' "$response" | python - <<'PY' || true
|
||||
import json, sys
|
||||
try:
|
||||
data = json.load(sys.stdin)
|
||||
except Exception as exc:
|
||||
print(f'response_parse_error={type(exc).__name__}: {exc}')
|
||||
raise SystemExit(0)
|
||||
print(f"response_object={data.get('object')}")
|
||||
print(f"response_model={data.get('model')}")
|
||||
print(f"response_npu_busy_delta_us={data.get('npu_busy_delta_us')}")
|
||||
print(f"embedding_count={len(data.get('data', []))}")
|
||||
PY
|
||||
if (( delta <= 0 )); then
|
||||
printf 'result=failed reason=no_positive_sysfs_npu_delta\n'
|
||||
exit 4
|
||||
fi
|
||||
printf 'result=ok\n'
|
||||
Reference in New Issue
Block a user