#!/usr/bin/env bash set -euo pipefail # Read-only health probe for Will's local OpenVINO/NPU services. # This script intentionally does not start, stop, restart, enable, reindex, or route anything. BUSY_PATH=${BUSY_PATH:-/sys/class/accel/accel0/device/npu_busy_time_us} CURL_TIMEOUT=${CURL_TIMEOUT:-8} EMBED_MODEL=${EMBED_MODEL:-bge-base-en-v1.5-int8-ov} EMBED_URL=${EMBED_URL:-http://127.0.0.1:18817/v1/embeddings} have() { command -v "$1" >/dev/null 2>&1; } json_pretty() { if have jq; then jq . else python -m json.tool fi } section() { printf '\n== %s ==\n' "$1" } http_json() { local name=$1 url=$2 printf '\n[%s] %s\n' "$name" "$url" if ! curl -fsS --max-time "$CURL_TIMEOUT" "$url" | json_pretty; then printf 'status=unavailable_or_non_json\n' return 1 fi } busy_value() { if [[ -r "$BUSY_PATH" ]]; then tr -d '\n' < "$BUSY_PATH" else printf 'missing' fi } section "NPU counter" printf 'busy_path=%s\n' "$BUSY_PATH" printf 'busy_time_us=%s\n' "$(busy_value)" section "Listeners" # Required OpenVINO/NPU program ports: live baseline 18810/18816/18817, # reranker 18818, local-only specialists 18819/18820/18829, and advisory gateway 18830. # 18814 is the existing RAG/embedding health wrapper; 18828 is a review-only # alternate used to avoid collisions during prior smoke tests. ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829|18830)\b' || true section "User service states" for unit in \ openvino-embeddings.service \ rag-embedding-health.service \ openvino-reranker.service \ openvino-router-classifier.service \ openvino-genai-npu-worker.service; do active=$(systemctl --user is-active "$unit" 2>/dev/null || true) enabled=$(systemctl --user is-enabled "$unit" 2>/dev/null || true) printf '%-38s active=%-10s enabled=%s\n' "$unit" "${active:-unknown}" "${enabled:-unknown}" done section "Docker service states" if [[ -d /home/will/lab/swarm ]]; then (cd /home/will/lab/swarm && docker compose ps whisper-server-npu 2>/dev/null) || true fi section "HTTP health" http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true # Prototypes are expected to be unavailable until explicitly started/approved. http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true http_json "NPU doc/image triage prototype" "http://127.0.0.1:18829/healthz" || true section "Embeddings NPU busy-time proof" if [[ ! -r "$BUSY_PATH" ]]; then printf 'result=failed reason=missing_busy_counter\n' exit 2 fi before=$(busy_value) response=$(curl -fsS --max-time "$CURL_TIMEOUT" \ "$EMBED_URL" \ -H 'Content-Type: application/json' \ -d "{\"input\":\"non-private npu health probe\",\"model\":\"$EMBED_MODEL\"}" || true) after=$(busy_value) if [[ -z "$response" ]]; then printf 'result=failed reason=embedding_request_failed before_us=%s after_us=%s\n' "$before" "$after" exit 3 fi delta=$((after - before)) printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta" RESPONSE_JSON="$response" python - <<'PY' || true import json, os try: data = json.loads(os.environ.get('RESPONSE_JSON', '')) except Exception as exc: print(f'response_parse_error={type(exc).__name__}: {exc}') raise SystemExit(0) print(f"response_object={data.get('object')}") print(f"response_model={data.get('model')}") print(f"response_npu_busy_delta_us={data.get('npu_busy_delta_us')}") print(f"embedding_count={len(data.get('data', []))}") PY if (( delta <= 0 )); then printf 'result=failed reason=no_positive_sysfs_npu_delta\n' exit 4 fi printf 'result=ok\n'