docs: add OpenVINO NPU services runbook
This commit is contained in:
committed by
William Valentin
parent
4003198ba9
commit
d67c259187
Executable
+110
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Read-only health probe for Will's local OpenVINO/NPU services.
|
||||
# This script intentionally does not start, stop, restart, enable, reindex, or route anything.
|
||||
|
||||
BUSY_PATH=${BUSY_PATH:-/sys/class/accel/accel0/device/npu_busy_time_us}
|
||||
CURL_TIMEOUT=${CURL_TIMEOUT:-8}
|
||||
EMBED_MODEL=${EMBED_MODEL:-bge-base-en-v1.5-int8-ov}
|
||||
EMBED_URL=${EMBED_URL:-http://127.0.0.1:18817/v1/embeddings}
|
||||
|
||||
have() { command -v "$1" >/dev/null 2>&1; }
|
||||
|
||||
json_pretty() {
|
||||
if have jq; then
|
||||
jq .
|
||||
else
|
||||
python -m json.tool
|
||||
fi
|
||||
}
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
http_json() {
|
||||
local name=$1 url=$2
|
||||
printf '\n[%s] %s\n' "$name" "$url"
|
||||
if ! curl -fsS --max-time "$CURL_TIMEOUT" "$url" | json_pretty; then
|
||||
printf 'status=unavailable_or_non_json\n'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
busy_value() {
|
||||
if [[ -r "$BUSY_PATH" ]]; then
|
||||
tr -d '\n' < "$BUSY_PATH"
|
||||
else
|
||||
printf 'missing'
|
||||
fi
|
||||
}
|
||||
|
||||
section "NPU counter"
|
||||
printf 'busy_path=%s\n' "$BUSY_PATH"
|
||||
printf 'busy_time_us=%s\n' "$(busy_value)"
|
||||
|
||||
section "Listeners"
|
||||
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
|
||||
|
||||
section "User service states"
|
||||
for unit in \
|
||||
openvino-embeddings.service \
|
||||
rag-embedding-health.service \
|
||||
openvino-reranker.service \
|
||||
openvino-router-classifier.service \
|
||||
openvino-genai-npu-worker.service; do
|
||||
active=$(systemctl --user is-active "$unit" 2>/dev/null || true)
|
||||
enabled=$(systemctl --user is-enabled "$unit" 2>/dev/null || true)
|
||||
printf '%-38s active=%-10s enabled=%s\n' "$unit" "${active:-unknown}" "${enabled:-unknown}"
|
||||
done
|
||||
|
||||
section "Docker service states"
|
||||
if [[ -d /home/will/lab/swarm ]]; then
|
||||
(cd /home/will/lab/swarm && docker compose ps whisper-server-npu 2>/dev/null) || true
|
||||
fi
|
||||
|
||||
section "HTTP health"
|
||||
http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true
|
||||
http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true
|
||||
http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true
|
||||
http_json "OpenVINO embeddings" "http://127.0.0.1:18817/health" || true
|
||||
# Prototypes are expected to be unavailable until explicitly started/approved.
|
||||
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
|
||||
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
|
||||
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
|
||||
|
||||
section "Embeddings NPU busy-time proof"
|
||||
if [[ ! -r "$BUSY_PATH" ]]; then
|
||||
printf 'result=failed reason=missing_busy_counter\n'
|
||||
exit 2
|
||||
fi
|
||||
before=$(busy_value)
|
||||
response=$(curl -fsS --max-time "$CURL_TIMEOUT" \
|
||||
"$EMBED_URL" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"input\":\"non-private npu health probe\",\"model\":\"$EMBED_MODEL\"}" || true)
|
||||
after=$(busy_value)
|
||||
if [[ -z "$response" ]]; then
|
||||
printf 'result=failed reason=embedding_request_failed before_us=%s after_us=%s\n' "$before" "$after"
|
||||
exit 3
|
||||
fi
|
||||
delta=$((after - before))
|
||||
printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta"
|
||||
printf '%s' "$response" | python - <<'PY' || true
|
||||
import json, sys
|
||||
try:
|
||||
data = json.load(sys.stdin)
|
||||
except Exception as exc:
|
||||
print(f'response_parse_error={type(exc).__name__}: {exc}')
|
||||
raise SystemExit(0)
|
||||
print(f"response_object={data.get('object')}")
|
||||
print(f"response_model={data.get('model')}")
|
||||
print(f"response_npu_busy_delta_us={data.get('npu_busy_delta_us')}")
|
||||
print(f"embedding_count={len(data.get('data', []))}")
|
||||
PY
|
||||
if (( delta <= 0 )); then
|
||||
printf 'result=failed reason=no_positive_sysfs_npu_delta\n'
|
||||
exit 4
|
||||
fi
|
||||
printf 'result=ok\n'
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
type: service-catalog
|
||||
created: 2026-05-14T14:50:46-07:00
|
||||
updated: 2026-06-03T21:31:01-07:00
|
||||
updated: 2026-06-04T11:35:00-07:00
|
||||
tags:
|
||||
- service-catalog
|
||||
- swarm
|
||||
@@ -54,7 +54,12 @@ Canonical index of local services, automation tools, Hermes capabilities, and wh
|
||||
| URL extractor | 18812 | OK 200 | URL/PDF/YouTube content extractor | `http://127.0.0.1:18812/healthz` |
|
||||
| Voice memo processor | 18813 | OK 200 | Voice memo processor | `http://127.0.0.1:18813/healthz` |
|
||||
| RAG/embedding health | 18814 | OK 200 | RAG/OpenVINO/Obsidian health wrapper | `http://127.0.0.1:18814/healthz` |
|
||||
| Whisper OpenVINO NPU | 18816 | OK 200 / Docker healthy on 2026-06-04 | Intel NPU Whisper transcription service | `http://127.0.0.1:18816/health` |
|
||||
| OpenVINO embeddings | 18817 | OK 200 | Intel NPU embeddings service for live Obsidian RAG | `http://127.0.0.1:18817/health` |
|
||||
| OpenVINO NPU reranker prototype | 18818 | approved prototype; not enabled live | Optional second-stage RAG reranker | `http://127.0.0.1:18818/readyz` |
|
||||
| OpenVINO router/classifier prototype | 18819 | approved prototype; not enabled live | Dry-run Atlas/Hermes message classifier/router | `http://127.0.0.1:18819/healthz` |
|
||||
| OpenVINO GenAI NPU worker prototype | 18820 | approved prototype; not enabled live | Bounded local background generation worker | `http://127.0.0.1:18820/healthz` |
|
||||
| OpenVINO document/image triage prototype | 18828/18829 | approved foreground prototype; not enabled live | Local document/image triage with NPU embeddings stage via `:18817` | `http://127.0.0.1:<port>/healthz` |
|
||||
| Obsidian REST HTTP | 27123 | OK 200 | Obsidian Local REST API HTTP | `http://127.0.0.1:27123/` |
|
||||
|
||||
## Docker services
|
||||
@@ -77,6 +82,7 @@ make status
|
||||
make local-ai-health
|
||||
make api-health
|
||||
make timers
|
||||
./scripts/npu-service-health.sh
|
||||
```
|
||||
|
||||
## Host-side systemd/user services
|
||||
@@ -93,6 +99,9 @@ Important known services:
|
||||
| `voice-memo-processor.service` | Voice memo processing on 18813 |
|
||||
| `rag-embedding-health.service` | RAG/OpenVINO/Obsidian health check wrapper on 18814 |
|
||||
| `openvino-embeddings.service` | Intel NPU BGE embedding service on 18817 |
|
||||
| `openvino-reranker.service` | Optional NPU reranker prototype on 18818; not installed/enabled without approval |
|
||||
| `openvino-router-classifier.service` | Optional dry-run router/classifier prototype on 18819; not installed/enabled without approval |
|
||||
| `openvino-genai-npu-worker.service` | Optional bounded GenAI worker prototype on 18820; not installed/enabled without approval |
|
||||
|
||||
Useful checks:
|
||||
|
||||
@@ -275,6 +284,7 @@ Profile Model Gateway Alias Distribu
|
||||
| Hermes CLI/toolsets/gateway/profiles | Hermes skill `hermes-agent`; `hermes --help`; `hermes tools list` |
|
||||
| Obsidian automation workflows | `~/lab/swarm/swarm-common/n8n-workflows/obsidian-*.json` |
|
||||
| Runbooks | [[Runbooks Home]] |
|
||||
| OpenVINO NPU service operations | [[OpenVINO NPU Services Runbook]]; `~/lab/swarm/scripts/npu-service-health.sh` |
|
||||
|
||||
## Safety notes
|
||||
|
||||
|
||||
+268
@@ -0,0 +1,268 @@
|
||||
---
|
||||
type: runbook
|
||||
system: openvino-npu-services
|
||||
status: draft
|
||||
created: 2026-06-04
|
||||
updated: 2026-06-04
|
||||
tags:
|
||||
- runbook
|
||||
- openvino
|
||||
- npu
|
||||
- swarm
|
||||
- atlas
|
||||
related:
|
||||
- [[Service Catalog]]
|
||||
- [[Swarm Operating Manual]]
|
||||
- [[Atlas Capability Upgrade Program]]
|
||||
---
|
||||
|
||||
# OpenVINO NPU Services Runbook
|
||||
|
||||
This runbook is the integrated operations view for Will's local Intel NPU/OpenVINO services from the `npu-capability-expansion` board.
|
||||
|
||||
Safety posture:
|
||||
- Do not restart the live Atlas/Hermes gateway from this runbook.
|
||||
- Do not change primary Atlas/Hermes routing without explicit Will approval.
|
||||
- Do not delete, overwrite, or in-place reindex existing Chroma/vector collections.
|
||||
- Treat HTTP 200 as necessary but not sufficient for NPU-backed services; verify `/sys/class/accel/accel0/device/npu_busy_time_us` before/after an inference.
|
||||
- Keep endpoints local-only unless Will explicitly approves broader exposure.
|
||||
- Keep raw prompts, private documents, OCR text, and secrets out of logs and durable handoffs.
|
||||
|
||||
## Current service map
|
||||
|
||||
| Capability | Port | Runtime / service | Path | State | Health endpoint | NPU proof |
|
||||
| --- | ---: | --- | --- | --- | --- | --- |
|
||||
| Obsidian/RAG endpoint | 18810 | `obsidian-reindex-endpoint.service` / local Python endpoint | `~/lab/swarm/scripts/` | live baseline; uses collection `obsidian_bge_npu` | `http://127.0.0.1:18810/healthz` | indirect via embeddings `:18817`; do not mutate existing collection |
|
||||
| RAG/embedding health wrapper | 18814 | `rag-embedding-health.service` | `~/lab/swarm/swarm-common/rag-embedding-health.service` | live baseline | `http://127.0.0.1:18814/healthz` | should exercise embeddings path when configured |
|
||||
| Whisper transcription, OpenVINO NPU | 18816 | Docker Compose service/container `whisper-server-npu` | `~/lab/swarm/whisper-openvino-npu/` | live baseline | `http://127.0.0.1:18816/health` | transcription response includes `npu_busy_delta_us`; sysfs delta must increase |
|
||||
| OpenVINO embeddings | 18817 | user systemd `openvino-embeddings.service` | `~/lab/swarm/scripts/openvino-embeddings-server.py`; unit in `~/lab/swarm/swarm-common/openvino-embeddings.service` | live baseline, enabled | `http://127.0.0.1:18817/health` | embedding response and sysfs delta must be positive |
|
||||
| NPU reranker prototype | 18818 | optional user systemd `openvino-reranker.service` | `~/lab/swarm/openvino-reranker-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:18818/readyz` | `/readyz` reports `device=NPU`; `/v1/rerank` response and sysfs delta must be positive |
|
||||
| NPU router/classifier prototype | 18819 | optional user systemd `openvino-router-classifier.service` | `~/lab/swarm/openvino-classifier-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:18819/healthz` | `/v1/classify` response has positive `npu_busy_delta_us` and `sysfs_npu_busy_delta_us` |
|
||||
| Small OpenVINO GenAI NPU worker | 18820 | optional user systemd `openvino-genai-npu-worker.service` | `~/lab/swarm/openvino-genai-npu-worker/` | approved prototype; not installed/enabled | `http://127.0.0.1:18820/healthz`; `GET /models` | generation response includes positive `npu_busy_delta_us` |
|
||||
| Document/image triage prototype | 18828 or 18829 for review only | foreground local-only server; no persistent unit yet | `~/lab/swarm/openvino-doc-image-triage-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:<port>/healthz`; `GET /models` | v1 NPU stage is semantic embedding through `:18817`; image classification/OCR remain CPU/local |
|
||||
|
||||
Port notes:
|
||||
- `18818`, `18819`, and `18820` are reserved prototype ports from the program plan; check listeners before binding.
|
||||
- `18820` was used by the GenAI worker prototype. The document/image triage prototype README still contains a `18820` example, but review used `18828`/`18829` to avoid collision. Prefer `18828`/`18829` for triage foreground review until Will approves a final persistent port.
|
||||
- Existing `:18817` is currently bound on `0.0.0.0` by the user service; prototype services should still default to `127.0.0.1`.
|
||||
|
||||
## Read-only unified health check
|
||||
|
||||
From the swarm repo:
|
||||
|
||||
```bash
|
||||
cd ~/lab/swarm
|
||||
./scripts/npu-service-health.sh
|
||||
```
|
||||
|
||||
The script is read-only. It checks listeners, user service state, Docker Compose state for `whisper-server-npu`, JSON health endpoints, and performs a non-private embeddings request while measuring `/sys/class/accel/accel0/device/npu_busy_time_us` before and after. A positive sysfs delta is required for the embeddings proof.
|
||||
|
||||
Manual minimal checks:
|
||||
|
||||
```bash
|
||||
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||
cat "$BUSY"
|
||||
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
|
||||
systemctl --user is-active openvino-embeddings.service rag-embedding-health.service
|
||||
cd ~/lab/swarm && docker compose ps whisper-server-npu
|
||||
curl -fsS http://127.0.0.1:18817/health | jq .
|
||||
```
|
||||
|
||||
Embedding NPU proof:
|
||||
|
||||
```bash
|
||||
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||
before=$(cat "$BUSY")
|
||||
curl -fsS http://127.0.0.1:18817/v1/embeddings \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"input":"non-private npu health probe","model":"bge-base-en-v1.5-int8-ov"}' | jq '{model, object, npu_busy_delta_us, embedding_count:(.data|length)}'
|
||||
after=$(cat "$BUSY")
|
||||
echo "sysfs_npu_busy_delta_us=$((after-before))"
|
||||
```
|
||||
|
||||
A healthy NPU path has:
|
||||
- HTTP success from the endpoint.
|
||||
- Response-level `npu_busy_delta_us > 0` when the service reports it.
|
||||
- Sysfs `after - before > 0`.
|
||||
|
||||
## Service-specific smoke checks
|
||||
|
||||
### Whisper NPU (`:18816`)
|
||||
|
||||
```bash
|
||||
curl -fsS http://127.0.0.1:18816/health | jq .
|
||||
# For a real transcription smoke, use a small non-private WAV fixture only.
|
||||
# Verify both response npu_busy_delta_us and sysfs busy-time delta.
|
||||
```
|
||||
|
||||
Operational notes:
|
||||
- Managed as Docker Compose service/container `whisper-server-npu` in `~/lab/swarm`.
|
||||
- Consistent with existing swarm service patterns because it is a containerized service with Compose health.
|
||||
- Do not restart it from this runbook unless Will asked for remediation.
|
||||
|
||||
### OpenVINO embeddings (`:18817`)
|
||||
|
||||
```bash
|
||||
systemctl --user status openvino-embeddings.service --no-pager
|
||||
curl -fsS http://127.0.0.1:18817/health | jq .
|
||||
```
|
||||
|
||||
Operational notes:
|
||||
- User systemd unit: `openvino-embeddings.service`.
|
||||
- Model: `bge-base-en-v1.5-int8-ov`.
|
||||
- Model directory: `/home/will/.cache/openvino-models/bge-base-en-v1.5-int8-ov`.
|
||||
- Live RAG `:18810` uses Chroma collection `obsidian_bge_npu` through this service. Do not reindex or replace this collection in place.
|
||||
|
||||
### Reranker prototype (`:18818`)
|
||||
|
||||
Foreground review start only, after confirming port is free:
|
||||
|
||||
```bash
|
||||
ss -ltnp | grep ':18818\b' || true
|
||||
cd ~/lab/swarm/openvino-reranker-npu
|
||||
source /home/will/.venvs/openvino-reranker/bin/activate
|
||||
OPENVINO_RERANKER_HOST=127.0.0.1 \
|
||||
OPENVINO_RERANKER_PORT=18818 \
|
||||
OPENVINO_RERANKER_DEVICE=NPU \
|
||||
OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
|
||||
python server.py
|
||||
```
|
||||
|
||||
From another shell:
|
||||
|
||||
```bash
|
||||
curl -fsS http://127.0.0.1:18818/readyz | jq .
|
||||
python ~/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
|
||||
```
|
||||
|
||||
Approval gate:
|
||||
- May be installed as `openvino-reranker.service` only after foreground smoke and Will approval.
|
||||
- May be integrated into RAG only behind disabled-by-default knobs such as `RAG_RERANK_ENABLED=false`; request-time reranking must not mutate Chroma.
|
||||
|
||||
### Router/classifier prototype (`:18819`)
|
||||
|
||||
Foreground review start only, after confirming port is free:
|
||||
|
||||
```bash
|
||||
ss -ltnp | grep ':18819\b' || true
|
||||
cd ~/lab/swarm/openvino-classifier-npu
|
||||
/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
|
||||
```
|
||||
|
||||
Smoke:
|
||||
|
||||
```bash
|
||||
curl -fsS http://127.0.0.1:18819/healthz | jq .
|
||||
curl -fsS http://127.0.0.1:18819/v1/classify \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"id":"smoke","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true,"dry_run":true}}' | jq .
|
||||
```
|
||||
|
||||
Approval gate:
|
||||
- May be installed as `openvino-router-classifier.service` only after Will approves live service enablement.
|
||||
- Must remain dry-run and must not alter Hermes/Atlas routing, memory writes, safety confirmation flow, or outbound messages without a separate explicit approval.
|
||||
|
||||
### Small GenAI NPU worker (`:18820`)
|
||||
|
||||
Foreground review start only, after confirming port is free:
|
||||
|
||||
```bash
|
||||
ss -ltnp | grep ':18820\b' || true
|
||||
cd ~/lab/swarm/openvino-genai-npu-worker
|
||||
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
|
||||
```
|
||||
|
||||
Smoke:
|
||||
|
||||
```bash
|
||||
curl -fsS http://127.0.0.1:18820/healthz | jq .
|
||||
curl -fsS http://127.0.0.1:18820/models | jq .
|
||||
curl -fsS http://127.0.0.1:18820/v1/worker/condense-notification \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"input":"Non-private smoke notification for local NPU worker.","max_new_tokens":64}' | jq .
|
||||
```
|
||||
|
||||
Approval gate:
|
||||
- May be installed as `openvino-genai-npu-worker.service` only after Will approves persistent service enablement.
|
||||
- Must not become primary Atlas/Hermes model routing. Use only for bounded background jobs such as title, summary, notification condensation, and memory-candidate drafting.
|
||||
|
||||
### Document/image triage prototype (`:18828`/`:18829` review ports)
|
||||
|
||||
Foreground review start only, after confirming port is free:
|
||||
|
||||
```bash
|
||||
ss -ltnp | grep -E ':(18828|18829)\b' || true
|
||||
cd ~/lab/swarm/openvino-doc-image-triage-npu
|
||||
/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18828 --allowed-root "$PWD"
|
||||
```
|
||||
|
||||
Smoke:
|
||||
|
||||
```bash
|
||||
curl -fsS http://127.0.0.1:18828/healthz | jq .
|
||||
curl -fsS http://127.0.0.1:18828/models | jq .
|
||||
/home/will/.venvs/npu/bin/python tests/smoke_test.py
|
||||
```
|
||||
|
||||
Approval gate:
|
||||
- Do not point it at arbitrary directories; allowed roots must be equal to or under configured roots.
|
||||
- Do not include raw OCR text or full source paths unless Will explicitly asks for a one-off response.
|
||||
- v1 only uses the NPU through `:18817` embeddings for needs-attention; image category classification and OCR are CPU/local fallbacks.
|
||||
|
||||
## Systemd and Compose recommendations
|
||||
|
||||
Recommended management split:
|
||||
- Keep containerized services in Docker Compose when they already have Docker build/runtime shape and Compose health (`whisper-server-npu`).
|
||||
- Keep host-side OpenVINO Python prototypes as user systemd services when they depend on local venvs, sysfs NPU access, model caches, and localhost-only APIs (`openvino-embeddings`, optional reranker/classifier/GenAI worker).
|
||||
- Do not add the prototypes to the live gateway or primary routing during installation. Installation and routing are separate approval gates.
|
||||
|
||||
User-systemd unit expectations for optional prototypes:
|
||||
- `WorkingDirectory` points at the service directory under `~/lab/swarm/`.
|
||||
- `ExecStart` uses the existing venv path documented by the prototype.
|
||||
- `Environment` pins host to `127.0.0.1`, port, model path, device `NPU`, and any upstream endpoint.
|
||||
- `Restart=on-failure`, not aggressive restart loops.
|
||||
- Logs go to user journal; do not log raw request bodies.
|
||||
- Start manually for smoke; enable on boot only after Will approval.
|
||||
|
||||
Compose expectations for existing swarm services:
|
||||
- Prefer `cd ~/lab/swarm && make ps`, `make status`, and targeted `docker compose ps <service>` for read-only checks.
|
||||
- Do not run `docker compose up -d`, restart containers, pull images, or prune volumes from this runbook without approval.
|
||||
|
||||
## Monitoring and logging notes
|
||||
|
||||
Minimum recurring monitoring should include:
|
||||
- Listener presence for `18816`, `18817`, and any approved optional prototype ports.
|
||||
- User service state for `openvino-embeddings.service` and any approved optional prototype unit.
|
||||
- Docker Compose health for `whisper-server-npu`.
|
||||
- HTTP health endpoint success.
|
||||
- Positive sysfs NPU busy-time delta on at least one non-private inference probe, preferably embeddings `:18817` because it is already live and central.
|
||||
- Journal/container logs only at summary level. Avoid raw prompts, raw OCR text, private document names, credentials, and API keys.
|
||||
|
||||
Useful log commands:
|
||||
|
||||
```bash
|
||||
journalctl --user -u openvino-embeddings.service -n 100 --no-pager
|
||||
journalctl --user -u rag-embedding-health.service -n 100 --no-pager
|
||||
journalctl --user -u openvino-reranker.service -n 100 --no-pager
|
||||
journalctl --user -u openvino-router-classifier.service -n 100 --no-pager
|
||||
journalctl --user -u openvino-genai-npu-worker.service -n 100 --no-pager
|
||||
cd ~/lab/swarm && docker compose logs --tail 100 whisper-server-npu
|
||||
```
|
||||
|
||||
## Approval gates
|
||||
|
||||
Requires explicit Will approval before proceeding:
|
||||
- Installing, enabling, or autostarting `openvino-reranker.service`, `openvino-router-classifier.service`, or `openvino-genai-npu-worker.service`.
|
||||
- Assigning a final persistent port to document/image triage or enabling it as a persistent service.
|
||||
- Enabling live RAG reranking or any request path that changes Atlas/RAG answers.
|
||||
- Changing primary Atlas/Hermes routing or connecting router/classifier outputs to live decisions.
|
||||
- Connecting the GenAI worker to primary Atlas chat, gateway routing, memory writes, or outbound notifications.
|
||||
- Restarting the live Atlas/Hermes gateway.
|
||||
- Deleting, overwriting, or in-place reindexing existing vector collections.
|
||||
- Broadening bind addresses or exposure beyond local-only defaults.
|
||||
|
||||
Approved/parked outcomes:
|
||||
- Built/approved prototypes: reranker (`:18818`), router/classifier (`:18819`), small GenAI worker (`:18820`), document/image triage (review ports `:18828`/`:18829`).
|
||||
- Live baseline retained: Whisper NPU (`:18816`), OpenVINO embeddings (`:18817`), RAG endpoint (`:18810`) using `obsidian_bge_npu`.
|
||||
- Parked: always-on wake-word/audio and conventional vision detection until Will wants a concrete use case.
|
||||
- Rejected for this NPU program: diffusion/image generation.
|
||||
Reference in New Issue
Block a user