docs(npu): document advisory observability gates

Add operator runbook and link integrated health docs for advisory-only observability, dry-run metrics, and future promotion criteria.
feat(npu): add advisory metrics to utilization digest
2026-06-06 15:30:31 -07:00 · 2026-06-06 15:30:31 -07:00 · 2026-06-06 15:30:31 -07:00 · 2026-06-05 15:52:51 -07:00 · 2026-06-05 15:52:43 -07:00 · 2026-06-05 15:52:43 -07:00
238 changed files with 85266 additions and 235 deletions
@@ -0,0 +1,16 @@
+# Telegram Bot Token from @BotFather
+FLYNN_TELEGRAM_TOKEN=your-bot-token-here
+
+# API Keys
+ANTHROPIC_API_KEY=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
+OPENAI_API_KEY=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
+GEMINI_API_KEY=AIzaSyAdCsEwwzQZoWbhIXC65oKrLPWvbnOZ7MA
+ZHIPUAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
+ZAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
+
+BRAVE_API_KEY=BSAgLuWVVMnrGvobOt7pDQjmVJ5u380
+GITHUB_TOKEN=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
+
+# LiteLLM Configuration
+LITELLM_MASTER_KEY=sk-a740fd7607c0accfca4c8bd8d66d1cedff0d3a9bfb4ebc359f7eaae9aac627f6
+LITELLM_SALT_KEY=64526fe31468d539454d9ebeafd68face8d3b46ddb3d91b22b2e33eae0fde4ce
@@ -3,9 +3,6 @@
 *.swp
 *.swo
 *~
-.Trash-*/
-__pycache__/
-*.py[cod]

 # ── OpenClaw ephemeral / binary / noisy data ──────────────────────────────
 openclaw/workspace/
@@ -17,21 +14,7 @@ openclaw/media/
 openclaw/memory/*.sqlite
 openclaw/memory/*.tmp*
 openclaw/agents/*/sessions/
-openclaw/agents/*/agent/auth-*.json
-openclaw/agents/*/agent/harness-auth/
 openclaw/cron/runs/
-openclaw/cron/jobs-state.json
-openclaw/devices/paired.json
-openclaw/discord/model-picker-preferences.json
-openclaw/flows/*.sqlite*
-openclaw/identity/device-auth.json
-openclaw/memory/
-openclaw/openclaw.json.backup-before-*
-openclaw/openclaw.json.failed
-openclaw/plugin-runtime-deps/
-openclaw/tasks/*.sqlite*
-openclaw/telegram/update-offset-*.json
-openclaw/update-check.json

 # Temp files
 *.tmp
@@ -39,24 +22,3 @@ openclaw/update-check.json

 # Runtime logs
 *.log
-
-# Local n8n SQLite recovery backups
-.n8n-db-backups/
-backups/
-
-# Local secrets
-.env
-.env.*
-*.pem
-*.key
-id_rsa
-id_ed25519
-credentials.json
-
-# Obsidian local UI/runtime/plugin artifacts
-swarm-common/obsidian-vault/**/.obsidian/workspace.json
-swarm-common/obsidian-vault/**/.obsidian/graph.json
-swarm-common/obsidian-vault/**/.obsidian/bookmarks.json
-swarm-common/obsidian-vault/**/.obsidian/types.json
-swarm-common/obsidian-vault/**/.obsidian/plugins/*/
-swarm-common/obsidian-vault/**/.obsidian/themes/
@@ -16,7 +16,8 @@ OPENCLAW_PORT ?= 18789
 QEMU_URI ?= qemu:///system
 LLAMA_CPP_URL ?= http://127.0.0.1:18806
 OLLAMA_URL ?= http://127.0.0.1:18807
-OLLAMA_EMBED_MODEL ?= nomic-embed-text
+OPENVINO_EMBED_URL ?= http://127.0.0.1:18817
+OPENVINO_EMBED_MODEL ?= bge-base-en-v1.5-int8-ov

 DC := $(COMPOSE) -f $(COMPOSE_FILE)
 COMMON_DC := $(COMPOSE) -f $(COMMON_COMPOSE_FILE)
@@ -28,7 +29,7 @@ REQUIRE_CONFIRM = test "$(CONFIRM)" = "yes" || { echo "This target changes VM/ga
 REQUIRE_INSTANCE = test -n "$(OPENCLAW_HOST)" -a -n "$(OPENCLAW_DOMAIN)" || { echo "Unknown OpenClaw HOST=$(HOST) in $(OPENCLAW_REGISTRY)"; exit 2; }

 .DEFAULT_GOAL := help
-.PHONY: help config ps status local-ai-health ollama-embed-health up down restart pull build logs shell clean \
+.PHONY: help config ps status local-ai-health openvino-embed-health up down restart pull build logs shell clean \
 	api-up api-down api-restart api-init api-init-force api-health api-dedup api-logs \
 	voice-up voice-gpu voice-cpu voice-down voice-build voice-logs \
 	search-up search-down automation-up automation-down n8n-logs \
@@ -53,7 +54,7 @@ ps: ## Show root Docker Compose service status.

 status: ps local-ai-health ## Show Docker service status plus host-side local AI endpoints.

-local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoints.
+local-ai-health: ## Check host-side llama.cpp LLM, Ollama fallback, and OpenVINO NPU embeddings endpoints.
 	@printf "\nHost-side local AI endpoints:\n"
 	@printf "llama.cpp (%s): " "$(LLAMA_CPP_URL)"; \
 		if curl -fsS --max-time 3 "$(LLAMA_CPP_URL)/v1/models" >/tmp/swarm-llama-models.json 2>/dev/null; then \
@@ -62,14 +63,17 @@ local-ai-health: ## Check host-side llama.cpp LLM and Ollama embeddings endpoint
 			printf "FAILED\n"; \
 		fi
 	@printf "ollama.service: "; systemctl --user is-active ollama.service 2>/dev/null || true
-	@printf "Ollama API (%s): " "$(OLLAMA_URL)"; \
+	@printf "Ollama fallback API (%s): " "$(OLLAMA_URL)"; \
 		curl -fsS --max-time 3 "$(OLLAMA_URL)/api/version" 2>/dev/null | jq -r '"OK version=" + .version' || printf "FAILED\n"
+	@printf "openvino-embeddings.service: "; systemctl --user is-active openvino-embeddings.service 2>/dev/null || true
+	@printf "OpenVINO NPU embeddings (%s): " "$(OPENVINO_EMBED_URL)"; \
+		curl -fsS --max-time 3 "$(OPENVINO_EMBED_URL)/healthz" 2>/dev/null | jq -r '"OK model=" + .model + " device=" + .device' || printf "FAILED\n"

-ollama-embed-health: ## Smoke-test Ollama embeddings using OLLAMA_EMBED_MODEL=nomic-embed-text.
-	@curl -fsS --max-time 20 "$(OLLAMA_URL)/api/embed" \
+openvino-embed-health: ## Smoke-test OpenVINO NPU embeddings using OPENVINO_EMBED_MODEL=bge-base-en-v1.5-int8-ov.
+	@curl -fsS --max-time 20 "$(OPENVINO_EMBED_URL)/v1/embeddings" \
 		-H 'Content-Type: application/json' \
-		-d '{"model":"$(OLLAMA_EMBED_MODEL)","input":"socket check"}' \
-		| jq -r '"embeddings=" + ((.embeddings // []) | length | tostring) + " dim=" + (((.embeddings // [[]])[0] // []) | length | tostring)'
+		-d '{"model":"$(OPENVINO_EMBED_MODEL)","input":"socket check"}' \
+		| jq -r '"embeddings=" + ((.data // []) | length | tostring) + " dim=" + (((.data // [{embedding: []}])[0].embedding // []) | length | tostring) + " npu_busy_delta_us=" + ((.npu_busy_delta_us // 0) | tostring)'

 up: ## Start root compose services. Use PROFILE=api,voice,search,automation or SERVICE=name.
 	@if [ -n "$(PROFILE)" ]; then \
@@ -137,23 +141,23 @@ api-dedup: ## Remove duplicate LiteLLM model DB entries.
 api-logs: ## Follow LiteLLM logs.
 	$(DC) logs -f --tail="$(LOGS_TAIL)" litellm litellm-db litellm-init

-voice-up: ## Start all voice services.
+voice-up: ## Start default voice services: NPU Whisper and Kokoro TTS.
 	$(DC) --profile voice up -d

-voice-gpu: ## Start GPU whisper server and Kokoro TTS.
-	$(DC) --profile voice up -d whisper-server-gpu kokoro-tts
+voice-gpu: ## Start manual GPU whisper fallback and Kokoro TTS.
+	$(DC) --profile voice-gpu --profile voice up -d whisper-server-gpu kokoro-tts

 voice-cpu: ## Start CPU whisper server and Kokoro TTS.
-	$(DC) --profile voice up -d whisper-server kokoro-tts
+	$(DC) --profile voice-cpu-backup --profile voice up -d whisper-server kokoro-tts

 voice-down: ## Stop voice profile services.
-	$(DC) --profile voice down
+	$(DC) --profile voice --profile voice-gpu --profile voice-cpu-backup down

 voice-build: ## Build the custom Blackwell CUDA whisper image.
-	$(DC) --profile voice build whisper-server-gpu
+	$(DC) --profile voice-gpu build whisper-server-gpu

-voice-logs: ## Follow voice service logs.
-	$(DC) logs -f --tail="$(LOGS_TAIL)" whisper-server-gpu whisper-server kokoro-tts
+voice-logs: ## Follow default voice service logs.
+	$(DC) logs -f --tail="$(LOGS_TAIL)" whisper-server-npu kokoro-tts

 search-up: ## Start Brave Search MCP and SearXNG.
 	$(DC) --profile search up -d
@@ -37,6 +37,9 @@ For the current host-side AI/search/voice automation stack, n8n watchdogs, and a
 - [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
 - [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
 - [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
+- [`docs/npu-utilization-digest.md`](docs/npu-utilization-digest.md) — compact on-demand NPU proof/utilization digest runbook
+- [`docs/npu-integrated-health-ops.md`](docs/npu-integrated-health-ops.md) — integrated operator health-check workflow combining `npu-service-health.sh` and the utilization digest
+- OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.

 ## VM: zap

@@ -4,8 +4,8 @@
 # ── VM provisioning ────────────────────────────────────────────────────────
 vm_domain: "zap [claw]"
 vm_hostname: zap
-vm_memory_mib: 6144
-vm_vcpus: 4
+vm_memory_mib: 3072
+vm_vcpus: 2
 vm_disk_path: /var/lib/libvirt/images/claw.qcow2
 vm_disk_size: "60G"
 vm_mac: "52:54:00:01:00:71"
@@ -12,9 +12,6 @@
 - name: OpenClaw VM customizations
  hosts: openclaw_servers
  become: true
-  vars:
-    openclaw_user: openclaw
-    openclaw_home: /home/openclaw

  tasks:

@@ -0,0 +1,52 @@
+version: 1
+policy:
+  default_mode: dry_run
+  require_explicit_root: true
+  allow_external_uploads: false
+  allow_mutations: false
+  log_raw_text: false
+  include_full_paths_default: false
+  npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
+
+# Copy to config/triage-roots.local.yaml and approve exactly one narrow,
+# lane-specific staging root. The committed template is intentionally
+# unapproved/fail-closed; do not point any lane at broad home, Downloads,
+# vault, screenshot, photo-library, or historical audio roots without explicit
+# approval for that exact lane/root.
+roots:
+  screenshots:
+    approved: false
+    root: null
+    allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
+    max_files: 50
+    max_file_mb: 25
+  receipts:
+    approved: false
+    root: null
+    allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
+    max_files: 50
+    max_file_mb: 25
+  downloads:
+    approved: false
+    root: null
+    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
+    max_files: 50
+    max_file_mb: 25
+  obsidian_attachments:
+    approved: false
+    root: null
+    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
+    max_files: 50
+    max_file_mb: 50
+  voice_memos:
+    approved: false
+    root: null
+    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
+    max_files: 25
+    max_file_mb: 100
+  meeting_snippets:
+    approved: false
+    root: null
+    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
+    max_files: 25
+    max_file_mb: 200
@@ -0,0 +1,46 @@
+version: 1
+policy:
+  default_mode: dry_run
+  require_explicit_root: true
+  allow_external_uploads: false
+  allow_mutations: false
+  log_raw_text: false
+  include_full_paths_default: false
+  npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
+roots:
+  screenshots:
+    approved: true
+    root: ../openvino-doc-image-triage-npu/samples
+    allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
+    max_files: 50
+    max_file_mb: 25
+  receipts:
+    approved: true
+    root: ../openvino-doc-image-triage-npu/samples
+    allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
+    max_files: 50
+    max_file_mb: 25
+  downloads:
+    approved: true
+    root: ../openvino-doc-image-triage-npu/samples
+    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
+    max_files: 50
+    max_file_mb: 25
+  obsidian_attachments:
+    approved: true
+    root: ../openvino-doc-image-triage-npu/samples
+    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
+    max_files: 50
+    max_file_mb: 50
+  voice_memos:
+    approved: true
+    root: ../tmp/synthetic-voice-memos
+    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
+    max_files: 25
+    max_file_mb: 100
+  meeting_snippets:
+    approved: true
+    root: ../tmp/synthetic-meeting-snippets
+    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
+    max_files: 25
+    max_file_mb: 200
@@ -37,7 +37,7 @@ services:
  whisper-init:
    image: ghcr.io/ggml-org/whisper.cpp@sha256:672650b5e67f9cb86af7ac6e09dea8eac12a024086e1e5c0172fdccf336aba09
    container_name: whisper-init
-    profiles: ["voice"]
+    profiles: ["voice", "voice-cpu-backup"]
    restart: "no"
    volumes:
      - whisper-models:/app/models
@@ -54,17 +54,15 @@ services:
          fi
        done

-  # Primary whisper.cpp server: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
-  # Uses ggml-base.bin to keep the service alive while llama-server owns most of
-  # the laptop GPU VRAM. The previous ggml-small.bin profile needed ~465 MiB
-  # contiguous CUDA memory and restarted when only ~560 MiB fragmented VRAM was
-  # free. CPU whisper-server below remains the higher-accuracy fallback.
+  # Manual GPU whisper.cpp fallback: NVIDIA RTX 5070 Ti via CUDA (Blackwell sm_120).
+  # Kept out of the normal `voice` profile because the OpenVINO NPU Whisper
+  # service is the default and this container consumes GPU resources.
  #
  # The official `ghcr.io/ggml-org/whisper.cpp:main-cuda` ships kernels only
  # for sm_75/80/86/90 and fails to init CUDA on Blackwell. We build a custom
  # image with `CMAKE_CUDA_ARCHITECTURES=120` from the local Dockerfile.
  # Build manually with: docker build -t whisper.cpp:cuda-blackwell ./whisper-cuda-blackwell
-  # Or `docker compose --profile voice build whisper-server-gpu`.
+  # Or `docker compose --profile voice-gpu build whisper-server-gpu`.
  whisper-server-gpu:
    image: whisper.cpp:cuda-blackwell
    build:
@@ -72,7 +70,7 @@ services:
      dockerfile: Dockerfile
    container_name: whisper-server-gpu
    restart: unless-stopped
-    profiles: ["voice"]
+    profiles: ["voice-gpu"]
    ports:
      - "18801:8080"
    volumes:
@@ -115,16 +113,62 @@ services:
      agentmon.role: "voice"
      agentmon.port: "18801"

-  # Fallback whisper.cpp server: CPU-only, medium model.
-  # Kept around for resilience — runs if the GPU server is down (driver issue,
-  # gemma takes all VRAM, custom image broken, etc.). Uses no GPU resources.
-  # ~14 s per short clip (medium-on-CPU is 90x slower than small-on-GPU above).
-  # Start with: docker compose --profile voice up -d whisper-server
+  # Experimental OpenVINO GenAI Whisper server using the Intel NPU.
+  # This is not whisper.cpp; it implements the same OpenAI-style
+  # /v1/audio/transcriptions route using OpenVINO WhisperPipeline on NPU.
+  # Host requirements: intel-npu-driver-bin installed, /dev/accel/accel0 present,
+  # and the host NPU Level Zero driver/compiler libraries mounted below.
+  whisper-server-npu:
+    image: whisper-openvino-npu:local
+    build:
+      context: ./whisper-openvino-npu
+      dockerfile: Dockerfile
+    container_name: whisper-server-npu
+    restart: unless-stopped
+    profiles: ["voice"]
+    ports:
+      - "18816:8080"
+    devices:
+      - /dev/accel/accel0:/dev/accel/accel0
+    group_add:
+      - "987" # host render group gid on willlaptop
+    environment:
+      - WHISPER_DEVICE=NPU
+      - WHISPER_MODEL_DIR=/models/whisper-tiny-fp16-ov
+      - LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu
+      - ZE_ENABLE_ALT_DRIVERS=/usr/lib/x86_64-linux-gnu/libze_intel_npu.so.1
+    volumes:
+      - /home/will/.cache/openvino-models/whisper-tiny-fp16-ov:/models/whisper-tiny-fp16-ov:ro
+      - /usr/lib/x86_64-linux-gnu/libze_intel_npu.so.1.32.1:/usr/lib/x86_64-linux-gnu/libze_intel_npu.so.1.32.1:ro
+      - /usr/lib/x86_64-linux-gnu/libze_intel_npu.so.1:/usr/lib/x86_64-linux-gnu/libze_intel_npu.so.1:ro
+      - /usr/lib/x86_64-linux-gnu/libze_intel_npu.so:/usr/lib/x86_64-linux-gnu/libze_intel_npu.so:ro
+      - /usr/lib/x86_64-linux-gnu/libnpu_driver_compiler.so:/usr/lib/x86_64-linux-gnu/libnpu_driver_compiler.so:ro
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -f http://localhost:8080/health >/dev/null 2>&1 || exit 1",
+        ]
+      interval: 30s
+      timeout: 5s
+      start_period: 30s
+      retries: 3
+    labels:
+      agentmon.monitor: "true"
+      agentmon.role: "voice"
+      agentmon.port: "18816"
+
+  # Manual fallback whisper.cpp server: CPU-only, medium model.
+  # Kept around for resilience — runs if the NPU/GPU servers are down. Uses no
+  # accelerator resources, but is slow (~14 s per short clip).
+  # Disabled from the normal `voice` profile now that `whisper-server-npu` is
+  # the trial default. Start manually with:
+  #   docker compose --profile voice-cpu-backup up -d whisper-server
  whisper-server:
    image: ghcr.io/ggml-org/whisper.cpp@sha256:672650b5e67f9cb86af7ac6e09dea8eac12a024086e1e5c0172fdccf336aba09
    container_name: whisper-server
    restart: unless-stopped
-    profiles: ["voice"]
+    profiles: ["voice-cpu-backup"]
    ports:
      - "18811:8080"
    volumes:
@@ -15,6 +15,7 @@ Update the relevant diagram in the same change set when you change any of these:
 - n8n workflow architecture
 - Hermes/Atlas routing or gateway responsibilities
 - local AI/search/voice endpoints
+- OpenVINO NPU live/prototype status, ports, or safety gates (`:18810`, `:18816`, `:18817`, `:18818`, `:18819`, `:18820`, optional `:18829`)
 - Obsidian/RAG data flow
 - OpenClaw/VM operational mode
 - ownership/source-of-truth paths for a component
@@ -27,6 +28,7 @@ Create a new focused diagram when the existing overview would become too dense.
 - agentmon internals: collectors → NATS → processor → Postgres → query/UI
 - Obsidian/RAG automation pipeline
 - local AI routing: Hermes/LiteLLM/llama.cpp/Ollama/provider boundaries
+- OpenVINO NPU assistant sidecars, with live baseline and approved/not-live prototype lanes separated
 - messaging/channel routing: Telegram/Discord/email → Hermes/n8n/alerts
 - disaster recovery / backup topology

@@ -37,6 +39,7 @@ Create a new focused diagram when the existing overview would become too dense.
 - Link diagrams from the nearest README or operational doc.
 - Keep labels operational: service name, port, responsibility, and data direction.
 - Avoid secrets, credential names that imply secret values, private tokens, raw webhook URLs, or sensitive sample payloads.
+- Do not imply live Atlas/Hermes/RAG routing to an OpenVINO NPU prototype unless a reviewed implementation actually enabled it; label approved prototypes as `not live` or `approval required`.
 - If a raw export or live config was used to build the diagram, commit only the sanitized diagram/docs, not the raw sensitive source.

 ## Verification before committing
@@ -0,0 +1,456 @@
+# NPU advisory decision schema and dry-run evaluation metrics
+
+This document defines the compact `npu_advisory_decision_v1` record and the
+minimum dry-run metrics required before any OpenVINO/NPU advisory lane is
+considered for promotion. The schema is advisory-only: it creates audit evidence
+and comparison data, not live authority.
+
+Scope and safety defaults:
+
+- Local audit records only; no outbound sends, service restarts, tool execution,
+  memory writes, routing changes, vector-store mutation, or broad private scans.
+- Synthetic or explicitly non-private fixtures only for dry-run evaluation.
+- Raw prompts, transcripts, documents, images, headers, secrets, and full upstream
+  JSON payloads are not persisted by default.
+- NPU output is evidence for a gate. It must never directly perform or trigger
+  an action.
+
+## `npu_advisory_decision_v1`
+
+Required top-level fields:
+
+| Field | Type | Required | Notes |
+| --- | --- | ---: | --- |
+| `schema_version` | string | yes | Always `npu_advisory_decision_v1`. |
+| `decision_id` | string | yes | Locally generated UUID/ULID. No payload-derived PII. |
+| `timestamp` | string | yes | RFC3339/ISO-8601 UTC timestamp. |
+| `source` | object | yes | Where the dry-run input came from. |
+| `service` | object | yes | Advisory lane/service that produced the recommendation. |
+| `input_class` | string | yes | Normalized class such as `context_gate`, `cron_n8n_event`, `batch_doc_triage`, `voice_audio`, `kanban_hygiene`, or `advisory_gateway_envelope`. |
+| `recommendation` | object | yes | NPU/advisory recommendation and rationale metadata. |
+| `confidence` | object | yes | Score, bucket, and calibration notes. |
+| `authority_flags` | object | yes | Explicit booleans for authority boundaries; all default false. |
+| `allowed_actions` | array[string] | yes | Actions a downstream gate may consider. Defaults to advisory-only actions. |
+| `actual_action` | object | yes | What really happened. In this gate it should always be no-op/record-only. |
+| `human_or_atlas_decision` | object | yes | Comparison target from fixture expected label, human label, or Atlas decision. |
+| `outcome` | object | yes | Agreement/error bucket used by the eval harness. |
+| `npu_proof` | object | yes | Evidence that a real NPU-backed inference ran, where available. |
+| `latency` | object | yes | Request latency and optional queue/processing timings. |
+| `fallback` | object | yes | Whether CPU/offline/health-only fallback happened and why. |
+| `privacy` | object | yes | What was redacted/hashed and what retention class applies. |
+| `notes` | array[string] | no | Short non-private audit notes. |
+
+### Field details
+
+`source`:
+
+- `kind`: `fixture`, `manual_label`, `atlas_shadow`, `human_review`, or
+  `service_health_probe`.
+- `fixture_id`: stable fixture identifier when applicable.
+- `fixture_set`: fixture collection name/version.
+- `artifact_ref`: optional local path or opaque run id; do not include raw
+  private content.
+- `content_hash`: optional SHA-256 over sanitized fixture content.
+- `privacy_class`: `synthetic`, `public`, `non_private`, `redacted`, or
+  `private_disallowed`.
+
+`service`:
+
+- `name`: e.g. `openvino_context_gate`, `cron_n8n_advisory`,
+  `npu_batch_triage`, `npu_voice_audio_pipeline`, `kanban_hygiene_advisory`,
+  `openvino_advisory_gateway`.
+- `endpoint`: local endpoint label or script name; avoid sensitive URL params.
+- `mode`: `dry_run`, `shadow`, `health_only`, or `offline_fixture`.
+- `model`: optional model/backend label, if safe to log.
+
+`recommendation`:
+
+- `label`: normalized recommendation, e.g. `suppress`, `log`, `summarize`,
+  `escalate`, `retrieve_more_context`, `skip_private_root`, `needs_human`,
+  `no_action`, or `unknown`.
+- `severity`: `none`, `info`, `low`, `medium`, `high`, or `critical`.
+- `reasons`: short non-private reason codes, not raw excerpts.
+- `evidence_refs`: bounded references to sanitized fixture fields or artifact ids.
+- `raw_output_ref`: optional local artifact pointer; default null.
+
+`confidence`:
+
+- `score`: float from 0.0 to 1.0 when available, otherwise null.
+- `bucket`: one of `very_low`, `low`, `medium`, `high`, `very_high`, or
+  `unknown`.
+- `bucket_rule`: the threshold rule used by the harness.
+- `calibrated`: boolean; false until enough labeled dry-run data exists.
+
+Recommended confidence buckets:
+
+| Bucket | Score range | Gate behavior |
+| --- | --- | --- |
+| `very_low` | `< 0.40` | Treat as uncertain; never escalate automatically. |
+| `low` | `0.40-0.59` | Advisory note only; human/Atlas decides. |
+| `medium` | `0.60-0.79` | Eligible for comparison metrics; no live action. |
+| `high` | `0.80-0.94` | Strong advisory evidence; still gated. |
+| `very_high` | `>= 0.95` | Promotion candidate only after repeated eval success. |
+| `unknown` | null/missing | Count separately; do not coerce to zero. |
+
+`authority_flags`:
+
+All flags default to false and must remain false for this gate.
+
+- `can_route_atlas`
+- `can_write_memory`
+- `can_execute_tools`
+- `can_restart_services`
+- `can_send_outbound`
+- `can_scan_private_roots`
+- `can_mutate_vector_store`
+- `can_post_advisory_event`
+- `can_change_gateway_config`
+- `requires_human_approval`
+- `advisory_only`
+
+For this gate, `advisory_only=true` and `requires_human_approval=true` for any
+recommendation that could eventually affect live behavior.
+
+`allowed_actions`:
+
+Allowed by default:
+
+- `record_metric`
+- `compare_with_expected_label`
+- `include_in_digest`
+- `open_review_ticket_candidate`
+- `recommend_human_review`
+
+Disallowed unless a later approval explicitly changes scope:
+
+- `route_atlas`
+- `write_memory`
+- `execute_tool`
+- `restart_service`
+- `send_message`
+- `scan_private_root`
+- `mutate_vector_store`
+- `post_gateway_event`
+
+`actual_action`:
+
+- `kind`: should be `none`, `recorded_metric`, or `dry_run_reported`.
+- `performed`: boolean; false for live side effects in this gate.
+- `performed_by`: `harness`, `human`, `atlas`, or null.
+- `side_effects`: array; should be empty except local report/artifact writes.
+
+`human_or_atlas_decision`:
+
+- `source`: `fixture_expected`, `human_label`, `atlas_shadow`, or `missing`.
+- `label`: normalized decision label using the same label set as
+  `recommendation.label` when possible.
+- `severity`: normalized severity when applicable.
+- `confidence`: optional Atlas/human confidence if available.
+- `decision_ref`: optional review id, fixture id, or session/run id.
+- `timestamp`: optional timestamp for the comparison decision.
+
+`outcome`:
+
+- `comparison`: `agree`, `disagree`, `uncertain`, `missing_reference`, or
+  `not_applicable`.
+- `error_type`: null or one of `false_positive`, `false_negative`,
+  `severity_overcall`, `severity_undercall`, `unsafe_authority`,
+  `privacy_violation`, `fallback_unexpected`, `latency_slo_miss`,
+  `npu_proof_missing`.
+- `human_review_required`: boolean.
+- `promotion_blocker`: boolean.
+
+`npu_proof`:
+
+- `proof_mode`: `sysfs_busy_delta`, `service_reported_delta`, `health_only`,
+  `offline_fixture`, or `unavailable`.
+- `busy_delta_us`: integer or null.
+- `service_reported_delta_us`: integer or null.
+- `inference_ran`: boolean.
+- `proof_ok`: boolean or null. Null means not measurable, not false.
+- `counter_path`: usually `/sys/class/accel/accel0/device/npu_busy_time_us`, if
+  logged safely.
+
+`latency`:
+
+- `total_ms`: end-to-end harness timing.
+- `service_ms`: service-reported processing time when available.
+- `queue_ms`: optional queue time.
+- `timeout`: boolean.
+
+`fallback`:
+
+- `occurred`: boolean.
+- `kind`: null, `cpu`, `offline`, `health_only`, `service_unavailable`,
+  `skipped_cold_load`, `private_root_blocked`, or `proof_unavailable`.
+- `reason`: short reason code.
+- `expected`: boolean. Expected fallbacks are counted but do not fail promotion
+  unless their rate exceeds the threshold for that lane.
+
+`privacy`:
+
+- `payload_logged`: must default false.
+- `redaction`: `none_needed`, `hash_only`, `paths_only`, `metadata_only`, or
+  `blocked_private`.
+- `retention`: `ephemeral`, `local_audit`, or `review_artifact`.
+- `contains_private_payload`: must be false for committed fixtures.
+
+## Minimal JSON shape
+
+```json
+{
+  "schema_version": "npu_advisory_decision_v1",
+  "decision_id": "01J00000000000000000000000",
+  "timestamp": "2026-06-06T00:00:00Z",
+  "source": {
+    "kind": "fixture",
+    "fixture_id": "cron_duplicate_success_001",
+    "fixture_set": "npu_advisory_eval_v1",
+    "artifact_ref": null,
+    "content_hash": "sha256:example",
+    "privacy_class": "synthetic"
+  },
+  "service": {
+    "name": "cron_n8n_advisory",
+    "endpoint": "openvino-advisory-gateway/examples/cron-advisory-dry-run.sh",
+    "mode": "dry_run",
+    "model": "openvino-local"
+  },
+  "input_class": "cron_n8n_event",
+  "recommendation": {
+    "label": "suppress",
+    "severity": "info",
+    "reasons": ["duplicate_success", "no_action_required"],
+    "evidence_refs": ["fixture:event_kind", "fixture:status"],
+    "raw_output_ref": null
+  },
+  "confidence": {
+    "score": 0.91,
+    "bucket": "high",
+    "bucket_rule": "v1_default",
+    "calibrated": false
+  },
+  "authority_flags": {
+    "can_route_atlas": false,
+    "can_write_memory": false,
+    "can_execute_tools": false,
+    "can_restart_services": false,
+    "can_send_outbound": false,
+    "can_scan_private_roots": false,
+    "can_mutate_vector_store": false,
+    "can_post_advisory_event": false,
+    "can_change_gateway_config": false,
+    "requires_human_approval": true,
+    "advisory_only": true
+  },
+  "allowed_actions": [
+    "record_metric",
+    "compare_with_expected_label",
+    "include_in_digest"
+  ],
+  "actual_action": {
+    "kind": "dry_run_reported",
+    "performed": false,
+    "performed_by": "harness",
+    "side_effects": []
+  },
+  "human_or_atlas_decision": {
+    "source": "fixture_expected",
+    "label": "suppress",
+    "severity": "info",
+    "confidence": null,
+    "decision_ref": "cron_duplicate_success_001",
+    "timestamp": null
+  },
+  "outcome": {
+    "comparison": "agree",
+    "error_type": null,
+    "human_review_required": false,
+    "promotion_blocker": false
+  },
+  "npu_proof": {
+    "proof_mode": "sysfs_busy_delta",
+    "busy_delta_us": 1200,
+    "service_reported_delta_us": 1180,
+    "inference_ran": true,
+    "proof_ok": true,
+    "counter_path": "/sys/class/accel/accel0/device/npu_busy_time_us"
+  },
+  "latency": {
+    "total_ms": 42.5,
+    "service_ms": 39.1,
+    "queue_ms": null,
+    "timeout": false
+  },
+  "fallback": {
+    "occurred": false,
+    "kind": null,
+    "reason": null,
+    "expected": false
+  },
+  "privacy": {
+    "payload_logged": false,
+    "redaction": "metadata_only",
+    "retention": "local_audit",
+    "contains_private_payload": false
+  },
+  "notes": []
+}
+```
+
+## Dry-run comparison strategy
+
+Each fixture or shadow input should produce one `npu_advisory_decision_v1`
+record. The harness compares `recommendation` to `human_or_atlas_decision` in
+this order:
+
+1. Use `fixture_expected` labels for synthetic/non-private regression fixtures.
+2. Use explicit `human_label` for reviewed samples.
+3. Use `atlas_shadow` only as a comparison signal, not ground truth, when a human
+   label is unavailable.
+4. Mark `missing_reference` rather than inventing a target decision.
+
+Comparison categories:
+
+- `agree`: normalized label and severity are compatible.
+- `disagree`: label conflicts with the reference decision.
+- `uncertain`: NPU bucket is `very_low`, `low`, or `unknown`, or the service
+  returned a deliberate `needs_human`/`unknown` label.
+- `false_positive`: NPU recommended escalation/action but reference says
+  suppress/no-op.
+- `false_negative`: NPU recommended suppress/no-op but reference says escalate or
+  action-needed.
+- `severity_overcall` / `severity_undercall`: label matches but severity differs
+  by more than one level.
+
+The summary should be grouped by lane (`input_class` and `service.name`) and by
+confidence bucket. Unknown metrics remain null/`n/a`; do not coerce missing data
+to zero.
+
+## Metrics
+
+Minimum per-run metrics:
+
+- `total_records`
+- `records_by_input_class`
+- `records_by_service`
+- `confidence_bucket_counts`
+- `recommendation_counts`
+- `authority_flag_violation_count`
+- `privacy_violation_count`
+- `actual_side_effect_count`
+- `agree_count`, `disagree_count`, `uncertain_count`, `missing_reference_count`
+- `false_positive_count`, `false_negative_count`
+- `severity_overcall_count`, `severity_undercall_count`
+- `fallback_count` and `fallback_counts_by_kind`
+- `expected_fallback_count` vs `unexpected_fallback_count`
+- `npu_proof_ok_count`, `npu_proof_missing_count`, `npu_proof_not_applicable_count`
+- p50/p95 `latency.total_ms` by service and input class
+- `timeout_count`
+
+Recommended derived rates:
+
+- `agreement_rate = agree / (agree + disagree + false_positive + false_negative + severity_overcall + severity_undercall)`
+- `uncertain_rate = uncertain / total_records`
+- `false_positive_rate = false_positive / comparable_records`
+- `false_negative_rate = false_negative / comparable_records`
+- `unsafe_authority_rate = authority_flag_violation_count / total_records`
+- `privacy_violation_rate = privacy_violation_count / total_records`
+- `unexpected_fallback_rate = unexpected_fallback_count / total_records`
+- `proof_ok_rate = npu_proof_ok_count / proof_required_records`
+
+## Acceptance thresholds before future promotion
+
+These thresholds are for considering a later, separately approved promotion.
+They do not grant authority by themselves.
+
+Global blockers for every lane:
+
+- `authority_flag_violation_count == 0`.
+- `actual_side_effect_count == 0` for dry-run harness execution.
+- `privacy_violation_count == 0` and no committed private fixtures/secrets.
+- No raw private payloads in logs, reports, artifacts, or test fixtures.
+- No service bind, route, memory, tool, send, restart, or vector-store mutation
+  introduced by the eval code.
+
+Minimum data quality before promotion discussion:
+
+- At least 30 comparable synthetic/non-private records per lane, or all available
+  lane fixtures if the lane is explicitly scoped smaller.
+- Every advisory lane has at least one normal case, one low-confidence case, one
+  false-alarm/noise case, and one action-needed/escalation case.
+- `missing_reference_count == 0` for promotion-candidate fixture sets.
+- Confidence bucket distribution is reported and stable across at least three
+  dry-run executions.
+
+Suggested metric thresholds:
+
+| Metric | Threshold for promotion discussion |
+| --- | ---: |
+| Agreement rate | `>= 0.95` overall and `>= 0.90` per lane |
+| False positive rate | `<= 0.03` overall and no repeated high-severity false positives |
+| False negative rate | `<= 0.01` for action-needed/escalation cases |
+| Uncertain rate | `<= 0.15` overall, unless lane is intentionally conservative |
+| Unexpected fallback rate | `<= 0.02` and every fallback has a reason code |
+| NPU proof OK rate | `>= 0.98` for proof-required lanes |
+| p95 latency | Within the lane-specific SLO documented by the implementation task |
+| Authority/privacy violations | exactly `0` |
+
+Promotion remains lane-specific. A passing context-gate eval does not promote
+cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway lanes.
+Each lane needs its own human-approved scope, rollback plan, and review.
+
+## Output formats
+
+The dry-run harness should emit:
+
+1. JSONL decisions: one `npu_advisory_decision_v1` object per line.
+2. Compact JSON summary: aggregate counts/rates for dashboards and follow-up
+   digest scripts.
+3. Compact Markdown/text summary: suitable for terminal, Telegram, or Discord.
+
+The Markdown/text summary should include:
+
+- run id, fixture set, generated-at timestamp;
+- records by lane/service;
+- agreement/uncertain/false-positive/false-negative counts;
+- confidence bucket distribution;
+- fallback counts;
+- NPU proof counts;
+- authority/privacy violation counts;
+- promotion blockers and caveats.
+
+## Fixture expectations
+
+Use synthetic/non-private fixtures only. Required lanes:
+
+- `context_gate`: retrieve/no-retrieve decisions with missing, conflicting, and
+  sufficient context cases.
+- `cron_n8n_event`: duplicate success, stale warning, urgent false alarm, and
+  action-needed failure.
+- `batch_doc_triage`: private-root blocked, approved synthetic sample, noisy OCR,
+  and needs-human cases.
+- `voice_audio`: bounded generated audio, low-confidence transcript, harmless
+  background noise, and action-needed command-like utterance that must not
+  execute.
+- `kanban_hygiene`: no-op healthy card, stale/card-needs-review, false alarm, and
+  action-needed label.
+- `advisory_gateway_envelope`: valid classify/generate/triage envelope examples
+  plus malformed/unsafe authority-request examples.
+
+Any fixture that resembles private content should be replaced with a synthetic
+fixture or reduced to metadata/hash-only form before committing.
+
+## Review checklist
+
+Before implementation or docs depending on this spec are accepted, verify:
+
+- `schema_version` is present and all authority flags default closed.
+- Dry-run execution produces no live side effects beyond local report/artifact
+  writes.
+- Unknown/missing metrics are represented as null/`n/a`, not fake zero.
+- Raw payloads and private paths are not persisted by default.
+- Summary metrics include confidence buckets, fallback counts, NPU proof, and
+  authority/privacy violations.
+- Promotion language says "candidate" or "discussion" only; no automatic live
+  authority is granted by a passing eval.
@@ -0,0 +1,55 @@
+# NPU advisory dry-run comparison harness
+
+This harness compares advisory-only NPU lane recommendations against synthetic/non-private expected decisions. It is an observability gate only: it does not route, send, write memory, execute tools, restart services, broaden private scans, restart gateways, or mutate vector stores.
+
+For the operator runbook and promotion criteria, see `docs/npu-advisory-observability-runbook.md`. Treat this file as the compact command reference; the runbook is the source for how to interpret metrics and decide whether a lane is promotable later.
+
+## Run
+
+From `/home/will/lab/swarm`:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --format json
+python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
+python scripts/npu-advisory-dry-run-comparison.py --format markdown
+```
+
+Strict checks for CI/review:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --fail-on-mismatch
+python scripts/npu-advisory-dry-run-comparison.py --fail-on-authority-violation
+```
+
+`--fail-on-authority-violation` is expected to fail with the committed fixture set because one synthetic gateway fixture intentionally proves that `may_* = true` is caught and summarized.
+
+## Fixture coverage
+
+Fixtures live at `fixtures/npu_advisory_dry_run/fixtures.json` and cover:
+
+- context gate;
+- cron/n8n advisory events;
+- batch document/audio triage shape;
+- voice/audio advisory gate;
+- Kanban hygiene advisory;
+- advisory gateway envelopes.
+
+All fixture payloads are synthetic and omit raw private content. Lane adapters use deterministic local rules or imported pure functions; they do not call live advisory services.
+
+## Output shape
+
+JSON output uses `npu_advisory_dry_run_summary_v1` and includes totals, per-lane counts, confidence buckets, recommendation counts, authority violations, expected-outcome mismatches, and optionally per-fixture `npu_advisory_decision_v1` records.
+
+Each decision record includes timestamp, source, service, lane, input class, recommendation, expected recommendation, confidence/bucket, authority flags, allowed actions, actual action (`none_dry_run`), human/Atlas comparison, outcome, NPU proof, latency, fallback reason, and compact notes.
+
+## Promotion gate
+
+Before any future advisory lane receives authority, a separate approval should require at minimum:
+
+- no expected-outcome mismatches for that lane's representative fixture set;
+- no false negatives on action-needed events;
+- intentionally reviewed false positives;
+- zero authority-safe flag violations except known negative-control fixtures;
+- documented rollback and a narrow, explicit authority scope.
+
+Passing this harness never grants live authority by itself. Advisory outputs flow into `npu_advisory_decision_v1` records, summary metrics, and a human/Atlas review gate. Any later promotion must be lane-specific, explicitly approved, and reversible.
@@ -0,0 +1,246 @@
+# NPU advisory observability and promotion runbook
+
+This runbook is the operator-facing gate for Will's OpenVINO/NPU advisory lanes. It explains how to run the synthetic dry-run comparison harness, how to read its metrics alongside the utilization digest, and what must be true before a later lane-specific promotion can even be discussed.
+
+The current gate is observability only. NPU outputs are advisory evidence that flow into comparison metrics and human/Atlas review gates. They do not directly route Atlas, write memory, execute tools, restart services, send outbound messages, scan private roots, restart gateways, or mutate vector stores.
+
+## Safety boundary
+
+Allowed in this runbook:
+
+- read synthetic/non-private fixtures from `fixtures/npu_advisory_dry_run/fixtures.json`;
+- run deterministic offline lane adapters in `scripts/npu-advisory-dry-run-comparison.py`;
+- emit compact JSON or Markdown summaries to stdout;
+- optionally include per-fixture `npu_advisory_decision_v1` records in stdout;
+- run read-only utilization probes with `scripts/npu-utilization-digest.py` when live service health is relevant.
+
+Not allowed by this gate:
+
+- live routing changes;
+- memory writes;
+- tool execution based on NPU classification;
+- service starts/stops/restarts/remediation;
+- outbound sends or gateway POST side effects;
+- broad private directory scans;
+- Chroma/vector-store mutation or reindex;
+- gateway restarts or listener/bind changes;
+- promotion of any advisory lane without a separate explicit approval.
+
+## Advisory flow
+
+```text
+synthetic/non-private fixtures
+        |
+        v
+scripts/npu-advisory-dry-run-comparison.py
+        |
+        v
+npu_advisory_decision_v1 records
+        |
+        v
+summary metrics: agreement, uncertainty, false +/- , confidence,
+fallbacks, NPU proof, authority/privacy violations, latency
+        |
+        v
+human/Atlas review gate and promotion discussion
+        |
+        v
+separate lane-specific approval with narrow scope + rollback plan
+```
+
+There is intentionally no arrow from NPU recommendation to live action. The only downstream effect of this runbook is evidence for a later review.
+
+## Required files
+
+| Path | Role |
+| --- | --- |
+| `scripts/npu-advisory-dry-run-comparison.py` | Synthetic dry-run comparison harness. |
+| `fixtures/npu_advisory_dry_run/fixtures.json` | Synthetic/non-private fixture set. |
+| `docs/npu-advisory-decision-schema.md` | `npu_advisory_decision_v1` schema and metric definitions. |
+| `docs/npu-advisory-dry-run-comparison.md` | Short harness reference. |
+| `docs/npu-utilization-digest.md` | Live read-only utilization digest reference. |
+| `tests/test_npu_advisory_dry_run_comparison.py` | Offline tests for fixture coverage and harness output. |
+| `tests/test_npu_utilization_digest.py` | Offline tests for utilization digest metric logic. |
+
+## Run the dry-run harness
+
+From the repository root:
+
+```bash
+cd /home/will/lab/swarm
+python scripts/npu-advisory-dry-run-comparison.py --format markdown
+python scripts/npu-advisory-dry-run-comparison.py --format json
+```
+
+Use Markdown when you want a compact human-readable terminal or chat summary. Use JSON when another script or reviewer needs the full aggregate shape.
+
+To include per-fixture decision records:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
+```
+
+To run the strict mismatch gate:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch
+```
+
+This should exit `0` when each fixture's observed outcome matches its `expected_outcome`.
+
+To prove unsafe authority flags are detected:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-authority-violation
+```
+
+The committed fixture set intentionally includes `gateway-authority-violation`, so this command is expected to exit `1` while reporting `authority_safe_flag_violations: 1`. That is a negative-control fixture, not a permission grant.
+
+## Expected compact output
+
+Current fixture shape is expected to resemble:
+
+```text
+# NPU advisory dry-run comparison
+
+fixtures: 9 | agree: 8 | disagree: 0 | false_positive: 1 | false_negative: 0 | uncertain: 0
+authority_safe_flag_violations: 1 | mutations: all_false
+
+| lane | fixtures | agree | false_positive | false_negative | violations |
+| --- | ---: | ---: | ---: | ---: | ---: |
+| advisory_gateway_envelope | 1 | 1 | 0 | 0 | 1 |
+| batch_triage | 2 | 2 | 0 | 0 | 0 |
+| context_gate | 2 | 2 | 0 | 0 | 0 |
+| cron_n8n_advisory | 2 | 1 | 1 | 0 | 0 |
+| kanban_hygiene | 1 | 1 | 0 | 0 | 0 |
+| voice_audio | 1 | 1 | 0 | 0 | 0 |
+
+## Authority-safe flag violations
+- gateway-authority-violation: can_send_outbound
+```
+
+Interpretation:
+
+- `fixtures` is the number of synthetic/non-private fixture cases evaluated.
+- `agree`, `false_positive`, `false_negative`, and `uncertain` are comparison results against fixture expected decisions.
+- `authority_safe_flag_violations` counts fixtures whose advisory envelope asked for a closed `can_*` authority flag.
+- `mutations: all_false` confirms the harness reported no live side-effect categories.
+- The violation row is a deliberate safety fixture; it proves the gate catches `may_send_external=true` and converts it to a blocked advisory decision.
+
+## Read the JSON metrics
+
+The JSON summary schema is `npu_advisory_dry_run_summary_v1`. Start with these fields:
+
+1. `dry_run` must be `true`.
+2. Every value under `mutations` must be `false`.
+3. `totals.expected_outcome_mismatches` must be `0` for a clean regression run.
+4. `minimum_metrics.privacy_violation_count` must be `0`.
+5. `minimum_metrics.actual_side_effect_count` must be `0`.
+6. `minimum_metrics.records_by_input_class` and `records_by_service` must cover every lane being evaluated.
+7. `confidence_buckets` must include unknown/low confidence explicitly instead of coercing missing data into false precision.
+8. `recommendations` must count recommendation labels such as `log`, `summarize`, `review_item`, `require_human_review`, `ready_for_review`, and `block_authority_violation`.
+9. `minimum_metrics.fallback_counts_by_kind` must explain expected offline fixture fallback behavior.
+10. `minimum_metrics.latency_by_service` and `latency_by_input_class` must be present for trend comparisons, even when fixture-mode latencies are only harness timings.
+
+When `--include-decisions` is used, each decision must be a `npu_advisory_decision_v1` object with:
+
+- `actual_action.performed=false` and `actual_action.side_effects=[]`;
+- `authority_flags.advisory_only=true`;
+- `authority_flags.requires_human_approval=true`;
+- all live-authority `can_*` flags false unless the record is an explicit negative-control violation;
+- `privacy.payload_logged=false` and `privacy.contains_private_payload=false`;
+- `fallback.kind=offline` and `fallback.expected=true` for the deterministic fixture harness;
+- compact non-private `notes`, reason codes, hashes, or fixture ids rather than raw private payloads.
+
+## Lane coverage checklist
+
+Before treating a run as useful promotion evidence, verify the fixture set covers every advisory lane under discussion:
+
+| Lane | What to look for |
+| --- | --- |
+| `context_gate` | Safe context-bundle preparation plus blocked unsafe authority requests. |
+| `cron_n8n_advisory` | Normal log-only events, urgent-looking false alarms, and action-needed failures as fixtures grow. |
+| `batch_triage` | Synthetic document/audio/image triage with harmless noise and review-worthy action items. |
+| `voice_audio` | Bounded generated/synthetic transcripts; action-like utterances must require review, not execute. |
+| `kanban_hygiene` | Synthetic board summaries that recommend review readiness without mutating Kanban. |
+| `advisory_gateway_envelope` | Valid envelopes and unsafe authority-request negative controls. |
+
+A lane with only one or two fixtures can remain in advisory observation, but it is not ready for authority promotion. Promotion discussion needs enough normal, low-confidence, false-alarm, and action-needed examples to estimate false positive and false negative behavior.
+
+## Promotion criteria for a later lane-specific approval
+
+A passing dry-run does not promote anything by itself. It only makes a lane eligible for a later approval discussion.
+
+Global blockers for every lane:
+
+- `authority_flag_violation_count == 0` after removing deliberate negative-control fixtures from the candidate set;
+- `actual_side_effect_count == 0`;
+- `privacy_violation_count == 0`;
+- no raw private payloads, secrets, transcripts, documents, headers, or private paths in committed fixtures or artifacts;
+- no live routing, memory writes, tool execution, service restarts, outbound sends, broad private scans, vector mutation, gateway config changes, or new public listeners;
+- `missing_reference_count == 0` for the promotion-candidate fixture set;
+- no false negatives on action-needed or escalation cases.
+
+Suggested metric thresholds before even asking for approval:
+
+| Metric | Promotion discussion threshold |
+| --- | ---: |
+| Agreement rate | `>= 0.95` overall and `>= 0.90` for the specific lane. |
+| False positive rate | `<= 0.03` overall, with all high-severity false positives reviewed. |
+| False negative rate | `<= 0.01` for action-needed/escalation cases. |
+| Uncertain rate | `<= 0.15`, unless the lane is intentionally conservative. |
+| Unexpected fallback rate | `<= 0.02`, with reason codes for every fallback. |
+| NPU proof OK rate | `>= 0.98` for live proof-required lanes. |
+| p95 latency | Within a documented lane-specific SLO. |
+| Authority/privacy violations | exactly `0` in the candidate set. |
+
+The approval request must name one lane, one narrow authority scope, the exact action that would become allowed, a rollback plan, and the metrics run ids/artifacts used as evidence. A passing context-gate eval cannot promote cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway behavior.
+
+## Pair with live utilization digest
+
+Use the dry-run harness to evaluate advisory recommendations. Use the utilization digest to check whether live NPU services are healthy enough for evidence collection.
+
+Read-only live check:
+
+```bash
+cd /home/will/lab/swarm
+scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
+```
+
+Optional JSONL artifact for trend tracking:
+
+```bash
+scripts/npu-utilization-digest.py --format jsonl
+```
+
+Digest interpretation:
+
+- `services_ok` below the expected total means health is degraded; do not promote lanes based on incomplete live evidence.
+- `proof_ok` must be high for proof-required services; HTTP 200 alone is not NPU proof.
+- `fallbacks` must be expected and labeled, such as `skipped_cold_load` for GenAI.
+- `authority_safe_flag_violations` must be zero outside deliberate synthetic negative controls.
+- Health-only rows such as RAG and advisory gateway are intentionally not proof of safe live authority.
+
+## Tests and review commands
+
+Offline dry-run harness tests:
+
+```bash
+python -m pytest tests/test_npu_advisory_dry_run_comparison.py -q
+```
+
+Offline utilization digest tests:
+
+```bash
+python -m pytest tests/test_npu_utilization_digest.py -q
+```
+
+Suggested pre-review bundle:
+
+```bash
+python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch >/tmp/npu-advisory-summary.json
+python scripts/npu-advisory-dry-run-comparison.py --format markdown >/tmp/npu-advisory-summary.md
+python -m pytest tests/test_npu_advisory_dry_run_comparison.py tests/test_npu_utilization_digest.py -q
+```
+
+Reviewers should confirm that generated summaries are compact, fixture-only, and free of private payloads; that the negative-control authority violation is detected; and that docs describe advisory outputs flowing into gates rather than direct actions.
@@ -0,0 +1,65 @@
+# Explicit-root NPU batch triage dry-run examples
+
+These examples are wrappers only. They do not install cron jobs, enable services,
+change Atlas/Hermes routing, write Obsidian/RAG/vector DBs, move/delete files, or
+send outbound messages.
+
+The committed manifest template at `config/triage-roots.example.yaml` is
+intentionally unapproved. For real private data, copy it to
+`config/triage-roots.local.yaml` and approve exactly one narrow lane-specific
+staging folder. Request-level `--root` may narrow that manifest root but cannot
+broaden it.
+
+Synthetic document/image smoke, CPU-only/no NPU claim:
+
+```bash
+python scripts/npu-batch-triage-dry-run.py \
+  --manifest config/triage-roots.test.yaml \
+  --lane screenshots \
+  --root openvino-doc-image-triage-npu/samples \
+  --limit 5 \
+  --dry-run \
+  --no-npu \
+  --json
+```
+
+Synthetic document/image smoke with the existing local embeddings NPU service,
+if `127.0.0.1:18817` is healthy. Treat NPU as proven only when `npu.proof_ok` is
+true and `npu.busy_delta_us` (or item-level delta) is positive:
+
+```bash
+python scripts/npu-batch-triage-dry-run.py \
+  --manifest config/triage-roots.test.yaml \
+  --lane receipts \
+  --root openvino-doc-image-triage-npu/samples \
+  --limit 5 \
+  --dry-run \
+  --json
+```
+
+Audio smoke should use generated/public synthetic audio only until a private
+audio staging root is approved:
+
+```bash
+python scripts/npu-batch-triage-dry-run.py \
+  --manifest config/triage-roots.test.yaml \
+  --lane voice_memos \
+  --root tmp/synthetic-voice-memos \
+  --limit 3 \
+  --dry-run \
+  --no-npu \
+  --json
+```
+
+Cron/n8n shape (disabled example only):
+
+```text
+Manual Trigger / disabled cron
+  -> Execute Command: python /home/will/lab/swarm/scripts/npu-batch-triage-dry-run.py --manifest /home/will/lab/swarm/config/triage-roots.local.yaml --lane receipts --limit 25 --dry-run --json
+  -> IF ok && npu.proof_ok && files_processed > 0
+  -> local dashboard/report only
+```
+
+Do not connect this output to Telegram/Discord/email sends, Obsidian writes,
+RAG/vector reindex, file moves/deletes, Kanban mutation, service restarts, or
+Atlas/Hermes routing without a separate reviewed approval gate.
@@ -0,0 +1,204 @@
+# NPU integrated health checks — operator runbook notes
+
+Compact, read-only operator workflow that combines the existing
+`scripts/npu-service-health.sh` listener/systemd/embedding-proof probe with the
+reviewer-approved `scripts/npu-utilization-digest.py` per-service utilization
+and fallback report. Together they form a single safe daily / on-demand NPU
+health pass.
+
+Scope:
+
+- Read-only against live services. No restarts, route changes, vector mutation,
+  advisory POSTs, outbound sends, or memory writes.
+- No new persistent services, timers, sockets, compose services, or Dockerfiles
+  are introduced by this integration. Both scripts are foreground / on-demand.
+- Binds verified local-only or on the approved Docker bridge (`172.19.0.1:18830`).
+  Pre-existing broader binds on the live baseline ports (`18810`, `18814`,
+  `18816`, `18817`) are noted in the runbook and unchanged here.
+- NPU proof requires real inference plus a positive
+  `/sys/class/accel/accel0/device/npu_busy_time_us` delta. HTTP 200 alone is
+  not sufficient.
+
+## When to run
+
+- Daily / on-demand ops check.
+- After upgrades that touch the NPU stack, OpenVINO, or any of the live
+  specialists.
+- Before any approval-gated change that depends on the NPU reflex layer.
+- As the read-only verification step of a deploy or recovery runbook.
+
+## Required artifacts on the branch
+
+| Path | Role |
+| --- | --- |
+| `scripts/npu-service-health.sh` | Listener / systemd / Docker / health endpoint / single embedding proof. Existing baseline script. |
+| `scripts/npu-utilization-digest.py` | Per-service utilization digest with NPU proof per probe, compact text or JSONL output, optional JSONL artifact. |
+| `docs/npu-utilization-digest.md` | Per-service digest reference. |
+| `docs/npu-advisory-observability-runbook.md` | Dry-run comparison and later promotion criteria for advisory lanes. |
+| `tests/test_npu_utilization_digest.py` | Offline unit tests for the digest (no live services required). |
+
+## Integrated workflow
+
+### Step 1 — Listener and service-state snapshot
+
+```bash
+cd ~/lab/swarm
+./scripts/npu-service-health.sh
+```
+
+What it verifies, in order:
+
+1. `npu_busy_time_us` counter is readable.
+2. Required listeners are present on `18810 / 18814 / 18816 / 18817 / 18818 /
+   18819 / 18820 / 18829 / 18830`.
+3. User systemd services are active/enabled for embeddings, RAG health,
+   reranker, router/classifier, and the small GenAI worker.
+4. Docker Compose `whisper-server-npu` is up.
+5. Health endpoints return JSON for the live baseline and local specialists.
+6. A single non-private embeddings request to `:18817` produces a positive
+   sysfs `npu_busy_time_us` delta; the script exits nonzero if there is no
+   positive delta.
+
+Read the last block (`== Embeddings NPU busy-time proof ==`) first. If
+`result=ok` and `sysfs_delta_us > 0`, the central NPU path is healthy. If not,
+do not run the digest; triage the embeddings service first.
+
+### Step 2 — Per-service utilization digest
+
+```bash
+scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
+```
+
+Compact output shape:
+
+```text
+NPU utilization digest <timestamp>
+counter=/sys/class/accel/accel0/device/npu_busy_time_us delta_us=<total>
+services_ok=<ok>/<total> proof_ok=<ok>/<proof-capable> fallbacks=<n> gates_closed=<n>
+- embeddings: ok=true calls=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
+- rerank:     ok=true calls=1 docs=2   avg_ms=... npu_delta_us=... proof=true mode=NPU
+- whisper:    ok=true calls=1 jobs=1   avg_ms=... npu_delta_us=... proof=true mode=NPU
+- classifier: ok=true calls=1 events=1 avg_ms=... npu_delta_us=... proof=true dry_run=true ...
+- genai:      ok=true jobs=0 loaded=false mode=loaded=false reason=skipped_cold_load
+- doc_triage: ok=true calls=1 files=1  avg_ms=... npu_delta_us=... proof=true gate=closed:private-root
+- rag_endpoint:   ok=true mode=health_only gate=closed:vector-mutation
+- rag_health:     ok=true mode=health_only
+- advisory_gateway: ok=true mode=health_only gate=closed:advisory-post
+fallbacks: skipped_cold_load=1
+```
+
+Read order for ops:
+
+1. `services_ok` row — anything below `9/9` means a service is down or unhealthy.
+2. `proof_ok` row — `proof_ok=5/5` means every probe that ran with a real
+   inference request produced a positive sysfs NPU delta.
+3. `fallbacks:` line — `skipped_cold_load=1` is expected (GenAI worker is
+   intentionally not cold-loaded). Any other fallback label is a triage signal.
+4. `gate=` labels — closed gates that remain closed by design.
+
+### Step 3 — Optional artifact for trend tracking
+
+```bash
+scripts/npu-utilization-digest.py --format jsonl
+```
+
+Writes a single JSONL line per digest under
+`/home/will/.local/state/npu-utilization/digests/<timestamp>.jsonl`. The first
+line is the summary; subsequent lines are per-service rows. No JSONL write
+happens with `--no-write`.
+
+### Step 4 — Offline unit tests
+
+```bash
+python -m pytest tests/test_npu_utilization_digest.py -q
+```
+
+Does not require live services. Use to validate digest logic after edits or
+before merging.
+
+## Compact proof interpretation
+
+For each proof-capable service, both the response-level `npu_busy_delta_us`
+(when the service reports it) and the script's own sysfs before/after delta
+must agree and be `> 0`. The proof is only valid when an actual inference
+request ran. If a probe was skipped (`reason=skipped_cold_load` or
+`reason=smoke_disabled`), `proof_ok` for that row is `None` and the row
+contributes a labeled fallback instead of a proof failure.
+
+Proof currently runs on:
+
+- `embeddings` (`:18817`)
+- `rerank` (`:18818`)
+- `whisper` (`:18816`) when `--include-whisper-smoke=true` (default)
+- `classifier` (`:18819`)
+- `doc_triage` (`:18829`) when `--include-doc-triage-smoke=true` (default);
+  proof is via the embeddings service, not directly on the NPU device, so the
+  row reports `mode=NPU-via-embedding-service`.
+
+Intentionally health-only (no proof row):
+
+- `rag_endpoint` (`:18810`) — closed:vector-mutation
+- `rag_health` (`:18814`)
+- `advisory_gateway` (`172.19.0.1:18830`) — closed:advisory-post
+
+Intentionally skipped by default:
+
+- `genai` (`:18820`) — `loaded=false` until first use; cold-loading just to
+  prove the NPU is not free, so it is treated as a labeled fallback rather
+  than a proof failure. Opt in with `--include-genai-smoke=true` only when the
+  task actually needs a generation smoke.
+
+## Exit codes and triage gates
+
+`scripts/npu-service-health.sh`:
+
+| Exit | Meaning | Next |
+| ---: | --- | --- |
+| 0 | All checks passed including embeddings proof. | Continue to digest. |
+| 2 | `npu_busy_time_us` not readable. | Check kernel/driver; do not run digest. |
+| 3 | Embedding request failed. | Triage `openvino-embeddings.service` and port `:18817`. |
+| 4 | Embedding request succeeded but sysfs delta `<= 0`. | Service reachable but not on the NPU; check service logs and device bind. |
+
+`scripts/npu-utilization-digest.py`:
+
+| Exit | Meaning | Next |
+| ---: | --- | --- |
+| 0 | All reachable services handled; proof/fallback accounting completed. | Inspect `proof_ok` and `fallbacks:` for any unexpected labels. |
+| 2 | `--strict-proof` was set and at least one proof-required probe ran without a positive sysfs delta. | Triage the named service's NPU path. |
+
+## Approval gates left closed
+
+The integrated workflow intentionally does not:
+
+- start, stop, restart, enable, or disable any user systemd unit or Docker
+  Compose service;
+- write to or mutate the Chroma collection `obsidian_bge_npu` or any other
+  vector store;
+- change Atlas/Hermes routing or model defaults;
+- post classification/generation/triage events to the advisory gateway;
+- broaden private document, image, or audio roots;
+- bind any new listener, including on `0.0.0.0`;
+- write memory, send messages, execute tools, or mutate Kanban state.
+
+These remain approval-gated and are tracked on the `npu-maximization` board.
+
+For advisory-lane promotion decisions, pair this live utilization pass with the fixture-only dry-run comparison in `docs/npu-advisory-observability-runbook.md`. The digest can show whether live NPU services are healthy enough to collect evidence; it does not promote advisory outputs into authority. Promotion remains a separate lane-specific approval with explicit scope and rollback.
+
+## Quick reference
+
+```bash
+# Single-pass NPU health check (listener + systemd + embeddings proof).
+cd ~/lab/swarm && ./scripts/npu-service-health.sh
+
+# Compact digest with per-service proof and fallback accounting.
+scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
+
+# Same, with a JSONL artifact for trend tracking.
+scripts/npu-utilization-digest.py --format jsonl
+
+# Strict mode for CI / pre-merge.
+scripts/npu-utilization-digest.py --no-write --strict-proof
+
+# Offline digest logic tests.
+python -m pytest tests/test_npu_utilization_digest.py -q
+```
@@ -0,0 +1,49 @@
+# NPU utilization digest
+
+Compact on-demand observability for Will's local OpenVINO/NPU specialists.
+
+Script:
+
+```bash
+/home/will/lab/swarm/scripts/npu-utilization-digest.py --format text
+```
+
+Safe defaults:
+
+- read-only for services; no service starts/stops/restarts, routing changes, vector DB mutation, advisory POSTs, outbound sends, or memory writes;
+- writes only a compact JSONL artifact under `/home/will/.local/state/npu-utilization/digests` unless `--no-write` is passed;
+- uses synthetic/non-private requests for embeddings, rerank, classifier dry-run, and doc triage;
+- keeps GenAI generation disabled by default when the worker is not loaded, to avoid cold-load side effects;
+- advisory gateway remains health-only because POSTs write metadata/events;
+- NPU proof is only true when an inference probe ran and `/sys/class/accel/accel0/device/npu_busy_time_us` increased around that probe.
+
+Common commands:
+
+```bash
+# Compact CLI digest, plus JSONL artifact.
+scripts/npu-utilization-digest.py --format text
+
+# No artifact write; useful during reviews.
+scripts/npu-utilization-digest.py --no-write --include-genai-smoke false
+
+# Machine-readable stdout.
+scripts/npu-utilization-digest.py --format jsonl --no-write
+
+# CI/unit tests; live services not required.
+python -m pytest tests/test_npu_utilization_digest.py -q
+```
+
+Output shape is intentionally small: service booleans, request counts by service, average probe ms, sysfs/NPU busy deltas by service, proof flags, fallback totals and per-service fallback counts, confidence distribution, escalation/suppression recommendation counts, authority-safe flag violation totals, artifact path, and closed gates. `fallbacks` includes unavailable services, failed/missing proof, and skipped proof-capable smokes such as disabled Whisper/doc-triage probes or GenAI cold-load skips; intentionally health-only RAG/advisory rows are not fallbacks unless unavailable. It does not print raw embeddings, transcripts, OCR text, model completions, request headers, or full upstream JSON.
+
+Covered rows:
+
+- `embeddings`: `/v1/embeddings` synthetic string, positive sysfs delta required.
+- `rerank`: `/rerank` with two synthetic docs, positive sysfs delta required.
+- `whisper`: health-only unless the bounded generated-WAV smoke is enabled.
+- `classifier`: `/v1/classify` with `dry_run=true` and `include_evidence=false`, positive sysfs delta required.
+- `genai`: health-only by default; skips when `loaded=false` unless explicitly opted in.
+- `doc_triage`: one approved synthetic sample under the service sample root, with `allowed_roots` narrowed to that sample directory; NPU proof is via embeddings.
+- `rag_endpoint` and `rag_health`: health-only; no vector mutation.
+- `advisory_gateway`: health-only; `closed:advisory-post` gate remains closed.
+
+Closed gates left for later approval: sending/delivery, recurring timer, GenAI cold-load smoke, advisory POSTs, Atlas/Hermes routing changes, vector mutation/reindex, and broad private document/audio/image roots.
@@ -0,0 +1,135 @@
+# NPU voice/audio local-file pipeline
+
+This is the first-slice local-file voice/audio path for the NPU maximization program:
+
+```text
+local audio file or already-staged attachment
+  -> OpenVINO NPU Whisper (:18816)
+  -> OpenVINO NPU classifier (:18819)
+  -> explicit advisory gate
+  -> Atlas/Hermes only after separate approval
+```
+
+The implementation is `scripts/npu_voice_audio_pipeline.py`. It is a CLI wrapper only; it starts no listener and performs no outbound sends, Obsidian writes, memory writes, vector DB mutations, Kanban mutations, service restarts, platform API calls, or live Atlas/Hermes routing changes.
+
+## Safety gates
+
+Closed unless explicitly approved later:
+
+- Telegram/Discord fetching by bot token or attachment URL.
+- Outbound messages or auto-sends.
+- Obsidian/vault writes.
+- Memory writes.
+- Vector DB mutation or reindex.
+- Automatic Kanban mutation.
+- Service restarts or new persistent listeners.
+- Private-directory root broadening.
+- Live Atlas/Hermes routing authority changes.
+
+HTTP success is not NPU proof. For NPU claims, require real inference plus positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas. The CLI reports response deltas and observed sysfs deltas for Whisper and classifier calls.
+
+## Example: synthetic local WAV smoke
+
+```bash
+cd /home/will/lab/swarm
+python - <<'PY'
+import math, struct, wave
+path = '/tmp/npu-voice-smoke.wav'
+sr = 16000
+with wave.open(path, 'wb') as w:
+    w.setnchannels(1)
+    w.setsampwidth(2)
+    w.setframerate(sr)
+    frames = bytearray()
+    for i in range(int(sr * 0.6)):
+        frames.extend(struct.pack('<h', int(12000 * math.sin(2 * math.pi * 440 * i / sr))))
+    w.writeframes(frames)
+print(path)
+PY
+```
+
+Run the local-file wrapper:
+
+```bash
+/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
+  --audio /tmp/npu-voice-smoke.wav \
+  --title "synthetic smoke" \
+  --source manual_smoke \
+  --json
+```
+
+Compact output shape:
+
+```json
+{
+  "ok": true,
+  "source": "manual_smoke",
+  "transcript_chars": 3,
+  "action_worthy": false,
+  "atlas_gate": "suppressed_not_action_worthy",
+  "whisper_npu_delta_us": 85441,
+  "whisper_sysfs_delta_us": 85441,
+  "classifier_npu_delta_us": 85908,
+  "classifier_sysfs_delta_us": 85908,
+  "classifier_observed_sysfs_delta_us": 85908,
+  "external_sends": 0,
+  "writes": 0
+}
+```
+
+A non-actionable smoke should stay `suppressed_not_action_worthy`. A transcript with a reminder, task, follow-up, explicit question, or classifier `tool_needed=true` should become `advisory_only_not_sent`, not sent.
+
+## Example: already-staged platform voice file
+
+This example assumes another approved process has already placed the audio file locally. The wrapper does not fetch from Telegram/Discord and does not read bot tokens.
+
+```bash
+/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
+  --audio /tmp/staged-voice-message.ogg \
+  --source staged_telegram \
+  --title "staged local Telegram voice memo" \
+  --json
+```
+
+## Compact fields
+
+The CLI always reports:
+
+- `ok`
+- `id`
+- `source`
+- `transcript_chars`
+- `action_worthy`
+- `atlas_gate`
+- `next_gate`
+- `whisper_npu_delta_us`
+- `whisper_sysfs_delta_us`
+- `classifier_npu_delta_us`
+- `classifier_sysfs_delta_us`
+- `classifier_observed_sysfs_delta_us`
+- `labels.workflow_category`
+- `labels.tool_needed`
+- `labels.urgency`
+- `labels.safety_confirmation_required`
+- `external_sends`
+- `writes`
+
+Transcript text is omitted by default. Use `--include-transcript` or `--include-transcript-preview-chars N` only for explicit local debugging.
+
+## Input limits
+
+- `--audio` must be an absolute local path.
+- Symlinks, directories, missing files, empty files, unsupported extensions, and files over `--max-bytes` are refused.
+- WAV duration is capped by `--max-audio-seconds`; other codecs remain size-capped in this first slice.
+- Classifier transcript payload is bounded by `--max-transcript-chars`.
+
+## Health prerequisites
+
+Read-only checks:
+
+```bash
+curl -fsS http://127.0.0.1:18816/health
+curl -fsS http://127.0.0.1:18819/healthz
+```
+
+Do not restart services from this runbook. If either endpoint is unhealthy, stop and request an ops/remediation task.
@@ -0,0 +1,388 @@
+# OpenVINO/NPU VLM, audio, and wake-word feasibility
+
+Date: 2026-06-04
+Scope: feasibility/spec only for lower-priority assistant sidecars. This document does not enable services, alter Atlas/Hermes/gateway routing, mutate RAG/Chroma/vector collections, or process private document/image directories.
+
+## Existing baseline and constraints
+
+Live baseline discovered by parent task:
+
+- RAG endpoint: `127.0.0.1:18810`
+- RAG health wrapper: `127.0.0.1:18814`
+- Whisper OpenVINO NPU: `127.0.0.1:18816`
+- OpenVINO embeddings: `127.0.0.1:18817`
+- Prototype ports currently reserved/not live: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, optional doc/image triage `:18829`
+
+Local NPU runtime snapshot from the feasibility run:
+
+- `/home/will/.venvs/npu` has `openvino==2026.2.0` and `openvino-genai==2026.2.0.0`.
+- `openvino.Core().available_devices` reports `CPU`, `GPU.0`, `GPU.1`, and `NPU`.
+- NPU device name: `Intel(R) AI Boost`.
+- NPU claims must be verified by positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas around inference.
+
+External release/project signals checked:
+
+- OpenVINO 2026.2.0 release notes mention broader GenAI coverage and VLM samples, but the VLM acceleration notes are CPU/GPU-oriented; they do not provide a clear low-risk NPU VLM path.
+- Prior OpenVINO release notes/search results mention OpenVINO Model Server VLM support for Qwen2-VL, Phi-3.5-Vision, and InternVL2.
+- `openWakeWord` is an active Apache-2.0 local wake-word framework with ONNX Runtime/TFLite support, pre-trained wake-word models, optional VAD, and 16 kHz PCM streaming examples. It is not installed in the current NPU venv.
+
+## Recommendation summary
+
+| Lane | Recommendation | Priority | Why |
+| --- | --- | --- | --- |
+| VLM / image captioning | Defer NPU-first VLM. If pursued, prototype CPU/GPU VLM CLI first, then attempt NPU only after model/runtime compatibility is proven. | Low | NPU support for VLMs is not clearly mature in the current OpenVINO public notes; VLMs are memory/op-shape heavy; failures could be slow and noisy. Existing doc/image triage already covers practical local image metadata without a full VLM. |
+| Lightweight image classification / caption fallback | Extend the existing `openvino-doc-image-triage-npu` lane before adding a new service. | Medium-low | It already has privacy boundaries, synthetic fixtures, CLI/server split, and NPU proof through embeddings. Add static-shape classifier only if a later task needs image labels beyond rule fallback. |
+| Audio classification | Defer until a concrete assistant workflow needs it. Consider CPU/GPU/OpenVINO Runtime prototype using Speech Commands/ESC-style classifier before any daemon. | Low | Whisper NPU already covers transcription. Generic audio tags are less useful without a routing/product requirement and need dataset-specific threshold tuning. |
+| Wake word | Worth a small CPU-only local smoke prototype; do not spend NPU time first. | Medium | Wake-word detection must be always-on, tiny, and reliable. CPU openWakeWord/ONNX/TFLite is the lowest-risk path and avoids starving existing NPU Whisper/embedding services. NPU use is only worth testing after CPU false-positive/latency behavior is acceptable. |
+
+## VLM / image-captioning path
+
+### Recommended model/runtime
+
+Initial runtime: CLI-first OpenVINO GenAI or OpenVINO Model Server on CPU/GPU, not NPU-first.
+
+Candidate models to evaluate, in order:
+
+1. `Qwen2-VL-2B-Instruct` OpenVINO/OVMS-compatible export if a small converted artifact is already available.
+2. `Phi-3.5-Vision-Instruct` only if memory/startup is acceptable.
+3. `InternVL2` only as a compatibility reference; likely too heavy for a low-priority local assistant sidecar.
+
+Why this order:
+
+- Qwen2-VL is broadly supported by OpenVINO Model Server release notes/search results and has smaller variants.
+- Phi-3.5-Vision is also named in OpenVINO Model Server VLM support, but may be heavier.
+- NPU is not the first target because public OpenVINO 2026.2 release notes emphasize VLM improvements for CPU/GPU, not NPU. Treat NPU VLM as experimental until a smoke test proves compilation and positive busy-time deltas.
+
+### Endpoint/CLI contract
+
+CLI-first contract:
+
+```bash
+python vlm_caption.py \
+  --image /path/to/synthetic_or_explicitly_allowed_image.png \
+  --prompt "Describe this image in one sentence." \
+  --device CPU \
+  --max-new-tokens 96 \
+  --json
+```
+
+Response shape:
+
+```json
+{
+  "ok": true,
+  "media_type": "image",
+  "source_path_basename": "synthetic_scene.png",
+  "source_sha256": "sha256:...",
+  "model": "qwen2-vl-small-openvino",
+  "runtime": "openvino-genai-or-ovms",
+  "device_requested": "CPU",
+  "device_observed": "CPU",
+  "caption": "A synthetic chart with three colored bars.",
+  "safety": {
+    "external_uploads": false,
+    "raw_image_logged": false,
+    "private_paths_allowed": false
+  },
+  "timing_ms": {
+    "load": 0,
+    "inference": 0,
+    "total": 0
+  },
+  "npu_busy_delta_us": null
+}
+```
+
+Optional localhost HTTP contract, only after CLI is stable:
+
+- Bind: `127.0.0.1:18829` or another explicitly approved unused prototype port.
+- `GET /healthz`
+- `GET /models`
+- `POST /v1/vision/caption`
+
+Request body:
+
+```json
+{
+  "path": "/allowed/root/synthetic_scene.png",
+  "prompt": "Describe this image in one sentence.",
+  "max_new_tokens": 96,
+  "device": "CPU"
+}
+```
+
+### Smoke-test plan using non-private data
+
+Use only generated fixtures under the repo, similar to `openvino-doc-image-triage-npu/samples/`:
+
+1. Create synthetic PNGs: simple chart, receipt-like image, screenshot-like text panel, and blank/noisy image.
+2. Run CLI with `--allowed-root "$PWD/samples"` and assert:
+   - JSON parses.
+   - `external_uploads=false`.
+   - only basename and SHA-256 are returned by default.
+   - captions are non-empty and under a configured token/character limit.
+   - unsupported/private paths are rejected.
+3. If an HTTP server is added, start it in foreground on `127.0.0.1`, call `/healthz` and `/v1/vision/caption`, then stop it.
+4. No private image/document folders and no Obsidian vault content should be used for smoke tests.
+
+### NPU busy-time verification plan
+
+Only claim NPU VLM if all of these pass:
+
+1. Verify the counter is readable:
+
+```bash
+BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
+test -r "$BUSY" && before=$(cat "$BUSY")
+```
+
+2. Run exactly one synthetic-image inference with `device=NPU`.
+3. Read `after=$(cat "$BUSY")`.
+4. Require `after - before > 0` and a response-level `npu_busy_delta_us > 0` if the server reports it.
+5. Repeat with a second synthetic image to avoid counting unrelated startup activity only.
+6. If HTTP returns 200 but the sysfs delta is zero, document as `NPU not verified` and do not call it an NPU service.
+
+### No-go / defer criteria
+
+Defer VLM NPU work if any apply:
+
+- Model export/compile to NPU fails or requires unsupported ops/custom patches.
+- First successful inference needs more than 60 seconds cold or more than 10 seconds warm for a small synthetic image.
+- NPU busy-time delta is zero or inconsistent.
+- Memory pressure disrupts Whisper `:18816`, embeddings `:18817`, or RAG `:18810`.
+- The only useful path requires processing private images/docs before synthetic smoke tests are stable.
+- Captions are too hallucination-prone for automation decisions without a human-review gate.
+
+## Lightweight image triage/classification path
+
+### Recommended model/runtime
+
+Recommended near-term path: keep `openvino-doc-image-triage-npu` as the primary image/document lane and add only a static-shape classifier if rule fallback becomes inadequate.
+
+Candidate classifier families for a later task:
+
+- MobileNetV3/EfficientNet-Lite/ResNet-18 style image classifier exported to OpenVINO IR.
+- Use NPU only if the IR compiles with static shapes and produces positive busy-time deltas.
+- Keep OCR/PDF rendering CPU-local; do not try to force OCR onto NPU in this phase.
+
+Why:
+
+- The current triage prototype already has the right privacy contract and reports CPU vs NPU stages.
+- A small classifier is much lower risk than a VLM and can be used for labels like `screenshot`, `receipt`, `document`, `photo`, `chart`.
+
+### Endpoint/CLI contract
+
+Extend existing CLI shape rather than introduce a new daemon:
+
+```bash
+/home/will/.venvs/npu/bin/python triage.py \
+  --allowed-root "$PWD" \
+  --image-classifier-model /home/will/models/openvino-image-classifier/model.xml \
+  --image-classifier-device NPU \
+  --pretty \
+  samples/synthetic_invoice.png
+```
+
+Response addition:
+
+```json
+{
+  "classification": {
+    "label": "receipt_or_invoice",
+    "confidence": 0.82,
+    "device": "NPU",
+    "method": "openvino_image_classifier",
+    "npu_busy_delta_us": 12345
+  }
+}
+```
+
+### Smoke-test plan
+
+Reuse `openvino-doc-image-triage-npu/make_samples.py` and `tests/smoke_test.py`; add synthetic image-label assertions only after a classifier model exists. Keep `--no-embeddings` mode available so the smoke suite can separate classifier NPU proof from embeddings `:18817` proof.
+
+### No-go / defer criteria
+
+- Static-shape classifier cannot compile on NPU.
+- Labels are not useful enough to drive an assistant workflow.
+- Classifier output duplicates the existing rule-based fallback.
+
+## Audio classification path
+
+### Recommended model/runtime
+
+Defer implementation. If a concrete workflow appears, start with a CLI-only OpenVINO Runtime classifier on CPU/GPU using synthetic/public audio fixtures, not a persistent service.
+
+Potential model classes:
+
+- Speech Commands keyword classifier for short command categories.
+- ESC-50/AudioSet-like environmental sound classifier only if the task requires non-speech detection.
+- Whisper transcript + lightweight text classifier may be enough for most assistant routing, using existing Whisper NPU `:18816`.
+
+Why:
+
+- The system already has local Whisper NPU transcription.
+- Generic audio classification needs careful threshold tuning and false-positive analysis.
+- Always-on audio processing has privacy and resource implications; keep it explicit and local.
+
+### CLI contract
+
+```bash
+python audio_classify.py \
+  --input samples/synthetic_chime.wav \
+  --model /home/will/models/openvino-audio-classifier/model.xml \
+  --device CPU \
+  --json
+```
+
+Response shape:
+
+```json
+{
+  "ok": true,
+  "source_path_basename": "synthetic_chime.wav",
+  "source_sha256": "sha256:...",
+  "sample_rate": 16000,
+  "duration_seconds": 1.2,
+  "labels": [
+    {"label": "chime", "confidence": 0.76}
+  ],
+  "device_requested": "CPU",
+  "device_observed": "CPU",
+  "npu_busy_delta_us": null,
+  "privacy": {"external_uploads": false, "raw_audio_logged": false}
+}
+```
+
+Optional HTTP should wait until a workflow exists. If it exists later, bind localhost and avoid overlap with current ports.
+
+### Smoke-test plan using non-private data
+
+1. Generate synthetic WAV files in repo-local `samples/`: sine tone, silence, white noise, simple chime, and a short synthetic spoken phrase if a local TTS fixture is available.
+2. Run CLI on each file with `--allowed-root "$PWD/samples"`.
+3. Assert JSON parses, durations are bounded, and confidence values are numeric.
+4. Do not stream microphone input or scan private audio directories in smoke tests.
+5. If NPU mode is attempted, wrap each inference in sysfs busy-time reads.
+
+### No-go / defer criteria
+
+- No concrete downstream automation consumes the labels.
+- False positives cannot be characterized on synthetic/public fixtures.
+- It competes with Whisper NPU or requires a persistent microphone daemon without explicit approval.
+
+## Wake-word path
+
+### Recommended model/runtime
+
+Recommended first runtime: CPU-only `openWakeWord` CLI/foreground process with ONNX Runtime or TFLite backend.
+
+NPU recommendation: defer. Try NPU/OpenVINO conversion only after CPU openWakeWord passes false-positive and latency checks.
+
+Why:
+
+- Wake-word detection is always-on and latency-sensitive; reliability matters more than accelerator novelty.
+- The model is small enough that CPU is likely acceptable and simpler.
+- Keeping wake-word off NPU reduces contention with Whisper NPU and embeddings.
+- openWakeWord has pre-trained models, optional VAD, and straightforward 16 kHz PCM frame APIs.
+
+### Endpoint/CLI contract
+
+CLI smoke contract:
+
+```bash
+python wake_word_smoke.py \
+  --model hey_jarvis \
+  --positive samples/synthetic_wake_positive.wav \
+  --negative samples/synthetic_noise.wav \
+  --threshold 0.5 \
+  --json
+```
+
+Foreground local stream contract, only for manual experiments:
+
+```bash
+python wake_word_listen.py \
+  --model hey_jarvis \
+  --threshold 0.5 \
+  --vad-threshold 0.3 \
+  --oneshot \
+  --json
+```
+
+Response/event shape:
+
+```json
+{
+  "ok": true,
+  "model": "hey_jarvis",
+  "runtime": "openwakeword-onnxruntime-or-tflite",
+  "device": "CPU",
+  "threshold": 0.5,
+  "events": [
+    {"offset_ms": 1280, "score": 0.83, "detected": true}
+  ],
+  "false_positive_count": 0,
+  "npu_busy_delta_us": null,
+  "privacy": {"external_uploads": false, "raw_audio_logged": false}
+}
+```
+
+If a localhost HTTP endpoint is ever needed, do not expose raw microphone streaming by default. Prefer events only:
+
+- `GET /healthz`
+- `POST /v1/wakeword/evaluate-file` for explicit files under allowed roots
+- `GET /v1/wakeword/events` for a manually started foreground listener
+
+### Smoke-test plan using non-private data
+
+1. Install in a disposable or dedicated venv, not the existing NPU venv unless explicitly approved:
+
+```bash
+python -m venv /tmp/openwakeword-smoke-venv
+/tmp/openwakeword-smoke-venv/bin/python -m pip install openwakeword
+```
+
+2. Use public/generated WAVs only:
+   - Negative: silence, white noise, generic non-wake speech/TTS if locally generated.
+   - Positive: only if a public/pretrained wake phrase fixture is available or generated explicitly for the selected model. If no positive fixture exists, run negative-only false-positive smoke and mark recall untested.
+3. Assert no false positives over a bounded negative fixture set.
+4. Measure per-frame CPU latency and max RSS.
+5. Do not start a persistent microphone listener; manual foreground `--oneshot` only if explicitly approved.
+
+### NPU busy-time verification plan
+
+Wake-word should not claim NPU in the initial path. If a later task converts a model to OpenVINO IR and targets NPU:
+
+1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before a bounded file evaluation.
+2. Run NPU inference on a fixed set of WAV frames.
+3. Read the counter after inference.
+4. Require positive delta and stable predictions matching CPU baseline.
+5. Also verify that keeping the wake-word loop active does not starve Whisper `:18816` or embeddings `:18817`.
+
+### No-go / defer criteria
+
+- CPU openWakeWord has unacceptable false positives on local negative fixtures.
+- A usable positive fixture cannot be created without recording private audio.
+- Always-on microphone capture is required before explicit approval.
+- NPU conversion changes scores materially from CPU baseline.
+- NPU loop increases contention with Whisper/embedding services.
+
+## Docs and diagram implications
+
+If these lanes advance beyond feasibility:
+
+1. Update `docs/swarm-infrastructure.md` and `docs/swarm-infrastructure.html` to keep live vs prototype labels clear.
+2. Update the OpenVINO NPU runbook with smoke commands and the sysfs busy-time proof steps.
+3. Update the Service Catalog only after a service is actually approved/live; until then list as `prototype/not live` or omit.
+4. Architecture diagrams may show:
+   - live: RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`;
+   - prototypes: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, doc/image triage optional `:18829`;
+   - VLM/audio/wake-word as `CLI feasibility / not live` unless a later implementation task creates a service.
+5. Do not imply Atlas/Hermes routing integration for any of these lanes without explicit approval.
+
+## Overall go/no-go decision
+
+- Go later: wake-word CPU-only CLI smoke, because it is useful and low risk if kept foreground/local.
+- Maybe later: lightweight image classifier inside existing doc/image triage, if rule fallback is not enough.
+- Defer: NPU-first VLM captioning until OpenVINO VLM-on-NPU compatibility is proven by a minimal synthetic-image smoke.
+- Defer: generic audio classification until there is a concrete assistant workflow that consumes the output.
@@ -27,7 +27,7 @@
  <div class="wrap">
    <div class="header"><div class="dot"></div><div><h1>Will's Swarm Infrastructure</h1><div class="sub">Atlas/Hermes gateway + n8n automation + agentmon monitoring + local AI/search/voice services</div></div></div>
    <div class="card">
-      <svg viewBox="0 0 1280 900" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
+      <svg viewBox="0 0 1280 980" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
        <defs>
          <pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse"><path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/></pattern>
          <marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="3" orient="auto" markerUnits="strokeWidth"><path d="M0,0 L0,6 L9,3 z" fill="#38bdf8" /></marker>
@@ -40,7 +40,7 @@
            .edge{fill:none; stroke:#38bdf8; stroke-width:1.8; marker-end:url(#arrow); opacity:.8}.edgeG{fill:none; stroke:#34d399; stroke-width:1.8; marker-end:url(#arrowGreen); opacity:.85}.edgeO{fill:none; stroke:#fb923c; stroke-width:1.8; marker-end:url(#arrowOrange); opacity:.85}.edgeR{fill:none; stroke:#fb7185; stroke-width:1.8; stroke-dasharray:5,4; marker-end:url(#arrowRose); opacity:.85}
          </style>
        </defs>
-        <rect width="1280" height="900" fill="#020617"/><rect width="1280" height="900" fill="url(#grid)" opacity="0.7"/>
+        <rect width="1280" height="980" fill="#020617"/><rect width="1280" height="980" fill="url(#grid)" opacity="0.7"/>

        <!-- arrows behind nodes -->
        <path class="edge" d="M140 120 C210 120 210 205 280 205"/>
@@ -58,13 +58,14 @@
        <path class="edge" d="M815 695 C900 695 900 735 965 735"/>
        <path class="edgeG" d="M625 635 C555 635 555 720 470 720"/>
        <path class="edge" d="M470 720 C545 720 545 565 620 565"/>
+        <path class="edgeR" d="M490 735 C620 735 790 880 965 880"/>

        <!-- boundaries -->
        <rect x="250" y="80" width="250" height="260" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="265" y="103" class="tiny" fill="#fbbf24">Hermes gateway layer</text>
        <rect x="590" y="105" width="260" height="655" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="605" y="128" class="tiny" fill="#fbbf24">n8n + agentmon observability</text>
-        <rect x="935" y="95" width="280" height="760" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
+        <rect x="935" y="95" width="280" height="850" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="950" y="118" class="tiny" fill="#fbbf24">local swarm services</text>

        <!-- external channels -->
@@ -83,31 +84,32 @@
        <!-- Local services -->
        <g><rect x="965" y="165" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="165" width="210" height="80" rx="9" fill="rgba(6,78,59,.4)" stroke="#34d399" stroke-width="1.6"/><text x="1070" y="195" text-anchor="middle" class="title">LiteLLM</text><text x="1070" y="216" text-anchor="middle" class="tiny">LLM router + DB</text><text x="1070" y="234" text-anchor="middle" class="port">:18804</text></g>
        <g><rect x="965" y="275" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="275" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="305" text-anchor="middle" class="title">Search</text><text x="1070" y="326" text-anchor="middle" class="tiny">SearXNG + Brave MCP</text><text x="1070" y="344" text-anchor="middle" class="port">:18803 / :18802</text></g>
-        <g><rect x="965" y="385" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="385" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="415" text-anchor="middle" class="title">Voice</text><text x="1070" y="436" text-anchor="middle" class="tiny">Kokoro + Whisper</text><text x="1070" y="454" text-anchor="middle" class="port">:18805 / :18811</text></g>
+        <g><rect x="965" y="385" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="385" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="415" text-anchor="middle" class="title">Voice</text><text x="1070" y="436" text-anchor="middle" class="tiny">Kokoro + Whisper</text><text x="1070" y="454" text-anchor="middle" class="port">:18805 / :18816</text></g>
        <g><rect x="965" y="555" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="555" width="210" height="80" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="585" text-anchor="middle" class="title">Docker services</text><text x="1070" y="606" text-anchor="middle" class="tiny">agentmon.monitor=true</text><text x="1070" y="624" text-anchor="middle" class="port">swarm/service snapshots</text></g>
        <g><rect x="965" y="665" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="665" width="210" height="80" rx="9" fill="rgba(120,53,15,.3)" stroke="#fbbf24" stroke-width="1.6"/><text x="1070" y="695" text-anchor="middle" class="title">OpenClaw VMs</text><text x="1070" y="716" text-anchor="middle" class="tiny">currently dormant</text><text x="1070" y="734" text-anchor="middle" class="port">openclaw.snapshot</text></g>
-        <g><rect x="965" y="775" width="210" height="60" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="60" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="822" text-anchor="middle" class="port">:27123/:27124 + ChromaDB</text></g>
+        <g><rect x="965" y="775" width="210" height="75" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="75" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="821" text-anchor="middle" class="tiny">:18810 semantic search</text><text x="1070" y="840" text-anchor="middle" class="port">NPU embed + rerank</text></g>
+        <g><rect x="965" y="870" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="870" width="210" height="80" rx="9" fill="rgba(244,63,94,.16)" stroke="#fb7185" stroke-width="1.6" stroke-dasharray="6,4"/><text x="1070" y="896" text-anchor="middle" class="title">NPU sidecars</text><text x="1070" y="917" text-anchor="middle" class="tiny">approved prototypes; not live</text><text x="1070" y="936" text-anchor="middle" class="port">:18818/:18819/:18820/:18829</text></g>

        <!-- host local ai box -->
-        <g><rect x="280" y="675" width="190" height="100" rx="10" fill="#0f172a"/><rect x="280" y="675" width="190" height="100" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="375" y="706" text-anchor="middle" class="title">host local AI</text><text x="375" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="375" y="752" text-anchor="middle" class="tiny">Ollama embed :18807</text></g>
+        <g><rect x="280" y="675" width="210" height="145" rx="10" fill="#0f172a"/><rect x="280" y="675" width="210" height="145" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="385" y="706" text-anchor="middle" class="title">host local AI</text><text x="385" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="385" y="752" text-anchor="middle" class="tiny">Ollama fallback :18807</text><text x="385" y="774" text-anchor="middle" class="tiny">OpenVINO embed :18817 live</text><text x="385" y="797" text-anchor="middle" class="tiny">Whisper NPU :18816 live</text></g>

        <!-- legend -->
-        <g transform="translate(40,820)">
+        <g transform="translate(40,910)">
          <text class="tiny" fill="#94a3b8">Legend</text>
          <rect x="0" y="16" width="14" height="10" fill="rgba(8,51,68,.4)" stroke="#22d3ee"/><text x="22" y="25" class="tiny">Gateway/Search/Voice</text>
          <rect x="180" y="16" width="14" height="10" fill="rgba(6,78,59,.4)" stroke="#34d399"/><text x="202" y="25" class="tiny">Automation/API</text>
          <rect x="320" y="16" width="14" height="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa"/><text x="342" y="25" class="tiny">Data/AI stores</text>
          <rect x="475" y="16" width="14" height="10" fill="rgba(251,146,60,.14)" stroke="#fb923c"/><text x="497" y="25" class="tiny">Event bus/pipeline</text>
-          <line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring flows</text>
+          <line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring / not-live prototype flows</text>
        </g>
      </svg>
    </div>
    <div class="cards">
      <div class="info"><h3>Monitoring model</h3><ul><li>• n8n direct probes critical ports</li><li>• agentmon aggregates Docker/OpenClaw snapshots</li><li>• n8n polls agentmon for stale/degraded state</li></ul></div>
-      <div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• local LLM/embed: 18806 / 18807</li></ul></div>
+      <div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• live NPU: RAG 18810, Whisper 18816, embeddings 18817</li><li>• live local reranker: 18818</li><li>• prototypes not live-routed: 18819/18820/18829</li></ul></div>
      <div class="info"><h3>Source paths</h3><ul><li>• Swarm repo: ~/lab/swarm</li><li>• Agentmon repo: ~/lab/agentmon</li><li>• Workflows: swarm-common/n8n-workflows</li></ul></div>
    </div>
-    <div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline.</div>
+    <div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline. The :18818 reranker is live as a request-time second stage for :18810 semantic search with safe vector fallback; classifier/GenAI/doc-image sidecars remain prototypes/not live-routed.</div>
  </div>
 </body>
 </html>
@@ -32,9 +32,11 @@ local AI/search/voice services
        +--> SearXNG :18803
        +--> Brave MCP :18802
        +--> llama.cpp :18806
-        +--> Ollama embeddings :18807
+        +--> Ollama embeddings :18807 (legacy/CPU fallback)
+        +--> OpenVINO NPU embeddings :18817
        +--> Kokoro TTS :18805
-        +--> Whisper :18811
+        +--> Whisper NPU :18816
+        +--> local-only NPU sidecars: reranker :18818, router/classifier :18819, GenAI worker :18820, doc/image triage :18829
 ```

 See also:
@@ -115,18 +117,35 @@ Docker services:
 - `searxng` — `:18803`, local metasearch
 - `brave-search` — `:18802`, Brave Search MCP server
 - `kokoro-tts` — `:18805`, local TTS
- `whisper-server` — `:18811`, local transcription
+- `whisper-server-npu` — `:18816`, OpenVINO NPU local transcription
 - `n8n-agent` — `:18808`, automation

 Host/user services:

 - `llama-server.service` — `:18806`, local llama.cpp OpenAI-compatible LLM
- `ollama.service` — `:18807`, embeddings API
+- `ollama.service` — `:18807`, legacy/CPU embeddings API fallback
+- `openvino-embeddings.service` — `:18817`, OpenVINO NPU embeddings API (`/v1/embeddings`, `/api/embed`, `/api/embeddings`)
 - `docker-health-endpoint.service` — `:18809`, read-only container health for n8n
- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger
+- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger and `/semantic-search`; default collection `obsidian_bge_npu` using OpenVINO NPU embeddings, with request-time `:18818` reranking enabled with vector-order fallback
 - `url-content-extractor.service` — `:18812`, YouTube/PDF/web extraction
 - `voice-memo-processor.service` — `:18813`, voice memo processing
 - `rag-embedding-health.service` — `:18814`, RAG/embedding health wrapper
+- `openvino-router-classifier.service` — `:18819`, local-only dry-run Atlas/Hermes message classifier; advisory only
+- `openvino-genai-npu-worker.service` — `:18820`, local-only bounded GenAI worker for small background generation jobs
+- `openvino-doc-image-triage.service` — `:18829`, local-only document/image triage HTTP wrapper with allowed-root enforcement
+- `openvino-advisory-gateway.service` — `172.19.0.1:18830`, Docker-bridge advisory envelope wrapper over classifier, GenAI, and doc/image triage for `n8n-agent`; explicit no-authority contract
+
+Local-only OpenVINO NPU sidecars:
+
+| Port | Component | State | Safety boundary |
+| ---: | --- | --- | --- |
+| `18818` | reranker | live user service; request-time second stage for `:18810/semantic-search` | no Chroma/vector mutation; vector-order fallback on timeout/error/non-positive NPU proof |
+| `18819` | router/classifier | live user service; dry-run only | no Hermes/Atlas routing, memory writes, service restarts, or outbound messages |
+| `18820` | bounded GenAI worker | live user service | background jobs only; not primary Atlas/Hermes model routing |
+| `18829` | document/image triage | live localhost server | allowed-root limited; no private directory processing unless explicitly approved; NPU stage is embeddings via `:18817` |
+| `18830` | advisory gateway | live user service; bound to `172.19.0.1` for `n8n-agent` bridge access | returns `openvino_advisory_v1` envelopes only; no routing, memory writes, external sends, tool execution, restarts, or process-root broadening from request payloads; refuses wildcard binds |
+
+These sidecars bind to `127.0.0.1` by default, except `openvino-advisory-gateway.service`, which is explicitly approved on the Docker bridge IP `172.19.0.1` so `n8n-agent` can call it. They must not be wired into live Atlas/Hermes routing, memory writes, broad private document processing, external sends, tool execution, service restarts, or primary model paths without explicit Will approval. Any NPU claim requires a positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta before/after inference or service-reported equivalent. HTTP 200 alone is not proof.

 ### 5. Obsidian and RAG

@@ -142,9 +161,11 @@ Local REST API:
 RAG/vector store:

 - ChromaDB path: `~/.hermes/data/rag-search/chroma/`
- Reindex state/progress: `~/.hermes/data/rag-search/obsidian_index_state.json` and `obsidian_reindex_progress.json`
- Embeddings backend: Ollama on `:18807`, normally `nomic-embed-text`
- Reindex endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test.
+- Reindex state/progress: active BGE/NPU state in `~/.hermes/data/rag-search/obsidian_bge_npu_index_state.json` and `obsidian_bge_npu_reindex_progress.json`; legacy Ollama state in `obsidian_index_state.json` remains for comparison/fallback.
+- Active RAG query/reindex embedding backend: OpenVINO NPU embeddings service on `:18817`, currently `bge-base-en-v1.5-int8-ov`, collection `obsidian_bge_npu`.
+- Legacy comparison/fallback collection: `obsidian`, built with Ollama on `:18807` using `nomic-embed-text`.
+- Reindex/search endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test, and `POST :18810/semantic-search` for n8n/Hermes semantic context lookup.
+- Reranker path: `RAG_RERANK_ENABLED=true` for `:18810/semantic-search` after local bake testing. `/semantic-search` retrieves `RAG_RERANK_INITIAL_K` vector candidates, calls `RAG_RERANK_URL` (`http://127.0.0.1:18818/rerank`), returns reranked `RAG_RERANK_TOP_K`, requires positive `npu_busy_delta_us` by default (`RAG_RERANK_REQUIRE_NPU_PROOF=true`), and falls back to vector order with `rerank.error` metadata on timeout/error/non-positive NPU proof. Reranking is request-time only and must not mutate Chroma/vector collections.

 ## Monitoring model

@@ -198,6 +219,12 @@ From the host:
 cd /home/will/lab/swarm
 make status
 make local-ai-health
+./scripts/npu-service-health.sh  # read-only; includes sysfs busy-time proof for :18817
+curl -fsS http://127.0.0.1:18810/semantic-health | jq '{status,state,search_ok,result_count}'
+curl -fsS http://127.0.0.1:18810/semantic-search \
+  -H 'Content-Type: application/json' \
+  -d '{"query":"non-private semantic smoke","top_k":2}' \
+  | jq '{ok,index,top_k,search_k,rerank,result_count}'
 curl -fsS http://127.0.0.1:18808/healthz
 curl -fsS http://127.0.0.1:8081/healthz
 curl -fsS 'http://127.0.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1' | jq .
@@ -207,8 +234,9 @@ From inside `n8n-agent`:

 ```bash
 docker exec n8n-agent /bin/sh -lc '
-  wget -qO- -T 5 http://172.19.0.1:8081/healthz
-  wget -qO- -T 5 "http://172.19.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1" | head -c 500
+  wget -qO- -T 5 http://172.19.0.1:18810/healthz
+  wget -qO- -T 5 http://172.19.0.1:18814/healthz
+  wget -qO- -T 5 http://172.19.0.1:18817/healthz | head -c 500
 '
 ```

@@ -231,3 +259,4 @@ jq '.[0] | {id,name,active,nodes:(.nodes|length)}' /tmp/agentmon-export.json
 - From `n8n-agent`, use `127.0.0.1:5678` for n8n itself and `172.19.0.1:<host-port>` for host-published swarm services.
 - Agentmon `/healthz` only proves the web/API process is alive; pair it with snapshot freshness to prove the monitoring pipeline is flowing.
 - OpenClaw is intentionally dormant unless explicitly re-enabled; do not alert on VMs being shut off by default.
+- OpenVINO NPU sidecars on `:18819`, `:18820`, and `:18829` are live local-only services, but remain isolated specialists. The `:18818` reranker is live as a local request-time second stage for `:18810/semantic-search`; it still falls back to vector order on timeout/error/non-positive NPU proof. Do not draw live Atlas/Hermes routing, memory-write, broad document-processing, or primary-model arrows to these sidecars without a separate approved integration.
@@ -0,0 +1,123 @@
+{
+  "schema": "npu_advisory_dry_run_fixture_set_v1",
+  "description": "Synthetic/non-private fixtures for advisory-only NPU dry-run comparison.",
+  "fixtures": [
+    {
+      "id": "context-gate-coding-safe",
+      "lane": "context_gate",
+      "source": "synthetic_cli",
+      "service": "openvino_context_gate",
+      "input_class": "coding_context_request",
+      "query": "Implement a dry-run harness in /home/will/lab/swarm and run focused pytest checks.",
+      "context": {"platform": "kanban", "task_id": "t_synthetic", "repo_path": "/home/will/lab/swarm"},
+      "expected_recommendation": "prepare_context_bundle",
+      "human_or_atlas_decision": "prepare_context_bundle",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    },
+    {
+      "id": "context-gate-live-routing-blocked",
+      "lane": "context_gate",
+      "source": "synthetic_cli",
+      "service": "openvino_context_gate",
+      "input_class": "unsafe_authority_request",
+      "query": "Change live routing and restart services based only on classifier output.",
+      "context": {"platform": "cli", "repo_path": "/home/will/lab/swarm"},
+      "expected_recommendation": "require_human_review",
+      "human_or_atlas_decision": "require_human_review",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    },
+    {
+      "id": "cron-normal-log",
+      "lane": "cron_n8n_advisory",
+      "source": "synthetic_cron",
+      "service": "openvino_advisory_gateway",
+      "input_class": "cron_health_check",
+      "event": {"workflow": "nightly-health", "severity": "normal", "kind": "health_check", "subject": "synthetic all clear", "dedupe_key": "nightly-health-ok"},
+      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-normal", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.74}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 10}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
+      "expected_recommendation": "log",
+      "human_or_atlas_decision": "log",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "medium"
+    },
+    {
+      "id": "cron-urgent-false-alarm",
+      "lane": "cron_n8n_advisory",
+      "source": "synthetic_n8n",
+      "service": "openvino_advisory_gateway",
+      "input_class": "urgent_looking_false_alarm",
+      "event": {"workflow": "backup-monitor", "severity": "warning", "kind": "alert", "subject": "synthetic warning recovered before paging", "dedupe_key": "backup-recovered"},
+      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-warning", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.62}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 7}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
+      "expected_recommendation": "summarize",
+      "human_or_atlas_decision": "log",
+      "expected_outcome": "false_positive",
+      "expected_confidence_bucket": "medium"
+    },
+    {
+      "id": "batch-receipt-action",
+      "lane": "batch_triage",
+      "source": "synthetic_fixture_file",
+      "service": "npu_batch_triage_dry_run",
+      "input_class": "receipt_with_deadline",
+      "document_text": "Synthetic receipt. Amount due $42.00. Please follow up by 2026-06-10.",
+      "triage_lane": "receipts",
+      "expected_recommendation": "review_item",
+      "human_or_atlas_decision": "review_item",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    },
+    {
+      "id": "batch-noisy-harmless",
+      "lane": "batch_triage",
+      "source": "synthetic_fixture_file",
+      "service": "npu_batch_triage_dry_run",
+      "input_class": "harmless_noisy_output",
+      "document_text": "Synthetic screenshot text: lorem ipsum, random status output, no action signal.",
+      "triage_lane": "screenshots",
+      "expected_recommendation": "suppress",
+      "human_or_atlas_decision": "suppress",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "medium"
+    },
+    {
+      "id": "voice-audio-action-needed",
+      "lane": "voice_audio",
+      "source": "synthetic_voice_memo",
+      "service": "npu_voice_audio_pipeline",
+      "input_class": "voice_action_item",
+      "transcript": "Reminder: review the NPU dry-run metrics and ask for approval before changing routing.",
+      "labels": {"tool_needed": true, "urgency": "normal", "safety_confirmation_required": true},
+      "npu_proof": {"whisper": true, "classifier": true},
+      "expected_recommendation": "require_human_review",
+      "human_or_atlas_decision": "require_human_review",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    },
+    {
+      "id": "kanban-review-ready",
+      "lane": "kanban_hygiene",
+      "source": "synthetic_board_summary",
+      "service": "kanban_hygiene_advisory",
+      "input_class": "implementation_with_tests",
+      "tasks": [{"id": "t_synthetic_impl", "title": "implement: synthetic dry-run harness", "status": "blocked", "assignee": "engineer", "created_at": 1000, "updated_at": 2000, "body_excerpt": "NPU advisory harness", "changed_files": ["scripts/example.py"], "tests_run": 3, "last_comment_excerpt": "review-required handoff"}],
+      "now": 2600,
+      "expected_recommendation": "ready_for_review",
+      "human_or_atlas_decision": "ready_for_review",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    },
+    {
+      "id": "gateway-authority-violation",
+      "lane": "advisory_gateway_envelope",
+      "source": "synthetic_gateway",
+      "service": "openvino_advisory_gateway",
+      "input_class": "authority_flag_violation",
+      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-violation", "result": {"labels": {"urgency": {"value": "critical", "confidence": 0.9}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 11}, "authority": {"may_send_external": true, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
+      "expected_recommendation": "block_authority_violation",
+      "human_or_atlas_decision": "block_authority_violation",
+      "expected_outcome": "agree",
+      "expected_confidence_bucket": "high"
+    }
+  ]
+}
@@ -1 +1 @@
-{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1776916468, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1776916468;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;client_byok=0;ccr=1;8kp=1;ip=71.231.248.128;asn=AS7922:fda910fb829d6585876da7e06e037cf7e75745e2b4d41b49de4911d85794adcc", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
+{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1774543278, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1774543278;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;ccr=1;8kp=1;ip=24.143.97.87;asn=AS11404:7f079a450cf1a45b238724eb0795e12bf36218ab99ffc6c4b84089e6e7e674b1", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
@@ -146,9 +146,29 @@ add_model "zai-glm-5"       "openai/glm-5"         "ZAI_API_KEY" "https://api.z.
 add_model "glm-4.7-flash"   "openai/glm-4.7-flash" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
 add_model "glm-5"           "openai/glm-5"         "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"

-# GitHub Copilot models are intentionally not registered here.
-# The token-file auth path caused repeated 403 refresh loops in LiteLLM when
-# Copilot credentials expired, slowing /health/liveliness responses.
+# GitHub Copilot (token-file auth, no API key)
+add_copilot_model "copilot-gpt-4o"              "gpt-4o"
+add_copilot_model "copilot-gpt-4.1"             "gpt-4.1"
+add_copilot_model "copilot-gpt-5-mini"          "gpt-5-mini"
+add_copilot_model "copilot-gpt-5.1"             "gpt-5.1"
+add_copilot_model "copilot-gpt-5.2"             "gpt-5.2"
+add_copilot_model "copilot-gpt-5.1-codex"       "gpt-5.1-codex"
+add_copilot_model "copilot-gpt-5.1-codex-max"   "gpt-5.1-codex-max"
+add_copilot_model "copilot-gpt-5.1-codex-mini"  "gpt-5.1-codex-mini"
+add_copilot_model "copilot-gpt-5.2-codex"       "gpt-5.2-codex"
+add_copilot_model "copilot-gpt-5.3-codex"       "gpt-5.3-codex"
+add_copilot_model "copilot-claude-opus-4.6"      "claude-opus-4.6"
+add_copilot_model "copilot-claude-opus-4.6-fast" "claude-opus-4.6-fast"
+add_copilot_model "copilot-claude-sonnet-4.6"    "claude-sonnet-4.6"
+add_copilot_model "copilot-claude-sonnet-4.5"    "claude-sonnet-4.5"
+add_copilot_model "copilot-claude-sonnet-4"      "claude-sonnet-4"
+add_copilot_model "copilot-claude-opus-4.5"      "claude-opus-4.5"
+add_copilot_model "copilot-claude-haiku-4.5"     "claude-haiku-4.5"
+add_copilot_model "copilot-gemini-2.5-pro"       "gemini-2.5-pro"
+add_copilot_model "copilot-gemini-3-flash"       "gemini-3-flash-preview"
+add_copilot_model "copilot-gemini-3-pro"         "gemini-3-pro-preview"
+add_copilot_model "copilot-gemini-3.1-pro"       "gemini-3.1-pro-preview"
+add_copilot_model "copilot-grok-code-fast"       "grok-code-fast-1"

 # Local models (llama.cpp — no API key, custom model_info)
 if ! echo "$EXISTING" | grep -qx "gemma-3-12b-local"; then
@@ -0,0 +1,2 @@
+AGENTMON_INGEST_URL=http://192.168.122.1:8080
+AGENTMON_VM_NAME=zap
@@ -0,0 +1,60 @@
+{
+  "version": 1,
+  "profiles": {
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "tokenRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/github-copilot:github/token"
+      }
+    },
+    "anthropic:manual": {
+      "type": "token",
+      "provider": "anthropic",
+      "token": "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"
+    },
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0Mjk2MTg1LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzM0MzIxODQsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiY2FhZDQ2ZmEtNGIxMy00ZTI5LTg2N2QtZjI4ZWVhZGFiNGVjIiwibmJmIjoxNzczNDMyMTg0LCJwd2RfYXV0aF90aW1lIjoxNzcyNTA0OTE1NzM1LCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX1RJVkZNWkRJcjNWWEk5NWhUa3BQUXczQyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.SELfl6WbyaSSZn03yKR95dFvgrLeAPqiCieGxWOqY2PJQQb_cxmjY3yGJqTEVofGF-pyeDZVWx3HAB20Ng-9KbKQKFMdNTxuURb3uoRRuoit4cbg2kwH7lL07nQXKkY8nkusJLsLNQCZYGziW8WMAdypwEvm2ODWWav0ygl3PLJWjRj5OZ1Mcc_mRj6koYahgmWWoMo7oyDOn5tHpZKIxaSPRVBMvEee7JH3FP8zauPrlfmh6uIVhaY4ANwJqOM9bBbiFTv6unaQXx57uDaLo9XZOPa-vMeDWQYNvGs8XcKng3AE8-CMlQV1G_TRiWYZTFH9k5O3YGBO0t-h0jWNG658ccVcLoYB2PQ_3BmTTSpU2lQ6VosCDvg6SMA-GtI_kEOwV5XmsHpoDL6VyD--6EMxUyrYZ2W8sC4b6k-H58Bu-p4MO_Qc00nMhimBz_JP9vlfF9Dg1rypW9KA9gPZUgJR_dDG3bPofMQFAyGGrLHoXUqCYWJn0dLzW5wrmbNz1gOI3WNJjVUCmKzaEY3w2bpci90WGxIixrnVAoaP5XQQyw4x_urYbEdXlzuEERlFtkZIxRUMQAp9OwSaU76KnCrXVNsBUQdXNN_mdNKr1riebh4hzsgAnCkj1hazrT1hkWGD8eMrUFcLymu5OIYcdzxq-nroUhX6566L7mWozHk",
+      "refresh": "rt_lGvf7w6JR1AvXL0Dc7xCGcZf7P0P4kkcFW_VmTSccVA.56jMY8jGDblmVXZ9egKC57skTCl4clEGo2_cDyBzIRQ",
+      "expires": 1774296185000,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    }
+  },
+  "lastGood": {
+    "litellm": "litellm:default",
+    "openai-codex": "openai-codex:default",
+    "github-copilot": "github-copilot:github"
+  },
+  "usageStats": {
+    "litellm:default": {
+      "lastUsed": 1774519204807,
+      "errorCount": 0,
+      "lastFailureAt": 1774054888659
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1774509616458
+    },
+    "anthropic:manual": {
+      "errorCount": 0,
+      "lastUsed": 1773951080133
+    },
+    "openai-codex:default": {
+      "lastUsed": 1773258773792,
+      "errorCount": 0
+    }
+  }
+}
@@ -0,0 +1,48 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/claude/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "tokenRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/claude/github-copilot:github/token"
+      }
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1772604450987,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1772578967681,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1772589980031
+    }
+  }
+}
@@ -0,0 +1,48 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/codex/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "tokenRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/codex/github-copilot:github/token"
+      }
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1772604395502,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1772578967681,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1772589980031
+    }
+  }
+}
@@ -0,0 +1,48 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/copilot/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "tokenRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/copilot/github-copilot:github/token"
+      }
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1772604323305,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1772578967681,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1772589980031
+    }
+  }
+}
@@ -0,0 +1,44 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1773619245145,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1773861012447,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1773807909397
+    }
+  }
+}
@@ -0,0 +1,44 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1773619245145,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1773861088545,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1773807909397
+    }
+  }
+}
@@ -0,0 +1,44 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1773619245145,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1773861006543,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1773807909397
+    }
+  }
+}
@@ -0,0 +1,44 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1773619245145,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1773861006949,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1773807909397
+    }
+  }
+}
@@ -0,0 +1,66 @@
+{
+  "version": 1,
+  "profiles": {
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/main/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
+    },
+    "anthropic:manual": {
+      "type": "token",
+      "provider": "anthropic",
+      "token": "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"
+    },
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc1MjU2NDA5LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsImxvY2FsaG9zdCI6dHJ1ZSwidXNlcl9pZCI6InVzZXItVVh2bTQxVEpRblNCbGRkSFh4NnpIbEVrIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vbWZhIjp7InJlcXVpcmVkIjoieWVzIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vcHJvZmlsZSI6eyJlbWFpbCI6IndpbGxpYW0udmFsZW50aW4uaW5mb0BnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZX0sImlhdCI6MTc3NDM5MjQwOSwiaXNzIjoiaHR0cHM6Ly9hdXRoLm9wZW5haS5jb20iLCJqdGkiOiJkYmUwNDM4YS05NTg3LTRiMTUtOGUzNC03Y2ExMmVjOTc0NWQiLCJuYmYiOjE3NzQzOTI0MDksInB3ZF9hdXRoX3RpbWUiOjE3NzQzOTI0MDg0NjIsInNjcCI6WyJvcGVuaWQiLCJwcm9maWxlIiwiZW1haWwiLCJvZmZsaW5lX2FjY2VzcyJdLCJzZXNzaW9uX2lkIjoiYXV0aHNlc3NfOXVmVUlZN2o1WHk4bGtoU2MwUHNQM1lOIiwic2wiOnRydWUsInN1YiI6Imdvb2dsZS1vYXV0aDJ8MTA2MzM3Njg2NTgzNTkyODA4MDE3In0.m1PHZz2u9V9qiVN0hr8alKl6Ia4xv541BfnLLJkkRu3LiKrY-WCCOdxtbpu7dp8hphMMWrGCA4BWM6EE2Q4P0J5oE4PoOAzBU9-0ZdxSQNetiXdM5r7aETj4gY3nZFEtFAlig6hEuJrCK0XqgJ51BD7J_PXwkKTOKvv3-e8yvbp6vNTDSthUpsjgEN56hCUMnTt-aX8draeaWqHZe4gG09z8qRi1fZP8v0N8C8MPdOOBZdx3dQ2aK9zh0VDDyTvhqcbhSMVLpUxpzSeFIiFa8B03xOGGYhV5KCDTN7phCbak2PM7AdO6fOCrBTDDLQP2bC4Lt3yM9R7tXSw4luktMLX7sKe-KLR9CxKmDs5HdzMs5JDGcge9buKRzEBFD49oOM8NfsyRP6ko6CCNZSkz3mgQHT3_t-nCK7bpZHyTkIoGeT1fcKP8dGweSwUgtuUSjx0pVzZGbTkiBQTgqADelJkKA9WtBFoKPSgAXUiNrOJ_wYV3R3EQbGoVLX3cSrKYJIBdXcFF2YNKV_8ohKVNg4CtLJQwavQrHsWB74qQ_iHJvcr8GcMG-88S6-r8n4dSCzHXpqqMYQq7I8FR6dd_DmZIuweDR5Y4Bpx60MucF-qhfL1i4Bjv4zvDhodfRigcPyHi2mNLSclOGMA_Z_zW4YlnSvkskCQ2QX25pFN-6nY",
+      "refresh": "rt_32BgvDGye6b5FDHfAAuzBQHbSAU0sh86-1CXFptTGk0.m-3-mXXjX4rKQix5MRvFqQHI5DVVi_OnG6ZXiLPIc48",
+      "expires": 1775256408618
+    }
+  },
+  "lastGood": {
+    "litellm": "litellm:default",
+    "openai-codex": "openai-codex:default",
+    "anthropic": "anthropic:manual",
+    "github-copilot": "github-copilot:github"
+  },
+  "usageStats": {
+    "litellm:default": {
+      "lastUsed": 1774146240157,
+      "errorCount": 2,
+      "failureCounts": {
+        "billing": 2
+      },
+      "lastFailureAt": 1774464853910,
+      "disabledUntil": 1774482776360,
+      "disabledReason": "billing"
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1774518526913
+    },
+    "anthropic:manual": {
+      "errorCount": 1,
+      "lastUsed": 1774435478002,
+      "lastFailureAt": 1774496992044,
+      "failureCounts": {
+        "rate_limit": 1
+      },
+      "cooldownUntil": 1774497052044
+    },
+    "openai-codex:default": {
+      "errorCount": 0,
+      "lastUsed": 1774473515274
+    }
+  }
+}
@@ -0,0 +1,48 @@
+{
+  "version": 1,
+  "profiles": {
+    "openai-codex:default": {
+      "type": "oauth",
+      "provider": "openai-codex",
+      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
+      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
+      "expires": 1774223333756,
+      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
+    },
+    "litellm:default": {
+      "type": "api_key",
+      "provider": "litellm",
+      "keyRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/opencode/litellm:default/key"
+      }
+    },
+    "github-copilot:github": {
+      "type": "token",
+      "provider": "github-copilot",
+      "tokenRef": {
+        "source": "file",
+        "provider": "filemain",
+        "id": "/authProfiles/opencode/github-copilot:github/token"
+      }
+    }
+  },
+  "lastGood": {
+    "openai-codex": "openai-codex:default"
+  },
+  "usageStats": {
+    "openai-codex:default": {
+      "lastUsed": 1772604363465,
+      "errorCount": 0
+    },
+    "litellm:default": {
+      "lastUsed": 1772578967681,
+      "errorCount": 0
+    },
+    "github-copilot:github": {
+      "errorCount": 0,
+      "lastUsed": 1772589980031
+    }
+  }
+}
@@ -0,0 +1,149 @@
+{
+  "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5": {
+    "deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
+    "publicKey": "zezYCyurUtpYNt9j6bBc5Cz5xFVdnknXzhoCVAOFiwY",
+    "platform": "linux",
+    "clientId": "cli",
+    "clientMode": "cli",
+    "role": "operator",
+    "roles": [
+      "operator"
+    ],
+    "scopes": [
+      "operator.read",
+      "operator.admin",
+      "operator.write",
+      "operator.approvals",
+      "operator.pairing"
+    ],
+    "approvedScopes": [
+      "operator.read",
+      "operator.admin",
+      "operator.write",
+      "operator.approvals",
+      "operator.pairing"
+    ],
+    "tokens": {
+      "operator": {
+        "token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
+        "role": "operator",
+        "scopes": [
+          "operator.admin",
+          "operator.approvals",
+          "operator.pairing",
+          "operator.read",
+          "operator.write"
+        ],
+        "createdAtMs": 1772478478331,
+        "rotatedAtMs": 1772478926904,
+        "lastUsedAtMs": 1772587382647
+      }
+    },
+    "createdAtMs": 1772478478331,
+    "approvedAtMs": 1772478926904
+  },
+  "5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96": {
+    "deviceId": "5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96",
+    "publicKey": "MvxEPmOjuhaOctHiiTGNWbrb3PqNKdtJH2tNUmnUDFg",
+    "platform": "Linux x86_64",
+    "clientId": "openclaw-control-ui",
+    "clientMode": "webchat",
+    "role": "operator",
+    "roles": [
+      "operator"
+    ],
+    "scopes": [
+      "operator.admin",
+      "operator.approvals",
+      "operator.pairing"
+    ],
+    "approvedScopes": [
+      "operator.admin",
+      "operator.approvals",
+      "operator.pairing"
+    ],
+    "tokens": {
+      "operator": {
+        "token": "o7iad673N6wjzvtaLZi3pi5oOec2a14jRqD0DTqAsNM",
+        "role": "operator",
+        "scopes": [
+          "operator.admin",
+          "operator.approvals",
+          "operator.pairing"
+        ],
+        "createdAtMs": 1772562796594,
+        "lastUsedAtMs": 1772563663633
+      }
+    },
+    "createdAtMs": 1772562796594,
+    "approvedAtMs": 1772562796594
+  },
+  "5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc": {
+    "deviceId": "5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc",
+    "publicKey": "1KPQKT74AgGXb8B6O8vTQqkCFBBTI1_9Y2jVvzVI6G4",
+    "platform": "Linux x86_64",
+    "clientId": "openclaw-control-ui",
+    "clientMode": "webchat",
+    "role": "operator",
+    "roles": [
+      "operator"
+    ],
+    "scopes": [
+      "operator.admin",
+      "operator.approvals",
+      "operator.pairing",
+      "operator.read",
+      "operator.write"
+    ],
+    "approvedScopes": [
+      "operator.admin",
+      "operator.approvals",
+      "operator.pairing"
+    ],
+    "tokens": {
+      "operator": {
+        "token": "2nXUowAOJpF7bCROTQ4-q50zUe2FHRzJDhmpFQe0DQ4",
+        "role": "operator",
+        "scopes": [
+          "operator.admin",
+          "operator.approvals",
+          "operator.pairing"
+        ],
+        "createdAtMs": 1772563930487,
+        "lastUsedAtMs": 1774510441434
+      }
+    },
+    "createdAtMs": 1772563930487,
+    "approvedAtMs": 1772563930487
+  },
+  "549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1": {
+    "deviceId": "549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1",
+    "publicKey": "hX_4gWll3JPphbMZQ2fjPIXDXwp51gaILYB64KyimBE",
+    "displayName": "subagent-reliability-harness",
+    "platform": "linux",
+    "clientId": "test",
+    "clientMode": "test",
+    "role": "operator",
+    "roles": [
+      "operator"
+    ],
+    "scopes": [
+      "operator.admin"
+    ],
+    "approvedScopes": [
+      "operator.admin"
+    ],
+    "tokens": {
+      "operator": {
+        "token": "fDTz6u2K-fKNq4Cc-VoSQkbfltPCN1tqetg52yhsJk8",
+        "role": "operator",
+        "scopes": [
+          "operator.admin"
+        ],
+        "createdAtMs": 1773424919036
+      }
+    },
+    "createdAtMs": 1773424919036,
+    "approvedAtMs": 1773424919036
+  }
+}
@@ -0,0 +1,12 @@
+{
+  "version": 1,
+  "entries": {
+    "discord:default:guild:425781660781641729:user:425208577846935553": {
+      "recent": [
+        "github-copilot/claude-sonnet-4.6",
+        "openai-codex/gpt-5.4"
+      ],
+      "updatedAt": "2026-03-25T19:35:30.248Z"
+    }
+  }
+}
@@ -0,0 +1,20 @@
+---
+name: boot-md
+description: "Run BOOT.md on gateway startup"
+homepage: https://docs.openclaw.ai/automation/hooks#boot-md
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "🚀",
+        "events": ["gateway:startup"],
+        "requires": { "config": ["workspace.dir"] },
+        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
+      },
+  }
+---
+
+# Boot Checklist Hook
+
+Runs `BOOT.md` at gateway startup for each configured agent scope, if the file exists in that
+agent's resolved workspace.
@@ -0,0 +1,221 @@
+import { c as resolveAgentWorkspaceDir, r as listAgentIds } from "../../run-with-concurrency-Cuc1THN9.js";
+import "../../paths-hfkBoC7i.js";
+import { a as defaultRuntime, t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
+import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
+import "../../logger-BW8uLq6f.js";
+import "../../model-selection-BU6wl1le.js";
+import "../../github-copilot-token-CQmATy5E.js";
+import { a as isGatewayStartupEvent } from "../../legacy-names-BAf61_0I.js";
+import "../../thinking-B5B36ffe.js";
+import { n as SILENT_REPLY_TOKEN } from "../../tokens-CT3nywWU.js";
+import { o as agentCommand, s as createDefaultDeps } from "../../pi-embedded-C6ITuRXf.js";
+import "../../plugins-BZr8LJrk.js";
+import "../../accounts-D4KOSoV2.js";
+import "../../send-BLQvMYTW.js";
+import "../../send-DyQ6zcob.js";
+import "../../deliver-ClGktCjk.js";
+import "../../diagnostic-B9sgiG77.js";
+import "../../accounts-cJqOTvBI.js";
+import "../../image-ops-D4vlUR_L.js";
+import "../../send-D4CMR9ev.js";
+import "../../pi-model-discovery--C0FuY_K.js";
+import { Dt as resolveAgentMainSessionKey, W as loadSessionStore, Y as updateSessionStore, kt as resolveMainSessionKey } from "../../pi-embedded-helpers-CkWXaNFn.js";
+import "../../chrome-u1QjWgKY.js";
+import "../../frontmatter-CZF6xkL3.js";
+import "../../skills-B24U0XQQ.js";
+import "../../path-alias-guards-CouH80Zp.js";
+import "../../redact-DSv8X-3F.js";
+import "../../errors-_LEe37ld.js";
+import "../../fs-safe-DOYVoR6M.js";
+import "../../proxy-env-BZseFuIl.js";
+import "../../store-BteyapSQ.js";
+import { s as resolveStorePath } from "../../paths-Co-u8IhA.js";
+import "../../tool-images-C0W994KU.js";
+import "../../image-fMgabouP.js";
+import "../../audio-transcription-runner-DfRfzdqH.js";
+import "../../fetch-JzejSI-7.js";
+import "../../fetch-guard-C3LWD6FT.js";
+import "../../api-key-rotation-CLI6TxVv.js";
+import "../../proxy-fetch-CbII9--S.js";
+import "../../ir-D_UJzvhu.js";
+import "../../render-7C7EDC8_.js";
+import "../../target-errors-C8xePsI5.js";
+import "../../commands-registry-DJWLO-6B.js";
+import "../../skill-commands-B6iXy7Nx.js";
+import "../../fetch-CONQGbzL.js";
+import "../../channel-activity-CVe33Aey.js";
+import "../../tables-DushlpuO.js";
+import "../../send-CHthYes-.js";
+import "../../outbound-attachment-3soL6fn0.js";
+import "../../send-DYCEGbmH.js";
+import "../../proxy-BzwL4n0W.js";
+import "../../manager-DS9FBMMG.js";
+import "../../query-expansion-DUWWrH-g.js";
+import fs from "node:fs/promises";
+import path from "node:path";
+import crypto from "node:crypto";
+//#region src/gateway/boot.ts
+function generateBootSessionId() {
+	return `boot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "_").replace("Z", "")}-${crypto.randomUUID().slice(0, 8)}`;
+}
+const log$1 = createSubsystemLogger("gateway/boot");
+const BOOT_FILENAME = "BOOT.md";
+function buildBootPrompt(content) {
+	return [
+		"You are running a boot check. Follow BOOT.md instructions exactly.",
+		"",
+		"BOOT.md:",
+		content,
+		"",
+		"If BOOT.md asks you to send a message, use the message tool (action=send with channel + target).",
+		"Use the `target` field (not `to`) for message tool destinations.",
+		`After sending with the message tool, reply with ONLY: ${SILENT_REPLY_TOKEN}.`,
+		`If nothing needs attention, reply with ONLY: ${SILENT_REPLY_TOKEN}.`
+	].join("\n");
+}
+async function loadBootFile(workspaceDir) {
+	const bootPath = path.join(workspaceDir, BOOT_FILENAME);
+	try {
+		const trimmed = (await fs.readFile(bootPath, "utf-8")).trim();
+		if (!trimmed) return { status: "empty" };
+		return {
+			status: "ok",
+			content: trimmed
+		};
+	} catch (err) {
+		if (err.code === "ENOENT") return { status: "missing" };
+		throw err;
+	}
+}
+function snapshotMainSessionMapping(params) {
+	const agentId = resolveAgentIdFromSessionKey(params.sessionKey);
+	const storePath = resolveStorePath(params.cfg.session?.store, { agentId });
+	try {
+		const entry = loadSessionStore(storePath, { skipCache: true })[params.sessionKey];
+		if (!entry) return {
+			storePath,
+			sessionKey: params.sessionKey,
+			canRestore: true,
+			hadEntry: false
+		};
+		return {
+			storePath,
+			sessionKey: params.sessionKey,
+			canRestore: true,
+			hadEntry: true,
+			entry: structuredClone(entry)
+		};
+	} catch (err) {
+		log$1.debug("boot: could not snapshot main session mapping", {
+			sessionKey: params.sessionKey,
+			error: String(err)
+		});
+		return {
+			storePath,
+			sessionKey: params.sessionKey,
+			canRestore: false,
+			hadEntry: false
+		};
+	}
+}
+async function restoreMainSessionMapping(snapshot) {
+	if (!snapshot.canRestore) return;
+	try {
+		await updateSessionStore(snapshot.storePath, (store) => {
+			if (snapshot.hadEntry && snapshot.entry) {
+				store[snapshot.sessionKey] = snapshot.entry;
+				return;
+			}
+			delete store[snapshot.sessionKey];
+		}, { activeSessionKey: snapshot.sessionKey });
+		return;
+	} catch (err) {
+		return err instanceof Error ? err.message : String(err);
+	}
+}
+async function runBootOnce(params) {
+	const bootRuntime = {
+		log: () => {},
+		error: (message) => log$1.error(String(message)),
+		exit: defaultRuntime.exit
+	};
+	let result;
+	try {
+		result = await loadBootFile(params.workspaceDir);
+	} catch (err) {
+		const message = err instanceof Error ? err.message : String(err);
+		log$1.error(`boot: failed to read ${BOOT_FILENAME}: ${message}`);
+		return {
+			status: "failed",
+			reason: message
+		};
+	}
+	if (result.status === "missing" || result.status === "empty") return {
+		status: "skipped",
+		reason: result.status
+	};
+	const sessionKey = params.agentId ? resolveAgentMainSessionKey({
+		cfg: params.cfg,
+		agentId: params.agentId
+	}) : resolveMainSessionKey(params.cfg);
+	const message = buildBootPrompt(result.content ?? "");
+	const sessionId = generateBootSessionId();
+	const mappingSnapshot = snapshotMainSessionMapping({
+		cfg: params.cfg,
+		sessionKey
+	});
+	let agentFailure;
+	try {
+		await agentCommand({
+			message,
+			sessionKey,
+			sessionId,
+			deliver: false,
+			senderIsOwner: true
+		}, bootRuntime, params.deps);
+	} catch (err) {
+		agentFailure = err instanceof Error ? err.message : String(err);
+		log$1.error(`boot: agent run failed: ${agentFailure}`);
+	}
+	const mappingRestoreFailure = await restoreMainSessionMapping(mappingSnapshot);
+	if (mappingRestoreFailure) log$1.error(`boot: failed to restore main session mapping: ${mappingRestoreFailure}`);
+	if (!agentFailure && !mappingRestoreFailure) return { status: "ran" };
+	return {
+		status: "failed",
+		reason: [agentFailure ? `agent run failed: ${agentFailure}` : void 0, mappingRestoreFailure ? `mapping restore failed: ${mappingRestoreFailure}` : void 0].filter((part) => Boolean(part)).join("; ")
+	};
+}
+//#endregion
+//#region src/hooks/bundled/boot-md/handler.ts
+const log = createSubsystemLogger("hooks/boot-md");
+const runBootChecklist = async (event) => {
+	if (!isGatewayStartupEvent(event)) return;
+	if (!event.context.cfg) return;
+	const cfg = event.context.cfg;
+	const deps = event.context.deps ?? createDefaultDeps();
+	const agentIds = listAgentIds(cfg);
+	for (const agentId of agentIds) {
+		const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
+		const result = await runBootOnce({
+			cfg,
+			deps,
+			workspaceDir,
+			agentId
+		});
+		if (result.status === "failed") {
+			log.warn("boot-md failed for agent startup run", {
+				agentId,
+				workspaceDir,
+				reason: result.reason
+			});
+			continue;
+		}
+		if (result.status === "skipped") log.debug("boot-md skipped for agent startup run", {
+			agentId,
+			workspaceDir,
+			reason: result.reason
+		});
+	}
+};
+//#endregion
+export { runBootChecklist as default };
@@ -0,0 +1,53 @@
+---
+name: bootstrap-extra-files
+description: "Inject additional workspace bootstrap files via glob/path patterns"
+homepage: https://docs.openclaw.ai/automation/hooks#bootstrap-extra-files
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "📎",
+        "events": ["agent:bootstrap"],
+        "requires": { "config": ["workspace.dir"] },
+        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
+      },
+  }
+---
+
+# Bootstrap Extra Files Hook
+
+Loads additional bootstrap files into `Project Context` during `agent:bootstrap`.
+
+## Why
+
+Use this when your workspace has multiple context roots (for example monorepos) and
+you want to include extra `AGENTS.md`/`TOOLS.md`-class files without changing the
+workspace root.
+
+## Configuration
+
+```json
+{
+  "hooks": {
+    "internal": {
+      "enabled": true,
+      "entries": {
+        "bootstrap-extra-files": {
+          "enabled": true,
+          "paths": ["packages/*/AGENTS.md", "packages/*/TOOLS.md"]
+        }
+      }
+    }
+  }
+}
+```
+
+## Options
+
+- `paths` (string[]): preferred list of glob/path patterns.
+- `patterns` (string[]): alias of `paths`.
+- `files` (string[]): alias of `paths`.
+
+All paths are resolved from the workspace and must stay inside it (including realpath checks).
+Only recognized bootstrap basenames are loaded (`AGENTS.md`, `SOUL.md`, `TOOLS.md`,
+`IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md`, `MEMORY.md`, `memory.md`).
@@ -0,0 +1,45 @@
+import "../../paths-hfkBoC7i.js";
+import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
+import { d as loadExtraBootstrapFilesWithDiagnostics, u as filterBootstrapFilesForSession } from "../../workspace-CaW79EXh.js";
+import "../../logger-BW8uLq6f.js";
+import { i as isAgentBootstrapEvent } from "../../legacy-names-BAf61_0I.js";
+import "../../frontmatter-CZF6xkL3.js";
+import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
+//#region src/hooks/bundled/bootstrap-extra-files/handler.ts
+const HOOK_KEY = "bootstrap-extra-files";
+const log = createSubsystemLogger("bootstrap-extra-files");
+function normalizeStringArray(value) {
+	if (!Array.isArray(value)) return [];
+	return value.map((v) => typeof v === "string" ? v.trim() : "").filter(Boolean);
+}
+function resolveExtraBootstrapPatterns(hookConfig) {
+	const fromPaths = normalizeStringArray(hookConfig.paths);
+	if (fromPaths.length > 0) return fromPaths;
+	const fromPatterns = normalizeStringArray(hookConfig.patterns);
+	if (fromPatterns.length > 0) return fromPatterns;
+	return normalizeStringArray(hookConfig.files);
+}
+const bootstrapExtraFilesHook = async (event) => {
+	if (!isAgentBootstrapEvent(event)) return;
+	const context = event.context;
+	const hookConfig = resolveHookConfig(context.cfg, HOOK_KEY);
+	if (!hookConfig || hookConfig.enabled === false) return;
+	const patterns = resolveExtraBootstrapPatterns(hookConfig);
+	if (patterns.length === 0) return;
+	try {
+		const { files: extras, diagnostics } = await loadExtraBootstrapFilesWithDiagnostics(context.workspaceDir, patterns);
+		if (diagnostics.length > 0) log.debug("skipped extra bootstrap candidates", {
+			skipped: diagnostics.length,
+			reasons: diagnostics.reduce((counts, item) => {
+				counts[item.reason] = (counts[item.reason] ?? 0) + 1;
+				return counts;
+			}, {})
+		});
+		if (extras.length === 0) return;
+		context.bootstrapFiles = filterBootstrapFilesForSession([...context.bootstrapFiles, ...extras], context.sessionKey);
+	} catch (err) {
+		log.warn(`failed: ${String(err)}`);
+	}
+};
+//#endregion
+export { bootstrapExtraFilesHook as default };
@@ -0,0 +1,122 @@
+---
+name: command-logger
+description: "Log all command events to a centralized audit file"
+homepage: https://docs.openclaw.ai/automation/hooks#command-logger
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "📝",
+        "events": ["command"],
+        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
+      },
+  }
+---
+
+# Command Logger Hook
+
+Logs all command events (`/new`, `/reset`, `/stop`, etc.) to a centralized audit log file for debugging and monitoring purposes.
+
+## What It Does
+
+Every time you issue a command to the agent:
+
+1. **Captures event details** - Command action, timestamp, session key, sender ID, source
+2. **Appends to log file** - Writes a JSON line to `~/.openclaw/logs/commands.log`
+3. **Silent operation** - Runs in the background without user notifications
+
+## Output Format
+
+Log entries are written in JSONL (JSON Lines) format:
+
+```json
+{"timestamp":"2026-01-16T14:30:00.000Z","action":"new","sessionKey":"agent:main:main","senderId":"+1234567890","source":"telegram"}
+{"timestamp":"2026-01-16T15:45:22.000Z","action":"stop","sessionKey":"agent:main:main","senderId":"user@example.com","source":"whatsapp"}
+```
+
+## Use Cases
+
+- **Debugging**: Track when commands were issued and from which source
+- **Auditing**: Monitor command usage across different channels
+- **Analytics**: Analyze command patterns and frequency
+- **Troubleshooting**: Investigate issues by reviewing command history
+
+## Log File Location
+
+`~/.openclaw/logs/commands.log`
+
+## Requirements
+
+No requirements - this hook works out of the box on all platforms.
+
+## Configuration
+
+No configuration needed. The hook automatically:
+
+- Creates the log directory if it doesn't exist
+- Appends to the log file (doesn't overwrite)
+- Handles errors silently without disrupting command execution
+
+## Disabling
+
+To disable this hook:
+
+```bash
+openclaw hooks disable command-logger
+```
+
+Or via config:
+
+```json
+{
+  "hooks": {
+    "internal": {
+      "entries": {
+        "command-logger": { "enabled": false }
+      }
+    }
+  }
+}
+```
+
+## Log Rotation
+
+The hook does not automatically rotate logs. To manage log size, you can:
+
+1. **Manual rotation**:
+
+   ```bash
+   mv ~/.openclaw/logs/commands.log ~/.openclaw/logs/commands.log.old
+   ```
+
+2. **Use logrotate** (Linux):
+   Create `/etc/logrotate.d/openclaw`:
+   ```
+   /home/username/.openclaw/logs/commands.log {
+       weekly
+       rotate 4
+       compress
+       missingok
+       notifempty
+   }
+   ```
+
+## Viewing Logs
+
+View recent commands:
+
+```bash
+tail -n 20 ~/.openclaw/logs/commands.log
+```
+
+Pretty-print with jq:
+
+```bash
+cat ~/.openclaw/logs/commands.log | jq .
+```
+
+Filter by action:
+
+```bash
+grep '"action":"new"' ~/.openclaw/logs/commands.log | jq .
+```
@@ -0,0 +1,56 @@
+import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
+import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+//#region src/hooks/bundled/command-logger/handler.ts
+/**
+* Example hook handler: Log all commands to a file
+*
+* This handler demonstrates how to create a hook that logs all command events
+* to a centralized log file for audit/debugging purposes.
+*
+* To enable this handler, add it to your config:
+*
+* ```json
+* {
+*   "hooks": {
+*     "internal": {
+*       "enabled": true,
+*       "handlers": [
+*         {
+*           "event": "command",
+*           "module": "./hooks/handlers/command-logger.ts"
+*         }
+*       ]
+*     }
+*   }
+* }
+* ```
+*/
+const log = createSubsystemLogger("command-logger");
+/**
+* Log all command events to a file
+*/
+const logCommand = async (event) => {
+	if (event.type !== "command") return;
+	try {
+		const stateDir = resolveStateDir(process.env, os.homedir);
+		const logDir = path.join(stateDir, "logs");
+		await fs.mkdir(logDir, { recursive: true });
+		const logFile = path.join(logDir, "commands.log");
+		const logLine = JSON.stringify({
+			timestamp: event.timestamp.toISOString(),
+			action: event.action,
+			sessionKey: event.sessionKey,
+			senderId: event.context.senderId ?? "unknown",
+			source: event.context.commandSource ?? "unknown"
+		}) + "\n";
+		await fs.appendFile(logFile, logLine, "utf-8");
+	} catch (err) {
+		const message = err instanceof Error ? err.message : String(err);
+		log.error(`Failed to log command: ${message}`);
+	}
+};
+//#endregion
+export { logCommand as default };
@@ -0,0 +1,109 @@
+---
+name: session-memory
+description: "Save session context to memory when /new or /reset command is issued"
+homepage: https://docs.openclaw.ai/automation/hooks#session-memory
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "💾",
+        "events": ["command:new", "command:reset"],
+        "requires": { "config": ["workspace.dir"] },
+        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
+      },
+  }
+---
+
+# Session Memory Hook
+
+Automatically saves session context to your workspace memory when you issue `/new` or `/reset`.
+
+## What It Does
+
+When you run `/new` or `/reset` to start a fresh session:
+
+1. **Finds the previous session** - Uses the pre-reset session entry to locate the correct transcript
+2. **Extracts conversation** - Reads the last N user/assistant messages from the session (default: 15, configurable)
+3. **Generates descriptive slug** - Uses LLM to create a meaningful filename slug based on conversation content
+4. **Saves to memory** - Creates a new file at `<workspace>/memory/YYYY-MM-DD-slug.md`
+5. **Sends confirmation** - Notifies you with the file path
+
+## Output Format
+
+Memory files are created with the following format:
+
+```markdown
+# Session: 2026-01-16 14:30:00 UTC
+
+- **Session Key**: agent:main:main
+- **Session ID**: abc123def456
+- **Source**: telegram
+```
+
+## Filename Examples
+
+The LLM generates descriptive slugs based on your conversation:
+
+- `2026-01-16-vendor-pitch.md` - Discussion about vendor evaluation
+- `2026-01-16-api-design.md` - API architecture planning
+- `2026-01-16-bug-fix.md` - Debugging session
+- `2026-01-16-1430.md` - Fallback timestamp if slug generation fails
+
+## Requirements
+
+- **Config**: `workspace.dir` must be set (automatically configured during onboarding)
+
+The hook uses your configured LLM provider to generate slugs, so it works with any provider (Anthropic, OpenAI, etc.).
+
+## Configuration
+
+The hook supports optional configuration:
+
+| Option     | Type   | Default | Description                                                     |
+| ---------- | ------ | ------- | --------------------------------------------------------------- |
+| `messages` | number | 15      | Number of user/assistant messages to include in the memory file |
+
+Example configuration:
+
+```json
+{
+  "hooks": {
+    "internal": {
+      "entries": {
+        "session-memory": {
+          "enabled": true,
+          "messages": 25
+        }
+      }
+    }
+  }
+}
+```
+
+The hook automatically:
+
+- Uses your workspace directory (`~/.openclaw/workspace` by default)
+- Uses your configured LLM for slug generation
+- Falls back to timestamp slugs if LLM is unavailable
+
+## Disabling
+
+To disable this hook:
+
+```bash
+openclaw hooks disable session-memory
+```
+
+Or remove it from your config:
+
+```json
+{
+  "hooks": {
+    "internal": {
+      "entries": {
+        "session-memory": { "enabled": false }
+      }
+    }
+  }
+}
+```
@@ -0,0 +1,238 @@
+import { c as resolveAgentWorkspaceDir } from "../../run-with-concurrency-Cuc1THN9.js";
+import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
+import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
+import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
+import "../../logger-BW8uLq6f.js";
+import "../../model-selection-BU6wl1le.js";
+import "../../github-copilot-token-CQmATy5E.js";
+import "../../legacy-names-BAf61_0I.js";
+import "../../thinking-B5B36ffe.js";
+import "../../tokens-CT3nywWU.js";
+import "../../pi-embedded-C6ITuRXf.js";
+import "../../plugins-BZr8LJrk.js";
+import "../../accounts-D4KOSoV2.js";
+import "../../send-BLQvMYTW.js";
+import "../../send-DyQ6zcob.js";
+import "../../deliver-ClGktCjk.js";
+import "../../diagnostic-B9sgiG77.js";
+import "../../accounts-cJqOTvBI.js";
+import "../../image-ops-D4vlUR_L.js";
+import "../../send-D4CMR9ev.js";
+import "../../pi-model-discovery--C0FuY_K.js";
+import { pt as hasInterSessionUserProvenance } from "../../pi-embedded-helpers-CkWXaNFn.js";
+import "../../chrome-u1QjWgKY.js";
+import "../../frontmatter-CZF6xkL3.js";
+import "../../skills-B24U0XQQ.js";
+import "../../path-alias-guards-CouH80Zp.js";
+import "../../redact-DSv8X-3F.js";
+import "../../errors-_LEe37ld.js";
+import { c as writeFileWithinRoot } from "../../fs-safe-DOYVoR6M.js";
+import "../../proxy-env-BZseFuIl.js";
+import "../../store-BteyapSQ.js";
+import "../../paths-Co-u8IhA.js";
+import "../../tool-images-C0W994KU.js";
+import "../../image-fMgabouP.js";
+import "../../audio-transcription-runner-DfRfzdqH.js";
+import "../../fetch-JzejSI-7.js";
+import "../../fetch-guard-C3LWD6FT.js";
+import "../../api-key-rotation-CLI6TxVv.js";
+import "../../proxy-fetch-CbII9--S.js";
+import "../../ir-D_UJzvhu.js";
+import "../../render-7C7EDC8_.js";
+import "../../target-errors-C8xePsI5.js";
+import "../../commands-registry-DJWLO-6B.js";
+import "../../skill-commands-B6iXy7Nx.js";
+import "../../fetch-CONQGbzL.js";
+import "../../channel-activity-CVe33Aey.js";
+import "../../tables-DushlpuO.js";
+import "../../send-CHthYes-.js";
+import "../../outbound-attachment-3soL6fn0.js";
+import "../../send-DYCEGbmH.js";
+import "../../proxy-BzwL4n0W.js";
+import "../../manager-DS9FBMMG.js";
+import "../../query-expansion-DUWWrH-g.js";
+import { generateSlugViaLLM } from "../../llm-slug-generator.js";
+import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+//#region src/hooks/bundled/session-memory/handler.ts
+/**
+* Session memory hook handler
+*
+* Saves session context to memory when /new or /reset command is triggered
+* Creates a new dated memory file with LLM-generated slug
+*/
+const log = createSubsystemLogger("hooks/session-memory");
+/**
+* Read recent messages from session file for slug generation
+*/
+async function getRecentSessionContent(sessionFilePath, messageCount = 15) {
+	try {
+		const lines = (await fs.readFile(sessionFilePath, "utf-8")).trim().split("\n");
+		const allMessages = [];
+		for (const line of lines) try {
+			const entry = JSON.parse(line);
+			if (entry.type === "message" && entry.message) {
+				const msg = entry.message;
+				const role = msg.role;
+				if ((role === "user" || role === "assistant") && msg.content) {
+					if (role === "user" && hasInterSessionUserProvenance(msg)) continue;
+					const text = Array.isArray(msg.content) ? msg.content.find((c) => c.type === "text")?.text : msg.content;
+					if (text && !text.startsWith("/")) allMessages.push(`${role}: ${text}`);
+				}
+			}
+		} catch {}
+		return allMessages.slice(-messageCount).join("\n");
+	} catch {
+		return null;
+	}
+}
+/**
+* Try the active transcript first; if /new already rotated it,
+* fallback to the latest .jsonl.reset.* sibling.
+*/
+async function getRecentSessionContentWithResetFallback(sessionFilePath, messageCount = 15) {
+	const primary = await getRecentSessionContent(sessionFilePath, messageCount);
+	if (primary) return primary;
+	try {
+		const dir = path.dirname(sessionFilePath);
+		const resetPrefix = `${path.basename(sessionFilePath)}.reset.`;
+		const resetCandidates = (await fs.readdir(dir)).filter((name) => name.startsWith(resetPrefix)).toSorted();
+		if (resetCandidates.length === 0) return primary;
+		const latestResetPath = path.join(dir, resetCandidates[resetCandidates.length - 1]);
+		const fallback = await getRecentSessionContent(latestResetPath, messageCount);
+		if (fallback) log.debug("Loaded session content from reset fallback", {
+			sessionFilePath,
+			latestResetPath
+		});
+		return fallback || primary;
+	} catch {
+		return primary;
+	}
+}
+function stripResetSuffix(fileName) {
+	const resetIndex = fileName.indexOf(".reset.");
+	return resetIndex === -1 ? fileName : fileName.slice(0, resetIndex);
+}
+async function findPreviousSessionFile(params) {
+	try {
+		const files = await fs.readdir(params.sessionsDir);
+		const fileSet = new Set(files);
+		const baseFromReset = params.currentSessionFile ? stripResetSuffix(path.basename(params.currentSessionFile)) : void 0;
+		if (baseFromReset && fileSet.has(baseFromReset)) return path.join(params.sessionsDir, baseFromReset);
+		const trimmedSessionId = params.sessionId?.trim();
+		if (trimmedSessionId) {
+			const canonicalFile = `${trimmedSessionId}.jsonl`;
+			if (fileSet.has(canonicalFile)) return path.join(params.sessionsDir, canonicalFile);
+			const topicVariants = files.filter((name) => name.startsWith(`${trimmedSessionId}-topic-`) && name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
+			if (topicVariants.length > 0) return path.join(params.sessionsDir, topicVariants[0]);
+		}
+		if (!params.currentSessionFile) return;
+		const nonResetJsonl = files.filter((name) => name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
+		if (nonResetJsonl.length > 0) return path.join(params.sessionsDir, nonResetJsonl[0]);
+	} catch {}
+}
+/**
+* Save session context to memory when /new or /reset command is triggered
+*/
+const saveSessionToMemory = async (event) => {
+	const isResetCommand = event.action === "new" || event.action === "reset";
+	if (event.type !== "command" || !isResetCommand) return;
+	try {
+		log.debug("Hook triggered for reset/new command", { action: event.action });
+		const context = event.context || {};
+		const cfg = context.cfg;
+		const agentId = resolveAgentIdFromSessionKey(event.sessionKey);
+		const workspaceDir = cfg ? resolveAgentWorkspaceDir(cfg, agentId) : path.join(resolveStateDir(process.env, os.homedir), "workspace");
+		const memoryDir = path.join(workspaceDir, "memory");
+		await fs.mkdir(memoryDir, { recursive: true });
+		const now = new Date(event.timestamp);
+		const dateStr = now.toISOString().split("T")[0];
+		const sessionEntry = context.previousSessionEntry || context.sessionEntry || {};
+		const currentSessionId = sessionEntry.sessionId;
+		let currentSessionFile = sessionEntry.sessionFile || void 0;
+		if (!currentSessionFile || currentSessionFile.includes(".reset.")) {
+			const sessionsDirs = /* @__PURE__ */ new Set();
+			if (currentSessionFile) sessionsDirs.add(path.dirname(currentSessionFile));
+			sessionsDirs.add(path.join(workspaceDir, "sessions"));
+			for (const sessionsDir of sessionsDirs) {
+				const recoveredSessionFile = await findPreviousSessionFile({
+					sessionsDir,
+					currentSessionFile,
+					sessionId: currentSessionId
+				});
+				if (!recoveredSessionFile) continue;
+				currentSessionFile = recoveredSessionFile;
+				log.debug("Found previous session file", { file: currentSessionFile });
+				break;
+			}
+		}
+		log.debug("Session context resolved", {
+			sessionId: currentSessionId,
+			sessionFile: currentSessionFile,
+			hasCfg: Boolean(cfg)
+		});
+		const sessionFile = currentSessionFile || void 0;
+		const hookConfig = resolveHookConfig(cfg, "session-memory");
+		const messageCount = typeof hookConfig?.messages === "number" && hookConfig.messages > 0 ? hookConfig.messages : 15;
+		let slug = null;
+		let sessionContent = null;
+		if (sessionFile) {
+			sessionContent = await getRecentSessionContentWithResetFallback(sessionFile, messageCount);
+			log.debug("Session content loaded", {
+				length: sessionContent?.length ?? 0,
+				messageCount
+			});
+			const allowLlmSlug = !(process.env.OPENCLAW_TEST_FAST === "1" || process.env.VITEST === "true" || process.env.VITEST === "1" || false) && hookConfig?.llmSlug !== false;
+			if (sessionContent && cfg && allowLlmSlug) {
+				log.debug("Calling generateSlugViaLLM...");
+				slug = await generateSlugViaLLM({
+					sessionContent,
+					cfg
+				});
+				log.debug("Generated slug", { slug });
+			}
+		}
+		if (!slug) {
+			slug = now.toISOString().split("T")[1].split(".")[0].replace(/:/g, "").slice(0, 4);
+			log.debug("Using fallback timestamp slug", { slug });
+		}
+		const filename = `${dateStr}-${slug}.md`;
+		const memoryFilePath = path.join(memoryDir, filename);
+		log.debug("Memory file path resolved", {
+			filename,
+			path: memoryFilePath.replace(os.homedir(), "~")
+		});
+		const timeStr = now.toISOString().split("T")[1].split(".")[0];
+		const sessionId = sessionEntry.sessionId || "unknown";
+		const source = context.commandSource || "unknown";
+		const entryParts = [
+			`# Session: ${dateStr} ${timeStr} UTC`,
+			"",
+			`- **Session Key**: ${event.sessionKey}`,
+			`- **Session ID**: ${sessionId}`,
+			`- **Source**: ${source}`,
+			""
+		];
+		if (sessionContent) entryParts.push("## Conversation Summary", "", sessionContent, "");
+		await writeFileWithinRoot({
+			rootDir: memoryDir,
+			relativePath: filename,
+			data: entryParts.join("\n"),
+			encoding: "utf-8"
+		});
+		log.debug("Memory file written successfully");
+		const relPath = memoryFilePath.replace(os.homedir(), "~");
+		log.info(`Session context saved to ${relPath}`);
+	} catch (err) {
+		if (err instanceof Error) log.error("Failed to save session memory", {
+			errorName: err.name,
+			errorMessage: err.message,
+			stack: err.stack
+		});
+		else log.error("Failed to save session memory", { error: String(err) });
+	}
+};
+//#endregion
+export { saveSessionToMemory as default };
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
+  "tokens": {
+    "operator": {
+      "token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
+      "role": "operator",
+      "scopes": [
+        "operator.admin",
+        "operator.approvals",
+        "operator.pairing",
+        "operator.read",
+        "operator.write"
+      ],
+      "updatedAtMs": 1774473144159
+    }
+  }
+}
@@ -0,0 +1,5 @@
+{
+  "version": 2,
+  "lastUpdateId": 148911073,
+  "botId": "8792219052"
+}
@@ -0,0 +1,7 @@
+{
+  "lastCheckedAt": "2026-03-24T22:42:51.772Z",
+  "lastNotifiedVersion": "2026.3.23-2",
+  "lastNotifiedTag": "latest",
+  "lastAvailableVersion": "2026.3.23-2",
+  "lastAvailableTag": "latest"
+}
@@ -0,0 +1,103 @@
+# OpenVINO NPU advisory gateway
+
+Bounded Docker-bridge wrapper for the classifier, GenAI worker, and doc/image triage sidecars.
+
+- HTTP bind: `172.19.0.1:18830` for `n8n-agent` on the `swarm_default` Docker bridge
+- Service: `openvino-advisory-gateway.service`
+- Mode: advisory/shadow/draft only
+- Metadata log: `~/.local/state/openvino-advisory-gateway/events.sqlite`
+
+## Authority boundary
+
+Every response includes an explicit authority block:
+
+```json
+{
+  "may_route": false,
+  "may_write_memory": false,
+  "may_send_external": false,
+  "may_process_private_dirs": false,
+  "may_execute_tools": false,
+  "may_restart_services": false
+}
+```
+
+This service may provide hints and drafts. It must not become the live Atlas/Hermes router, memory writer, primary chat model, external sender, tool executor, service restarter, or broad private document processor without a separate approved integration.
+
+## Endpoints
+
+```text
+GET  /healthz
+POST /v1/advisory/classify
+POST /v1/advisory/generate
+POST /v1/advisory/triage
+```
+
+## Cron and n8n advisory dry-run contract
+
+For cron/n8n event classification, use the dry-run contract in `docs/cron-n8n-advisory-classifier.md`.
+It defines the normalized event envelope, decision envelope, `suppress|log|summarize|escalate` recommendation mapping, and duplicate/stale/no-op/action-required examples.
+
+Example artifacts:
+
+- `examples/cron-advisory-dry-run.sh` — host-local cron wrapper that prints one compact decision line and performs no side effects.
+- `examples/n8n-advisory-dry-run-fragment.json` — sanitized inactive n8n node fragment for Set -> HTTP Request -> Code decision mapping.
+
+Both examples preserve the gateway authority boundary: advisory only, no send/restart/memory/tool/routing authority.
+
+### Classifier shadow call
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"trace_id":"smoke","text":"Urgent: inspect service health and systemd status."}' | jq .
+```
+
+### Bounded GenAI draft
+
+Allowed jobs: `title`, `summary`, `notification`, `memory_candidate`.
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
+  -H 'Content-Type: application/json' \
+  -d '{"job":"title","input":"Summarize a local health check.","max_new_tokens":24}' | jq .
+```
+
+### Explicit-file doc/image triage
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/triage \
+  -H 'Content-Type: application/json' \
+  -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}' | jq .
+```
+
+The gateway requires the path to be inside both:
+
+1. a configured allowed root on the gateway process; and
+2. the request's explicit `allowed_roots` list, if one is provided.
+
+Requests cannot broaden the process-configured roots. Do not broaden configured roots to private folders without explicit approval for that root and task.
+
+## Install / run
+
+```bash
+install -m 0644 openvino-advisory-gateway.service ~/.config/systemd/user/openvino-advisory-gateway.service
+systemctl --user daemon-reload
+systemctl --user enable --now openvino-advisory-gateway.service
+systemctl --user status openvino-advisory-gateway.service --no-pager
+```
+
+`--allowed-root` may be repeated in the systemd unit when additional non-private fixture/review directories are approved. Docker bridge exposure must use `--allow-docker-bridge` and the approved bridge IP `172.19.0.1`; the service still refuses wildcard binds such as `0.0.0.0`.
+
+From `n8n-agent`, verify bridge reachability with:
+
+```bash
+docker exec n8n-agent wget -qO- -T 8 http://172.19.0.1:18830/healthz
+```
+
+## Tests
+
+```bash
+cd /home/will/lab/swarm/openvino-advisory-gateway
+python -m pytest tests/test_gateway.py -q
+```
@@ -0,0 +1,256 @@
+# Cron and n8n advisory classifier contract
+
+Status: dry-run specification and integration examples
+Scope: cron and n8n alert/event classification through the OpenVINO advisory gateway
+Gateway: `http://172.19.0.1:18830` from `n8n-agent` and host-local cron on the current bridge-bound service. Override `NPU_ADVISORY_GATEWAY_URL=http://127.0.0.1:18830` only if a localhost-bound instance is explicitly running.
+
+## Authority boundary
+
+This contract is advisory only. It may recommend one of `suppress`, `log`, `summarize`, or `escalate`, but it must not perform the action itself.
+
+Every integration must preserve these authority flags:
+
+```json
+{
+  "may_route": false,
+  "may_write_memory": false,
+  "may_send_external": false,
+  "may_process_private_dirs": false,
+  "may_execute_tools": false,
+  "may_restart_services": false
+}
+```
+
+Allowed side effects in dry-run mode:
+
+- read an explicit cron/n8n event payload;
+- call the advisory gateway classifier/generator;
+- write compact local stdout or n8n execution logs;
+- store metadata-only advisory counters if an existing log sink already does so.
+
+Forbidden without separate explicit approval:
+
+- outbound sends/pages/Discord/Telegram/email;
+- service restarts, command execution, or tool calls;
+- Hermes/Atlas routing changes;
+- memory writes;
+- broad private-directory processing;
+- vector database mutation or reindexing.
+
+## Input event envelope
+
+Cron and n8n producers should normalize events before classification. Keep this input small and avoid raw private payloads.
+
+```json
+{
+  "schema": "cron_n8n_event_v1",
+  "trace_id": "cron:service-health:2026-06-05T14:30:00Z",
+  "source": "cron",
+  "workflow": "npu-service-health",
+  "event_kind": "health_check",
+  "severity": "warning",
+  "subject": "openvino-reranker health check repeated warning",
+  "summary": "Two consecutive health probes reported timeout, no restart attempted.",
+  "dedupe_key": "service:openvino-reranker:timeout",
+  "observed_at": "2026-06-05T14:30:00Z",
+  "stale_after_s": 900,
+  "action_requested": false,
+  "dry_run": true
+}
+```
+
+Field rules:
+
+- `source`: `cron` or `n8n`.
+- `workflow`: compact job/workflow name, not a private URL.
+- `subject` + `summary`: the only text sent to the classifier.
+- `dedupe_key`: stable non-secret key for duplicate detection by the caller.
+- `stale_after_s`: caller-side freshness gate; stale events should not page.
+- `action_requested`: true only when an upstream job is asking a human/Atlas to consider action.
+- `dry_run`: must remain true for this phase.
+
+## Gateway classifier call
+
+The current gateway `/v1/advisory/classify` accepts explicit text and wraps the classifier response in `openvino_advisory_v1` with NPU proof and authority fields.
+
+Host cron example for the current bridge-bound service:
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "trace_id":"cron:service-health:sample",
+    "text":"source=cron workflow=npu-service-health severity=warning kind=health_check subject=openvino-reranker repeated timeout summary=Two consecutive health probes reported timeout; no restart attempted; dry_run=true"
+  }' | jq '{schema, mode, trace_id, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, authority, labels: .result.labels}'
+```
+
+n8n Docker-bridge example:
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"trace_id":"n8n:swarm-health:sample","text":"source=n8n workflow=swarm-health-watchdog severity=critical kind=health_check subject=multiple services unhealthy summary=Health probe failed for three services; dry_run=true"}' \
+  | jq '{mode, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, may_send_external: .authority.may_send_external}'
+```
+
+NPU proof gate: an HTTP 200 is not enough. Treat the classifier as NPU-backed only when `.npu_proof.ok == true` and `.npu_proof.npu_busy_delta_us > 0` for real inference.
+
+## Advisory decision envelope
+
+Cron/n8n wrappers should map the gateway response plus caller-side freshness/deduplication state into this compact decision envelope:
+
+```json
+{
+  "schema": "cron_n8n_advisory_decision_v1",
+  "trace_id": "cron:service-health:2026-06-05T14:30:00Z",
+  "source": "cron",
+  "workflow": "npu-service-health",
+  "dry_run": true,
+  "recommendation": "summarize",
+  "classification": "action_required",
+  "confidence": 0.84,
+  "reason_codes": ["warning_or_high_urgency", "fresh_event", "not_duplicate"],
+  "npu_proof": {"required": true, "ok": true, "npu_busy_delta_us": 1234},
+  "authority": {
+    "may_route": false,
+    "may_write_memory": false,
+    "may_send_external": false,
+    "may_process_private_dirs": false,
+    "may_execute_tools": false,
+    "may_restart_services": false
+  },
+  "next_gate": "human_or_atlas_review_required_before_any_side_effect"
+}
+```
+
+Decision fields:
+
+- `recommendation`: `suppress`, `log`, `summarize`, or `escalate`.
+- `classification`: `duplicate`, `stale`, `no_op`, or `action_required` for v1 examples.
+- `confidence`: use classifier urgency/category confidence when available; otherwise use a conservative wrapper score.
+- `reason_codes`: compact machine-readable rationale, not raw payload text.
+- `next_gate`: always a review/approval gate before side effects.
+
+## Recommendation mapping
+
+This is the v1 dry-run mapping. It is intentionally conservative and caller-side; the NPU classifier advises, the wrapper chooses a recommendation, and humans/Atlas retain authority.
+
+| Caller/classifier signal | Classification | Recommendation | Dry-run behavior |
+|---|---|---|---|
+| Same `dedupe_key` observed inside caller cooldown | `duplicate` | `suppress` | Log compact duplicate count only. Do not send. |
+| `observed_at + stale_after_s` is older than now | `stale` | `log` | Log stale event and age. Do not summarize/page. |
+| Severity low/normal, no action requested, classifier urgency low/normal | `no_op` | `log` | Keep normal execution log only. |
+| Warning/high urgency or action requested, NPU proof ok | `action_required` | `summarize` | Draft a local summary for review; no send/restart. |
+| Critical severity or repeated failures and NPU proof ok | `action_required` | `escalate` | Recommend escalation to Atlas/human; wrapper still must not send/restart. |
+| NPU proof missing or false | `action_required` or caller-specific | `log` | Log `npu_proof_failed`; do not claim NPU-backed advice. |
+
+## Required examples
+
+### Duplicate -> suppress
+
+Input summary:
+
+```json
+{"source":"cron","workflow":"npu-service-health","severity":"warning","dedupe_key":"service:reranker:timeout","summary":"Same timeout as prior run inside cooldown.","dry_run":true}
+```
+
+Decision:
+
+```json
+{"classification":"duplicate","recommendation":"suppress","reason_codes":["dedupe_key_in_cooldown"],"next_gate":"none_in_dry_run"}
+```
+
+### Stale -> log
+
+Input summary:
+
+```json
+{"source":"n8n","workflow":"swarm-health-watchdog","severity":"warning","observed_at":"older_than_stale_after","stale_after_s":900,"summary":"Delayed webhook replay for an old probe.","dry_run":true}
+```
+
+Decision:
+
+```json
+{"classification":"stale","recommendation":"log","reason_codes":["event_stale"],"next_gate":"none_in_dry_run"}
+```
+
+### No-op -> log
+
+Input summary:
+
+```json
+{"source":"cron","workflow":"backup-check","severity":"normal","action_requested":false,"summary":"Backup completed and all expected files are present.","dry_run":true}
+```
+
+Decision:
+
+```json
+{"classification":"no_op","recommendation":"log","reason_codes":["normal_severity","no_action_requested"],"next_gate":"none_in_dry_run"}
+```
+
+### Action required -> summarize/escalate
+
+Input summary:
+
+```json
+{"source":"n8n","workflow":"swarm-health-watchdog","severity":"critical","action_requested":true,"summary":"RAG and embeddings health failed repeatedly; no restart attempted.","dry_run":true}
+```
+
+Decision:
+
+```json
+{"classification":"action_required","recommendation":"escalate","reason_codes":["critical_severity","action_requested","fresh_event"],"next_gate":"human_or_atlas_review_required_before_any_side_effect"}
+```
+
+## Optional local summary draft
+
+If the decision is `summarize` or `escalate`, a wrapper may request a bounded draft from `/v1/advisory/generate`:
+
+```bash
+curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
+  -H 'Content-Type: application/json' \
+  -d '{"trace_id":"cron:service-health:sample","job":"summary","input":"Health check warning: openvino-reranker timed out twice; no restart attempted.","max_new_tokens":48}' \
+  | jq '{mode, trace_id, npu_ok: .npu_proof.ok, authority, draft: .result.draft_text, final_authority: .result.final_authority}'
+```
+
+The draft remains non-authoritative. It must not be automatically sent externally or written to memory.
+
+## n8n integration pattern
+
+Recommended node chain for dry-run workflows:
+
+```text
+Schedule/Webhook/Failure Trigger
+  -> Set normalized event envelope
+  -> HTTP Request POST /v1/advisory/classify
+  -> Code node maps decision envelope
+  -> IF node on recommendation
+      suppress/log: execution log only
+      summarize/escalate: optional local summary draft, then execution log only
+```
+
+The IF node must not connect to outbound messaging, service restart, memory write, or Hermes routing nodes until a separate approval changes the authority boundary.
+
+See `../examples/n8n-advisory-dry-run-fragment.json` for a sanitized node fragment.
+
+## Cron integration pattern
+
+Cron jobs should call a wrapper script that prints one compact line and exits successfully unless the wrapper itself fails. The wrapper should not page or restart.
+
+Example crontab shape:
+
+```text
+*/15 * * * * /home/will/lab/swarm/openvino-advisory-gateway/examples/cron-advisory-dry-run.sh npu-service-health warning health_check "openvino-reranker timeout twice" "service:openvino-reranker:timeout" >> /home/will/.local/state/npu-advisory/cron.log 2>&1
+```
+
+See `../examples/cron-advisory-dry-run.sh`.
+
+## Verification checklist
+
+- Gateway health is reachable on the intended interface.
+- Classifier response includes `schema=openvino_advisory_v1`.
+- `.authority.*` flags are all false for side-effect authority.
+- `.npu_proof.ok` is true and `npu_busy_delta_us > 0` before claiming NPU-backed advice.
+- Decision envelope is compact and contains only booleans/counts/paths/deltas/gates.
+- Duplicate/stale/no-op/action-required examples remain dry-run only.
+- No n8n workflow activation, outbound send, service restart, memory write, routing change, private-dir broadening, or vector DB mutation occurred.
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Dry-run cron/n8n advisory wrapper.
+# It calls the advisory classifier and prints one compact decision line.
+# It does not send, restart, route, execute tools, or write memory.
+
+GATEWAY_URL="${NPU_ADVISORY_GATEWAY_URL:-http://172.19.0.1:18830}"
+WORKFLOW="${1:-cron-advisory-sample}"
+SEVERITY="${2:-normal}"
+EVENT_KIND="${3:-health_check}"
+SUBJECT="${4:-sample advisory event}"
+DEDUPE_KEY="${5:-sample}"
+TRACE_ID="${NPU_ADVISORY_TRACE_ID:-cron:${WORKFLOW}:$(date -u +%Y%m%dT%H%M%SZ)}"
+
+TEXT="source=cron workflow=${WORKFLOW} severity=${SEVERITY} kind=${EVENT_KIND} subject=${SUBJECT} dedupe_key=${DEDUPE_KEY} dry_run=true authority=no-send,no-restart,no-memory"
+
+payload=$(jq -nc --arg trace_id "$TRACE_ID" --arg text "$TEXT" '{trace_id:$trace_id,text:$text}')
+response=$(curl -fsS "${GATEWAY_URL%/}/v1/advisory/classify" -H 'Content-Type: application/json' -d "$payload")
+
+printf '%s\n' "$response" | jq -c --arg source cron --arg workflow "$WORKFLOW" --arg severity "$SEVERITY" --arg dedupe_key "$DEDUPE_KEY" '
+  . as $env
+  | ($env.result.labels.urgency.value // "normal") as $urgency
+  | ($env.result.labels.urgency.confidence // 0) as $confidence
+  | ($env.npu_proof.ok == true and (($env.npu_proof.npu_busy_delta_us // 0) > 0)) as $npu_ok
+  | (if ($npu_ok | not) then "log"
+     elif ($severity == "critical") then "escalate"
+     elif ($severity == "warning" or $urgency == "high" or $urgency == "critical") then "summarize"
+     else "log" end) as $recommendation
+  | (if ($recommendation == "log" and $severity == "normal") then "no_op" else "action_required" end) as $classification
+  | {
+      schema: "cron_n8n_advisory_decision_v1",
+      trace_id: $env.trace_id,
+      source: $source,
+      workflow: $workflow,
+      dry_run: true,
+      recommendation: $recommendation,
+      classification: $classification,
+      confidence: $confidence,
+      reason_codes: ([
+        (if $npu_ok then "npu_proof_ok" else "npu_proof_failed" end),
+        ("severity_" + $severity),
+        ("urgency_" + $urgency)
+      ]),
+      npu_proof: $env.npu_proof,
+      authority: $env.authority,
+      next_gate: (if $recommendation == "escalate" or $recommendation == "summarize" then "human_or_atlas_review_required_before_any_side_effect" else "none_in_dry_run" end)
+    }'
@@ -0,0 +1,70 @@
+{
+  "name": "OpenVINO Advisory Dry-Run Fragment",
+  "active": false,
+  "nodes": [
+    {
+      "parameters": {
+        "values": {
+          "string": [
+            {"name": "schema", "value": "cron_n8n_event_v1"},
+            {"name": "source", "value": "n8n"},
+            {"name": "workflow", "value": "swarm-health-watchdog"},
+            {"name": "event_kind", "value": "health_check"},
+            {"name": "severity", "value": "warning"},
+            {"name": "subject", "value": "OpenVINO service health warning"},
+            {"name": "summary", "value": "Health probe reported a warning; no restart or send is authorized."},
+            {"name": "dedupe_key", "value": "service:openvino:warning"},
+            {"name": "dry_run", "value": "true"}
+          ]
+        },
+        "options": {}
+      },
+      "id": "set-normalized-event",
+      "name": "Set normalized advisory event",
+      "type": "n8n-nodes-base.set",
+      "typeVersion": 2,
+      "position": [260, 300]
+    },
+    {
+      "parameters": {
+        "method": "POST",
+        "url": "http://172.19.0.1:18830/v1/advisory/classify",
+        "sendBody": true,
+        "contentType": "json",
+        "jsonBody": "={{ JSON.stringify({ trace_id: 'n8n:' + $json.workflow + ':' + $now.toISO(), text: 'source=n8n workflow=' + $json.workflow + ' severity=' + $json.severity + ' kind=' + $json.event_kind + ' subject=' + $json.subject + ' summary=' + $json.summary + ' dedupe_key=' + $json.dedupe_key + ' dry_run=true authority=no-send,no-restart,no-memory' }) }}",
+        "options": {
+          "timeout": 20000
+        }
+      },
+      "id": "http-advisory-classify",
+      "name": "HTTP advisory classify dry-run",
+      "type": "n8n-nodes-base.httpRequest",
+      "typeVersion": 4,
+      "position": [520, 300]
+    },
+    {
+      "parameters": {
+        "jsCode": "const env = $json;\nconst labels = env.result?.labels || {};\nconst urgency = labels.urgency?.value || 'normal';\nconst severity = $('Set normalized advisory event').first().json.severity || 'normal';\nconst npuOk = env.npu_proof?.ok === true && (env.npu_proof?.npu_busy_delta_us || 0) > 0;\nlet recommendation = 'log';\nlet classification = 'no_op';\nconst reason_codes = [npuOk ? 'npu_proof_ok' : 'npu_proof_failed', `severity_${severity}`, `urgency_${urgency}`];\nif (npuOk && severity === 'critical') { recommendation = 'escalate'; classification = 'action_required'; }\nelse if (npuOk && (severity === 'warning' || urgency === 'high' || urgency === 'critical')) { recommendation = 'summarize'; classification = 'action_required'; }\nif (!npuOk) reason_codes.push('log_only_no_npu_claim');\nreturn [{ json: { schema: 'cron_n8n_advisory_decision_v1', trace_id: env.trace_id, source: 'n8n', workflow: $('Set normalized advisory event').first().json.workflow, dry_run: true, recommendation, classification, confidence: labels.urgency?.confidence || 0, reason_codes, npu_proof: env.npu_proof, authority: env.authority, next_gate: (recommendation === 'summarize' || recommendation === 'escalate') ? 'human_or_atlas_review_required_before_any_side_effect' : 'none_in_dry_run' } } }];"
+      },
+      "id": "map-dry-run-decision",
+      "name": "Map dry-run decision (no side effects)",
+      "type": "n8n-nodes-base.code",
+      "typeVersion": 2,
+      "position": [780, 300]
+    }
+  ],
+  "connections": {
+    "Set normalized advisory event": {
+      "main": [[{"node": "HTTP advisory classify dry-run", "type": "main", "index": 0}]]
+    },
+    "HTTP advisory classify dry-run": {
+      "main": [[{"node": "Map dry-run decision (no side effects)", "type": "main", "index": 0}]]
+    }
+  },
+  "settings": {
+    "executionOrder": "v1"
+  },
+  "pinData": {},
+  "staticData": null,
+  "tags": ["dry-run", "openvino", "advisory"]
+}
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+"""Local-only advisory gateway for OpenVINO NPU sidecars.
+
+This service deliberately returns bounded advisory envelopes. It never routes,
+writes memory, sends external messages, executes tools, restarts services, or
+broadens document processing authority. Atlas/Hermes may use these outputs as
+hints only.
+"""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import ipaddress
+import json
+import os
+import sqlite3
+import time
+import urllib.request
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any, Callable
+from urllib.parse import urlparse
+
+HOST = "127.0.0.1"
+DOCKER_BRIDGE_HOST = "172.19.0.1"
+PORT = 18830
+CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
+GENAI_URL = "http://127.0.0.1:18820/v1/worker/generate"
+DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
+DEFAULT_LOG_DB = Path(os.environ.get("NPU_ADVISORY_LOG_DB", "/home/will/.local/state/openvino-advisory-gateway/events.sqlite"))
+DEFAULT_ALLOWED_ROOT = Path("/home/will/lab/swarm/openvino-doc-image-triage-npu")
+DEFAULT_ALLOWED_ROOTS = [Path(p) for p in os.environ.get("NPU_ADVISORY_ALLOWED_ROOTS", str(DEFAULT_ALLOWED_ROOT)).split(os.pathsep) if p]
+ALLOWED_GENAI_JOBS = {"title", "summary", "notification", "memory_candidate"}
+
+AUTHORITY = {
+    "may_route": False,
+    "may_write_memory": False,
+    "may_send_external": False,
+    "may_process_private_dirs": False,
+    "may_execute_tools": False,
+    "may_restart_services": False,
+}
+
+
+def validate_bind_host(host: str, *, allow_docker_bridge: bool = False) -> None:
+    """Restrict service exposure to localhost or the explicitly approved Docker bridge bind."""
+    if host == "127.0.0.1":
+        return
+    if not allow_docker_bridge:
+        raise ValueError("refusing non-local bind without --allow-docker-bridge")
+    try:
+        addr = ipaddress.ip_address(host)
+    except ValueError as exc:
+        raise ValueError("bind host must be a literal IP address") from exc
+    if host != DOCKER_BRIDGE_HOST or not (addr.version == 4 and addr.is_private and not addr.is_loopback and not addr.is_unspecified):
+        raise ValueError(f"Docker bridge bind must use approved bridge IP {DOCKER_BRIDGE_HOST}")
+
+
+def sha256_text(text: str) -> str:
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def http_post_json(url: str, payload: dict[str, Any], timeout_s: float = 20.0) -> dict[str, Any]:
+    req = urllib.request.Request(url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST")
+    with urllib.request.urlopen(req, timeout=timeout_s) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def http_get_json(url: str, timeout_s: float = 8.0) -> dict[str, Any]:
+    with urllib.request.urlopen(url, timeout=timeout_s) as resp:
+        body = resp.read().decode("utf-8")
+    try:
+        return json.loads(body)
+    except json.JSONDecodeError:
+        return {"ok": True, "raw_text": body[:120]}
+
+
+def _npu_delta_from(result: dict[str, Any], fallback: int | None = None) -> int | None:
+    for key in ("npu_busy_delta_us", "sysfs_npu_busy_delta_us"):
+        value = result.get(key)
+        if isinstance(value, int):
+            return value
+        if isinstance(value, float):
+            return int(value)
+    return fallback
+
+
+def _doc_triage_npu_delta(result: dict[str, Any]) -> int | None:
+    pages = ((result.get("result") or {}).get("pages") or []) if isinstance(result, dict) else []
+    best: int | None = None
+    for page in pages:
+        emb = ((page.get("needs_attention") or {}).get("embedding") or {}) if isinstance(page, dict) else {}
+        delta = emb.get("npu_busy_delta_us")
+        if isinstance(delta, int):
+            best = max(best or 0, delta)
+    return best
+
+
+def build_envelope(
+    *,
+    service: str,
+    operation: str,
+    result: dict[str, Any],
+    mode: str = "advisory",
+    input_scope: str,
+    npu_busy_delta_us: int | None,
+    trace_id: str | None = None,
+    warnings: list[str] | None = None,
+) -> dict[str, Any]:
+    npu_ok = bool(isinstance(npu_busy_delta_us, int) and npu_busy_delta_us > 0)
+    return {
+        "ok": True,
+        "schema": "openvino_advisory_v1",
+        "service": service,
+        "operation": operation,
+        "mode": mode,
+        "trace_id": trace_id,
+        "input_scope": input_scope,
+        "result": result,
+        "npu_proof": {"required": True, "ok": npu_ok, "npu_busy_delta_us": npu_busy_delta_us},
+        "authority": dict(AUTHORITY),
+        "warnings": warnings or [],
+    }
+
+
+class AdvisoryLogger:
+    def __init__(self, db_path: str | Path = DEFAULT_LOG_DB):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._init()
+
+    def _init(self) -> None:
+        with sqlite3.connect(self.db_path) as con:
+            con.execute(
+                """
+                CREATE TABLE IF NOT EXISTS advisory_events (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    created_at REAL NOT NULL,
+                    service TEXT NOT NULL,
+                    operation TEXT NOT NULL,
+                    mode TEXT NOT NULL,
+                    input_scope TEXT NOT NULL,
+                    input_ref TEXT NOT NULL,
+                    npu_busy_delta_us INTEGER,
+                    ok INTEGER NOT NULL,
+                    raw_payload TEXT
+                )
+                """
+            )
+
+    def log(self, envelope: dict[str, Any], *, input_ref: str) -> None:
+        proof = envelope.get("npu_proof") or {}
+        with sqlite3.connect(self.db_path) as con:
+            con.execute(
+                """
+                INSERT INTO advisory_events(created_at, service, operation, mode, input_scope, input_ref,
+                                            npu_busy_delta_us, ok, raw_payload)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL)
+                """,
+                (
+                    time.time(),
+                    str(envelope.get("service")),
+                    str(envelope.get("operation")),
+                    str(envelope.get("mode")),
+                    str(envelope.get("input_scope")),
+                    input_ref,
+                    proof.get("npu_busy_delta_us"),
+                    1 if envelope.get("ok") else 0,
+                ),
+            )
+
+
+def classify_text(
+    text: str,
+    *,
+    trace_id: str | None = None,
+    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
+    logger: AdvisoryLogger | None = None,
+    timeout_s: float = 20.0,
+) -> dict[str, Any]:
+    if not isinstance(text, str) or not text.strip():
+        raise ValueError("text must be a non-empty string")
+    payload = {"id": trace_id or "advisory", "text": text, "options": {"include_evidence": False, "dry_run": True}}
+    result = http_post_json(CLASSIFIER_URL, payload, timeout_s)
+    envelope = build_envelope(
+        service="classifier",
+        operation="classify",
+        mode="shadow",
+        input_scope="explicit_text",
+        trace_id=trace_id,
+        result={"labels": result.get("labels", {}), "model": result.get("model"), "service_mode": result.get("mode", "dry_run")},
+        npu_busy_delta_us=_npu_delta_from(result),
+    )
+    if logger:
+        logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
+    return envelope
+
+
+def generate_bounded(
+    job: str,
+    text: str,
+    *,
+    max_new_tokens: int | None = None,
+    trace_id: str | None = None,
+    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
+    logger: AdvisoryLogger | None = None,
+    timeout_s: float = 180.0,
+) -> dict[str, Any]:
+    if job not in ALLOWED_GENAI_JOBS:
+        raise ValueError("unsupported advisory generation job")
+    if not isinstance(text, str) or not text.strip():
+        raise ValueError("input must be a non-empty string")
+    payload: dict[str, Any] = {"job": job, "input": text}
+    if max_new_tokens is not None:
+        payload["max_new_tokens"] = max_new_tokens
+    result = http_post_json(GENAI_URL, payload, timeout_s)
+    envelope = build_envelope(
+        service="genai",
+        operation=f"generate:{job}",
+        mode="draft",
+        input_scope="explicit_text",
+        trace_id=trace_id,
+        result={"draft_text": result.get("text", ""), "json": result.get("json"), "timing_ms": result.get("timing_ms"), "final_authority": False},
+        npu_busy_delta_us=_npu_delta_from(result),
+    )
+    if logger:
+        logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
+    return envelope
+
+
+def _resolve_allowed(path: str, allowed_roots: list[str] | None, configured_roots: list[Path] | None = None) -> tuple[Path, list[Path]]:
+    configured = [p.expanduser().resolve() for p in (configured_roots or DEFAULT_ALLOWED_ROOTS)]
+    if not configured:
+        raise ValueError("at least one configured allowed root is required")
+    requested = [Path(p).expanduser().resolve() for p in (allowed_roots or [str(p) for p in configured])]
+    if not requested:
+        raise ValueError("at least one requested allowed root is required")
+    for root in requested:
+        if not any(root == base or root.is_relative_to(base) for base in configured):
+            raise ValueError("requested allowed root is outside configured roots")
+    roots = requested
+    candidate = Path(path).expanduser().resolve()
+    if not any(candidate == root or candidate.is_relative_to(root) for root in roots):
+        raise ValueError("path must be inside an allowed root")
+    if not candidate.exists() or not candidate.is_file():
+        raise ValueError("path must be an existing file")
+    return candidate, roots
+
+
+def triage_file(
+    path: str,
+    *,
+    allowed_roots: list[str] | None = None,
+    configured_roots: list[Path] | None = None,
+    trace_id: str | None = None,
+    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
+    logger: AdvisoryLogger | None = None,
+    timeout_s: float = 60.0,
+) -> dict[str, Any]:
+    candidate, roots = _resolve_allowed(path, allowed_roots, configured_roots)
+    payload = {"path": str(candidate), "options": {"allowed_roots": [str(r) for r in roots], "max_pages": 3}}
+    result = http_post_json(DOC_TRIAGE_URL, payload, timeout_s)
+    delta = _doc_triage_npu_delta(result)
+    envelope = build_envelope(
+        service="doc_triage",
+        operation="triage_file",
+        mode="reviewable_artifact",
+        input_scope="explicit_file",
+        trace_id=trace_id,
+        result={"triage": result.get("result"), "final_authority": False},
+        npu_busy_delta_us=delta,
+    )
+    if logger:
+        envelope["warnings"].append("metadata-only log; raw file contents are not logged")
+        logger.log(envelope, input_ref="file:sha256path:" + sha256_text(str(candidate)))
+    return envelope
+
+
+def health(*, http_get_json: Callable[[str, float], dict[str, Any]] = http_get_json) -> dict[str, Any]:
+    deps = {
+        "classifier": "http://127.0.0.1:18819/healthz",
+        "genai": "http://127.0.0.1:18820/healthz",
+        "doc_triage": "http://127.0.0.1:18829/healthz",
+    }
+    out: dict[str, Any] = {"ok": True, "service": "openvino-advisory-gateway", "mode": "advisory_only", "authority": dict(AUTHORITY), "dependencies": {}}
+    for name, url in deps.items():
+        try:
+            data = http_get_json(url, 8.0)
+            out["dependencies"][name] = {"ok": bool(data.get("ok", data.get("status") == "ok")), "service": data.get("service"), "device": data.get("device")}
+        except Exception as exc:
+            out["ok"] = False
+            out["dependencies"][name] = {"ok": False, "error": str(exc)}
+    return out
+
+
+def _read_json(handler: BaseHTTPRequestHandler, max_bytes: int = 256 * 1024) -> dict[str, Any]:
+    length = int(handler.headers.get("Content-Length", "0"))
+    if length > max_bytes:
+        raise ValueError("request JSON too large")
+    raw = handler.rfile.read(length)
+    if not raw:
+        return {}
+    return json.loads(raw.decode("utf-8"))
+
+
+def make_handler(logger: AdvisoryLogger, configured_roots: list[Path]):
+    class Handler(BaseHTTPRequestHandler):
+        server_version = "openvino-advisory-gateway/0.1"
+
+        def log_message(self, format: str, *args: Any) -> None:  # noqa: A002 - stdlib override name
+            # Do not log request bodies or private paths.
+            print(f"{self.client_address[0]} {format % args}")
+
+        def send_json(self, status: int, payload: Any) -> None:
+            body = json.dumps(payload, indent=2, sort_keys=True).encode("utf-8")
+            self.send_response(status)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+
+        def do_GET(self) -> None:  # noqa: N802
+            if urlparse(self.path).path in ("/", "/health", "/healthz"):
+                self.send_json(200, health())
+                return
+            self.send_json(404, {"ok": False, "error": "not_found"})
+
+        def do_POST(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            try:
+                payload = _read_json(self)
+                if path == "/v1/advisory/classify":
+                    self.send_json(200, classify_text(str(payload.get("text", "")), trace_id=payload.get("trace_id"), logger=logger))
+                    return
+                if path == "/v1/advisory/generate":
+                    self.send_json(200, generate_bounded(str(payload.get("job", "summary")), str(payload.get("input", "")), max_new_tokens=payload.get("max_new_tokens"), trace_id=payload.get("trace_id"), logger=logger))
+                    return
+                if path == "/v1/advisory/triage":
+                    self.send_json(200, triage_file(str(payload.get("path", "")), allowed_roots=payload.get("allowed_roots"), configured_roots=configured_roots, trace_id=payload.get("trace_id"), logger=logger))
+                    return
+                self.send_json(404, {"ok": False, "error": "not_found"})
+            except Exception as exc:
+                self.send_json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc), "authority": dict(AUTHORITY)})
+
+    return Handler
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Local-only OpenVINO NPU advisory gateway")
+    parser.add_argument("--host", default=os.environ.get("NPU_ADVISORY_HOST", HOST))
+    parser.add_argument("--port", type=int, default=int(os.environ.get("NPU_ADVISORY_PORT", str(PORT))))
+    parser.add_argument("--log-db", default=str(DEFAULT_LOG_DB))
+    parser.add_argument("--allowed-root", action="append", dest="allowed_roots", default=None, help="Configured file root allowed for advisory doc/image triage. May be repeated.")
+    parser.add_argument(
+        "--allow-docker-bridge",
+        action="store_true",
+        default=os.environ.get("NPU_ADVISORY_ALLOW_DOCKER_BRIDGE", "").lower() in {"1", "true", "yes"},
+        help="Permit binding to a private Docker bridge IP instead of 127.0.0.1.",
+    )
+    args = parser.parse_args(argv)
+    try:
+        validate_bind_host(args.host, allow_docker_bridge=args.allow_docker_bridge)
+    except ValueError as exc:
+        raise SystemExit(str(exc)) from exc
+    configured_roots = [Path(p).expanduser().resolve() for p in (args.allowed_roots or DEFAULT_ALLOWED_ROOTS)]
+    logger = AdvisoryLogger(args.log_db)
+    server = ThreadingHTTPServer((args.host, args.port), make_handler(logger, configured_roots))
+    print(json.dumps({"service": "openvino-advisory-gateway", "host": args.host, "port": args.port, "mode": "advisory_only"}), flush=True)
+    server.serve_forever()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,18 @@
+[Unit]
+Description=OpenVINO NPU advisory gateway (Docker bridge, port 18830)
+After=network.target openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
+Wants=openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
+
+[Service]
+Type=simple
+WorkingDirectory=/home/will/lab/swarm/openvino-advisory-gateway
+Environment=NPU_ADVISORY_HOST=172.19.0.1
+Environment=NPU_ADVISORY_PORT=18830
+Environment=NPU_ADVISORY_ALLOW_DOCKER_BRIDGE=true
+Environment=NPU_ADVISORY_LOG_DB=/home/will/.local/state/openvino-advisory-gateway/events.sqlite
+ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-advisory-gateway/gateway.py --host 172.19.0.1 --port 18830 --allow-docker-bridge --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+import json
+import sqlite3
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+import gateway
+
+
+def test_authority_envelope_is_advisory_and_forbids_side_effects() -> None:
+    env = gateway.build_envelope(
+        service="classifier",
+        operation="classify",
+        mode="shadow",
+        result={"labels": {"workflow_category": {"value": "devops"}}},
+        npu_busy_delta_us=123,
+        input_scope="explicit_text",
+    )
+
+    assert env["ok"] is True
+    assert env["mode"] == "shadow"
+    assert env["authority"] == {
+        "may_route": False,
+        "may_write_memory": False,
+        "may_send_external": False,
+        "may_process_private_dirs": False,
+        "may_execute_tools": False,
+        "may_restart_services": False,
+    }
+    assert env["npu_proof"] == {"required": True, "ok": True, "npu_busy_delta_us": 123}
+
+
+def test_bind_host_requires_explicit_docker_bridge_approval() -> None:
+    gateway.validate_bind_host("127.0.0.1")
+
+    with pytest.raises(ValueError, match="without --allow-docker-bridge"):
+        gateway.validate_bind_host("172.19.0.1")
+
+    gateway.validate_bind_host("172.19.0.1", allow_docker_bridge=True)
+
+    with pytest.raises(ValueError, match="approved bridge IP"):
+        gateway.validate_bind_host("0.0.0.0", allow_docker_bridge=True)
+
+
+def test_classify_calls_sidecar_and_logs_metadata_only(tmp_path: Path) -> None:
+    calls: list[tuple[str, dict]] = []
+
+    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
+        calls.append((url, payload))
+        return {
+            "labels": {"tool_needed": {"value": True}},
+            "npu_busy_delta_us": 55,
+            "sysfs_npu_busy_delta_us": 55,
+        }
+
+    logger = gateway.AdvisoryLogger(tmp_path / "events.sqlite")
+    env = gateway.classify_text(
+        "Inspect live service status",
+        trace_id="t1",
+        http_post_json=fake_post,
+        logger=logger,
+    )
+
+    assert calls[0][0].endswith(":18819/v1/classify")
+    assert calls[0][1]["options"]["dry_run"] is True
+    assert env["service"] == "classifier"
+    assert env["authority"]["may_route"] is False
+    assert env["npu_proof"]["ok"] is True
+
+    with sqlite3.connect(tmp_path / "events.sqlite") as con:
+        row = con.execute("select service, operation, input_ref, raw_payload from advisory_events").fetchone()
+    assert row == ("classifier", "classify", "text:sha256:" + gateway.sha256_text("Inspect live service status"), None)
+
+
+def test_generate_allows_only_bounded_jobs() -> None:
+    with pytest.raises(ValueError, match="unsupported advisory generation job"):
+        gateway.generate_bounded("primary_chat", "hello", http_post_json=lambda *_: {})
+
+
+def test_generate_wraps_draft_without_final_authority() -> None:
+    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
+        return {"text": "Short title", "npu_busy_delta_us": 99, "timing_ms": {"total": 10}}
+
+    env = gateway.generate_bounded("title", "Summarize this local health check", http_post_json=fake_post)
+
+    assert env["service"] == "genai"
+    assert env["operation"] == "generate:title"
+    assert env["result"]["draft_text"] == "Short title"
+    assert env["result"]["final_authority"] is False
+    assert env["authority"]["may_send_external"] is False
+
+
+def test_doc_triage_requires_explicit_file_under_allowed_root(tmp_path: Path) -> None:
+    allowed = tmp_path / "allowed"
+    allowed.mkdir()
+    target = allowed / "synthetic.png"
+    target.write_bytes(b"not real image for unit test")
+
+    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
+        assert payload["path"] == str(target.resolve())
+        assert payload["options"]["allowed_roots"] == [str(allowed.resolve())]
+        return {"ok": True, "result": {"pages": [{"needs_attention": {"embedding": {"verified_npu": True, "npu_busy_delta_us": 42}}}]}}
+
+    env = gateway.triage_file(str(target), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=fake_post)
+
+    assert env["service"] == "doc_triage"
+    assert env["input_scope"] == "explicit_file"
+    assert env["npu_proof"]["ok"] is True
+
+
+def test_doc_triage_rejects_private_root_broadening(tmp_path: Path) -> None:
+    allowed = tmp_path / "allowed"
+    allowed.mkdir()
+    with pytest.raises(ValueError, match="path must be inside an allowed root"):
+        gateway.triage_file(str(tmp_path / "outside.png"), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=lambda *_: {})
+
+
+def test_doc_triage_rejects_requested_root_outside_configured_roots(tmp_path: Path) -> None:
+    configured = tmp_path / "configured"
+    requested = tmp_path / "private"
+    requested.mkdir()
+    target = requested / "file.png"
+    target.write_bytes(b"synthetic")
+
+    with pytest.raises(ValueError, match="requested allowed root is outside configured roots"):
+        gateway.triage_file(
+            str(target),
+            allowed_roots=[str(requested)],
+            configured_roots=[configured],
+            http_post_json=lambda *_: {},
+        )
+
+
+def test_health_aggregates_dependencies_without_raw_private_data() -> None:
+    def fake_get(url: str, timeout_s: float) -> dict:
+        return {"ok": True, "service": url.rsplit(":", 1)[-1]}
+
+    health = gateway.health(http_get_json=fake_get)
+
+    assert health["ok"] is True
+    assert set(health["dependencies"]) == {"classifier", "genai", "doc_triage"}
+    assert "raw" not in json.dumps(health).lower()
@@ -0,0 +1,339 @@
+# OpenVINO NPU classifier/router dry-run contract
+
+Status: specification for dry-run prototype refresh
+Target port: `127.0.0.1:18819`
+Owner context: Atlas/Hermes local assistant sidecar evaluation
+
+This service is an advisory classifier for Atlas/Hermes automation hints. It may suggest labels such as tool-needed, memory-candidate type, urgency, workflow category, and safety-confirmation-required, but it must not make or enforce live routing, memory, tool, or safety decisions without a separate explicit approval from Will.
+
+## Recommended model and runtime
+
+Recommended v1 runtime: small local Python HTTP/CLI service backed by the existing OpenVINO NPU embeddings service on `127.0.0.1:18817`.
+
+Recommended v1 model shape:
+
+- Primary signal: `bge-base-en-v1.5-int8-ov` embeddings from the live embeddings service.
+- Classifier layer: inspectable deterministic rules plus cosine similarity against curated synthetic/prototype utterances.
+- Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`.
+- Device proof: request-level `npu_busy_delta_us` from `:18817` plus direct sysfs before/after reads from `/sys/class/accel/accel0/device/npu_busy_time_us`.
+
+Why this is preferred for the dry run:
+
+1. It reuses the already-live NPU embeddings path rather than adding a second model conversion/runtime dependency before contract validation.
+2. Rules and prototypes are transparent enough for safety-sensitive routing hints; a reviewer can inspect why a message was labeled.
+3. It avoids fine-tuning or training on private Atlas/Hermes transcripts.
+4. It keeps the service small, localhost-only, and easy to start/stop during smoke tests.
+5. It produces NPU activity through the embeddings path while making clear that final decision logic remains advisory.
+
+Defer a dedicated NPU sequence-classification model such as TinyBERT/MiniLM until the dry-run labels and thresholds have been evaluated against synthetic fixtures and explicitly-approved non-private examples. If pursued later, use OpenVINO Runtime/Optimum export with fixed input shapes suitable for NPU, and keep the rule layer for safety gates.
+
+## Non-goals and safety invariants
+
+The service must not:
+
+- Change Hermes/Atlas model routing, gateway routing, memory writes, tool-use permissions, or safety-confirmation behavior.
+- Restart, stop, enable, or persist any live Atlas/Hermes/gateway/RAG service.
+- Bind to anything broader than `127.0.0.1` by default.
+- Mutate Chroma/vector collections, trigger reindexing, or write to RAG state.
+- Process private document/image directories or private transcript dumps for smoke testing.
+- Log raw prompts by default beyond normal foreground stderr during local review.
+- Claim NPU success from HTTP 200 alone.
+
+## Endpoint contract
+
+All HTTP endpoints are local-only by default.
+
+Base URL:
+
+```text
+http://127.0.0.1:18819
+```
+
+### GET `/healthz`, `/health`, `/readyz`, `/`
+
+Purpose: liveness/readiness metadata.
+
+Response fields:
+
+- `status`: `starting | ok`
+- `service`: `atlas-router-classifier`
+- `version`: service version string
+- `mode`: always `dry_run`
+- `model`: model/runtime label
+- `embed_url`: upstream embeddings URL
+- `device`: expected to say `NPU-via-embedding-service` or equivalent
+- `labels`: supported label names
+- `embedding_dim`: embedding dimension after warmup
+- `prototype_count`: number of synthetic prototype examples loaded
+- `prototype_npu_busy_delta_us`: warmup delta reported by upstream embeddings, if available
+- `npu_busy_time_us`: current sysfs counter value, if readable
+- `warnings`: list of non-fatal warnings
+
+A healthy service is not enough to prove NPU execution. At least one classification request must also show positive request and sysfs busy deltas.
+
+### GET `/v1/labels`
+
+Purpose: publish schema information without dumping private examples.
+
+Response fields:
+
+- `model`
+- `thresholds`
+  - `tool_needed`: recommended threshold `0.72`
+  - `memory_candidate`: recommended threshold `0.78`
+  - `safety_confirmation_required`: recommended threshold `0.80`
+  - `workflow_category`: recommended threshold `0.52`
+- `enums`
+  - `memory_candidate`: `none`, `user_preference`, `durable_user_fact`, `environment_fact`, `workflow_convention`, `skill_candidate`
+  - `urgency`: `low`, `normal`, `high`, `critical`
+  - `workflow_category`: `chat`, `research`, `coding`, `debugging`, `devops`, `smart_home`, `media`, `note_taking`, `productivity`, `kanban`, `unknown`
+- `prototype_ids`: names of curated synthetic prototype buckets
+
+### POST `/v1/classify`
+
+Purpose: classify one user/task message for advisory dry-run hints.
+
+Request:
+
+```json
+{
+  "id": "optional-trace-id",
+  "text": "Urgent: check whether port 18817 is listening and inspect systemd logs.",
+  "context": {
+    "platform": "cli",
+    "source": "user"
+  },
+  "options": {
+    "include_evidence": true,
+    "include_embedding_debug": false,
+    "dry_run": true
+  }
+}
+```
+
+Required behavior:
+
+- Reject empty text with HTTP 400.
+- Default `dry_run` to true.
+- Return no side effects other than local inference and response generation.
+- Include evidence by default unless `include_evidence=false`.
+- Include embedding/prototype scores only when explicitly requested through `include_embedding_debug=true`.
+
+Response:
+
+```json
+{
+  "id": "optional-trace-id",
+  "model": "bge-base-en-v1.5-int8-ov/prototype-router-v0",
+  "created": 1780590000,
+  "duration_ms": 12.3,
+  "npu_busy_delta_us": 1234,
+  "sysfs_npu_busy_delta_us": 1200,
+  "dry_run": true,
+  "labels": {
+    "tool_needed": {
+      "value": true,
+      "confidence": 0.84,
+      "threshold": 0.72,
+      "reason_codes": ["local_state_requested"]
+    },
+    "memory_candidate": {
+      "value": "none",
+      "confidence": 0.31,
+      "threshold": 0.78,
+      "reason_codes": []
+    },
+    "urgency": {
+      "value": "high",
+      "confidence": 0.84,
+      "scores": {"low": 0.0, "normal": 0.2, "high": 0.84, "critical": 0.0},
+      "reason_codes": ["urgent_language"]
+    },
+    "workflow_category": {
+      "value": "devops",
+      "confidence": 0.86,
+      "scores": {"devops": 0.86, "unknown": 0.14}
+    },
+    "safety_confirmation_required": {
+      "value": false,
+      "confidence": 0.0,
+      "threshold": 0.8,
+      "reason_codes": []
+    }
+  },
+  "warnings": [],
+  "evidence": []
+}
+```
+
+### POST `/v1/batch_classify`
+
+Purpose: classify a bounded batch of non-private synthetic or explicitly-approved messages.
+
+Request:
+
+```json
+{
+  "items": [
+    {"id": "m1", "text": "What time is it in Seattle right now?"},
+    {"id": "m2", "text": "Restart the live Atlas gateway and switch primary routing."}
+  ],
+  "options": {"include_evidence": false, "dry_run": true}
+}
+```
+
+Response:
+
+- `model`
+- `duration_ms`
+- aggregate `npu_busy_delta_us`
+- `results`: array of `/v1/classify` responses
+
+Batch limits for prototype review:
+
+- Keep batches small; the prototype rejects empty batches and batches larger than `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE` (default `32`).
+- Use only synthetic fixtures unless Will explicitly approves a real non-private sample set.
+- Do not retain request bodies to disk.
+
+## CLI contract
+
+The same implementation should support foreground review from the service directory:
+
+```bash
+cd /home/will/lab/swarm/openvino-classifier-npu
+/home/will/.venvs/npu/bin/python router_classifier.py \
+  --host 127.0.0.1 \
+  --port 18819 \
+  --embed-url http://127.0.0.1:18817/v1/embeddings
+```
+
+Required flags/env:
+
+- `--host` / `OPENVINO_CLASSIFIER_HOST`; default `127.0.0.1`.
+- `--port` / `OPENVINO_CLASSIFIER_PORT`; default `18819`.
+- `--embed-url` / `OPENVINO_CLASSIFIER_EMBED_URL`; default `http://127.0.0.1:18817/v1/embeddings`.
+- `--timeout-s` / `OPENVINO_CLASSIFIER_TIMEOUT_S`; default `30`.
+- `--max-batch-size` / `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`; default `32`.
+- `--no-warmup` to defer prototype embedding until first request.
+
+A future dedicated CLI mode may be added for one-shot JSONL classification, but foreground HTTP review is sufficient for the dry-run contract.
+
+## Synthetic smoke-test plan
+
+Preconditions:
+
+1. Confirm `:18817` embeddings service is healthy.
+2. Confirm `:18819` is not already listening.
+3. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before starting the request smoke.
+4. Use only synthetic fixture text such as `fixtures/atlas_hermes_messages.jsonl`.
+
+Unit/schema smoke, no NPU dependency:
+
+```bash
+cd /home/will/lab/swarm
+/home/will/.venvs/npu/bin/python -m unittest discover -s openvino-classifier-npu/tests -v
+```
+
+Foreground service smoke:
+
+```bash
+ss -ltnp | grep ':18819\b' || true
+cd /home/will/lab/swarm/openvino-classifier-npu
+/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
+```
+
+From another shell:
+
+```bash
+curl -fsS http://127.0.0.1:18819/healthz | jq .
+curl -fsS http://127.0.0.1:18819/v1/labels | jq .
+curl -fsS http://127.0.0.1:18819/v1/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"id":"smoke-devops","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true,"dry_run":true}}' | jq .
+curl -fsS http://127.0.0.1:18819/v1/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"id":"smoke-safety","text":"Restart the live Atlas gateway and switch primary routing to the new classifier.","options":{"include_evidence":true,"dry_run":true}}' | jq .
+```
+
+Expected label checks:
+
+- `smoke-devops`: `tool_needed.value=true`, `urgency.value=high`, `workflow_category.value=devops`.
+- `smoke-safety`: `safety_confirmation_required.value=true`, no actual restart or routing change.
+- Health and classify responses include no raw private paths or private document content.
+
+Shutdown:
+
+- Stop the foreground server with Ctrl-C.
+- Re-run `ss -ltnp | grep ':18819\b' || true` and confirm no listener remains.
+
+## NPU busy-time verification plan
+
+Use sysfs plus service response fields; do not accept HTTP 200 alone.
+
+```bash
+BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
+before=$(cat "$BUSY")
+response=$(curl -fsS http://127.0.0.1:18819/v1/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"id":"npu-proof","text":"Check current systemd service status for the embeddings service.","options":{"include_evidence":false,"dry_run":true}}')
+after=$(cat "$BUSY")
+echo "$response" | jq '{npu_busy_delta_us, sysfs_npu_busy_delta_us, warnings}'
+echo "outer_sysfs_npu_busy_delta_us=$((after-before))"
+```
+
+Optional localhost smoke helper, after starting the foreground service:
+
+```bash
+/home/will/.venvs/npu/bin/python openvino-classifier-npu/smoke_classifier.py \
+  --base-url http://127.0.0.1:18819
+```
+
+Acceptance for an NPU-backed classification request:
+
+- HTTP request succeeds.
+- Response `npu_busy_delta_us > 0` from upstream embeddings.
+- Response `sysfs_npu_busy_delta_us > 0` when sysfs is readable.
+- Outer shell `after-before > 0`.
+- If any delta is missing or <= 0, mark NPU proof failed or inconclusive and do not claim NPU execution.
+
+## Docs and diagram implications
+
+If this prototype is refreshed or reviewed, update documentation to show:
+
+- Live baseline remains RAG `:18810`, RAG health `:18814`, Whisper NPU `:18816`, and embeddings `:18817`.
+- Classifier/router `:18819` is an optional prototype sidecar, not a live Atlas/Hermes routing dependency.
+- Any architecture diagram should place `:18819` under local AI/search/voice prototype sidecars with a clear `dry-run / not live routing` label.
+- Runbooks should list foreground start, health/classify smoke, sysfs NPU proof, and shutdown checks.
+- Service catalog entries should state `not installed/enabled` until Will approves persistent service enablement.
+- No docs should imply the classifier decides memory writes, tool permission, safety confirmation, or live routing.
+
+Relevant docs inventory:
+
+- `docs/swarm-infrastructure.md`
+- `docs/swarm-infrastructure.html`
+- `docs/diagram-maintenance.md`
+- `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`
+- `swarm-common/obsidian-vault/will/will-shared-zap/Resources/Service Catalog.md`
+
+## No-go / defer criteria
+
+Do not proceed to implementation refresh, persistent service enablement, or live integration if any of the following hold:
+
+- `:18817` embeddings is unavailable and no approved NPU embedding fallback exists.
+- `/sys/class/accel/accel0/device/npu_busy_time_us` is missing/unreadable and NPU proof cannot be independently established.
+- Classification responses cannot produce positive NPU busy-time deltas.
+- `:18819` is already occupied by an unknown or live service.
+- Smoke tests require private transcripts, private document/image directories, or production routing changes.
+- Labels are too noisy on synthetic fixtures to be useful as advisory hints.
+- The service would need to bind externally, run persistently, or integrate with live Hermes/Atlas before Will approves those gates.
+- Any implementation path requires mutating Chroma/vector collections or triggering RAG reindexing in place.
+
+## Implementation handoff notes
+
+Recommended next engineer actions:
+
+1. Verify or refresh `openvino-classifier-npu/router_classifier.py` to match this contract.
+2. Keep the service stdlib/local-first unless a dependency is already present in `/home/will/.venvs/npu`.
+3. Maintain synthetic fixtures and unit tests for label schema/threshold behavior.
+4. Run only foreground smokes; do not install or enable `openvino-router-classifier.service`.
+5. Capture changed files, unit test output, listener checks, response samples, and NPU busy-time before/after in the implementation handoff.
@@ -0,0 +1,140 @@
+# OpenVINO NPU router classifier prototype
+
+Dry-run Atlas/Hermes message classifier/router prototype.
+
+The detailed dry-run contract is in [`CONTRACT.md`](./CONTRACT.md), including the
+recommended model/runtime, HTTP/CLI schema, smoke-test plan, NPU busy-time proof,
+docs/diagram implications, and no-go/defer criteria.
+
+It reuses the existing OpenVINO NPU embeddings service on `127.0.0.1:18817` and
+serves an inspectable stdlib HTTP API on `127.0.0.1:18819`. It does not change
+live Hermes/Atlas routing, write memory, mutate vector collections, restart
+services, or send external messages.
+
+## Runtime shape
+
+- Service: `atlas-router-classifier`
+- Default port: `18819`
+- Default bind: `127.0.0.1`
+- Upstream: `http://127.0.0.1:18817/v1/embeddings`
+- Batch limit: `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`, default `32`
+- Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`
+- NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` before/after plus upstream `npu_busy_delta_us`
+
+The classifier uses deterministic high-precision rules for safety/urgency/tool
+signals plus cosine similarity against curated embedding prototypes for workflow
+and memory recommendations. This is intentionally tunable without model training.
+
+## API
+
+### GET `/healthz`
+
+Returns service metadata, labels, prototype count, NPU sysfs counter, and warmup
+NPU delta.
+
+### GET `/v1/labels`
+
+Returns label enum values, thresholds, and prototype IDs without dumping private
+fixtures.
+
+### POST `/v1/classify`
+
+Request:
+
+```json
+{
+  "id": "optional trace id",
+  "text": "User message or task body to classify.",
+  "context": {"platform": "cli", "source": "user"},
+  "options": {
+    "include_evidence": true,
+    "include_embedding_debug": false,
+    "dry_run": true
+  }
+}
+```
+
+Response includes:
+
+- `labels.tool_needed`: boolean, confidence, threshold, reason codes
+- `labels.memory_candidate`: `none | user_preference | durable_user_fact | environment_fact | workflow_convention | skill_candidate`
+- `labels.urgency`: `low | normal | high | critical`
+- `labels.workflow_category`: `chat | research | coding | debugging | devops | smart_home | media | note_taking | productivity | kanban | unknown`
+- `labels.safety_confirmation_required`: boolean, confidence, reason codes
+- `npu_busy_delta_us` and `sysfs_npu_busy_delta_us`
+- `evidence` when requested
+
+### POST `/v1/batch_classify`
+
+Request:
+
+```json
+{
+  "items": [{"id": "m1", "text": "What time is it?"}],
+  "options": {"include_evidence": false, "dry_run": true}
+}
+```
+
+## Local smoke test
+
+Check that the proposed port is free first:
+
+```bash
+ss -ltnp | grep ':18819' || true
+```
+
+Run without installing anything extra; `/home/will/.venvs/npu` already has the
+stdlib plus requests/openvino stack used by the upstream embeddings service:
+
+```bash
+cd /home/will/lab/swarm/openvino-classifier-npu
+/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
+```
+
+Environment variables mirror the flags: `OPENVINO_CLASSIFIER_HOST`,
+`OPENVINO_CLASSIFIER_PORT`, `OPENVINO_CLASSIFIER_EMBED_URL`,
+`OPENVINO_CLASSIFIER_TIMEOUT_S`, and `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`.
+
+Then from another shell:
+
+```bash
+curl -fsS http://127.0.0.1:18819/healthz | jq .
+curl -fsS http://127.0.0.1:18819/v1/classify \
+  -H 'Content-Type: application/json' \
+  -d '{"id":"smoke","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true}}' | jq .
+```
+
+A valid NPU-backed response must have positive `npu_busy_delta_us`; HTTP 200 by
+itself is not considered proof.
+
+Synthetic fixture smoke helper, after the foreground service is running:
+
+```bash
+/home/will/.venvs/npu/bin/python smoke_classifier.py --base-url http://127.0.0.1:18819
+```
+
+The helper refuses non-local URLs, checks fixture label expectations, and prints
+response plus outer sysfs NPU busy deltas.
+
+## Tests
+
+Unit tests use a fake embedding client and do not touch the NPU:
+
+```bash
+/home/will/.venvs/npu/bin/python -m unittest discover -s openvino-classifier-npu/tests -v
+```
+
+Fixture messages live at `fixtures/atlas_hermes_messages.jsonl`.
+
+## Optional systemd user unit
+
+A reviewed local-only user service unit is included as `openvino-router-classifier.service`. Install/enable it when the dry-run classifier should persist across logins:
+
+```bash
+cp openvino-router-classifier.service ~/.config/systemd/user/openvino-router-classifier.service
+systemctl --user daemon-reload
+systemctl --user enable --now openvino-router-classifier.service
+systemctl --user status openvino-router-classifier.service --no-pager
+```
+
+The service is persistent, but classifier decisions remain dry-run until a separate approved routing change lands. Do not connect it to live Atlas/Hermes routing, memory writes, service restarts, or outbound messages.
@@ -0,0 +1,10 @@
+{"id":"tool-time","text":"What time is it in Seattle right now?","expected":{"tool_needed":true,"workflow_category":"chat","urgency":"normal","safety_confirmation_required":false}}
+{"id":"memory-preference","text":"Remember that I prefer concise answers in the terminal.","expected":{"memory_candidate":"user_preference","tool_needed":false,"safety_confirmation_required":false}}
+{"id":"coding-debug","text":"Debug the failing pytest suite and inspect the git diff before opening a PR.","expected":{"tool_needed":true,"workflow_category":"debugging","urgency":"normal"}}
+{"id":"devops-urgent","text":"Urgent: the embeddings service on port 18817 is down; check systemd logs and restore it.","expected":{"tool_needed":true,"workflow_category":"devops","urgency":"high"}}
+{"id":"safety-routing","text":"Restart the live Atlas gateway and switch primary routing to the new classifier.","expected":{"tool_needed":true,"workflow_category":"devops","safety_confirmation_required":true}}
+{"id":"destructive-reindex","text":"Delete the existing Chroma collection and reindex the Obsidian vault in place.","expected":{"tool_needed":true,"workflow_category":"note_taking","safety_confirmation_required":true}}
+{"id":"research","text":"Research current OpenVINO NPU support for TinyBERT sequence classification and summarize sources.","expected":{"tool_needed":true,"workflow_category":"research"}}
+{"id":"smart-home","text":"Turn off the living room lights and set the thermostat to 68.","expected":{"tool_needed":true,"workflow_category":"smart_home"}}
+{"id":"media","text":"Transcribe this voice memo and extract action items.","expected":{"tool_needed":true,"workflow_category":"media"}}
+{"id":"kanban","text":"Work kanban task t_5e123496 and block it if review is required.","expected":{"tool_needed":true,"workflow_category":"kanban"}}
@@ -0,0 +1,18 @@
+[Unit]
+Description=Atlas/Hermes dry-run OpenVINO router classifier
+After=network.target openvino-embeddings.service
+Wants=openvino-embeddings.service
+
+[Service]
+Type=simple
+WorkingDirectory=/home/will/lab/swarm/openvino-classifier-npu
+Environment=OPENVINO_CLASSIFIER_HOST=127.0.0.1
+Environment=OPENVINO_CLASSIFIER_PORT=18819
+Environment=OPENVINO_CLASSIFIER_EMBED_URL=http://127.0.0.1:18817/v1/embeddings
+Environment=OPENVINO_CLASSIFIER_MAX_BATCH_SIZE=32
+ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-classifier-npu/router_classifier.py
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
@@ -0,0 +1,563 @@
+#!/usr/bin/env python3
+"""Dry-run Atlas/Hermes router classifier backed by the local OpenVINO NPU embedding service.
+
+Default port: 18819
+Default upstream: http://127.0.0.1:18817/v1/embeddings
+
+This service is intentionally advisory only. It does not write memory, mutate routing,
+restart services, or call external APIs. NPU execution is proved by the upstream
+embedding service's npu_busy_delta_us and by reading the local sysfs busy counter.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+
+VERSION = "0.1.0"
+SERVICE = "atlas-router-classifier"
+MODEL = "bge-base-en-v1.5-int8-ov/prototype-router-v0"
+DEFAULT_HOST = "127.0.0.1"
+DEFAULT_PORT = 18819
+DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
+DEFAULT_MAX_BATCH_SIZE = 32
+NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+
+WORKFLOW_CATEGORIES = [
+    "chat",
+    "research",
+    "coding",
+    "debugging",
+    "devops",
+    "smart_home",
+    "media",
+    "note_taking",
+    "productivity",
+    "kanban",
+    "unknown",
+]
+MEMORY_VALUES = ["none", "user_preference", "durable_user_fact", "environment_fact", "workflow_convention", "skill_candidate"]
+URGENCY_VALUES = ["low", "normal", "high", "critical"]
+
+PROTOTYPES: dict[str, list[str]] = {
+    "tool_needed": [
+        "check the current date time weather news versions or live facts",
+        "inspect files git branches logs ports processes disk memory or system state",
+        "send a message create a cron job call an API or interact with a local service",
+        "search the web browse a website download or verify current information",
+    ],
+    "memory_user_preference": [
+        "remember that I prefer concise replies and a direct style",
+        "my preference is use short answers and avoid unnecessary detail",
+        "please remember I like this convention for future sessions",
+    ],
+    "memory_durable_user_fact": [
+        "remember that I live in Seattle and work on local AI infrastructure",
+        "my name role location identity or durable personal detail is",
+    ],
+    "memory_environment_fact": [
+        "this project uses pytest and this server runs linux with openvino npu",
+        "remember this repository convention service port path or environment setup",
+    ],
+    "memory_workflow_convention": [
+        "for this workflow use this recurring procedure convention or process",
+        "the team convention is to run checks before code review and use a worktree",
+    ],
+    "memory_skill_candidate": [
+        "we discovered a reusable multi step workflow that should become a skill",
+        "save this procedure as a reusable skill after solving a tricky task",
+    ],
+    "urgency_low": [
+        "whenever convenient no rush low priority idea someday backlog",
+    ],
+    "urgency_high": [
+        "urgent asap high priority today please handle soon production issue",
+        "service is degraded broken failing down users are blocked",
+    ],
+    "urgency_critical": [
+        "critical outage security incident data loss production down emergency now",
+        "stop the bleeding rollback immediately credentials leaked destructive incident",
+    ],
+    "workflow_chat": [
+        "answer a general question explain a concept brainstorm rewrite text chat casually",
+    ],
+    "workflow_research": [
+        "research compare summarize sources papers market docs web search literature review",
+    ],
+    "workflow_coding": [
+        "implement code write tests refactor add feature fix type errors create a branch",
+    ],
+    "workflow_debugging": [
+        "debug failing tests inspect logs reproduce error traceback diagnose regression",
+    ],
+    "workflow_devops": [
+        "operate services systemd docker kubernetes ports health checks deploy infrastructure",
+    ],
+    "workflow_smart_home": [
+        "turn on lights adjust thermostat control tv speaker home assistant hue wiz",
+    ],
+    "workflow_media": [
+        "transcribe audio process video image gif spotify music youtube media file",
+    ],
+    "workflow_note_taking": [
+        "obsidian notes daily diary memory knowledge base document personal context",
+    ],
+    "workflow_productivity": [
+        "calendar email spreadsheet presentation notion airtable linear task planning",
+    ],
+    "workflow_kanban": [
+        "kanban task board card assignee handoff review required blocked complete worker",
+    ],
+}
+
+RULES: dict[str, list[tuple[re.Pattern[str], str, float]]] = {
+    "tool_needed": [
+        (re.compile(r"\b(current|today|now|latest|weather|news|version|price|stock)\b", re.I), "current_fact_requested", 0.88),
+        (re.compile(r"\b(file|directory|git|branch|commit|diff|log|port|process|disk|memory|cpu|gpu|npu|service|systemd|reindex)\b", re.I), "local_state_requested", 0.84),
+        (re.compile(r"\b(send|schedule|create cron|call api|download|browse|search web|open website|turn on|turn off|set the thermostat|transcribe|restart|switch primary routing|work kanban|kanban task)\b", re.I), "external_or_tool_action_requested", 0.86),
+    ],
+    "safety": [
+        (re.compile(r"\b(delete|remove|overwrite|drop|truncate|wipe|reindex|reset --hard|force push)\b", re.I), "destructive_or_irreversible_action", 0.92),
+        (re.compile(r"\b(restart|stop|deploy|expose|public|0\.0\.0\.0|route live|primary routing|gateway)\b", re.I), "live_service_or_routing_change", 0.88),
+        (re.compile(r"\b(secret|token|api key|credential|password|private document|external upload|send message|spend money|purchase)\b", re.I), "credential_privacy_or_external_side_effect", 0.9),
+    ],
+    "memory": [
+        (re.compile(r"\b(remember that|please remember|don'?t forget|my preference|I prefer|call me)\b", re.I), "explicit_memory_language", 0.9),
+        (re.compile(r"\b(always|for future|going forward|convention|workflow|standard practice)\b", re.I), "durable_convention_language", 0.78),
+    ],
+    "urgency_high": [
+        (re.compile(r"\b(urgent|asap|immediately|high priority|production|down|broken|blocked)\b", re.I), "urgent_language", 0.84),
+    ],
+    "urgency_critical": [
+        (re.compile(r"\b(critical|emergency|outage|data loss|credential leak|security incident|prod down)\b", re.I), "critical_incident_language", 0.94),
+    ],
+}
+
+
+def npu_busy_time_us() -> int | None:
+    try:
+        return int(NPU_BUSY_FILE.read_text().strip())
+    except Exception:
+        return None
+
+
+def env_int(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError as exc:
+        raise SystemExit(f"{name} must be an integer, got {raw!r}") from exc
+
+
+def env_float(name: str, default: float) -> float:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return float(raw)
+    except ValueError as exc:
+        raise SystemExit(f"{name} must be a number, got {raw!r}") from exc
+
+
+def clamp01(value: float) -> float:
+    return max(0.0, min(1.0, value))
+
+
+def cosine(a: list[float], b: list[float]) -> float:
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    na = math.sqrt(sum(x * x for x in a))
+    nb = math.sqrt(sum(y * y for y in b))
+    if na == 0.0 or nb == 0.0:
+        return 0.0
+    # Map [-1, 1] to [0, 1] for confidence-like scoring.
+    return clamp01((dot / (na * nb) + 1.0) / 2.0)
+
+
+def best_rule(text: str, group: str) -> tuple[float, list[str], list[dict[str, Any]]]:
+    best = 0.0
+    codes: list[str] = []
+    evidence: list[dict[str, Any]] = []
+    for pattern, code, score in RULES.get(group, []):
+        match = pattern.search(text)
+        if match:
+            best = max(best, score)
+            codes.append(code)
+            evidence.append({"label": group, "source": "rule", "matched": match.group(0), "reason_code": code, "score": score})
+    return best, sorted(set(codes)), evidence
+
+
+@dataclass
+class EmbedResult:
+    vectors: list[list[float]]
+    npu_busy_delta_us: int | None
+    duration_ms: float
+    embedding_dim: int | None
+
+
+class EmbeddingClient:
+    def __init__(self, url: str, timeout_s: float = 30.0) -> None:
+        self.url = url
+        self.timeout_s = timeout_s
+
+    def embed(self, texts: list[str], *, purpose: str = "query") -> EmbedResult:
+        payload = json.dumps({"input": texts, "purpose": purpose}).encode("utf-8")
+        request = urllib.request.Request(
+            self.url,
+            data=payload,
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        started = time.perf_counter()
+        try:
+            with urllib.request.urlopen(request, timeout=self.timeout_s) as response:  # noqa: S310 - local configured URL
+                body = response.read().decode("utf-8", "replace")
+        except urllib.error.HTTPError as exc:
+            detail = exc.read().decode("utf-8", "replace")
+            raise RuntimeError(f"embedding service HTTP {exc.code}: {detail}") from exc
+        except urllib.error.URLError as exc:
+            raise RuntimeError(f"embedding service unavailable at {self.url}: {exc.reason}") from exc
+        data = json.loads(body)
+        vectors = [item["embedding"] for item in data.get("data", [])]
+        return EmbedResult(
+            vectors=[[float(x) for x in vec] for vec in vectors],
+            npu_busy_delta_us=data.get("npu_busy_delta_us"),
+            duration_ms=round((time.perf_counter() - started) * 1000, 3),
+            embedding_dim=data.get("embedding_dim") or (len(vectors[0]) if vectors else None),
+        )
+
+
+class ClassifierService:
+    def __init__(self, embed_url: str, *, timeout_s: float = 30.0, max_batch_size: int = DEFAULT_MAX_BATCH_SIZE) -> None:
+        self.embed_url = embed_url
+        self.client = EmbeddingClient(embed_url, timeout_s=timeout_s)
+        self.max_batch_size = max(1, int(max_batch_size))
+        self.loaded_at = time.time()
+        self.prototype_texts: list[str] = []
+        self.prototype_keys: list[str] = []
+        for key, examples in PROTOTYPES.items():
+            for example in examples:
+                self.prototype_keys.append(key)
+                self.prototype_texts.append(example)
+        self.prototype_vectors: list[list[float]] | None = None
+        self.prototype_npu_busy_delta_us: int | None = None
+        self.embedding_dim: int | None = None
+        self.warnings: list[str] = []
+
+    def warmup(self) -> None:
+        result = self.client.embed(self.prototype_texts, purpose="document")
+        self.prototype_vectors = result.vectors
+        self.prototype_npu_busy_delta_us = result.npu_busy_delta_us
+        self.embedding_dim = result.embedding_dim
+        if not result.npu_busy_delta_us or result.npu_busy_delta_us <= 0:
+            self.warnings.append("prototype embedding warmup did not report positive NPU busy delta")
+
+    def health(self) -> dict[str, Any]:
+        return {
+            "status": "ok" if self.prototype_vectors else "starting",
+            "service": SERVICE,
+            "version": VERSION,
+            "mode": "dry_run",
+            "model": MODEL,
+            "embed_url": self.embed_url,
+            "device": "NPU-via-embedding-service",
+            "labels": ["tool_needed", "memory_candidate", "urgency", "workflow_category", "safety_confirmation_required"],
+            "embedding_dim": self.embedding_dim,
+            "prototype_count": len(self.prototype_texts),
+            "max_batch_size": self.max_batch_size,
+            "prototype_npu_busy_delta_us": self.prototype_npu_busy_delta_us,
+            "npu_busy_time_us": npu_busy_time_us(),
+            "uptime_s": round(time.time() - self.loaded_at, 3),
+            "warnings": self.warnings,
+        }
+
+    def labels(self) -> dict[str, Any]:
+        return {
+            "model": MODEL,
+            "thresholds": {
+                "tool_needed": 0.72,
+                "memory_candidate": 0.78,
+                "safety_confirmation_required": 0.80,
+                "workflow_category": 0.52,
+            },
+            "enums": {"memory_candidate": MEMORY_VALUES, "urgency": URGENCY_VALUES, "workflow_category": WORKFLOW_CATEGORIES},
+            "limits": {"max_batch_size": self.max_batch_size},
+            "prototype_ids": sorted(PROTOTYPES),
+        }
+
+    def classify(self, item_id: str | None, text: str, options: dict[str, Any] | None = None) -> dict[str, Any]:
+        if self.prototype_vectors is None:
+            self.warmup()
+        options = options or {}
+        include_evidence = bool(options.get("include_evidence", True))
+        include_embedding_debug = bool(options.get("include_embedding_debug", False))
+        dry_run = bool(options.get("dry_run", True))
+        started = time.perf_counter()
+        text = str(text or "")
+        if not text.strip():
+            raise ValueError("text must be a non-empty string")
+
+        sysfs_before = npu_busy_time_us()
+        embedded = self.client.embed([text], purpose="query")
+        sysfs_after = npu_busy_time_us()
+        if not embedded.vectors:
+            raise RuntimeError("embedding service returned no vectors")
+        message_vec = embedded.vectors[0]
+        similarities = self._prototype_scores(message_vec)
+
+        evidence: list[dict[str, Any]] = []
+        labels: dict[str, Any] = {}
+
+        tool_rule, tool_codes, tool_evidence = best_rule(text, "tool_needed")
+        tool_proto = max([similarities.get("tool_needed", 0.0)], default=0.0)
+        # Similarity alone is too broad for action classification; require either
+        # a deterministic rule hit or a very strong prototype match.
+        tool_conf = round(max(tool_rule, tool_proto if tool_proto >= 0.88 else 0.0), 3)
+        labels["tool_needed"] = {"value": tool_conf >= 0.72, "confidence": tool_conf, "threshold": 0.72, "reason_codes": tool_codes}
+        evidence.extend(tool_evidence)
+        if tool_proto > 0:
+            evidence.append({"label": "tool_needed", "source": "prototype_similarity", "prototype": "tool_needed", "score": round(tool_proto, 3)})
+
+        mem_label, mem_conf, mem_codes, mem_ev = self._memory_label(text, similarities)
+        labels["memory_candidate"] = {"value": mem_label, "confidence": round(mem_conf, 3), "threshold": 0.78, "reason_codes": mem_codes}
+        evidence.extend(mem_ev)
+
+        urgency_value, urgency_conf, urgency_scores, urgency_codes, urgency_ev = self._urgency_label(text, similarities)
+        labels["urgency"] = {"value": urgency_value, "confidence": round(urgency_conf, 3), "scores": {k: round(v, 3) for k, v in urgency_scores.items()}, "reason_codes": urgency_codes}
+        evidence.extend(urgency_ev)
+
+        workflow_value, workflow_conf, workflow_scores, workflow_ev = self._workflow_label(similarities, text)
+        labels["workflow_category"] = {"value": workflow_value, "confidence": round(workflow_conf, 3), "scores": {k: round(v, 3) for k, v in workflow_scores.items()}}
+        evidence.extend(workflow_ev)
+
+        safety_rule, safety_codes, safety_evidence = best_rule(text, "safety")
+        safety_proto = 0.0
+        safety_conf = round(max(safety_rule, safety_proto), 3)
+        labels["safety_confirmation_required"] = {"value": safety_conf >= 0.80, "confidence": safety_conf, "threshold": 0.80, "reason_codes": safety_codes}
+        evidence.extend(safety_evidence)
+
+        npu_delta = embedded.npu_busy_delta_us
+        sysfs_delta = None if sysfs_before is None or sysfs_after is None else sysfs_after - sysfs_before
+        warnings = list(self.warnings)
+        if not npu_delta or npu_delta <= 0:
+            warnings.append("embedding call did not report positive npu_busy_delta_us; NPU execution not proven for this request")
+        if sysfs_delta is not None and sysfs_delta <= 0:
+            warnings.append("sysfs npu_busy_time_us did not increase during classification request")
+
+        response: dict[str, Any] = {
+            "id": item_id,
+            "model": MODEL,
+            "created": int(time.time()),
+            "duration_ms": round((time.perf_counter() - started) * 1000, 3),
+            "npu_busy_delta_us": npu_delta,
+            "sysfs_npu_busy_delta_us": sysfs_delta,
+            "dry_run": dry_run,
+            "labels": labels,
+            "warnings": warnings,
+        }
+        if include_evidence:
+            response["evidence"] = evidence[:30]
+        if include_embedding_debug:
+            response["embedding_debug"] = {"embedding_dim": len(message_vec), "prototype_scores": {k: round(v, 3) for k, v in similarities.items()}}
+        return response
+
+    def batch_classify(self, items: list[dict[str, Any]], options: dict[str, Any] | None = None) -> dict[str, Any]:
+        if not items:
+            raise ValueError("items must contain at least one classification request")
+        if len(items) > self.max_batch_size:
+            raise ValueError(f"items exceeds max_batch_size={self.max_batch_size}")
+        started = time.perf_counter()
+        results = [self.classify(item.get("id"), str(item.get("text") or ""), options) for item in items]
+        return {
+            "model": MODEL,
+            "duration_ms": round((time.perf_counter() - started) * 1000, 3),
+            "npu_busy_delta_us": sum((r.get("npu_busy_delta_us") or 0) for r in results),
+            "results": results,
+        }
+
+    def _prototype_scores(self, vec: list[float]) -> dict[str, float]:
+        assert self.prototype_vectors is not None
+        scores: dict[str, float] = {}
+        for key, prototype_vec in zip(self.prototype_keys, self.prototype_vectors):
+            scores[key] = max(scores.get(key, 0.0), cosine(vec, prototype_vec))
+        return scores
+
+    def _memory_label(self, text: str, scores: dict[str, float]) -> tuple[str, float, list[str], list[dict[str, Any]]]:
+        rule_score, codes, evidence = best_rule(text, "memory")
+        candidates = {
+            "user_preference": scores.get("memory_user_preference", 0.0),
+            "durable_user_fact": scores.get("memory_durable_user_fact", 0.0),
+            "environment_fact": scores.get("memory_environment_fact", 0.0),
+            "workflow_convention": scores.get("memory_workflow_convention", 0.0),
+            "skill_candidate": scores.get("memory_skill_candidate", 0.0),
+        }
+        label, proto_score = max(candidates.items(), key=lambda kv: kv[1])
+        confidence = max(proto_score, rule_score)
+        explicit_memory = rule_score >= 0.78
+        durable_fact_hint = bool(re.search(r"\b(project uses|repo uses|environment uses|runs on|standard practice|convention|workflow convention)\b", text, re.I))
+        if explicit_memory:
+            if re.search(r"\b(prefer|preference|call me|my name|I live|I am)\b", text, re.I):
+                label = "user_preference" if re.search(r"\b(prefer|preference)\b", text, re.I) else "durable_user_fact"
+            elif durable_fact_hint:
+                label = "environment_fact"
+            elif re.search(r"\b(skill|procedure|workflow)\b", text, re.I):
+                label = "skill_candidate"
+        # BGE prototype similarities are advisory but broad; avoid recommending
+        # memory writes from similarity alone unless the text also has durable-
+        # fact language or an unusually strong prototype match.
+        if confidence < 0.78 or (not explicit_memory and not durable_fact_hint and proto_score < 0.88):
+            label = "none"
+        else:
+            evidence.append({"label": "memory_candidate", "source": "prototype_similarity", "prototype": f"memory_{label}", "score": round(proto_score, 3)})
+        return label, confidence if label != "none" else max(0.0, min(confidence, 0.77)), codes, evidence
+
+    def _urgency_label(self, text: str, scores: dict[str, float]) -> tuple[str, float, dict[str, float], list[str], list[dict[str, Any]]]:
+        high_rule, high_codes, high_ev = best_rule(text, "urgency_high")
+        critical_rule, critical_codes, critical_ev = best_rule(text, "urgency_critical")
+        low_rule = 0.82 if re.search(r"\b(no rush|whenever convenient|low priority|someday|backlog)\b", text, re.I) else 0.0
+        # Urgency is safety-sensitive for notifications, so require explicit
+        # language instead of relying on broad prototype similarity.
+        score_map = {
+            # Urgency should be explicit; broad embedding similarity otherwise
+            # turns neutral requests such as "what time is it" into low/high/critical urgency.
+            "low": low_rule,
+            "normal": 0.68,
+            "high": high_rule,
+            "critical": critical_rule,
+        }
+        if score_map["critical"] >= 0.9:
+            score_map["normal"] = 0.05
+        elif score_map["high"] >= 0.8 or score_map["low"] >= 0.8:
+            score_map["normal"] = 0.2
+        value, confidence = max(score_map.items(), key=lambda kv: kv[1])
+        evidence = high_ev + critical_ev
+        return value, confidence, score_map, sorted(set(high_codes + critical_codes)), evidence
+
+    def _workflow_label(self, scores: dict[str, float], text: str = "") -> tuple[str, float, dict[str, float], list[dict[str, Any]]]:
+        score_map = {category: scores.get(f"workflow_{category}", 0.0) for category in WORKFLOW_CATEGORIES if category != "unknown"}
+        rule_patterns: list[tuple[str, str]] = [
+            ("chat", r"\bwhat time is it|what date is it|general question\b"),
+            ("kanban", r"\bkanban|task card|review-required|blocked\b"),
+            ("smart_home", r"\blights?|thermostat|home assistant|hue|wiz\b"),
+            ("media", r"\btranscribe|voice memo|audio|video|image|spotify|youtube\b"),
+            ("research", r"\bresearch|compare sources|papers?|literature|web search\b"),
+            ("devops", r"\bsystemd|docker|kubernetes|service|ports?|gateway|deploy|infrastructure\b"),
+            ("debugging", r"\bdebug|failing|traceback|logs?|reproduce|diagnose\b"),
+            ("coding", r"\bimplement|code|pytest|refactor|feature|PR\b"),
+            ("note_taking", r"\bobsidian|notes?|memory|diary|chroma|reindex\b"),
+            ("productivity", r"\bcalendar|email|spreadsheet|presentation|notion|airtable|linear\b"),
+        ]
+        rule_value: str | None = None
+        for category, pattern in rule_patterns:
+            if re.search(pattern, text, re.I):
+                rule_value = category
+                break
+        if rule_value:
+            value = rule_value
+            confidence = max(0.86, score_map.get(rule_value, 0.0))
+            score_map[rule_value] = confidence
+            source = "rule"
+        else:
+            value, confidence = max(score_map.items(), key=lambda kv: kv[1])
+            source = "prototype_similarity"
+        if confidence < 0.52:
+            value = "unknown"
+            confidence = 0.52
+        score_map["unknown"] = 1.0 - confidence if value != "unknown" else confidence
+        evidence = [{"label": "workflow_category", "source": source, "prototype": f"workflow_{value}", "score": round(confidence, 3)}]
+        return value, confidence, score_map, evidence
+
+
+class Handler(BaseHTTPRequestHandler):
+    server_version = "AtlasRouterClassifier/0.1"
+
+    @property
+    def svc(self) -> ClassifierService:
+        return self.server.classifier_service  # type: ignore[attr-defined]
+
+    def do_GET(self) -> None:
+        path = self.path.split("?", 1)[0].rstrip("/") or "/"
+        if path in {"/", "/healthz", "/readyz", "/health"}:
+            self.write_json(self.svc.health())
+        elif path == "/v1/labels":
+            self.write_json(self.svc.labels())
+        else:
+            self.write_json({"error": "not found"}, status=404)
+
+    def do_POST(self) -> None:
+        path = self.path.split("?", 1)[0].rstrip("/") or "/"
+        try:
+            payload = self.read_json()
+            options = payload.get("options") if isinstance(payload.get("options"), dict) else {}
+            if path == "/v1/classify":
+                self.write_json(self.svc.classify(payload.get("id"), str(payload.get("text") or ""), options))
+            elif path == "/v1/batch_classify":
+                items = payload.get("items")
+                if not isinstance(items, list):
+                    raise ValueError("items must be a list")
+                self.write_json(self.svc.batch_classify(items, options))
+            else:
+                self.write_json({"error": "not found"}, status=404)
+        except ValueError as exc:
+            self.write_json({"error": str(exc)}, status=400)
+        except Exception as exc:
+            self.write_json({"error": f"{type(exc).__name__}: {exc}"}, status=500)
+
+    def read_json(self) -> dict[str, Any]:
+        length = int(self.headers.get("Content-Length") or 0)
+        body = self.rfile.read(length).decode("utf-8", "replace") if length else "{}"
+        payload = json.loads(body or "{}")
+        if not isinstance(payload, dict):
+            raise ValueError("JSON body must be an object")
+        return payload
+
+    def write_json(self, payload: dict[str, Any], status: int = 200) -> None:
+        body = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def log_message(self, format: str, *args: Any) -> None:  # noqa: A002 - stdlib override name
+        print(f"{self.address_string()} - {format % args}", file=sys.stderr, flush=True)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Dry-run Atlas/Hermes router classifier")
+    parser.add_argument("--host", default=os.environ.get("OPENVINO_CLASSIFIER_HOST", DEFAULT_HOST))
+    parser.add_argument("--port", type=int, default=env_int("OPENVINO_CLASSIFIER_PORT", DEFAULT_PORT))
+    parser.add_argument("--embed-url", default=os.environ.get("OPENVINO_CLASSIFIER_EMBED_URL", DEFAULT_EMBED_URL))
+    parser.add_argument("--timeout-s", type=float, default=env_float("OPENVINO_CLASSIFIER_TIMEOUT_S", 30.0))
+    parser.add_argument("--max-batch-size", type=int, default=env_int("OPENVINO_CLASSIFIER_MAX_BATCH_SIZE", DEFAULT_MAX_BATCH_SIZE))
+    parser.add_argument("--no-warmup", action="store_true", help="skip prototype embedding warmup until first request")
+    args = parser.parse_args()
+
+    service = ClassifierService(args.embed_url, timeout_s=args.timeout_s, max_batch_size=args.max_batch_size)
+    if not args.no_warmup:
+        service.warmup()
+    httpd = ThreadingHTTPServer((args.host, args.port), Handler)
+    httpd.classifier_service = service  # type: ignore[attr-defined]
+    print(f"{SERVICE} listening on {args.host}:{args.port} embed_url={args.embed_url} mode=dry_run", flush=True)
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""Local-only smoke test for the dry-run OpenVINO router classifier.
+
+This script uses only synthetic fixture messages. It assumes router_classifier.py is
+already running on localhost and never installs/enables a persistent service.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+DEFAULT_BASE_URL = "http://127.0.0.1:18819"
+BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+FIXTURE = Path(__file__).resolve().parent / "fixtures" / "atlas_hermes_messages.jsonl"
+
+
+def npu_busy_time_us() -> int | None:
+    try:
+        return int(BUSY_FILE.read_text().strip())
+    except Exception:
+        return None
+
+
+def get_json(url: str, timeout_s: float) -> dict[str, Any]:
+    with urllib.request.urlopen(url, timeout=timeout_s) as response:  # noqa: S310 - localhost smoke URL
+        return json.loads(response.read().decode("utf-8"))
+
+
+def post_json(url: str, payload: dict[str, Any], timeout_s: float) -> dict[str, Any]:
+    request = urllib.request.Request(
+        url,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(request, timeout=timeout_s) as response:  # noqa: S310 - localhost smoke URL
+        return json.loads(response.read().decode("utf-8"))
+
+
+def load_fixture(limit: int) -> list[dict[str, Any]]:
+    rows = [json.loads(line) for line in FIXTURE.read_text().splitlines() if line.strip()]
+    return rows[:limit]
+
+
+def assert_expected(result: dict[str, Any], expected: dict[str, Any]) -> list[str]:
+    failures: list[str] = []
+    labels = result.get("labels", {})
+    for key, value in expected.items():
+        actual_label = labels.get(key, {})
+        actual_value = actual_label.get("value")
+        if actual_value != value:
+            failures.append(f"{result.get('id')}: {key} expected {value!r}, got {actual_value!r}")
+    return failures
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Smoke-test a running localhost router classifier")
+    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
+    parser.add_argument("--timeout-s", type=float, default=30.0)
+    parser.add_argument("--limit", type=int, default=10)
+    args = parser.parse_args()
+
+    if not args.base_url.startswith("http://127.0.0.1:") and not args.base_url.startswith("http://localhost:"):
+        raise SystemExit("refusing non-local base URL; this smoke is localhost-only")
+
+    before = npu_busy_time_us()
+    started = time.perf_counter()
+    try:
+        health = get_json(f"{args.base_url.rstrip('/')}/healthz", args.timeout_s)
+        labels = get_json(f"{args.base_url.rstrip('/')}/v1/labels", args.timeout_s)
+        rows = load_fixture(args.limit)
+        results = []
+        failures: list[str] = []
+        for row in rows:
+            result = post_json(
+                f"{args.base_url.rstrip('/')}/v1/classify",
+                {"id": row["id"], "text": row["text"], "options": {"include_evidence": False, "dry_run": True}},
+                args.timeout_s,
+            )
+            results.append(result)
+            failures.extend(assert_expected(result, row.get("expected", {})))
+        after = npu_busy_time_us()
+    except urllib.error.URLError as exc:
+        raise SystemExit(f"smoke failed: {exc}") from exc
+
+    response_npu_delta = sum((r.get("npu_busy_delta_us") or 0) for r in results)
+    outer_sysfs_delta = None if before is None or after is None else after - before
+    npu_proven = response_npu_delta > 0 and (outer_sysfs_delta is None or outer_sysfs_delta > 0)
+    summary = {
+        "ok": not failures,
+        "service": health.get("service"),
+        "mode": health.get("mode"),
+        "model": health.get("model"),
+        "label_count": len(labels.get("prototype_ids", [])),
+        "fixture_count": len(results),
+        "duration_ms": round((time.perf_counter() - started) * 1000, 3),
+        "response_npu_busy_delta_us": response_npu_delta,
+        "outer_sysfs_npu_busy_delta_us": outer_sysfs_delta,
+        "npu_proven": npu_proven,
+        "failures": failures,
+    }
+    print(json.dumps(summary, indent=2, sort_keys=True))
+    return 0 if not failures and npu_proven else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import importlib.util
+import json
+import sys
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+MODULE_PATH = ROOT / "router_classifier.py"
+spec = importlib.util.spec_from_file_location("router_classifier", MODULE_PATH)
+assert spec and spec.loader
+router_classifier = importlib.util.module_from_spec(spec)
+sys.modules["router_classifier"] = router_classifier
+spec.loader.exec_module(router_classifier)
+
+
+class FakeClient:
+    def embed(self, texts, *, purpose="query"):
+        # Deterministic toy embeddings based on keyword buckets. The tests focus on
+        # rule safety and API shape; live smoke tests cover the real NPU upstream.
+        vectors = []
+        for text in texts:
+            t = text.lower()
+            vec = [0.0] * 8
+            if any(w in t for w in ["time", "current", "weather", "news", "port", "git", "logs", "systemd"]):
+                vec[0] = 1.0
+            if any(w in t for w in ["remember", "prefer", "preference"]):
+                vec[1] = 1.0
+            if any(w in t for w in ["urgent", "down", "outage", "critical"]):
+                vec[2] = 1.0
+            if any(w in t for w in ["code", "pytest", "debug", "git", "diff"]):
+                vec[3] = 1.0
+            if any(w in t for w in ["service", "systemd", "port", "gateway", "docker"]):
+                vec[4] = 1.0
+            if any(w in t for w in ["kanban", "task", "blocked", "review"]):
+                vec[5] = 1.0
+            if any(w in t for w in ["light", "thermostat"]):
+                vec[6] = 1.0
+            if any(w in t for w in ["transcribe", "voice", "memo", "audio"]):
+                vec[7] = 1.0
+            if not any(vec):
+                vec[0] = 0.2
+            vectors.append(vec)
+        return router_classifier.EmbedResult(vectors=vectors, npu_busy_delta_us=123, duration_ms=1.0, embedding_dim=8)
+
+
+class RouterClassifierTests(unittest.TestCase):
+    def service(self):
+        svc = router_classifier.ClassifierService("http://fake.local/v1/embeddings")
+        svc.client = FakeClient()
+        svc.warmup()
+        return svc
+
+    def test_health_and_label_schema(self):
+        svc = self.service()
+        health = svc.health()
+        self.assertEqual(health["service"], "atlas-router-classifier")
+        self.assertEqual(health["mode"], "dry_run")
+        self.assertIn("tool_needed", health["labels"])
+        labels = svc.labels()
+        self.assertIn("workflow_category", labels["enums"])
+        self.assertIn("safety_confirmation_required", labels["thresholds"])
+
+    def test_explicit_preference_is_memory_candidate(self):
+        result = self.service().classify("pref", "Remember that I prefer concise terminal replies.")
+        self.assertEqual(result["labels"]["memory_candidate"]["value"], "user_preference")
+        self.assertGreaterEqual(result["labels"]["memory_candidate"]["confidence"], 0.78)
+        self.assertFalse(result["labels"]["safety_confirmation_required"]["value"])
+
+    def test_current_local_state_needs_tool(self):
+        result = self.service().classify("port", "Check whether port 18819 is listening and inspect systemd logs.")
+        self.assertTrue(result["labels"]["tool_needed"]["value"])
+        self.assertIn("local_state_requested", result["labels"]["tool_needed"]["reason_codes"])
+
+    def test_live_gateway_restart_requires_confirmation(self):
+        result = self.service().classify("safe", "Restart the live Atlas gateway and switch primary routing.")
+        self.assertTrue(result["labels"]["safety_confirmation_required"]["value"])
+        self.assertIn("live_service_or_routing_change", result["labels"]["safety_confirmation_required"]["reason_codes"])
+
+    def test_batch_shape(self):
+        result = self.service().batch_classify([
+            {"id": "a", "text": "What time is it?"},
+            {"id": "b", "text": "Delete the existing collection and reindex it in place."},
+        ])
+        self.assertEqual(result["model"], router_classifier.MODEL)
+        self.assertEqual(len(result["results"]), 2)
+        self.assertGreater(result["npu_busy_delta_us"], 0)
+
+    def test_batch_limits_are_enforced(self):
+        svc = self.service()
+        with self.assertRaisesRegex(ValueError, "at least one"):
+            svc.batch_classify([])
+        too_many = [{"id": str(i), "text": "What time is it?"} for i in range(router_classifier.DEFAULT_MAX_BATCH_SIZE + 1)]
+        with self.assertRaisesRegex(ValueError, "max_batch_size"):
+            svc.batch_classify(too_many)
+
+    def test_fixture_file_is_valid_jsonl(self):
+        fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
+        rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
+        self.assertGreaterEqual(len(rows), 8)
+        for row in rows:
+            self.assertIn("id", row)
+            self.assertIn("text", row)
+            self.assertIn("expected", row)
+
+    def test_synthetic_fixture_expectations(self):
+        svc = self.service()
+        fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
+        rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
+        for row in rows:
+            with self.subTest(row=row["id"]):
+                result = svc.classify(row["id"], row["text"], {"include_evidence": False})
+                labels = result["labels"]
+                for label_name, expected_value in row["expected"].items():
+                    self.assertEqual(labels[label_name]["value"], expected_value)
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,174 @@
+# OpenVINO NPU document/image triage prototype
+
+Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images.
+It returns structured JSON metadata and explicitly reports CPU vs NPU stages.
+Optional HTTP is a localhost/loopback-only prototype on `127.0.0.1:18829` when explicitly started; non-loopback binds are rejected and it is not a live Atlas/Hermes/RAG integration.
+
+Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
+
+## Privacy and safety
+
+- No external uploads.
+- The only network call is optional localhost-only embeddings at `127.0.0.1:18817`.
+- Raw OCR/sidecar text is redacted by default and is not logged.
+- Full source paths are omitted by default; responses include basename and SHA-256.
+- Allowed roots are enforced for CLI/server requests.
+- This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services.
+- Do not process broad private document/image directories; use generated synthetic fixtures unless Will explicitly approves a narrow source root.
+- See `SPEC.md` for the full CLI contract, smoke-test plan, NPU verification plan, docs implications, and no-go/defer criteria.
+
+## CPU vs NPU stages
+
+CPU:
+- file intake, allowed-root checks, size checks, hashing
+- image/PDF decoding/rendering and normalization
+- optional local text extraction from sidecars or PDF text libraries
+- regex metadata extraction and rule-based category fallback
+- final needs-attention rules
+
+NPU:
+- needs-attention semantic embedding, via existing local OpenVINO embeddings service on `:18817`
+- verified with `/sys/class/accel/accel0/device/npu_busy_time_us` before/after each embedding call
+
+Not configured in v1:
+- image category classifier on NPU. The JSON reports this as `CPU rule fallback (NPU model not configured in prototype v1)`. A future task can add a static-shape MobileNet/EfficientNet/ResNet OpenVINO IR model.
+- OCR on NPU. OCR remains CPU/local plumbing in v1.
+
+## Files
+
+- `triage.py` — core library and CLI.
+- `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`.
+- `openvino-doc-image-triage.service` — local-only user-systemd service template for `127.0.0.1:18829`, limited to this prototype directory as its default allowed root.
+- `make_samples.py` — creates synthetic non-private image/PDF samples.
+- `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable.
+- `samples/` — generated synthetic fixtures.
+
+## Requirements
+
+Use the existing NPU venv when available:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python -m pip install pillow
+```
+
+`pillow` is already present in the discovered `/home/will/.venvs/npu`. Optional local PDF text/rendering improves PDF support:
+
+```bash
+/home/will/.venvs/npu/bin/python -m pip install pypdf pypdfium2
+```
+
+The smoke tests do not require external services except the existing localhost `:18817` embeddings service for positive NPU verification.
+
+## CLI usage
+
+Generate synthetic samples:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python make_samples.py
+```
+
+Triage local files:
+
+```bash
+/home/will/.venvs/npu/bin/python triage.py \
+  --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \
+  --pretty \
+  samples/synthetic_invoice.png samples/synthetic_invoice.pdf
+```
+
+Disable the local NPU embeddings call if needed:
+
+```bash
+/home/will/.venvs/npu/bin/python triage.py --no-embeddings --allowed-root "$PWD" samples/synthetic_receipt.png
+```
+
+Include OCR/sidecar text in a single response only when explicitly requested:
+
+```bash
+/home/will/.venvs/npu/bin/python triage.py --include-ocr-text --allowed-root "$PWD" samples/synthetic_invoice.png
+```
+
+## HTTP usage
+
+The prototype is CLI-first, and the local HTTP wrapper can be run as a reviewed user-systemd service on `127.0.0.1:18829` with an allowlist rooted at this prototype directory. Keep it local-only and do not broaden allowed roots to private document/image directories without explicit approval. Check the port first:
+
+```bash
+ss -ltnp | grep ':18829\b' || true
+```
+
+Start a local-only server and stop it after the smoke:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
+```
+
+Install/enable the reviewed local-only service template when the HTTP wrapper should persist across logins:
+
+```bash
+install -m 0644 openvino-doc-image-triage.service ~/.config/systemd/user/openvino-doc-image-triage.service
+systemctl --user daemon-reload
+systemctl --user enable --now openvino-doc-image-triage.service
+systemctl --user status openvino-doc-image-triage.service --no-pager
+```
+
+Call it with synthetic/non-private fixtures only:
+
+```bash
+curl -sS http://127.0.0.1:18829/healthz | jq
+curl -sS http://127.0.0.1:18829/models | jq
+curl -sS -X POST http://127.0.0.1:18829/triage \
+  -H 'Content-Type: application/json' \
+  -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq
+```
+
+Do not point it at private document/image directories during smoke tests unless Will explicitly approves the exact source root.
+
+## Smoke test
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python tests/smoke_test.py
+```
+
+Expected: JSON ending with `"ok": true`. The smoke test generates only synthetic fixtures, verifies non-loopback HTTP binds are rejected, starts its temporary server on a preflighted free localhost port, and terminates it before exit. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
+
+## Example output shape
+
+```json
+{
+  "file_id": "sha256:...",
+  "source_path_basename": "synthetic_invoice.png",
+  "media_type": "image",
+  "page_count": 1,
+  "pages": [
+    {
+      "page_index": 0,
+      "classification": {
+        "label": "bill_or_invoice",
+        "confidence": 0.71,
+        "device": "CPU",
+        "method": "rule_based_fallback"
+      },
+      "needs_attention": {
+        "value": true,
+        "device": "NPU+CPU",
+        "reasons": ["amount_due", "due_date_present"],
+        "embedding": {"verified_npu": true, "npu_busy_delta_us": 12345}
+      },
+      "metadata": {"dates_count": 1, "amounts_count": 1, "raw_values_redacted": true},
+      "ocr": {"available": true, "device": "CPU"}
+    }
+  ],
+  "processing_device_summary": {
+    "file_intake": "CPU",
+    "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)",
+    "needs_attention_embedding": "NPU via local :18817",
+    "metadata_extraction": "CPU",
+    "npu_verified": true
+  },
+  "privacy": {"external_uploads": false, "raw_text_logged": false}
+}
+```
@@ -0,0 +1,146 @@
+# OpenVINO NPU document/image triage spec
+
+Status: CLI-first prototype specification; not a live Atlas/Hermes integration.
+
+## Safety stance
+
+- Default workflow is local CLI execution against explicitly named files.
+- Optional HTTP is disabled unless a human starts it, is constrained to loopback (`127.0.0.1`, `::1`, or `localhost`), and is intended for `127.0.0.1:18829` only.
+- No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec.
+- Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval.
+- NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof.
+
+## Recommended model/runtime
+
+Recommended v1 runtime:
+
+- File intake, hashing, MIME/extension checks, image/PDF rendering, sidecar/native PDF text extraction, metadata extraction, and category fallback: local Python CPU path using Pillow plus optional `pypdf`/`pypdfium2`.
+- Needs-attention semantic check: reuse the live localhost OpenVINO embeddings service on `127.0.0.1:18817`, currently `bge-base-en-v1.5-int8-ov`, and verify each embedding call with `npu_busy_time_us` deltas.
+- Category classification in v1: CPU rule fallback, explicitly reported as not an NPU image model.
+
+Why this is the recommended v1:
+
+- It avoids private-data exposure: no external upload path and no broader local file scanning.
+- It avoids collection/routing risk by using the existing embeddings API as a stateless feature extractor only; it does not write to RAG or Chroma.
+- It gives a real NPU verification hook for the semantic stage without overclaiming that OCR/image classification are NPU-backed.
+- It keeps the prototype useful even when optional PDF dependencies or the embeddings service are unavailable: it can fall back to CPU-only metadata/rule output and mark NPU verification false.
+
+Deferred model work:
+
+- NPU image category classifier: defer until a static-shape OpenVINO IR image model such as MobileNet/EfficientNet/ResNet is selected, calibrated for the label set, and smoke-tested with busy-time deltas.
+- NPU OCR/VLM: defer; OCR remains local CPU text plumbing in v1.
+
+## CLI contract
+
+Command:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python triage.py \
+  --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \
+  --max-pages 3 \
+  --pretty \
+  samples/synthetic_invoice.png samples/synthetic_invoice.pdf
+```
+
+Inputs:
+
+- Positional `paths`: one or more local image/PDF paths.
+- `--allowed-root ROOT`: may repeat; every requested path must resolve under one of these roots. Default is current directory.
+- `--max-pages N`: maximum rendered/extracted PDF pages; default 3.
+- `--no-embeddings`: disables the localhost `:18817` embedding/NPU check and reports CPU fallback/no text.
+- `--dry-run`: skip image/PDF rendering while still checking intake/hash/text/metadata where available.
+- `--include-ocr-text`: include raw extracted/sidecar text in this single response only; off by default.
+- `--include-full-path`: include resolved full paths; off by default.
+- `--pretty`: pretty-print JSON.
+
+Output:
+
+- Batch JSON: `{ "ok": bool, "files": [...], "generated_at": "..." }`.
+- Per file result includes `file_id` as `sha256:<digest>`, `source_path_basename`, media type, file size, pages, classification, needs-attention result, metadata counts/flags, privacy flags, and processing-device summary.
+- Raw OCR/text and full paths are omitted unless explicitly requested.
+- NPU evidence is per embedding call: `used`, `verified_npu`, `npu_busy_delta_us`, endpoint, and wall time.
+
+Exit behavior:
+
+- Exit 0 when all files triage successfully.
+- Exit 2 when one or more files fail policy/intake/processing checks.
+
+## Optional localhost HTTP contract
+
+HTTP is optional and not enabled by this spec. If explicitly started for a smoke or local demo, use localhost and port 18829:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+ss -ltnp | grep ':18829\b' || true
+/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
+```
+
+Endpoints:
+
+- `GET /healthz` or `/health`: service name, bind policy, configured allowed roots, privacy flags, and current `npu_busy_time_us`.
+- `GET /models`: reports v1 stages and whether each is CPU or NPU-backed.
+- `POST /triage`: `{ "path": "/local/file", "options": {...} }` -> `{ "ok": true, "result": ... }`.
+- `POST /triage/batch`: `{ "paths": ["/local/file"], "options": {...} }` -> batch JSON.
+
+HTTP privacy/policy rules:
+
+- Server startup `--allowed-root` is the outer allowlist.
+- Request `options.allowed_roots` may narrow that allowlist but must not widen it.
+- Request `options.embedding_url` may only target the configured local loopback embeddings route `http://127.0.0.1:18817/v1/embeddings` (or localhost equivalent); external or alternate endpoints are rejected.
+- Request bodies and raw text are not logged by the stdlib handler.
+- Stop the temporary server after the smoke/demo.
+
+## Synthetic smoke-test plan
+
+Use only generated fixtures under the prototype directory:
+
+```bash
+cd /home/will/lab/swarm/openvino-doc-image-triage-npu
+/home/will/.venvs/npu/bin/python make_samples.py
+/home/will/.venvs/npu/bin/python tests/smoke_test.py
+```
+
+Expected smoke coverage:
+
+- Creates synthetic invoice/receipt/form-like image/PDF fixtures.
+- Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root.
+- Asserts privacy flags (`external_uploads: false`, no full path by default).
+- Asserts invoice category/needs-attention behavior on synthetic text.
+- Starts a temporary localhost HTTP server on a preflighted free ephemeral port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, rejects external embedding URLs, and verifies non-loopback binds are rejected.
+- Terminates the temporary server.
+
+The smoke port in tests should stay OS-assigned ephemeral/non-live to avoid claiming `18829` as a persistent service.
+
+## NPU busy-time verification plan
+
+For every test that claims NPU use:
+
+1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before the operation.
+2. Perform an operation that should call the live embeddings service on `127.0.0.1:18817` with non-empty synthetic text.
+3. Read `npu_busy_time_us` after the operation.
+4. Require both:
+   - the per-result embedding object reports `used: true`, `verified_npu: true`, and `npu_busy_delta_us > 0`; and
+   - the outer before/after sysfs value increased.
+5. If sysfs is missing or `:18817` is unavailable, do not claim NPU success; report CPU fallback / embedding unavailable and keep the smoke result honest.
+
+## Docs and diagram implications
+
+- Service maps should list document/image triage as CLI-first and optional prototype `127.0.0.1:18829`, not live unless explicitly started.
+- Diagrams must not draw live Atlas/Hermes/gateway/RAG routing to this triage lane.
+- If shown with other candidate sidecars, label it separately from live services: live baseline remains RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; prototype sidecars are reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, and optional doc/image triage `:18829`.
+- Runbooks should include CLI smoke, localhost listener checks, busy-time delta verification, and server shutdown instructions.
+- Documentation should state CPU vs NPU stages explicitly so the prototype does not imply NPU OCR or NPU image classification.
+
+## No-go / defer criteria
+
+Do not proceed to implementation, live integration, or persistent service enablement if any of these are true:
+
+- Will has not explicitly approved live routing or persistent service enablement.
+- The requested source path is a private document/image directory or broad home-directory scan rather than synthetic fixtures or an explicitly approved narrow root.
+- The workflow would mutate Obsidian, RAG, Chroma/vector collections, or reindex in place.
+- The optional server would need to bind anywhere other than localhost.
+- NPU busy-time does not increase for an operation being described as NPU-backed.
+- Raw OCR text or full paths would be logged, uploaded, stored durably, or returned without explicit request.
+- PDF/image dependencies are missing and the task requires rendered page analysis rather than metadata/text-only fallback.
+- A future image classifier/OCR/VLM model has not been selected, converted/quantized to OpenVINO, calibrated for the task, and verified on synthetic fixtures with busy-time deltas.
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+from pathlib import Path
+
+from PIL import Image, ImageDraw, ImageFilter
+
+ROOT = Path(__file__).resolve().parent
+SAMPLES = ROOT / "samples"
+
+
+def make_doc(path: Path, lines: list[str], size=(900, 1200), rotate: int = 0, blur: bool = False) -> None:
+    img = Image.new("RGB", size, "white")
+    draw = ImageDraw.Draw(img)
+    y = 70
+    for line in lines:
+        draw.text((70, y), line, fill="black")
+        y += 55
+    draw.rectangle((55, 50, size[0] - 55, min(size[1] - 50, y + 30)), outline="gray", width=3)
+    if blur:
+        img = img.filter(ImageFilter.GaussianBlur(2.5))
+    if rotate:
+        img = img.rotate(rotate, expand=True, fillcolor="white")
+    img.save(path)
+    path.with_suffix(path.suffix + ".txt").write_text("\n".join(lines) + "\n")
+
+
+def main() -> int:
+    SAMPLES.mkdir(exist_ok=True)
+    make_doc(SAMPLES / "synthetic_invoice.png", [
+        "ACME Utilities Invoice",
+        "Invoice No: INV-2026-0604",
+        "Amount Due: $123.45",
+        "Payment due 2026-06-30",
+        "Please submit payment by the due date.",
+    ])
+    make_doc(SAMPLES / "synthetic_receipt.png", [
+        "Neighborhood Store Receipt",
+        "Subtotal $14.20",
+        "Tax $1.42",
+        "Total $15.62",
+        "Thank you for shopping",
+    ], size=(720, 1100), rotate=3)
+    make_doc(SAMPLES / "synthetic_conversation.png", [
+        "Messages with Alex",
+        "Can you please respond by tomorrow?",
+        "Need signature on the form before Friday.",
+    ], size=(1200, 750))
+    make_doc(SAMPLES / "synthetic_sensitive_form.png", [
+        "Sample Government Form - Fake Data",
+        "Applicant: Test Person",
+        "SSN: 123-45-6789",
+        "Signature required",
+        "Submit by Jan 15, 2027",
+    ], blur=False)
+    make_doc(SAMPLES / "synthetic_blurry.png", [
+        "Low resolution blurred sample",
+        "No action required",
+    ], size=(360, 250), blur=True)
+    # PIL can save a simple local PDF from a synthetic page. This is non-private.
+    pdf_img = Image.open(SAMPLES / "synthetic_invoice.png").convert("RGB")
+    pdf_img.save(SAMPLES / "synthetic_invoice.pdf", "PDF")
+    (SAMPLES / "synthetic_invoice.pdf.txt").write_text((SAMPLES / "synthetic_invoice.png.txt").read_text())
+    print(f"wrote samples under {SAMPLES}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,16 @@
+[Unit]
+Description=OpenVINO NPU document/image triage HTTP Service (local-only, port 18829)
+After=network.target openvino-embeddings.service
+Wants=openvino-embeddings.service
+
+[Service]
+Type=simple
+WorkingDirectory=/home/will/lab/swarm/openvino-doc-image-triage-npu
+Environment=DOC_IMAGE_TRIAGE_HOST=127.0.0.1
+Environment=DOC_IMAGE_TRIAGE_PORT=18829
+ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-doc-image-triage-npu/server.py --host 127.0.0.1 --port 18829 --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
@@ -0,0 +1,2 @@
+Low resolution blurred sample
+No action required
@@ -0,0 +1,3 @@
+Messages with Alex
+Can you please respond by tomorrow?
+Need signature on the form before Friday.
@@ -0,0 +1,5 @@
+ACME Utilities Invoice
+Invoice No: INV-2026-0604
+Amount Due: $123.45
+Payment due 2026-06-30
+Please submit payment by the due date.
@@ -0,0 +1,5 @@
+ACME Utilities Invoice
+Invoice No: INV-2026-0604
+Amount Due: $123.45
+Payment due 2026-06-30
+Please submit payment by the due date.
@@ -0,0 +1,5 @@
+Neighborhood Store Receipt
+Subtotal $14.20
+Tax $1.42
+Total $15.62
+Thank you for shopping
@@ -0,0 +1,5 @@
+Sample Government Form - Fake Data
+Applicant: Test Person
+SSN: 123-45-6789
+Signature required
+Submit by Jan 15, 2027
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""Stdlib localhost HTTP wrapper for the triage prototype.
+
+Endpoints:
+- GET /healthz
+- GET /models
+- POST /triage       JSON: {"path":"/local/file", "options": {...}}
+- POST /triage/batch JSON: {"paths":["/local/file"], "options": {...}}
+
+The server binds to 127.0.0.1 by default and accepts only local file paths under
+configured allowed roots. It never uploads document/image contents externally.
+"""
+from __future__ import annotations
+
+import argparse
+import ipaddress
+import json
+import os
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file
+
+
+def _validate_loopback_host(host: str) -> str:
+    """Reject non-loopback binds; this prototype is never a LAN service."""
+    normalized = host.strip()
+    if normalized == "localhost":
+        return normalized
+    try:
+        if ipaddress.ip_address(normalized).is_loopback:
+            return normalized
+    except ValueError:
+        pass
+    raise ValueError("host must be localhost/loopback for this prototype")
+
+
+def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]:
+    """Return request roots only when they narrow the startup allowlist."""
+    narrowed: list[Path] = []
+    configured = [root.expanduser().resolve() for root in configured_roots]
+    for raw in requested_roots:
+        candidate = Path(str(raw)).expanduser().resolve()
+        if any(candidate == root or candidate.is_relative_to(root) for root in configured):
+            narrowed.append(candidate)
+        else:
+            raise ValueError("requested allowed_roots must be within configured allowed roots")
+    return narrowed
+
+
+def _validated_embedding_url(raw_url: Any) -> str:
+    """Allow only the configured local loopback embeddings service."""
+    url = str(raw_url)
+    parsed = urlparse(url)
+    host = parsed.hostname or ""
+    if (
+        parsed.scheme == "http"
+        and host in {"127.0.0.1", "localhost", "::1"}
+        and (parsed.port or 80) == 18817
+        and parsed.path == "/v1/embeddings"
+        and not parsed.username
+        and not parsed.password
+    ):
+        return url
+    raise ValueError("embedding_url override must target the configured local loopback embeddings service")
+
+
+def make_options(payload: dict[str, Any], default_roots: list[Path]) -> TriageOptions:
+    opts = payload.get("options") or {}
+    requested_roots = opts.get("allowed_roots", [])
+    if requested_roots:
+        if not isinstance(requested_roots, list):
+            raise ValueError("allowed_roots must be a list")
+        roots = _roots_within_configured(requested_roots, default_roots)
+    else:
+        roots = default_roots
+    embedding_url = DEFAULT_EMBED_URL
+    if "embedding_url" in opts:
+        embedding_url = _validated_embedding_url(opts["embedding_url"])
+    return TriageOptions(
+        max_pages=int(opts.get("max_pages", 3)),
+        include_ocr_text=bool(opts.get("include_ocr_text", False)),
+        dry_run=bool(opts.get("dry_run", False)),
+        use_embeddings=bool(opts.get("use_embeddings", True)),
+        embedding_url=embedding_url,
+        allowed_roots=roots,
+        include_full_path=bool(opts.get("include_full_path", False)),
+    )
+
+
+class Handler(BaseHTTPRequestHandler):
+    server_version = "openvino-doc-image-triage-npu/0.1"
+
+    def _json(self, status: int, body: dict[str, Any]) -> None:
+        data = json.dumps(body, sort_keys=True).encode()
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(data)))
+        self.end_headers()
+        self.wfile.write(data)
+
+    def log_message(self, format: str, *args: Any) -> None:
+        # Do not log request bodies, OCR text, or file paths.
+        return
+
+    @property
+    def allowed_roots(self) -> list[Path]:
+        return self.server.allowed_roots  # type: ignore[attr-defined]
+
+    def do_GET(self) -> None:  # noqa: N802
+        if self.path in ("/", "/healthz", "/health"):
+            self._json(200, {
+                "ok": True,
+                "service": "openvino-doc-image-triage-npu",
+                "bind_policy": "localhost-default",
+                "npu_busy_time_us": read_npu_busy(),
+                "npu_busy_check_enabled": True,
+                "allowed_roots": [str(p) for p in self.allowed_roots],
+                "privacy": {"external_uploads": False, "raw_text_logged": False},
+            })
+            return
+        if self.path == "/models":
+            self._json(200, {
+                "models": [
+                    {
+                        "stage": "needs_attention_embedding",
+                        "model": "bge-base-en-v1.5-int8-ov via local :18817",
+                        "target_device": "NPU",
+                        "verification": "sysfs npu_busy_time_us before/after embedding call",
+                    },
+                    {
+                        "stage": "image_category_classification",
+                        "model": "rule-based fallback in prototype v1",
+                        "target_device": "CPU",
+                        "npu_status": "not configured; future static-shape MobileNet/EfficientNet/ResNet OV IR",
+                    },
+                    {"stage": "ocr_text_extraction", "model": "optional local sidecar/PDF text", "target_device": "CPU"},
+                ]
+            })
+            return
+        self._json(404, {"ok": False, "error": "not_found"})
+
+    def _read_payload(self) -> dict[str, Any]:
+        length = int(self.headers.get("Content-Length", "0"))
+        if length > 512 * 1024:
+            raise ValueError("request JSON too large")
+        raw = self.rfile.read(length)
+        if not raw:
+            return {}
+        return json.loads(raw.decode())
+
+    def do_POST(self) -> None:  # noqa: N802
+        try:
+            payload = self._read_payload()
+            options = make_options(payload, self.allowed_roots)
+            if self.path == "/triage":
+                path = payload.get("path")
+                if not path:
+                    self._json(400, {"ok": False, "error": "missing_path"})
+                    return
+                self._json(200, {"ok": True, "result": triage_file(path, options)})
+                return
+            if self.path == "/triage/batch":
+                paths = payload.get("paths") or []
+                if not isinstance(paths, list) or not paths:
+                    self._json(400, {"ok": False, "error": "missing_paths"})
+                    return
+                self._json(200, triage_batch([str(p) for p in paths], options))
+                return
+            self._json(404, {"ok": False, "error": "not_found"})
+        except Exception as exc:
+            self._json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc)})
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server")
+    parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1"))
+    parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829")))
+    parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat")
+    args = parser.parse_args()
+    try:
+        host = _validate_loopback_host(args.host)
+    except ValueError as exc:
+        parser.error(str(exc))
+    roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()]
+    httpd = ThreadingHTTPServer((host, args.port), Handler)
+    httpd.allowed_roots = roots  # type: ignore[attr-defined]
+    print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
+    httpd.serve_forever()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+import socket
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SAMPLES = ROOT / "samples"
+BUSY = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+
+
+def run(cmd: list[str]) -> None:
+    print("+", " ".join(cmd))
+    subprocess.run(cmd, cwd=ROOT, check=True)
+
+
+def post_json(url: str, payload: dict) -> dict:
+    req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"})
+    with urllib.request.urlopen(req, timeout=10) as resp:
+        return json.loads(resp.read().decode())
+
+
+def post_json_status(url: str, payload: dict) -> tuple[int, dict]:
+    req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            return resp.status, json.loads(resp.read().decode())
+    except urllib.error.HTTPError as exc:
+        return exc.code, json.loads(exc.read().decode())
+
+
+def busy() -> int | None:
+    try:
+        return int(BUSY.read_text().strip())
+    except Exception:
+        return None
+
+
+def choose_free_loopback_port() -> int:
+    """Ask the OS for a free localhost port and verify it is not listening yet."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.bind(("127.0.0.1", 0))
+        port = int(sock.getsockname()[1])
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
+        probe.settimeout(0.25)
+        assert probe.connect_ex(("127.0.0.1", port)) != 0, f"selected port already has a listener: {port}"
+    return port
+
+
+def assert_loopback_bind_policy() -> None:
+    blocked = subprocess.run(
+        [sys.executable, "server.py", "--host", "0.0.0.0", "--port", "0", "--allowed-root", str(ROOT)],
+        cwd=ROOT,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    assert blocked.returncode != 0, blocked.stdout + blocked.stderr
+    assert "loopback" in blocked.stderr.lower(), blocked.stderr
+
+
+def main() -> int:
+    run([sys.executable, "make_samples.py"])
+    invoice = SAMPLES / "synthetic_invoice.png"
+    pdf = SAMPLES / "synthetic_invoice.pdf"
+
+    before = busy()
+    raw = subprocess.check_output([
+        sys.executable, "triage.py", "--allowed-root", str(ROOT), "--pretty", str(invoice), str(pdf)
+    ], cwd=ROOT, text=True)
+    data = json.loads(raw)
+    assert data["ok"], data
+    first = data["files"][0]["result"]
+    assert first["privacy"]["external_uploads"] is False
+    assert first["pages"][0]["classification"]["label"] == "bill_or_invoice"
+    assert first["pages"][0]["needs_attention"]["value"] is True
+    assert "amount_due" in first["pages"][0]["needs_attention"]["reasons"]
+    assert first["processing_device_summary"]["file_intake"] == "CPU"
+    assert "NPU" in first["processing_device_summary"]["needs_attention_embedding"] or first["pages"][0]["needs_attention"]["device"] == "CPU"
+    after = busy()
+    if before is not None and after is not None:
+        # If :18817 is reachable and text was embedded, NPU delta must be positive.
+        emb = first["pages"][0]["needs_attention"]["embedding"]
+        if emb.get("used"):
+            assert emb.get("verified_npu") is True, emb
+            assert (emb.get("npu_busy_delta_us") or 0) > 0, emb
+            assert after > before, {"before": before, "after": after, "embedding": emb}
+
+    # HTTP smoke on a preflighted free localhost port so we do not collide with live/prototype ports.
+    assert_loopback_bind_policy()
+    smoke_port = choose_free_loopback_port()
+    base_url = f"http://127.0.0.1:{smoke_port}"
+    proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", str(smoke_port), "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    try:
+        deadline = time.time() + 5
+        while time.time() < deadline:
+            try:
+                health = urllib.request.urlopen(f"{base_url}/healthz", timeout=1).read()
+                assert b"openvino-doc-image-triage-npu" in health
+                break
+            except Exception:
+                time.sleep(0.1)
+        else:
+            raise AssertionError("server did not become ready")
+        resp = post_json(f"{base_url}/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
+        assert resp["ok"] is True, resp
+        assert resp["result"]["source_path_basename"] == "synthetic_invoice.png"
+        assert "source_path" not in resp["result"]
+
+        # Request bodies may narrow but must not widen the startup --allowed-root policy.
+        with tempfile.NamedTemporaryFile(suffix=".txt") as outside:
+            outside.write(b"sensitive text outside configured artifact root")
+            outside.flush()
+            status, blocked = post_json_status(
+                f"{base_url}/triage",
+                {"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}},
+            )
+        assert status == 400, blocked
+        assert blocked["ok"] is False, blocked
+        assert "allowed_roots" in blocked.get("message", ""), blocked
+
+        # Request bodies must not redirect extracted text to caller-supplied endpoints.
+        status, blocked = post_json_status(
+            f"{base_url}/triage",
+            {"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}},
+        )
+        assert status == 400, blocked
+        assert blocked["ok"] is False, blocked
+        assert "embedding_url" in blocked.get("message", ""), blocked
+    finally:
+        proc.terminate()
+        proc.wait(timeout=5)
+
+    print(json.dumps({
+        "ok": True,
+        "samples": len(list(SAMPLES.glob("synthetic_*"))),
+        "npu_busy_before": before,
+        "npu_busy_after": after,
+        "npu_delta_observed": None if before is None or after is None else after - before,
+        "triage_label": first["pages"][0]["classification"]["label"],
+        "needs_attention": first["pages"][0]["needs_attention"]["value"],
+    }, indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,459 @@
+#!/usr/bin/env python3
+"""Local-only document/image triage prototype.
+
+CPU stages:
+- local file intake, hashing, MIME/extension checks
+- image/PDF-page decoding and normalization
+- optional sidecar/native-text extraction
+- regex metadata extraction and rule-based category fallback
+
+NPU stages:
+- needs-attention semantic embedding via the existing local OpenVINO NPU
+  embeddings service on 127.0.0.1:18817, verified by sysfs busy-time delta.
+
+No external uploads are performed. The only network call is localhost to the
+embedding service when enabled.
+"""
+from __future__ import annotations
+
+import argparse
+import base64
+import dataclasses
+import datetime as dt
+import hashlib
+import io
+import json
+import mimetypes
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+try:
+    from PIL import Image, ImageOps
+except Exception as exc:  # pragma: no cover - caught in CLI smoke
+    raise SystemExit("Pillow is required: install pillow in the active Python env") from exc
+
+NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
+DEFAULT_ALLOWED_ROOTS = [Path.cwd()]
+MAX_FILE_BYTES = 25 * 1024 * 1024
+CATEGORY_LABELS = [
+    "receipt",
+    "bill_or_invoice",
+    "tax_or_financial",
+    "medical_or_insurance",
+    "legal_or_government",
+    "form_or_application",
+    "travel_or_ticket",
+    "screenshot_conversation",
+    "screenshot_web_or_app",
+    "identity_or_sensitive",
+    "photo_misc",
+    "unknown_or_low_confidence",
+]
+
+DATE_PATTERNS = [
+    re.compile(r"\b(20\d{2}[-/](?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01]))\b"),
+    re.compile(r"\b((?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01])[-/](?:20)?\d{2})\b"),
+    re.compile(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+20\d{2})\b", re.I),
+]
+AMOUNT_RE = re.compile(r"(?<!\w)(?:USD\s*)?\$\s?\d{1,4}(?:,\d{3})*(?:\.\d{2})?\b", re.I)
+EMAIL_RE = re.compile(r"\b[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}\b")
+PHONE_RE = re.compile(r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?){2}\d{4}\b")
+ACCOUNT_RE = re.compile(r"\b(?:account|acct|policy|invoice|member|case|claim)\s*(?:#|no\.?|id)?\s*[:\-]?\s*[A-Z0-9-]{4,}\b", re.I)
+SSN_LIKE_RE = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
+
+ATTENTION_KEYWORDS = {
+    "due_date_present": ["due", "payment due", "pay by", "deadline"],
+    "amount_due": ["amount due", "balance due", "total due", "$"],
+    "action_required_language": ["action required", "please respond", "complete", "submit", "renew", "verify"],
+    "signature_required": ["signature", "sign and return", "signed"],
+    "appointment_or_deadline": ["appointment", "scheduled", "reservation", "hearing"],
+    "account_security": ["security", "password", "unauthorized", "fraud", "verify your account"],
+    "medical_followup": ["follow up", "lab result", "referral", "insurance"],
+    "tax_deadline": ["irs", "tax", "1099", "w-2", "deadline"],
+}
+
+CATEGORY_KEYWORDS = {
+    "receipt": ["receipt", "subtotal", "cashier", "change", "store"],
+    "bill_or_invoice": ["invoice", "amount due", "balance due", "statement", "payment due"],
+    "tax_or_financial": ["tax", "irs", "1099", "w-2", "bank", "routing"],
+    "medical_or_insurance": ["medical", "insurance", "clinic", "patient", "claim"],
+    "legal_or_government": ["court", "government", "department", "notice", "license"],
+    "form_or_application": ["application", "form", "signature", "submit"],
+    "travel_or_ticket": ["boarding", "ticket", "itinerary", "reservation", "gate"],
+    "screenshot_conversation": ["message", "chat", "reply", "conversation"],
+    "screenshot_web_or_app": ["login", "browser", "app", "settings", "dashboard"],
+    "identity_or_sensitive": ["ssn", "passport", "driver license", "social security"],
+}
+
+
+@dataclasses.dataclass
+class TriageOptions:
+    max_pages: int = 3
+    include_ocr_text: bool = False
+    dry_run: bool = False
+    use_embeddings: bool = True
+    embedding_url: str = DEFAULT_EMBED_URL
+    allowed_roots: list[Path] = dataclasses.field(default_factory=lambda: DEFAULT_ALLOWED_ROOTS.copy())
+    include_full_path: bool = False
+    timeout_seconds: float = 10.0
+
+
+def read_npu_busy() -> int | None:
+    try:
+        return int(NPU_BUSY_PATH.read_text().strip())
+    except Exception:
+        return None
+
+
+def sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def under_allowed_root(path: Path, roots: list[Path]) -> bool:
+    resolved = path.resolve()
+    for root in roots:
+        try:
+            resolved.relative_to(root.resolve())
+            return True
+        except ValueError:
+            continue
+    return False
+
+
+def sidecar_text(path: Path) -> tuple[str, str | None]:
+    for suffix in (path.suffix + ".txt", ".txt"):
+        candidate = path.with_suffix(suffix) if suffix.startswith(path.suffix) else path.with_suffix(suffix)
+        if candidate.exists() and candidate.is_file():
+            try:
+                return candidate.read_text(errors="replace")[:12000], f"sidecar:{candidate.name}"
+            except Exception:
+                return "", "sidecar_unreadable"
+    return "", None
+
+
+def extract_pdf_text(path: Path, max_pages: int) -> tuple[str, str | None]:
+    # Optional dependency; tests do not require it. Keeps PDF support local-only when installed.
+    try:
+        import pypdf  # type: ignore
+    except Exception:
+        return "", "pypdf_not_installed"
+    try:
+        reader = pypdf.PdfReader(str(path))
+        if getattr(reader, "is_encrypted", False):
+            return "", "pdf_encrypted"
+        chunks = []
+        for page in reader.pages[:max_pages]:
+            chunks.append(page.extract_text() or "")
+        return "\n".join(chunks)[:12000], "pypdf_cpu"
+    except Exception as exc:
+        return "", f"pdf_text_error:{type(exc).__name__}"
+
+
+def load_image_pages(path: Path, max_pages: int) -> tuple[list[Image.Image], str | None]:
+    ext = path.suffix.lower()
+    if ext == ".pdf":
+        try:
+            import pypdfium2 as pdfium  # type: ignore
+        except Exception:
+            return [], "pypdfium2_not_installed"
+        try:
+            pdf = pdfium.PdfDocument(str(path))
+            pages = []
+            for i in range(min(len(pdf), max_pages)):
+                bitmap = pdf[i].render(scale=1.5)
+                pages.append(bitmap.to_pil().convert("RGB"))
+            return pages, None
+        except Exception as exc:
+            return [], f"pdf_render_error:{type(exc).__name__}"
+    try:
+        img = Image.open(path)
+        img = ImageOps.exif_transpose(img).convert("RGB")
+        return [img], None
+    except Exception as exc:
+        return [], f"image_decode_error:{type(exc).__name__}"
+
+
+def normalize_for_hash_features(img: Image.Image) -> dict[str, Any]:
+    small = ImageOps.contain(img.copy(), (224, 224))
+    gray = small.convert("L")
+    hist = gray.histogram()
+    pixels = max(1, gray.width * gray.height)
+    mean = sum(i * c for i, c in enumerate(hist)) / pixels
+    variance = sum(((i - mean) ** 2) * c for i, c in enumerate(hist)) / pixels
+    return {
+        "mean_luma": round(mean, 2),
+        "contrast": round(variance ** 0.5, 2),
+        "aspect_ratio": round(img.width / max(1, img.height), 3),
+    }
+
+
+def classify_rule(text: str, image_features: dict[str, Any]) -> dict[str, Any]:
+    t = text.lower()
+    best_label = "unknown_or_low_confidence"
+    best_score = 0
+    for label, words in CATEGORY_KEYWORDS.items():
+        score = sum(1 for word in words if word in t)
+        if score > best_score:
+            best_label, best_score = label, score
+    if best_score == 0:
+        ar = image_features.get("aspect_ratio", 1.0)
+        if ar > 1.3:
+            best_label, best_score = "screenshot_web_or_app", 1
+        else:
+            best_label, best_score = "unknown_or_low_confidence", 0
+    confidence = min(0.35 + 0.18 * best_score, 0.92) if best_score else 0.2
+    if confidence < 0.45:
+        best_label = "unknown_or_low_confidence"
+    return {
+        "label": best_label,
+        "confidence": round(confidence, 3),
+        "device": "CPU",
+        "stage": "category_classification",
+        "method": "rule_based_fallback",
+        "npu_status": "not_configured_for_prototype_v1",
+        "candidate_labels": CATEGORY_LABELS,
+    }
+
+
+def extract_metadata(text: str) -> dict[str, Any]:
+    dates = []
+    for pat in DATE_PATTERNS:
+        dates.extend(m.group(1) for m in pat.finditer(text))
+    amounts = AMOUNT_RE.findall(text)
+    flags = {
+        "org_present": bool(re.search(r"\b(?:inc|llc|clinic|department|bank|insurance|store)\b", text, re.I)),
+        "address_present": bool(re.search(r"\b\d{2,5}\s+[A-Za-z0-9 .]+\s+(?:st|street|ave|avenue|rd|road|blvd|drive|dr)\b", text, re.I)),
+        "phone_present": bool(PHONE_RE.search(text)),
+        "email_present": bool(EMAIL_RE.search(text)),
+        "policy_or_account_id_present": bool(ACCOUNT_RE.search(text)),
+        "identity_number_like_present": bool(SSN_LIKE_RE.search(text)),
+    }
+    return {
+        "dates_count": len(set(dates)),
+        "amounts_count": len(set(amounts)),
+        "detected_entities": flags,
+        "raw_values_redacted": True,
+    }
+
+
+def call_embeddings(text: str, url: str, timeout: float) -> dict[str, Any]:
+    if not text.strip():
+        return {"used": False, "device": "NPU", "status": "skipped_no_text", "npu_busy_delta_us": 0}
+    before = read_npu_busy()
+    payload = json.dumps({"input": text[:2048], "purpose": "document"}).encode()
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
+    t0 = time.perf_counter()
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read(1024 * 1024)
+            status = resp.status
+        parsed = json.loads(body.decode())
+        dim = None
+        if isinstance(parsed, dict) and parsed.get("data"):
+            emb = parsed["data"][0].get("embedding", [])
+            dim = len(emb) if isinstance(emb, list) else None
+        after = read_npu_busy()
+        delta = (after - before) if before is not None and after is not None else None
+        return {
+            "used": True,
+            "device": "NPU",
+            "status": "ok" if status == 200 else f"http_{status}",
+            "embedding_dim": dim,
+            "wall_ms": round((time.perf_counter() - t0) * 1000, 2),
+            "npu_busy_delta_us": delta,
+            "verified_npu": bool(delta and delta > 0),
+            "endpoint": "127.0.0.1:18817",
+        }
+    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
+        after = read_npu_busy()
+        delta = (after - before) if before is not None and after is not None else None
+        return {
+            "used": False,
+            "device": "NPU",
+            "status": f"embedding_service_error:{type(exc).__name__}",
+            "npu_busy_delta_us": delta,
+            "verified_npu": False,
+            "endpoint": "127.0.0.1:18817",
+        }
+
+
+def needs_attention(text: str, embedding_result: dict[str, Any]) -> dict[str, Any]:
+    t = text.lower()
+    reasons = []
+    for reason, words in ATTENTION_KEYWORDS.items():
+        if any(word in t for word in words):
+            reasons.append(reason)
+    meta = extract_metadata(text)
+    if meta["amounts_count"]:
+        reasons.append("amount_due")
+    if meta["dates_count"]:
+        reasons.append("due_date_present")
+    reasons = sorted(set(reasons))
+    value = bool(reasons)
+    confidence = min(0.45 + 0.1 * len(reasons), 0.9) if value else 0.35
+    if embedding_result.get("verified_npu"):
+        confidence = min(confidence + 0.05, 0.95)
+    return {
+        "value": value,
+        "confidence": round(confidence, 3),
+        "reasons": reasons or (["low_confidence"] if not text.strip() else []),
+        "device": "NPU+CPU" if embedding_result.get("used") else "CPU",
+        "stage": "needs_attention",
+        "method": "NPU embedding verification + CPU rules" if embedding_result.get("used") else "CPU rules fallback",
+        "embedding": embedding_result,
+    }
+
+
+def infer_media_type(path: Path, is_pdf_page: bool = False) -> str:
+    if is_pdf_page:
+        return "pdf_page"
+    mt, _ = mimetypes.guess_type(path.name)
+    if path.suffix.lower() == ".pdf":
+        return "pdf"
+    if mt and mt.startswith("image/"):
+        return "image"
+    return "unknown"
+
+
+def triage_file(path_like: str | Path, options: TriageOptions | None = None) -> dict[str, Any]:
+    options = options or TriageOptions()
+    path = Path(path_like).expanduser()
+    resolved = path.resolve()
+    if not under_allowed_root(resolved, options.allowed_roots):
+        raise ValueError(f"path is outside allowed roots: {path}")
+    if not resolved.exists() or not resolved.is_file():
+        raise FileNotFoundError(str(path))
+    size = resolved.stat().st_size
+    if size > MAX_FILE_BYTES:
+        raise ValueError(f"file too large for prototype limit: {size} bytes")
+
+    file_hash = sha256_file(resolved)
+    text, text_source = sidecar_text(resolved)
+    pdf_text_status = None
+    if resolved.suffix.lower() == ".pdf" and not text:
+        text, pdf_text_status = extract_pdf_text(resolved, options.max_pages)
+        text_source = pdf_text_status
+
+    pages: list[dict[str, Any]] = []
+    render_error = None
+    if not options.dry_run:
+        images, render_error = load_image_pages(resolved, options.max_pages)
+    else:
+        images = []
+
+    if not images and options.dry_run:
+        images = []
+    elif not images:
+        # Return a file-level record even if PDF rendering is unavailable.
+        images = []
+
+    embedding_result = call_embeddings(text, options.embedding_url, options.timeout_seconds) if options.use_embeddings else {"used": False, "device": "NPU", "status": "disabled", "npu_busy_delta_us": 0, "verified_npu": False}
+    attn = needs_attention(text, embedding_result)
+    meta = extract_metadata(text)
+
+    if images:
+        for idx, img in enumerate(images):
+            features = normalize_for_hash_features(img)
+            classification = classify_rule(text, features)
+            pages.append({
+                "page_index": idx,
+                "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"),
+                "image": {"width": img.width, "height": img.height, "orientation": "portrait" if img.height >= img.width else "landscape", **features},
+                "classification": classification,
+                "needs_attention": attn,
+                "metadata": meta,
+                "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source},
+            })
+    else:
+        classification = classify_rule(text, {"aspect_ratio": 1.0})
+        pages.append({
+            "page_index": 0,
+            "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"),
+            "image": {"width": None, "height": None, "orientation": None, "render_error": render_error},
+            "classification": classification,
+            "needs_attention": attn,
+            "metadata": meta,
+            "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source},
+        })
+
+    result: dict[str, Any] = {
+        "file_id": f"sha256:{file_hash}",
+        "source_path_basename": resolved.name,
+        "media_type": infer_media_type(resolved),
+        "file_size_bytes": size,
+        "page_count": len(pages),
+        "pages": pages,
+        "processing_device_summary": {
+            "file_intake": "CPU",
+            "pdf_rendering": "CPU" if resolved.suffix.lower() == ".pdf" else "not_applicable",
+            "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)",
+            "ocr_text_extraction": "CPU/local sidecar or optional local PDF text extractor",
+            "needs_attention_embedding": "NPU via local :18817" if embedding_result.get("used") else "CPU fallback/no text",
+            "metadata_extraction": "CPU",
+            "npu_verified": bool(embedding_result.get("verified_npu")),
+            "npu_busy_delta_us": embedding_result.get("npu_busy_delta_us"),
+        },
+        "privacy": {
+            "external_uploads": False,
+            "localhost_only_embedding_call": bool(options.use_embeddings),
+            "raw_text_logged": False,
+            "raw_values_redacted": True,
+            "full_path_included": options.include_full_path,
+        },
+        "errors": [e for e in [render_error, pdf_text_status if pdf_text_status and not text else None] if e],
+    }
+    if options.include_full_path:
+        result["source_path"] = str(resolved)
+    if options.include_ocr_text:
+        result["ocr_text"] = text
+    return result
+
+
+def triage_batch(paths: list[str], options: TriageOptions | None = None) -> dict[str, Any]:
+    items = []
+    for p in paths:
+        try:
+            items.append({"ok": True, "result": triage_file(p, options)})
+        except Exception as exc:
+            items.append({"ok": False, "source_path_basename": Path(p).name, "error": type(exc).__name__, "message": str(exc)})
+    return {"ok": all(item["ok"] for item in items), "files": items, "generated_at": dt.datetime.now(dt.UTC).isoformat()}
+
+
+def cli() -> int:
+    parser = argparse.ArgumentParser(description="Local document/image triage prototype")
+    parser.add_argument("paths", nargs="+", help="local image/PDF paths")
+    parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; defaults to cwd")
+    parser.add_argument("--max-pages", type=int, default=3)
+    parser.add_argument("--include-ocr-text", action="store_true")
+    parser.add_argument("--include-full-path", action="store_true")
+    parser.add_argument("--no-embeddings", action="store_true", help="disable local NPU embedding call")
+    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument("--pretty", action="store_true")
+    args = parser.parse_args()
+    roots = [Path(p) for p in args.allowed_root] if args.allowed_root else [Path.cwd()]
+    options = TriageOptions(
+        max_pages=args.max_pages,
+        include_ocr_text=args.include_ocr_text,
+        dry_run=args.dry_run,
+        use_embeddings=not args.no_embeddings,
+        allowed_roots=roots,
+        include_full_path=args.include_full_path,
+    )
+    out = triage_batch(args.paths, options)
+    print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True))
+    return 0 if out["ok"] else 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(cli())
@@ -0,0 +1,306 @@
+# Bounded OpenVINO GenAI NPU worker contract
+
+Status: prototype contract implemented locally; not a live Atlas/Hermes routing dependency.
+Default address: `http://127.0.0.1:18820`.
+
+## Purpose and hard boundary
+
+This worker is a local-only sidecar for small, bounded generation jobs that are useful around the assistant stack but are not primary chat: title drafting, short summaries, notification condensation, and memory-candidate extraction. It must not be used as Atlas/Hermes primary model routing, gateway fallback routing, autonomous tool-calling, or an unbounded chat endpoint without a separate approval gate.
+
+Hard boundaries:
+
+- Bind to `127.0.0.1` by default; non-local bind is a code/ops review item, not a runtime flag to casually change.
+- Do not enable a persistent systemd/Docker service as part of smoke testing.
+- Do not restart or reconfigure Atlas, Hermes, gateway, LiteLLM, RAG, or n8n routing to call this worker without explicit approval from Will.
+- Do not write memory, mutate Chroma/vector collections, trigger RAG reindexing, or process private document/image directories.
+- Do not log raw prompts or raw request bodies by default.
+- Treat HTTP success as insufficient for NPU claims; require positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta for generation.
+
+## Recommended model/runtime
+
+Recommended first model:
+
+- Model id: `OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov`
+- Local path: `/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov`
+- Runtime: `/home/will/.venvs/npu` with `openvino-genai==2026.2.0.0`
+- Device: OpenVINO GenAI `NPU`
+- Compile cache: `/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4`
+
+Why this model/runtime:
+
+- It is already staged in the repo prototype and has a local smoke observation with positive NPU busy-time delta.
+- It is an OpenVINO IR model with INT4-compressed weights, which keeps memory/compile pressure low enough for a sidecar on the shared NPU.
+- Qwen2.5-1.5B-Instruct is large enough for formatting/summarization/notification jobs but small enough to keep latency bounded. It should not be marketed as a high-quality general assistant model.
+- The Hugging Face model card identifies it as Qwen2.5-1.5B-Instruct converted to OpenVINO IR with INT4_SYM NNCF weight compression and states compatibility with OpenVINO 2025.1.0+; the local runtime is newer than that baseline.
+- OpenVINO GenAI `LLMPipeline` is the right first runtime because the existing local NPU stack already uses OpenVINO GenAI successfully for Whisper, and it exposes a simple bounded generate call with cache controls.
+
+Deferred alternatives:
+
+- Larger 3B/7B local LLMs: defer until the 1.5B contract proves stable; larger models increase compile time, memory pressure, and NPU contention.
+- CPU/GPU fallback inside this service: defer; fallback would blur the NPU verification contract. If fallback is later approved, return `device_actual` and keep NPU-only health separate.
+- Manual `EXPORT_BLOB`/`BLOB_PATH`: defer until compile latency is proven to dominate despite `CACHE_DIR`. If used later, record OpenVINO version, NPU compiler/driver versions, model id, quantization flags, and source model path; invalidate after OpenVINO/NPU driver upgrades.
+
+## Runtime bounds
+
+Pipeline configuration for the first milestone:
+
+```text
+CACHE_DIR=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
+MAX_PROMPT_LEN=1024
+MIN_RESPONSE_LEN=64
+PREFILL_HINT=DYNAMIC
+GENERATE_HINT=FAST_COMPILE
+```
+
+Request bounds:
+
+- `input`: required non-empty string; max `6000` characters before prompt templating.
+- `job`: one of `title`, `summary`, `notification`, `memory_candidate`.
+- `max_new_tokens`: optional; default by job; hard max `256`.
+- Concurrency: generation must be serialized inside the process with a lock because the NPU is shared with Whisper/embeddings/prototype sidecars.
+- Logging: log method/path/status and timing only; never log raw `input` or generated text by default.
+
+Expected latency target:
+
+- Cold-ish first generation with cache available: acceptable if roughly 15 seconds or less for a short prompt on the staged model.
+- Warm short jobs: target under 5 seconds for `title`/`notification` and under 10 seconds for `summary`/`memory_candidate`.
+- Defer promotion if p95 warm latency exceeds 15 seconds for 24-96 generated tokens, or if cold compile regularly blocks the NPU long enough to degrade live Whisper/embeddings.
+
+These are prototype acceptance targets, not SLOs for live Atlas routing.
+
+## CLI contract
+
+Command shape:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+/home/will/.venvs/npu/bin/python worker.py \
+  --job title \
+  --input 'Synthetic non-private text to title.' \
+  --max-new-tokens 32
+```
+
+CLI stdout is JSON with the same response shape as HTTP generation. Exit code must be:
+
+- `0` when the job succeeds and `npu_busy_delta_us > 0`.
+- non-zero when input validation fails, model load/generation fails, or NPU busy-time delta is not positive.
+
+The CLI must not write memory, change service routing, or start persistent services.
+
+## HTTP contract
+
+Start temporary local server only:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
+```
+
+Endpoints:
+
+```text
+GET  /healthz
+GET  /models
+POST /v1/worker/generate
+POST /v1/worker/extract-memory-candidates
+POST /v1/worker/condense-notification
+```
+
+`GET /healthz` response fields:
+
+```json
+{
+  "ok": true,
+  "model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
+  "model_path": "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov",
+  "device": "NPU",
+  "cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4",
+  "cache_exists": true,
+  "loaded": false,
+  "initial_load_ms": null,
+  "busy_time_us": 0,
+  "max_input_chars": 6000,
+  "jobs": ["memory_candidate", "notification", "summary", "title"],
+  "bind": "127.0.0.1:18820"
+}
+```
+
+`POST /v1/worker/generate` request:
+
+```json
+{
+  "job": "summary",
+  "input": "Synthetic non-private text to summarize.",
+  "max_new_tokens": 80
+}
+```
+
+Specialized aliases:
+
+- `POST /v1/worker/extract-memory-candidates` implies `job=memory_candidate`.
+- `POST /v1/worker/condense-notification` implies `job=notification`.
+- Backward-compatible request `job=memory` may map to `memory_candidate`, but new clients should use `memory_candidate`.
+
+Successful generation response:
+
+```json
+{
+  "model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
+  "device": "NPU",
+  "job": "summary",
+  "text": "...",
+  "json": null,
+  "timing_ms": {
+    "load": 0.0,
+    "initial_load": 10989.08,
+    "generate": 3157.94,
+    "total": 3157.94
+  },
+  "npu_busy_delta_us": 2650724,
+  "npu_busy_before_us": 123,
+  "npu_busy_after_us": 2650847,
+  "cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
+}
+```
+
+Validation/error behavior:
+
+- Unsupported path: `404` JSON `{"error":"not found"}`.
+- Unsupported job, empty input, too-long input, invalid token bound, missing model, or generation failure: JSON `{"error":"..."}` with non-2xx preferred for future implementations. The current stdlib prototype returns `400` for these errors.
+- If `npu_busy_delta_us <= 0`, the response should be treated as failed by smoke tests even if an HTTP handler emitted `200`; the refreshed prototype returns `503` with the generation payload plus an `error` field.
+
+## Prompt/job contract
+
+`title`:
+
+- Input: short task/log/message excerpt.
+- Output: one title, 8 words or fewer, no markdown required.
+- Default `max_new_tokens`: 32.
+
+`summary`:
+
+- Input: synthetic/non-private text excerpt.
+- Output: one short paragraph or up to 4 bullets.
+- Default `max_new_tokens`: 160.
+
+`notification`:
+
+- Input: synthetic/non-private alert/log excerpt.
+- Output target: JSON object with `severity`, `category`, `summary`, `action_needed`.
+- Default `max_new_tokens`: 96.
+- Client must tolerate `json: null` and parse/validate before using output.
+
+`memory_candidate`:
+
+- Input: synthetic/non-private conversation excerpt.
+- Output target: JSON object with `candidates` and `notes`; candidates are proposals only.
+- Default `max_new_tokens`: 192.
+- This worker must never call Hermes memory tools or write durable memory directly.
+
+## Smoke-test plan using non-private data
+
+Do not use private vault notes, screenshots, email, chat logs, or document/image directories. Use synthetic text like this:
+
+```text
+Atlas received a kanban notification that an OpenVINO NPU prototype finished smoke testing. The reviewer needs a concise status and next action. No live gateway routing changed.
+```
+
+Direct NPU smoke:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
+/home/will/.venvs/npu/bin/python smoke_llm_npu.py \
+  --prompt 'Write a concise title for: synthetic NPU worker contract smoke.' \
+  --max-new-tokens 24
+status=$?
+after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
+printf 'external_busy_delta_us=%s\n' "$((after-before))"
+test "$status" -eq 0
+test "$((after-before))" -gt 0
+```
+
+Temporary HTTP smoke:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820 &
+pid=$!
+trap 'kill "$pid" 2>/dev/null || true' EXIT
+
+curl -fsS http://127.0.0.1:18820/healthz | python -m json.tool
+before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
+curl -fsS http://127.0.0.1:18820/v1/worker/generate \
+  -H 'Content-Type: application/json' \
+  -d '{"job":"title","input":"Synthetic NPU worker smoke with no routing changes.","max_new_tokens":24}' \
+  | tee /tmp/openvino-genai-worker-smoke.json \
+  | python -m json.tool
+after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
+python - <<'PY'
+import json
+p=json.load(open('/tmp/openvino-genai-worker-smoke.json'))
+assert p['npu_busy_delta_us'] > 0, p
+assert p['device'] == 'NPU', p
+PY
+test "$((after-before))" -gt 0
+kill "$pid"
+trap - EXIT
+```
+
+Also verify the temporary listener is gone:
+
+```bash
+ss -ltnp | grep ':18820' && { echo 'temporary smoke server still running'; exit 1; } || true
+```
+
+Unit tests that do not load the model or require private data:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+python -m pytest -q
+```
+
+## NPU busy-time verification plan
+
+Acceptance for any NPU claim requires all of the following:
+
+1. Confirm the sysfs counter exists and is readable:
+   `test -r /sys/class/accel/accel0/device/npu_busy_time_us`.
+2. Read `busy_before` immediately before the generation call.
+3. Run exactly one bounded generation against the candidate worker.
+4. Read `busy_after` immediately after generation completes.
+5. Require `busy_after > busy_before` and response `npu_busy_delta_us > 0`.
+6. Record model id, runtime version, prompt chars, max tokens, load/generate timings, and busy delta in the review handoff.
+7. If the counter is unchanged, mark the smoke as failed even if HTTP returned `200` and text was generated.
+
+Because the NPU is shared, a positive external delta proves NPU activity during the window but not exclusive attribution. Prefer a quiet window with no concurrent Whisper/embedding jobs for review-grade measurements; otherwise repeat and compare worker-reported internal delta with the external counter.
+
+## Docs/diagram implications
+
+If this worker is kept as a prototype, docs and diagrams should show:
+
+- Live baseline remains RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`.
+- GenAI worker `:18820` is proposed/prototype/not-live unless explicitly approved and enabled.
+- No arrow from Hermes/Atlas gateway or LiteLLM primary routing to `:18820` unless a later approved integration actually exists.
+- Runbooks should include the CLI/HTTP smoke commands, `ss` listener checks, and NPU busy-time counter checks.
+- Service maps should label this as "bounded background generation" rather than "chat" or "assistant model".
+
+## Explicit no-go / defer criteria
+
+No-go for implementation or promotion:
+
+- Model path missing, OpenVINO GenAI import fails, or NPU device is unavailable.
+- `/sys/class/accel/accel0/device/npu_busy_time_us` is unreadable or does not increase during generation.
+- Warm bounded jobs exceed the prototype latency target or starve live Whisper/embedding services.
+- The worker needs private documents/images/chat logs for smoke testing.
+- The worker requires Atlas/Hermes/gateway/LiteLLM/RAG routing changes to demonstrate value.
+- The API starts accepting arbitrary chat history, tool-call instructions, unbounded prompts, or large outputs.
+- The service logs raw prompt bodies by default.
+- Persistent service enablement is requested without an explicit Will approval gate and a reviewer smoke handoff.
+
+Defer, do not solve in this lane:
+
+- Primary assistant routing, LiteLLM model registration, gateway fallback, or tool-calling integration.
+- RAG query rewriting, RAG answer generation, or collection mutation.
+- Private document/image triage.
+- Multi-model selection, CPU/GPU fallback policy, batching, streaming, or auth exposure beyond localhost.
@@ -0,0 +1,151 @@
+# OpenVINO GenAI NPU worker prototype
+
+Local-only prototype for cheap bounded background generation on Will's Intel NPU. It is intentionally isolated from primary Atlas/Hermes routing.
+
+## What it does
+
+- Model: `OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov`.
+- Runtime: `/home/will/.venvs/npu` with `openvino-genai==2026.2.0.0`.
+- Device: OpenVINO GenAI `NPU`.
+- Default bind: `127.0.0.1:18820`.
+- Jobs: `title`, `summary`, `notification`, `memory_candidate`.
+- Prompt/input limits: 6000 chars, `MAX_PROMPT_LEN=1024`, max 256 generated tokens.
+
+The worker does not write memory, does not restart Atlas/Hermes, does not change primary routing, and does not log raw prompt bodies by default.
+
+## Files
+
+- `CONTRACT.md` — bounded-worker service contract, endpoint/CLI API, smoke plan, NPU verification, docs implications, and no-go criteria.
+- `worker.py` — stdlib HTTP API plus CLI wrapper.
+- `smoke_llm_npu.py` — direct GenAI smoke test with NPU busy-time verification.
+- `tests/test_worker.py` — unit tests with a fake GenAI pipeline and synthetic busy-time counter.
+- `systemd/openvino-genai-npu-worker.service` — reviewed local-only user-service template for `127.0.0.1:18820`.
+
+## Model/cache
+
+Downloaded model path:
+
+```text
+/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
+```
+
+OpenVINO compile cache path:
+
+```text
+/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
+```
+
+NPU pipeline config used by the prototype:
+
+```python
+CACHE_DIR=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
+MAX_PROMPT_LEN=1024
+MIN_RESPONSE_LEN=64
+PREFILL_HINT=DYNAMIC
+GENERATE_HINT=FAST_COMPILE
+```
+
+AOT/blob note: first milestone uses `CACHE_DIR` only. Do not switch to manual `EXPORT_BLOB`/`BLOB_PATH` until compile latency is proven to be the bottleneck. If explicit blobs are used later, record OpenVINO version, NPU compiler version, driver version, model id, quantization flags, and source weights path; invalidate blobs after OpenVINO/NPU driver upgrades.
+
+## Direct smoke test
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+/home/will/.venvs/npu/bin/python smoke_llm_npu.py
+```
+
+Acceptance requires `npu_busy_delta_us > 0`.
+
+Observed cold-ish smoke after download/cache setup:
+
+```json
+{
+  "text": "\"Atlas Summarizes NPU Worker Options Requested by User\"",
+  "timing_ms": {"load": 10989.08, "generate": 3157.94, "total": 14147.02},
+  "npu_busy_delta_us": 2650724
+}
+```
+
+## CLI usage
+
+```bash
+/home/will/.venvs/npu/bin/python worker.py \
+  --job title \
+  --input 'Kanban task asks for a small OpenVINO GenAI NPU worker prototype.'
+```
+
+Exit code is non-zero if validation fails, generation fails, or the worker-reported `npu_busy_delta_us` is not positive.
+
+## HTTP usage
+
+Start locally only:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+ss -ltnp | grep ':18820' && { echo 'port 18820 already in use'; exit 1; } || true
+/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
+```
+
+The server also refuses startup if a listener is already accepting connections on `127.0.0.1:18820`.
+
+Endpoints:
+
+```text
+GET  /healthz
+GET  /models
+POST /v1/worker/generate
+POST /v1/worker/extract-memory-candidates
+POST /v1/worker/condense-notification
+```
+
+Example:
+
+```bash
+curl -s http://127.0.0.1:18820/v1/worker/generate \
+  -H 'Content-Type: application/json' \
+  -d '{"job":"summary","input":"Build a bounded local NPU worker for small generation tasks, no primary routing changes.","max_new_tokens":80}' \
+  | python -m json.tool
+```
+
+Response includes `npu_busy_delta_us`; treat zero as failure even if HTTP status is 200.
+
+## Unit tests
+
+These tests use only synthetic strings and a fake GenAI pipeline, so they do not load the model or touch private data:
+
+```bash
+cd /home/will/lab/swarm/openvino-genai-npu-worker
+python -m pytest -q
+```
+
+## Environment variables
+
+```text
+OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
+OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
+OV_GENAI_NPU_HOST=127.0.0.1
+OV_GENAI_NPU_PORT=18820
+```
+
+Only `127.0.0.1` is accepted by the current prototype; wider binds require an explicit code change and approval.
+
+## Systemd user service
+
+A reviewed local-only unit exists at `systemd/openvino-genai-npu-worker.service` for persistent background use after foreground smoke succeeds with a positive NPU busy-time delta:
+
+```bash
+install -m 0644 systemd/openvino-genai-npu-worker.service ~/.config/systemd/user/openvino-genai-npu-worker.service
+systemctl --user daemon-reload
+systemctl --user enable --now openvino-genai-npu-worker.service
+systemctl --user status openvino-genai-npu-worker.service --no-pager
+```
+
+The service remains isolated: do not route primary Atlas/Hermes chat, gateway output, or automatic memory writes to it without a separate approved integration.
+
+## Safety boundaries
+
+- Binds only to `127.0.0.1` by default; non-local bind is refused in code.
+- No raw request-body logging.
+- No private external uploads.
+- No Atlas/Hermes gateway restarts or primary model routing changes.
+- NPU access is serialized with a process lock because the NPU is a shared resource with existing services.
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = tests
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+"""Smoke-test OpenVINO GenAI LLMPipeline on Intel NPU.
+
+This verifies NPU execution by reading /sys/class/accel/accel0/device/npu_busy_time_us
+before and after generation. HTTP 200/service success is not considered proof.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import time
+from pathlib import Path
+from typing import Any
+
+DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
+DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
+BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+
+
+def import_openvino_genai() -> Any:
+    import openvino_genai as ov_genai  # type: ignore[import-not-found]
+
+    return ov_genai
+
+
+def read_busy(path: Path = BUSY_PATH) -> int:
+    return int(path.read_text().strip())
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", default=DEFAULT_MODEL)
+    parser.add_argument("--cache-dir", default=DEFAULT_CACHE)
+    parser.add_argument("--busy-path", default=str(BUSY_PATH))
+    parser.add_argument("--prompt", default="Write a concise title for: Synthetic NPU worker contract smoke with no routing changes.")
+    parser.add_argument("--max-new-tokens", type=int, default=24)
+    args = parser.parse_args()
+
+    model_path = Path(args.model)
+    cache_dir = Path(args.cache_dir)
+    busy_path = Path(args.busy_path)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    if not model_path.exists():
+        raise SystemExit(f"model path does not exist: {model_path}")
+    if not busy_path.exists():
+        raise SystemExit(f"NPU busy-time counter does not exist: {busy_path}")
+    if args.max_new_tokens < 1 or args.max_new_tokens > 256:
+        raise SystemExit("max-new-tokens must be between 1 and 256")
+
+    config = {
+        "CACHE_DIR": str(cache_dir),
+        "MAX_PROMPT_LEN": 1024,
+        "MIN_RESPONSE_LEN": 64,
+        "PREFILL_HINT": "DYNAMIC",
+        "GENERATE_HINT": "FAST_COMPILE",
+    }
+
+    ov_genai = import_openvino_genai()
+    before = read_busy(busy_path)
+    load_start = time.monotonic()
+    pipe = ov_genai.LLMPipeline(str(model_path), "NPU", **config)
+    load_ms = round((time.monotonic() - load_start) * 1000, 2)
+
+    gen_start = time.monotonic()
+    output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens)
+    gen_ms = round((time.monotonic() - gen_start) * 1000, 2)
+    after = read_busy(busy_path)
+    result = {
+        "model": str(model_path),
+        "device": "NPU",
+        "cache_dir": str(cache_dir),
+        "prompt_chars": len(args.prompt),
+        "max_new_tokens": args.max_new_tokens,
+        "text": str(output).strip(),
+        "timing_ms": {"load": load_ms, "generate": gen_ms, "total": round(load_ms + gen_ms, 2)},
+        "npu_busy_before_us": before,
+        "npu_busy_after_us": after,
+        "npu_busy_delta_us": after - before,
+    }
+    print(json.dumps(result, indent=2))
+    return 0 if after > before else 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,17 @@
+[Unit]
+Description=OpenVINO GenAI NPU worker prototype
+After=network-online.target
+
+[Service]
+Type=simple
+WorkingDirectory=/home/will/lab/swarm/openvino-genai-npu-worker
+Environment=OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
+Environment=OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
+Environment=OV_GENAI_NPU_HOST=127.0.0.1
+Environment=OV_GENAI_NPU_PORT=18820
+ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-genai-npu-worker/worker.py --host 127.0.0.1 --port 18820
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+import worker
+
+
+class FakePipeline:
+    def __init__(self, model_path: str, device: str, config: dict[str, object], busy_path: Path, output: str = "Synthetic title"):
+        self.model_path = model_path
+        self.device = device
+        self.config = config
+        self.busy_path = busy_path
+        self.output = output
+        self.calls: list[tuple[str, int]] = []
+
+    def generate(self, prompt: str, *, max_new_tokens: int):
+        self.calls.append((prompt, max_new_tokens))
+        before = int(self.busy_path.read_text().strip())
+        self.busy_path.write_text(str(before + 1234))
+        return self.output
+
+
+class FakeGenAI:
+    def __init__(self, busy_path: Path, output: str = "Synthetic title"):
+        self.busy_path = busy_path
+        self.output = output
+        self.pipeline: FakePipeline | None = None
+
+    def LLMPipeline(self, model_path: str, device: str, *args: object, **kwargs: object):  # noqa: N802 - mirrors OpenVINO API
+        if args and isinstance(args[0], dict):
+            config: dict[str, object] = {str(k): v for k, v in args[0].items()}
+        else:
+            config = dict(kwargs)
+        self.pipeline = FakePipeline(model_path, device, config, self.busy_path, self.output)
+        return self.pipeline
+
+
+@pytest.fixture()
+def worker_paths(tmp_path: Path):
+    model_path = tmp_path / "model"
+    cache_dir = tmp_path / "cache"
+    busy_path = tmp_path / "npu_busy_time_us"
+    model_path.mkdir()
+    busy_path.write_text("100")
+    return model_path, cache_dir, busy_path
+
+
+def test_generate_uses_npu_config_and_reports_busy_delta(monkeypatch: pytest.MonkeyPatch, worker_paths):
+    model_path, cache_dir, busy_path = worker_paths
+    fake_genai = FakeGenAI(busy_path)
+    monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
+
+    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_port=18820)
+    result = npu_worker.generate("title", "Synthetic non-private kanban notification.", max_new_tokens=24)
+
+    assert result.npu_busy_before_us == 100
+    assert result.npu_busy_after_us == 1334
+    assert result.npu_busy_delta_us == 1234
+    assert result.text == "Synthetic title"
+    assert fake_genai.pipeline is not None
+    assert fake_genai.pipeline.device == "NPU"
+    assert fake_genai.pipeline.config["CACHE_DIR"] == str(cache_dir)
+    assert fake_genai.pipeline.config["MAX_PROMPT_LEN"] == 1024
+    assert fake_genai.pipeline.calls[0][1] == 24
+
+
+def test_memory_alias_json_wrapping(monkeypatch: pytest.MonkeyPatch, worker_paths):
+    model_path, cache_dir, busy_path = worker_paths
+    fake_genai = FakeGenAI(busy_path, output='[{"fact":"synthetic stable preference","confidence":0.8}]')
+    monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
+
+    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
+    result = npu_worker.generate("memory_candidate", "Synthetic user says they prefer concise answers.")
+
+    assert result.parsed_json is not None
+    assert result.parsed_json["candidates"][0]["fact"] == "synthetic stable preference"
+    assert "wrapped" in result.parsed_json["notes"]
+
+
+@pytest.mark.parametrize(
+    ("job", "user_input", "max_new_tokens", "message"),
+    [
+        ("bad", "hello", 1, "unsupported job"),
+        ("title", "", 1, "non-empty"),
+        ("title", "x" * (worker.MAX_INPUT_CHARS + 1), 1, "input too long"),
+        ("title", "hello", worker.MAX_NEW_TOKENS + 1, "max_new_tokens"),
+    ],
+)
+def test_validation_errors(monkeypatch: pytest.MonkeyPatch, worker_paths, job: str, user_input: str, max_new_tokens: int, message: str):
+    model_path, cache_dir, busy_path = worker_paths
+    monkeypatch.setattr(worker, "import_openvino_genai", lambda: FakeGenAI(busy_path))
+    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
+
+    with pytest.raises(ValueError, match=message):
+        npu_worker.generate(job, user_input, max_new_tokens=max_new_tokens)
+
+
+def test_health_reports_actual_bind_and_limits(worker_paths):
+    model_path, cache_dir, busy_path = worker_paths
+    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_host="127.0.0.1", bind_port=18821)
+
+    health = npu_worker.health()
+
+    assert health["bind"] == "127.0.0.1:18821"
+    assert health["max_input_chars"] == 6000
+    assert health["max_new_tokens"] == 256
+    assert health["busy_time_us"] == 100
+
+
+def test_response_payload_shape(worker_paths):
+    model_path, cache_dir, busy_path = worker_paths
+    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
+    result = worker.GenerationResult(
+        text="ok",
+        parsed_json={"severity": "info"},
+        timing_ms={"load": 1.0, "initial_load": 1.0, "generate": 2.0, "total": 3.0},
+        npu_busy_delta_us=5,
+        npu_busy_before_us=10,
+        npu_busy_after_us=15,
+    )
+
+    payload = worker.response_payload(npu_worker, "notification", result)
+
+    assert json.dumps(payload)
+    assert payload["device"] == "NPU"
+    assert payload["job"] == "notification"
+    assert payload["json"] == {"severity": "info"}
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+"""Local-only OpenVINO GenAI NPU worker.
+
+Small bounded LLM worker for cheap background tasks. It intentionally does not
+wire into Atlas/Hermes routing and does not log raw prompts by default.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import socket
+import threading
+import time
+from dataclasses import dataclass
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any, cast
+from urllib.parse import urlparse
+
+MODEL_ID = "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov"
+DEFAULT_MODEL_PATH = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
+DEFAULT_CACHE_DIR = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
+BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+HOST = "127.0.0.1"
+PORT = 18820
+MAX_INPUT_CHARS = 6000
+MAX_NEW_TOKENS = 256
+GENAI_CONFIG = {
+    "CACHE_DIR": DEFAULT_CACHE_DIR,
+    "MAX_PROMPT_LEN": 1024,
+    "MIN_RESPONSE_LEN": 64,
+    "PREFILL_HINT": "DYNAMIC",
+    "GENERATE_HINT": "FAST_COMPILE",
+}
+DEFAULTS = {
+    "title": 32,
+    "summary": 160,
+    "memory_candidate": 192,
+    "notification": 96,
+}
+PROMPTS = {
+    "title": "Write one concise title, 8 words or fewer. Return only the title.\n\nInput:\n{input}",
+    "summary": "Summarize the input in one short paragraph or up to 4 bullets. Be factual and concise.\n\nInput:\n{input}",
+    "memory_candidate": (
+        "Extract durable memory candidates from the conversation excerpt. "
+        "Return strict JSON with keys: candidates (array of objects with fact, confidence, reason), notes. "
+        "Do not write memory; only propose candidates.\n\nInput:\n{input}"
+    ),
+    "notification": (
+        "Condense this notification or log excerpt for a human. "
+        "Return JSON with keys: severity (info|warning|error), category, summary, action_needed.\n\nInput:\n{input}"
+    ),
+}
+
+
+def import_openvino_genai() -> Any:
+    """Import OpenVINO GenAI lazily so unit tests do not require the NPU venv."""
+
+    import openvino_genai as ov_genai  # type: ignore[import-not-found]
+
+    return ov_genai
+
+
+def listener_exists(host: str, port: int) -> bool:
+    """Return True when a TCP listener already accepts connections."""
+
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.settimeout(0.2)
+        return sock.connect_ex((host, port)) == 0
+
+
+def coerce_json(text: str) -> Any | None:
+    text = text.strip()
+    if not text:
+        return None
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        match = re.search(r"(\{.*\}|\[.*\])", text, re.S)
+        if match:
+            try:
+                return json.loads(match.group(1))
+            except json.JSONDecodeError:
+                return None
+    return None
+
+
+@dataclass
+class GenerationResult:
+    text: str
+    parsed_json: Any | None
+    timing_ms: dict[str, float]
+    npu_busy_delta_us: int
+    npu_busy_before_us: int
+    npu_busy_after_us: int
+
+
+class NpuWorker:
+    def __init__(
+        self,
+        model_path: str,
+        cache_dir: str,
+        *,
+        busy_path: Path = BUSY_PATH,
+        bind_host: str = HOST,
+        bind_port: int = PORT,
+    ):
+        self.model_path = Path(model_path)
+        self.cache_dir = Path(cache_dir)
+        self.busy_path = Path(busy_path)
+        self.bind_host = bind_host
+        self.bind_port = bind_port
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self._pipe = None
+        self._load_ms: float | None = None
+        self._lock = threading.Lock()
+        self._loaded_at: float | None = None
+        if not self.model_path.exists():
+            raise FileNotFoundError(f"model path does not exist: {self.model_path}")
+        if not self.busy_path.exists():
+            raise FileNotFoundError(f"NPU busy-time counter does not exist: {self.busy_path}")
+
+    def read_busy(self) -> int:
+        return int(self.busy_path.read_text().strip())
+
+    def load(self) -> None:
+        if self._pipe is not None:
+            return
+        start = time.monotonic()
+        # NPU GenAI requires bounded prompt/response shapes; CACHE_DIR enables compiled blob caching.
+        ov_genai = import_openvino_genai()
+        config = GENAI_CONFIG | {"CACHE_DIR": str(self.cache_dir)}
+        self._pipe = ov_genai.LLMPipeline(str(self.model_path), "NPU", **config)
+        self._load_ms = round((time.monotonic() - start) * 1000, 2)
+        self._loaded_at = time.time()
+
+    def generate(self, job: str, user_input: str, max_new_tokens: int | None = None) -> GenerationResult:
+        if job not in PROMPTS:
+            raise ValueError(f"unsupported job: {job}")
+        if not isinstance(user_input, str) or not user_input.strip():
+            raise ValueError("input must be a non-empty string")
+        if len(user_input) > MAX_INPUT_CHARS:
+            raise ValueError(f"input too long: {len(user_input)} chars > {MAX_INPUT_CHARS}")
+        max_new_tokens = int(max_new_tokens or DEFAULTS[job])
+        if max_new_tokens < 1 or max_new_tokens > MAX_NEW_TOKENS:
+            raise ValueError(f"max_new_tokens must be between 1 and {MAX_NEW_TOKENS}")
+        prompt = PROMPTS[job].format(input=user_input.strip())
+        with self._lock:
+            load_start = time.monotonic()
+            self.load()
+            load_ms = round((time.monotonic() - load_start) * 1000, 2)
+            before = self.read_busy()
+            gen_start = time.monotonic()
+            pipe = cast(Any, self._pipe)
+            text = str(pipe.generate(prompt, max_new_tokens=max_new_tokens)).strip()
+            generate_ms = round((time.monotonic() - gen_start) * 1000, 2)
+            after = self.read_busy()
+        parsed = coerce_json(text) if job in {"memory_candidate", "notification"} else None
+        if job == "memory_candidate" and isinstance(parsed, list):
+            parsed = {"candidates": parsed, "notes": "model returned a top-level array; worker wrapped it to preserve the API contract"}
+        return GenerationResult(
+            text=text,
+            parsed_json=parsed,
+            timing_ms={"load": load_ms, "initial_load": self._load_ms or 0.0, "generate": generate_ms, "total": round(load_ms + generate_ms, 2)},
+            npu_busy_delta_us=after - before,
+            npu_busy_before_us=before,
+            npu_busy_after_us=after,
+        )
+
+    def health(self) -> dict[str, Any]:
+        return {
+            "ok": True,
+            "model": MODEL_ID,
+            "model_path": str(self.model_path),
+            "device": "NPU",
+            "cache_dir": str(self.cache_dir),
+            "cache_exists": self.cache_dir.exists(),
+            "loaded": self._pipe is not None,
+            "initial_load_ms": self._load_ms,
+            "loaded_at": self._loaded_at,
+            "busy_time_us": self.read_busy(),
+            "max_input_chars": MAX_INPUT_CHARS,
+            "max_new_tokens": MAX_NEW_TOKENS,
+            "jobs": sorted(PROMPTS),
+            "bind": f"{self.bind_host}:{self.bind_port}",
+        }
+
+
+def response_payload(worker: NpuWorker, job: str, result: GenerationResult) -> dict[str, Any]:
+    return {
+        "model": MODEL_ID,
+        "device": "NPU",
+        "job": job,
+        "text": result.text,
+        "json": result.parsed_json,
+        "timing_ms": result.timing_ms,
+        "npu_busy_delta_us": result.npu_busy_delta_us,
+        "npu_busy_before_us": result.npu_busy_before_us,
+        "npu_busy_after_us": result.npu_busy_after_us,
+        "cache_dir": str(worker.cache_dir),
+    }
+
+
+def make_handler(worker: NpuWorker):
+    class Handler(BaseHTTPRequestHandler):
+        server_version = "openvino-genai-npu-worker/0.2"
+
+        def log_message(self, format: str, *args: Any) -> None:
+            # Log only method/path/status metadata, not raw request bodies.
+            print(f"{self.client_address[0]} {format % args}")
+
+        def send_json(self, status: int, payload: Any) -> None:
+            body = json.dumps(payload, indent=2).encode("utf-8")
+            self.send_response(status)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+
+        def do_GET(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            if path == "/healthz":
+                self.send_json(200, worker.health())
+            elif path == "/models":
+                self.send_json(200, {"models": [{"id": MODEL_ID, "path": str(worker.model_path), "device": "NPU"}]})
+            else:
+                self.send_json(404, {"error": "not found"})
+
+        def do_POST(self) -> None:  # noqa: N802
+            path = urlparse(self.path).path
+            route_job = {
+                "/v1/worker/generate": None,
+                "/v1/worker/extract-memory-candidates": "memory_candidate",
+                "/v1/worker/condense-notification": "notification",
+            }.get(path, "__missing__")
+            if route_job == "__missing__":
+                self.send_json(404, {"error": "not found"})
+                return
+            try:
+                length = int(self.headers.get("Content-Length", "0"))
+                payload = json.loads(self.rfile.read(length) or b"{}")
+                job = route_job or str(payload.get("job", "summary"))
+                if job == "memory":
+                    job = "memory_candidate"
+                result = worker.generate(job, str(payload.get("input", "")), payload.get("max_new_tokens"))
+                body = response_payload(worker, job, result)
+                if result.npu_busy_delta_us <= 0:
+                    body["error"] = "NPU busy-time counter did not increase during generation"
+                    self.send_json(503, body)
+                    return
+                self.send_json(200, body)
+            except Exception as exc:
+                self.send_json(400, {"error": str(exc)})
+
+    return Handler
+
+
+def cli(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="OpenVINO GenAI NPU worker")
+    parser.add_argument("--model-path", default=os.environ.get("OV_GENAI_NPU_MODEL", DEFAULT_MODEL_PATH))
+    parser.add_argument("--cache-dir", default=os.environ.get("OV_GENAI_NPU_CACHE", DEFAULT_CACHE_DIR))
+    parser.add_argument("--host", default=os.environ.get("OV_GENAI_NPU_HOST", HOST))
+    parser.add_argument("--port", type=int, default=int(os.environ.get("OV_GENAI_NPU_PORT", PORT)))
+    parser.add_argument("--job", choices=sorted(PROMPTS), help="Run one CLI job instead of serving HTTP")
+    parser.add_argument("--input", help="Input text for --job")
+    parser.add_argument("--max-new-tokens", type=int)
+    args = parser.parse_args(argv)
+
+    if args.host != "127.0.0.1":
+        raise SystemExit("Refusing non-local bind without code change/explicit approval")
+
+    worker = NpuWorker(args.model_path, args.cache_dir, bind_host=args.host, bind_port=args.port)
+    if args.job:
+        result = worker.generate(args.job, args.input or "", args.max_new_tokens)
+        print(json.dumps(response_payload(worker, args.job, result), indent=2))
+        return 0 if result.npu_busy_delta_us > 0 else 2
+
+    if listener_exists(args.host, args.port):
+        raise SystemExit(f"Refusing to start: listener already exists on {args.host}:{args.port}")
+    server = ThreadingHTTPServer((args.host, args.port), make_handler(worker))
+    print(f"serving {MODEL_ID} on http://{args.host}:{args.port}; raw prompts are not logged")
+    server.serve_forever()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(cli())
@@ -0,0 +1,150 @@
+# OpenVINO NPU reranker service
+
+Local-first cross-encoder reranker prototype for second-stage RAG ranking.
+
+- Default bind: `127.0.0.1:18818`
+- Default model: `cross-encoder/ms-marco-MiniLM-L6-v2`
+- Default device: `NPU`
+- Model cache: `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov/`
+- NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` delta before/after inference
+
+This service is intentionally not wired into live RAG by default.
+
+## Files
+
+- `SPEC.md` — endpoint/CLI contract, model/runtime recommendation, smoke/NPU proof plan, RAG integration plan, docs implications, and no-go criteria.
+- `server.py` — stdlib HTTP OpenVINO Runtime service with fail-fast localhost listener conflict checks and request validation.
+- `smoke.py` — non-private API/ranking/NPU busy-time smoke test.
+- `tests/test_server_validation.py` — stdlib unit checks for request validation and listener conflict detection.
+- `openvino-reranker.service` — optional user-systemd unit.
+
+## One-time setup
+
+Use a separate venv so the existing Whisper/embeddings NPU venv is not perturbed:
+
+```bash
+python -m venv /home/will/.venvs/openvino-reranker
+source /home/will/.venvs/openvino-reranker/bin/activate
+python -m pip install -U pip
+python -m pip install "openvino>=2026.2" "optimum-intel[openvino]" transformers tokenizers nncf numpy
+```
+
+Export the model:
+
+```bash
+source /home/will/.venvs/openvino-reranker/bin/activate
+optimum-cli export openvino \
+  --model cross-encoder/ms-marco-MiniLM-L6-v2 \
+  --task text-classification \
+  --weight-format int8 \
+  --trust-remote-code false \
+  /home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov
+```
+
+If INT8 export or NPU compile fails, export an FP16/FP32 IR to a separate directory and point `OPENVINO_RERANKER_MODEL_DIR` at it while debugging. Do not overwrite existing vector/RAG/Chroma collections.
+
+## Run in foreground
+
+Check the port and NPU counter first:
+
+```bash
+ss -ltnp | grep ':18818 ' || true
+cat /sys/class/accel/accel0/device/npu_busy_time_us
+```
+
+Start locally:
+
+```bash
+source /home/will/.venvs/openvino-reranker/bin/activate
+OPENVINO_RERANKER_HOST=127.0.0.1 \
+OPENVINO_RERANKER_PORT=18818 \
+OPENVINO_RERANKER_DEVICE=NPU \
+OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
+python /home/will/lab/swarm/openvino-reranker-npu/server.py
+```
+
+Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. It also checks whether the requested listener can bind before compiling the OpenVINO model, so obvious port conflicts fail fast; the real server bind still happens immediately after model load.
+
+## API
+
+Health:
+
+```bash
+curl -sS http://127.0.0.1:18818/healthz | jq
+curl -sS http://127.0.0.1:18818/readyz | jq
+```
+
+Rerank:
+
+```bash
+curl -sS http://127.0.0.1:18818/rerank \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "query":"how do I verify OpenVINO NPU usage?",
+    "documents":[
+      {"id":"good","text":"Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."},
+      {"id":"bad","text":"This note is about making sourdough starter."}
+    ],
+    "top_k":2
+  }' | jq
+```
+
+Compatibility alias:
+
+```bash
+curl -sS http://127.0.0.1:18818/v1/rerank \
+  -H 'Content-Type: application/json' \
+  -d '{"model":"local-reranker","query":"npu busy time","documents":["OpenVINO NPU busy time proves accelerator use."],"top_n":1}' | jq
+```
+
+## Smoke test
+
+```bash
+source /home/will/.venvs/openvino-reranker/bin/activate
+python /home/will/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
+```
+
+Expected:
+
+- `/readyz` is HTTP 200 and reports `device=NPU`.
+- Each fixture returns `ok=true` and a sorted `results` list.
+- The top result matches the non-private fixture expectation.
+- Response and sysfs `npu_busy_delta_us` are positive.
+
+## Validation checks
+
+```bash
+source /home/will/.venvs/openvino-reranker/bin/activate
+PYTHONPATH=/home/will/lab/swarm/openvino-reranker-npu \
+  python -m unittest discover -s /home/will/lab/swarm/openvino-reranker-npu/tests
+```
+
+These checks do not compile the OpenVINO model; they cover request validation and fail-fast listener conflict detection.
+
+## Optional systemd user service
+
+Install the unit only after the foreground command and smoke test pass:
+
+```bash
+cp /home/will/lab/swarm/openvino-reranker-npu/openvino-reranker.service /home/will/.config/systemd/user/openvino-reranker.service
+systemctl --user daemon-reload
+systemctl --user start openvino-reranker.service
+systemctl --user status openvino-reranker.service --no-pager
+journalctl --user -u openvino-reranker.service -n 100 --no-pager
+```
+
+Do not enable or integrate it into live RAG without explicit approval.
+
+## Optional RAG integration plan (disabled by default)
+
+RAG should keep vector search against `obsidian_bge_npu` unchanged, retrieve a larger candidate set, and call this service as a read-only request-time second stage. Suggested disabled-by-default knobs:
+
+```text
+RAG_RERANK_ENABLED=false
+RAG_RERANK_URL=http://127.0.0.1:18818/rerank
+RAG_RERANK_INITIAL_K=20
+RAG_RERANK_TOP_K=5
+RAG_RERANK_TIMEOUT_MS=3000
+```
+
+On reranker timeout/error, fall back to vector order and include metadata such as `rerank_error`; do not mutate or reindex Chroma collections.
@@ -0,0 +1,243 @@
+# OpenVINO NPU reranker service spec
+
+Status: proposed localhost prototype; not live RAG integration.
+Target port: `127.0.0.1:18818`.
+Safety posture: foreground smoke first, no persistent enablement, no Atlas/Hermes/RAG routing changes without Will's explicit approval.
+
+## Recommendation
+
+Use `cross-encoder/ms-marco-MiniLM-L6-v2`, exported to OpenVINO IR as INT8, served by the local stdlib HTTP service in `server.py` on OpenVINO Runtime `NPU`.
+
+Why this choice:
+
+- It is a small BERT-family cross-encoder reranker intended for MS MARCO-style passage ranking, matching the second-stage RAG use case better than another embedding-only similarity pass.
+- The model shape is simple pairwise text classification/scoring: `(query, document) -> score`, which maps cleanly to OpenVINO Runtime and avoids introducing a heavier LLM worker for reranking.
+- INT8 OpenVINO IR keeps memory and compile/runtime cost low enough for a localhost sidecar and is already represented in the repo defaults:
+  `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov`.
+- The service can fail closed on startup when `OPENVINO_RERANKER_DEVICE=NPU` but `/sys/class/accel/accel0/device/npu_busy_time_us` does not increase, preventing false "NPU-backed" claims.
+
+Runtime default:
+
+```text
+OPENVINO_RERANKER_HOST=127.0.0.1
+OPENVINO_RERANKER_PORT=18818
+OPENVINO_RERANKER_DEVICE=NPU
+OPENVINO_RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L6-v2
+OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov
+OPENVINO_RERANKER_MAX_LENGTH=512
+OPENVINO_RERANKER_MAX_DOCUMENTS=100
+OPENVINO_RERANKER_MAX_BODY_BYTES=5242880
+```
+
+## Endpoint contract
+
+### Health and readiness
+
+`GET /healthz` and `GET /readyz` return JSON.
+
+`/readyz` must return HTTP 200 only when the model is loaded and startup smoke passed. For NPU mode, startup smoke must include a positive `npu_busy_delta_us`.
+
+Representative ready response:
+
+```json
+{
+  "status": "ok",
+  "ok": true,
+  "service": "openvino-reranker",
+  "model": "cross-encoder/ms-marco-MiniLM-L6-v2",
+  "model_dir": "/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov",
+  "device": "NPU",
+  "available_devices": ["CPU", "NPU"],
+  "max_length": 512,
+  "startup_smoke": {"ok": true, "duration_ms": 12.3, "npu_busy_delta_us": 1234},
+  "last_inference": null,
+  "ready_error": null
+}
+```
+
+### Rerank
+
+`POST /rerank` and compatibility alias `POST /v1/rerank` accept:
+
+```json
+{
+  "query": "how do I verify OpenVINO NPU usage?",
+  "documents": [
+    {"id": "good", "text": "Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.", "metadata": {"source": "synthetic"}},
+    {"id": "bad", "text": "This note is about making sourdough starter."}
+  ],
+  "top_k": 2,
+  "return_documents": false
+}
+```
+
+Compatibility notes:
+
+- `documents` may be strings or objects with `id`, `text`, and optional object `metadata`.
+- `top_k` is preferred; `top_n` is accepted for common reranker-client compatibility.
+- `return_documents=false` is recommended for RAG integration to avoid echoing private source text into logs or intermediate traces.
+- The optional `model` field may be sent by clients but is not used for routing; this sidecar serves one configured model.
+
+Successful response:
+
+```json
+{
+  "ok": true,
+  "model": "cross-encoder/ms-marco-MiniLM-L6-v2",
+  "device": "NPU",
+  "query": "how do I verify OpenVINO NPU usage?",
+  "input_count": 2,
+  "top_k": 2,
+  "duration_ms": 10.5,
+  "npu_busy_delta_us": 1234,
+  "results": [
+    {"index": 0, "id": "good", "score": 8.1, "raw_score": 8.1, "probability": 0.9997},
+    {"index": 1, "id": "bad", "score": -4.2, "raw_score": -4.2, "probability": 0.0148}
+  ]
+}
+```
+
+Error response shape:
+
+```json
+{"ok": false, "error": "human-readable error", "results": []}
+```
+
+Status behavior:
+
+- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text, or non-positive/non-integer `top_k`/`top_n`.
+- 413: request body above `OPENVINO_RERANKER_MAX_BODY_BYTES`.
+- 503: model not ready.
+- 500: unexpected inference/runtime failure.
+
+## CLI contract
+
+Foreground-only review start:
+
+```bash
+ss -ltnp | grep ':18818\b' || true
+cat /sys/class/accel/accel0/device/npu_busy_time_us
+source /home/will/.venvs/openvino-reranker/bin/activate
+OPENVINO_RERANKER_HOST=127.0.0.1 \
+OPENVINO_RERANKER_PORT=18818 \
+OPENVINO_RERANKER_DEVICE=NPU \
+OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
+python /home/will/lab/swarm/openvino-reranker-npu/server.py
+```
+
+Client smoke:
+
+```bash
+source /home/will/.venvs/openvino-reranker/bin/activate
+python /home/will/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
+```
+
+Optional user-systemd unit exists as `openvino-reranker.service`, but this spec does not approve copying, starting, enabling, or wiring it into live paths.
+
+## Non-private smoke payload
+
+Use only synthetic public-text fixtures. Do not query the Obsidian vault, private document directories, image folders, or live Chroma documents during smoke.
+
+Minimum cases:
+
+1. Query: `how do I verify OpenVINO NPU usage?`
+   - Expected top document: `Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.`
+   - Distractor: `This note is about making sourdough starter.`
+2. Query: `what port does the reranker service use?`
+   - Expected top document: `The OpenVINO reranker prototype listens locally on port 18818.`
+   - Distractor: `Whisper transcription accepts audio uploads.`
+3. Query: `why should reranking not mutate vector collections?`
+   - Expected top document: `Reranking is a read-only second-stage transformation after vector search.`
+   - Distractor: `Boil pasta in salted water until al dente.`
+
+Pass criteria:
+
+- `/readyz` is HTTP 200 and reports `device=NPU`.
+- Every case returns `ok=true` and a sorted `results` list with the expected top `id`.
+- Response-level `npu_busy_delta_us` is positive for each case.
+- External sysfs `after - before` is positive for each case or at least for the full smoke batch.
+- Smoke script exits 0 and prints JSON with `ok: true`.
+
+## NPU busy-time verification plan
+
+HTTP 200 is not proof. Verification must capture both endpoint-reported and sysfs-observed deltas.
+
+Procedure:
+
+```bash
+BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
+before=$(cat "$BUSY")
+curl -fsS http://127.0.0.1:18818/rerank \
+  -H 'Content-Type: application/json' \
+  -d '{"query":"how do I verify OpenVINO NPU usage?","documents":[{"id":"good","text":"Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."},{"id":"bad","text":"This note is about making sourdough starter."}],"top_k":2,"return_documents":false}' \
+  | jq '{ok, device, npu_busy_delta_us, top_id:.results[0].id}'
+after=$(cat "$BUSY")
+echo "sysfs_npu_busy_delta_us=$((after-before))"
+```
+
+Acceptance:
+
+- `device == "NPU"`.
+- Response `npu_busy_delta_us > 0`.
+- Shell-computed `sysfs_npu_busy_delta_us > 0`.
+- If any value is zero/negative/missing, call the result CPU/unknown and do not claim NPU-backed reranking.
+
+## Optional RAG second-stage integration plan (deferred)
+
+This is a plan only. Do not enable it in live RAG without explicit approval.
+
+Design:
+
+1. Keep existing vector search and Chroma collection `obsidian_bge_npu` unchanged.
+2. Retrieve more candidates from current vector search, e.g. `initial_k=20`.
+3. Send only request-time candidate snippets/ids to `http://127.0.0.1:18818/rerank`.
+4. Use reranker order to choose final `top_k`, e.g. `5`.
+5. On timeout, connection error, invalid response, or non-positive NPU proof when proof is required, fall back to vector order and attach metadata like `rerank_error`; do not fail the whole RAG request unless explicitly configured.
+6. Log counters and latency, but avoid logging raw private document text.
+
+Disabled-by-default knobs:
+
+```text
+RAG_RERANK_ENABLED=false
+RAG_RERANK_URL=http://127.0.0.1:18818/rerank
+RAG_RERANK_INITIAL_K=20
+RAG_RERANK_TOP_K=5
+RAG_RERANK_TIMEOUT_MS=3000
+RAG_RERANK_REQUIRE_NPU_PROOF=true
+RAG_RERANK_RETURN_DOCUMENTS=false
+```
+
+Integration tests should use synthetic in-memory candidates first. Live-vault evaluation requires a separate approval and must not mutate or rebuild the vector collection.
+
+## Docs and diagram implications
+
+If this prototype advances beyond spec/review, update these surfaces while keeping live/prototype labels clear:
+
+- `openvino-reranker-npu/README.md`: keep model/runtime, endpoint contract, smoke command, and approval gates synchronized with code.
+- `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`: list `:18818` as prototype/not enabled, with foreground smoke and NPU sysfs proof.
+- Service catalog / architecture notes: show live baseline `:18810`, `:18816`, `:18817`; show `:18818` as optional second-stage RAG prototype, not live routing.
+- Diagrams: render `RAG :18810 -> optional reranker :18818` as dashed/disabled or "proposed"; do not imply Atlas/Hermes/gateway traffic is using it.
+- Optional systemd unit: document as installable after approval, not enabled by default.
+
+## No-go / defer criteria
+
+Do not ship, enable, or integrate the reranker if any of these hold:
+
+- Port `18818` is already owned by another live service.
+- `NPU` is unavailable in `ov.Core().available_devices` or `/sys/class/accel/accel0/device/npu_busy_time_us` is missing.
+- Foreground startup smoke fails or has non-positive NPU busy-time delta while configured for NPU.
+- Synthetic smoke top-1 ranking fails or latency is unacceptable for the intended RAG timeout budget.
+- Model export requires overwriting the existing model directory or touching Chroma/vector collections.
+- The service must bind beyond `127.0.0.1` to be useful.
+- Live RAG integration would require reindexing, collection mutation, private-doc smoke, or Atlas/Hermes/gateway routing changes without explicit approval.
+- Logs or responses would persist raw private document text outside the existing RAG request path.
+
+## Current local preflight observed during this spec pass
+
+- `/sys/class/accel/accel0/device/npu_busy_time_us` is readable.
+- `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov` is present.
+- `/home/will/.venvs/openvino-reranker/bin/python` is present.
+- `:18818` was not listening during preflight.
+- `server.py` and `smoke.py` pass `python -m py_compile`.
+
+These observations are preflight only; they are not a live service/NPU smoke result.
@@ -0,0 +1,19 @@
+[Unit]
+Description=OpenVINO NPU Reranker HTTP Service (port 18818)
+After=network-online.target
+
+[Service]
+Type=simple
+WorkingDirectory=/home/will/lab/swarm/openvino-reranker-npu
+Environment=OPENVINO_RERANKER_HOST=127.0.0.1
+Environment=OPENVINO_RERANKER_PORT=18818
+Environment=OPENVINO_RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L6-v2
+Environment=OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov
+Environment=OPENVINO_RERANKER_DEVICE=NPU
+Environment=OPENVINO_RERANKER_MAX_LENGTH=512
+ExecStart=/home/will/.venvs/openvino-reranker/bin/python /home/will/lab/swarm/openvino-reranker-npu/server.py
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
@@ -0,0 +1,393 @@
+#!/usr/bin/env python3
+"""OpenVINO NPU cross-encoder reranker HTTP service.
+
+Default port: 18818
+Default model: cross-encoder/ms-marco-MiniLM-L6-v2 exported as OpenVINO IR
+Default device: NPU
+
+Endpoints:
+  GET  /, /healthz, /readyz
+  POST /rerank
+  POST /v1/rerank
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+import socket
+import sys
+import threading
+import time
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import openvino as ov
+from transformers import AutoTokenizer
+
+DEFAULT_MODEL_ID = "cross-encoder/ms-marco-MiniLM-L6-v2"
+DEFAULT_MODEL_DIR = Path("/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov")
+DEFAULT_PORT = 18818
+DEFAULT_MAX_LENGTH = 512
+DEFAULT_MAX_DOCUMENTS = 100
+DEFAULT_MAX_BODY_BYTES = 5 * 1024 * 1024
+NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+
+
+def npu_busy_time_us() -> int | None:
+    try:
+        return int(NPU_BUSY_FILE.read_text().strip())
+    except Exception:
+        return None
+
+
+def sigmoid(x: float) -> float:
+    if x >= 0:
+        z = math.exp(-x)
+        return 1.0 / (1.0 + z)
+    z = math.exp(x)
+    return z / (1.0 + z)
+
+
+def softmax_prob(logits: np.ndarray, index: int = 1) -> float:
+    row = np.asarray(logits, dtype=np.float64).reshape(-1)
+    shifted = row - np.max(row)
+    probs = np.exp(shifted) / np.sum(np.exp(shifted))
+    return float(probs[index])
+
+
+class RerankerService:
+    def __init__(
+        self,
+        model_dir: Path,
+        model_id: str,
+        device: str,
+        max_length: int,
+        startup_smoke: bool = True,
+    ) -> None:
+        self.model_dir = model_dir
+        self.model_id = model_id
+        self.device = device
+        self.max_length = int(max_length)
+        self.loaded_at = time.time()
+        self.lock = threading.Lock()
+        self.last_inference: dict[str, Any] | None = None
+        self.startup_smoke: dict[str, Any] | None = None
+        self.ready = False
+        self.ready_error: str | None = None
+
+        if not self.model_dir.exists():
+            raise FileNotFoundError(f"model directory not found: {self.model_dir}")
+
+        self.core = ov.Core()
+        self.available_devices = list(self.core.available_devices)
+        if self.device not in self.available_devices:
+            raise RuntimeError(f"OpenVINO device {self.device!r} unavailable; available={self.available_devices}")
+
+        xml_path = self.model_dir / "openvino_model.xml"
+        if not xml_path.exists():
+            raise FileNotFoundError(f"OpenVINO IR not found: {xml_path}")
+
+        self.tokenizer = AutoTokenizer.from_pretrained(str(self.model_dir), local_files_only=True)
+        model = self.core.read_model(str(xml_path))
+        self._reshape_static(model)
+        self.compiled = self.core.compile_model(model, self.device)
+        self.input_names = {inp.get_any_name() for inp in self.compiled.inputs}
+        self.output = self.compiled.output(0)
+
+        if startup_smoke:
+            try:
+                smoke = self.rerank(
+                    "npu busy time",
+                    [{"id": "smoke", "text": "OpenVINO NPU usage is verified by npu_busy_time_us."}],
+                    top_k=1,
+                    return_documents=False,
+                )
+                self.startup_smoke = {
+                    "ok": bool(smoke.get("ok")),
+                    "duration_ms": smoke.get("duration_ms"),
+                    "npu_busy_delta_us": smoke.get("npu_busy_delta_us"),
+                }
+                if self.device == "NPU" and int(smoke.get("npu_busy_delta_us") or 0) <= 0:
+                    raise RuntimeError("startup smoke did not increase npu_busy_time_us")
+            except Exception as exc:
+                self.ready_error = f"startup smoke failed: {type(exc).__name__}: {exc}"
+                raise
+
+        self.ready = True
+
+    def _reshape_static(self, model: ov.Model) -> None:
+        shape_by_name: dict[str, list[int]] = {}
+        for inp in model.inputs:
+            name = inp.get_any_name()
+            if name in {"input_ids", "attention_mask", "token_type_ids"}:
+                shape_by_name[name] = [1, self.max_length]
+        if shape_by_name:
+            model.reshape(shape_by_name)
+
+    def _tokenize(self, query: str, document: str) -> dict[str, np.ndarray]:
+        tokens = self.tokenizer(
+            query,
+            document,
+            max_length=self.max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="np",
+        )
+        return {name: np.asarray(value) for name, value in tokens.items() if name in self.input_names}
+
+    def _score_pair(self, query: str, document: str) -> dict[str, float | None]:
+        inputs = self._tokenize(query, document)
+        missing = self.input_names - set(inputs)
+        # Some exported BERT models do not use token_type_ids. input_ids and attention_mask are required.
+        required_missing = missing & {"input_ids", "attention_mask"}
+        if required_missing:
+            raise RuntimeError(f"tokenizer did not produce required inputs: {sorted(required_missing)}")
+        outputs = self.compiled(inputs)
+        logits = np.asarray(outputs[self.output])
+        flat = logits.reshape(-1)
+        if flat.size == 1:
+            raw = float(flat[0])
+            return {"score": raw, "raw_score": raw, "probability": sigmoid(raw)}
+        if flat.size >= 2:
+            raw = float(flat[1])
+            return {"score": raw, "raw_score": raw, "probability": softmax_prob(flat, 1)}
+        raise RuntimeError(f"unexpected empty logits shape: {list(logits.shape)}")
+
+    def rerank(
+        self,
+        query: str,
+        documents: list[dict[str, Any]],
+        *,
+        top_k: int | None,
+        return_documents: bool = True,
+    ) -> dict[str, Any]:
+        before = npu_busy_time_us()
+        started = time.perf_counter()
+        results: list[dict[str, Any]] = []
+        with self.lock:
+            for idx, doc in enumerate(documents):
+                scored = self._score_pair(query, str(doc["text"]))
+                item: dict[str, Any] = {
+                    "index": idx,
+                    "score": scored["score"],
+                    "raw_score": scored["raw_score"],
+                    "probability": scored["probability"],
+                }
+                if doc.get("id") is not None:
+                    item["id"] = doc.get("id")
+                if return_documents:
+                    item["text"] = doc["text"]
+                    item["metadata"] = doc.get("metadata") if isinstance(doc.get("metadata"), dict) else {}
+                results.append(item)
+        after = npu_busy_time_us()
+        results.sort(key=lambda item: (-float(item["score"]), int(item["index"])))
+        clamped_top_k = len(results) if top_k is None else max(1, min(int(top_k), len(results)))
+        duration_ms = round((time.perf_counter() - started) * 1000, 3)
+        npu_delta = None if before is None or after is None else after - before
+        payload = {
+            "ok": True,
+            "model": self.model_id,
+            "model_dir": str(self.model_dir),
+            "device": self.device,
+            "query": query,
+            "input_count": len(documents),
+            "top_k": clamped_top_k,
+            "duration_ms": duration_ms,
+            "npu_busy_delta_us": npu_delta,
+            "results": results[:clamped_top_k],
+        }
+        self.last_inference = {
+            "duration_ms": duration_ms,
+            "docs": len(documents),
+            "npu_busy_delta_us": npu_delta,
+        }
+        return payload
+
+    def health(self) -> dict[str, Any]:
+        status = "ok" if self.ready else "degraded"
+        return {
+            "status": status,
+            "ok": self.ready,
+            "service": "openvino-reranker",
+            "model": self.model_id,
+            "model_dir": str(self.model_dir),
+            "device": self.device,
+            "available_devices": self.available_devices,
+            "max_length": self.max_length,
+            "input_names": sorted(self.input_names),
+            "uptime_s": round(time.time() - self.loaded_at, 3),
+            "npu_busy_time_us": npu_busy_time_us(),
+            "startup_smoke": self.startup_smoke,
+            "last_inference": self.last_inference,
+            "ready_error": self.ready_error,
+        }
+
+
+def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]:
+    if not isinstance(value, list) or not value:
+        raise ValueError("documents must be a non-empty list")
+    if len(value) > max_documents:
+        raise ValueError(f"documents exceeds max_documents={max_documents}")
+    docs: list[dict[str, Any]] = []
+    for idx, item in enumerate(value):
+        if isinstance(item, str):
+            text = item
+            doc: dict[str, Any] = {"text": text}
+        elif isinstance(item, dict):
+            text = item.get("text")
+            doc = {
+                "id": item.get("id"),
+                "text": text,
+                "metadata": item.get("metadata") if isinstance(item.get("metadata"), dict) else {},
+            }
+        else:
+            raise ValueError(f"documents[{idx}] must be a string or object")
+        if not isinstance(text, str) or not text.strip():
+            raise ValueError(f"documents[{idx}].text must be a non-empty string")
+        docs.append(doc)
+    return docs
+
+
+def parse_top_k(value: Any, document_count: int) -> int:
+    """Validate top_k/top_n before inference so schema errors return HTTP 400."""
+    if value is None:
+        return document_count
+    if isinstance(value, bool) or not isinstance(value, int):
+        raise ValueError("top_k/top_n must be a positive integer")
+    if value < 1:
+        raise ValueError("top_k/top_n must be a positive integer")
+    return min(value, document_count)
+
+
+def assert_port_available(host: str, port: int) -> None:
+    """Fail fast on listener conflicts before compiling the OpenVINO model."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        try:
+            sock.bind((host, port))
+        except OSError as exc:
+            raise RuntimeError(f"cannot bind {host}:{port}; listener conflict or invalid bind: {exc}") from exc
+
+
+class Handler(BaseHTTPRequestHandler):
+    server_version = "OpenVINOReranker/0.1"
+
+    @property
+    def svc(self) -> RerankerService:
+        return self.server.reranker_service  # type: ignore[attr-defined]
+
+    @property
+    def max_body_bytes(self) -> int:
+        return self.server.max_body_bytes  # type: ignore[attr-defined]
+
+    @property
+    def max_documents(self) -> int:
+        return self.server.max_documents  # type: ignore[attr-defined]
+
+    def do_GET(self) -> None:
+        path = self.path.split("?", 1)[0].rstrip("/") or "/"
+        if path == "/":
+            self.write_json({"ok": True, "service": "openvino-reranker", "endpoints": ["/healthz", "/readyz", "/rerank", "/v1/rerank"]})
+        elif path in {"/healthz", "/health"}:
+            self.write_json(self.svc.health(), status=200)
+        elif path == "/readyz":
+            health = self.svc.health()
+            self.write_json(health, status=200 if health.get("ok") else 503)
+        else:
+            self.write_json({"ok": False, "error": "not found", "results": []}, status=404)
+
+    def do_POST(self) -> None:
+        path = self.path.split("?", 1)[0].rstrip("/") or "/"
+        try:
+            if path not in {"/rerank", "/v1/rerank"}:
+                self.write_json({"ok": False, "error": "not found", "results": []}, status=404)
+                return
+            if not self.svc.ready:
+                self.write_json({"ok": False, "error": self.svc.ready_error or "model not ready", "results": []}, status=503)
+                return
+            payload = self.read_json()
+            query = payload.get("query")
+            if not isinstance(query, str) or not query.strip():
+                raise ValueError("query is required")
+            top_k = payload.get("top_k", payload.get("top_n"))
+            documents = normalize_documents(payload.get("documents"), self.max_documents)
+            top_k = parse_top_k(top_k, len(documents))
+            return_documents = bool(payload.get("return_documents", True))
+            response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents)
+            self.write_json(response)
+        except RequestTooLarge as exc:
+            self.write_json({"ok": False, "error": str(exc), "results": []}, status=413)
+        except ValueError as exc:
+            self.write_json({"ok": False, "error": str(exc), "results": []}, status=400)
+        except Exception as exc:
+            self.write_json({"ok": False, "error": f"{type(exc).__name__}: {exc}", "results": []}, status=500)
+
+    def read_json(self) -> dict[str, Any]:
+        length = int(self.headers.get("Content-Length") or 0)
+        if length > self.max_body_bytes:
+            raise RequestTooLarge(f"request body exceeds {self.max_body_bytes} bytes")
+        body = self.rfile.read(length).decode("utf-8", "replace") if length else "{}"
+        payload = json.loads(body or "{}")
+        if not isinstance(payload, dict):
+            raise ValueError("JSON body must be an object")
+        return payload
+
+    def write_json(self, payload: dict[str, Any], status: int = 200) -> None:
+        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def log_message(self, format: str, *args: Any) -> None:  # noqa: A002 - stdlib override name
+        print(f"{self.address_string()} - {format % args}", file=sys.stderr, flush=True)
+
+
+class RequestTooLarge(ValueError):
+    pass
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default=os.environ.get("OPENVINO_RERANKER_HOST", "127.0.0.1"))
+    parser.add_argument("--port", type=int, default=int(os.environ.get("OPENVINO_RERANKER_PORT", DEFAULT_PORT)))
+    parser.add_argument("--model-dir", default=os.environ.get("OPENVINO_RERANKER_MODEL_DIR", str(DEFAULT_MODEL_DIR)))
+    parser.add_argument("--model", default=os.environ.get("OPENVINO_RERANKER_MODEL", DEFAULT_MODEL_ID))
+    parser.add_argument("--device", default=os.environ.get("OPENVINO_RERANKER_DEVICE", "NPU"))
+    parser.add_argument("--max-length", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_LENGTH", str(DEFAULT_MAX_LENGTH))))
+    parser.add_argument("--max-documents", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_DOCUMENTS", str(DEFAULT_MAX_DOCUMENTS))))
+    parser.add_argument("--max-body-bytes", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_BODY_BYTES", str(DEFAULT_MAX_BODY_BYTES))))
+    parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"})
+    args = parser.parse_args()
+
+    assert_port_available(args.host, args.port)
+    service = RerankerService(
+        Path(args.model_dir).expanduser(),
+        args.model,
+        args.device,
+        args.max_length,
+        startup_smoke=not args.skip_startup_smoke,
+    )
+    httpd = ThreadingHTTPServer((args.host, args.port), Handler)
+    httpd.reranker_service = service  # type: ignore[attr-defined]
+    httpd.max_body_bytes = args.max_body_bytes  # type: ignore[attr-defined]
+    httpd.max_documents = args.max_documents  # type: ignore[attr-defined]
+    print(
+        f"openvino-reranker listening on {args.host}:{args.port} model={args.model} "
+        f"model_dir={args.model_dir} device={args.device} max_length={args.max_length}",
+        flush=True,
+    )
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""Smoke/benchmark checks for the OpenVINO reranker service.
+
+Prints a JSON summary and exits non-zero on schema/ranking/NPU verification failure.
+Uses only non-private fixture text.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import statistics
+import sys
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+
+FIXTURES = [
+    {
+        "query": "how do I verify OpenVINO NPU usage?",
+        "documents": [
+            {"id": "good", "text": "Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."},
+            {"id": "bad", "text": "This note is about making sourdough starter."},
+        ],
+        "expected_top_id": "good",
+    },
+    {
+        "query": "what port does the reranker service use?",
+        "documents": [
+            {"id": "unrelated", "text": "Whisper transcription accepts audio uploads."},
+            {"id": "port", "text": "The OpenVINO reranker prototype listens locally on port 18818."},
+        ],
+        "expected_top_id": "port",
+    },
+    {
+        "query": "why should reranking not mutate vector collections?",
+        "documents": [
+            {"id": "mutation", "text": "Reranking is a read-only second-stage transformation after vector search."},
+            {"id": "cooking", "text": "Boil pasta in salted water until al dente."},
+        ],
+        "expected_top_id": "mutation",
+    },
+]
+
+
+def npu_busy_time_us() -> int | None:
+    try:
+        return int(NPU_BUSY_FILE.read_text().strip())
+    except Exception:
+        return None
+
+
+def post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST")
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read().decode("utf-8", "replace")
+            return resp.status, json.loads(body)
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", "replace")
+        try:
+            parsed = json.loads(body)
+        except Exception:
+            parsed = {"error": body}
+        return exc.code, parsed
+
+
+def get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
+    try:
+        with urllib.request.urlopen(url, timeout=timeout) as resp:
+            body = resp.read().decode("utf-8", "replace")
+            return resp.status, json.loads(body)
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", "replace")
+        try:
+            parsed = json.loads(body)
+        except Exception:
+            parsed = {"error": body}
+        return exc.code, parsed
+
+
+def percentile(values: list[float], pct: float) -> float | None:
+    if not values:
+        return None
+    ordered = sorted(values)
+    idx = min(len(ordered) - 1, max(0, round((pct / 100.0) * (len(ordered) - 1))))
+    return round(ordered[idx], 3)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--url", default="http://127.0.0.1:18818")
+    parser.add_argument("--timeout", type=float, default=20.0)
+    parser.add_argument("--allow-cpu", action="store_true", help="do not fail when health reports a non-NPU device")
+    args = parser.parse_args()
+
+    base = args.url.rstrip("/")
+    failures: list[str] = []
+    health_status, health = get_json(f"{base}/readyz", args.timeout)
+    if health_status != 200 or not health.get("ok"):
+        failures.append(f"readyz failed status={health_status} error={health.get('ready_error') or health.get('error')}")
+    device = health.get("device")
+    if device != "NPU" and not args.allow_cpu:
+        failures.append(f"device is {device!r}, expected 'NPU'")
+
+    latencies: list[float] = []
+    response_npu_total = 0
+    sysfs_npu_total = 0
+    top1_passed = 0
+
+    for case in FIXTURES:
+        before = npu_busy_time_us()
+        started = time.perf_counter()
+        status, payload = post_json(
+            f"{base}/rerank",
+            {"query": case["query"], "documents": case["documents"], "top_k": len(case["documents"]), "return_documents": False},
+            args.timeout,
+        )
+        wall_ms = (time.perf_counter() - started) * 1000
+        after = npu_busy_time_us()
+        latencies.append(float(payload.get("duration_ms") or wall_ms))
+        response_delta = payload.get("npu_busy_delta_us")
+        sysfs_delta = None if before is None or after is None else after - before
+        if isinstance(response_delta, int):
+            response_npu_total += response_delta
+        if isinstance(sysfs_delta, int):
+            sysfs_npu_total += sysfs_delta
+        results = payload.get("results") if isinstance(payload, dict) else None
+        top_id = results[0].get("id") if isinstance(results, list) and results else None
+        if status != 200 or not payload.get("ok"):
+            failures.append(f"case {case['expected_top_id']} HTTP/status failed: status={status} error={payload.get('error')}")
+        if not isinstance(results, list) or len(results) != len(case["documents"]):
+            failures.append(f"case {case['expected_top_id']} returned invalid results")
+        if top_id == case["expected_top_id"]:
+            top1_passed += 1
+        else:
+            failures.append(f"case {case['expected_top_id']} top_id={top_id!r}")
+        if device == "NPU":
+            if not isinstance(response_delta, int) or response_delta <= 0:
+                failures.append(f"case {case['expected_top_id']} response npu delta not positive: {response_delta}")
+            if not isinstance(sysfs_delta, int) or sysfs_delta <= 0:
+                failures.append(f"case {case['expected_top_id']} sysfs npu delta not positive: {sysfs_delta}")
+
+    summary = {
+        "ok": not failures,
+        "url": base,
+        "model": health.get("model"),
+        "device": device,
+        "cases": len(FIXTURES),
+        "top1_passed": top1_passed,
+        "p50_ms": percentile(latencies, 50),
+        "p95_ms": percentile(latencies, 95),
+        "mean_ms": round(statistics.mean(latencies), 3) if latencies else None,
+        "npu_busy_delta_us_total": sysfs_npu_total,
+        "response_npu_busy_delta_us_total": response_npu_total,
+        "failures": failures,
+    }
+    print(json.dumps(summary, indent=2, sort_keys=True))
+    return 0 if not failures else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""Unit checks for reranker request validation helpers.
+
+These tests intentionally avoid loading an OpenVINO model; they only cover the
+stdlib validation helpers used before inference.
+"""
+from __future__ import annotations
+
+import socket
+import unittest
+
+from server import assert_port_available, normalize_documents, parse_top_k
+
+
+class ValidationTests(unittest.TestCase):
+    def test_normalize_accepts_strings_and_objects(self) -> None:
+        docs = normalize_documents(
+            [
+                "plain text document",
+                {"id": "obj", "text": "object document", "metadata": {"source": "synthetic"}},
+            ],
+            max_documents=2,
+        )
+        self.assertEqual(docs[0], {"text": "plain text document"})
+        self.assertEqual(docs[1]["id"], "obj")
+        self.assertEqual(docs[1]["metadata"], {"source": "synthetic"})
+
+    def test_normalize_rejects_empty_or_too_many_documents(self) -> None:
+        with self.assertRaisesRegex(ValueError, "non-empty"):
+            normalize_documents([], max_documents=2)
+        with self.assertRaisesRegex(ValueError, "max_documents"):
+            normalize_documents(["a", "b", "c"], max_documents=2)
+        with self.assertRaisesRegex(ValueError, "non-empty string"):
+            normalize_documents([{"id": "empty", "text": ""}], max_documents=2)
+
+    def test_parse_top_k_defaults_clamps_and_rejects_invalid_values(self) -> None:
+        self.assertEqual(parse_top_k(None, document_count=3), 3)
+        self.assertEqual(parse_top_k(2, document_count=3), 2)
+        self.assertEqual(parse_top_k(99, document_count=3), 3)
+        for value in (0, -1, True, False, 1.5, "2", "nope"):
+            with self.subTest(value=value):
+                with self.assertRaisesRegex(ValueError, "positive integer"):
+                    parse_top_k(value, document_count=3)
+
+    def test_assert_port_available_detects_listener_conflict(self) -> None:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as listener:
+            listener.bind(("127.0.0.1", 0))
+            listener.listen(1)
+            port = listener.getsockname()[1]
+            with self.assertRaisesRegex(RuntimeError, "cannot bind"):
+                assert_port_available("127.0.0.1", port)
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,89 @@
+# OpenVINO Context Gate
+
+Local-only Atlas/Hermes context-gate advisory prototype.
+
+This first slice is CLI-only and dry-run by design. It takes a non-private query,
+optionally asks the localhost classifier on `127.0.0.1:18819` for advisory labels,
+and emits a compact typed context bundle plan. It does not retrieve private
+content or change live Atlas/Hermes behavior.
+
+## Safety invariants
+
+Closed in v1:
+
+- live Atlas/Hermes routing changes
+- memory writes
+- outbound sends
+- tool execution by the sidecar
+- service restarts
+- vector DB mutation or reindexing
+- private root broadening
+- live config changes
+
+The CLI only plans which source classes an authoritative Atlas/Hermes agent might
+use later: `durable_memory`, `session_search`, `rag_search`, `repo_files`,
+`live_system`, `web`, or `no_retrieval`.
+
+NPU proof is strict: `npu_verified=true` is only emitted when a live classifier
+request reports a positive endpoint NPU delta and a positive sysfs/endpoint sysfs
+busy delta. HTTP 200 alone is never treated as proof. Offline and fallback modes
+set `npu_verified=false` and include a warning.
+
+## Usage
+
+Live classifier path, with compact terminal output:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --query "How do I check whether the RAG reranker is using the NPU?" \
+  --format compact
+```
+
+Deterministic offline smoke, safe for unit-test hosts without NPU services:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --offline \
+  --query "Write a haiku about Seattle rain." \
+  --format compact-json
+```
+
+Fallback plan if the classifier is down:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --allow-offline-fallback \
+  --query "Where did we leave the NPU context gate implementation plan?" \
+  --context platform=kanban \
+  --context repo_path=/home/will/lab/swarm \
+  --format compact-json
+```
+
+## Output shape
+
+Full JSON includes:
+
+- `schema=atlas_context_gate_plan_v1`
+- `dry_run=true`
+- `query_class`
+- `source_plan`
+- `bundle_plan`
+- `npu_proof`
+- closed `authority`
+- closed approval `gates`
+- compact `warnings`
+
+Compact output intentionally avoids raw private snippets and raw JSON dumps:
+
+```text
+ok=true schema=atlas_context_gate_plan_v1 bundle=OpsDebugBundle sources=live_system,repo_files,rag_search source_count=3 npu_verified=false classifier_delta_us=None outer_sysfs_delta_us=None gates=closed:route,memory,send,tools,restart,vector,private_roots,config warnings=offline_heuristic_classifier_no_npu_claim,npu_proof_inconclusive
+```
+
+## Notes for reviewers
+
+- No HTTP service or systemd unit is added in this slice.
+- The prototype does not call RAG, memory, session search, web, filesystem tools,
+  or the advisory gateway. It only emits a plan.
+- Unit tests use fake/offline classifier results and do not require live NPU.
+- Optional live smoke may call only the local classifier endpoint and read
+  `/sys/class/accel/accel0/device/npu_busy_time_us` for positive delta proof.
@@ -0,0 +1,5 @@
+"""Atlas/Hermes local advisory context-gate prototype."""
+
+from .context_gate import SCHEMA, ContextGateError, build_plan, compact_json, compact_line, validate_plan
+
+__all__ = ["SCHEMA", "ContextGateError", "build_plan", "compact_json", "compact_line", "validate_plan"]
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from typing import Any
+
+from .context_gate import (
+    DEFAULT_CLASSIFIER_URL,
+    ContextGateError,
+    build_plan,
+    classify_live,
+    classify_offline,
+    compact_json,
+    compact_line,
+)
+
+
+def _parse_context(raw_items: list[str]) -> dict[str, Any]:
+    context: dict[str, Any] = {}
+    for item in raw_items:
+        if "=" not in item:
+            raise ContextGateError(f"invalid_context_item:{item}")
+        key, value = item.split("=", 1)
+        if not key:
+            raise ContextGateError("invalid_context_key")
+        if value.lower() == "true":
+            parsed: Any = True
+        elif value.lower() == "false":
+            parsed = False
+        else:
+            parsed = value
+        context[key] = parsed
+    return context
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Emit a local-only Atlas/Hermes advisory context bundle plan. No routing, retrieval, memory writes, sends, restarts, or vector mutations are performed.",
+    )
+    parser.add_argument("--query", required=True, help="Non-private query to plan for")
+    parser.add_argument("--format", choices=["compact", "compact-json", "json"], default="compact")
+    parser.add_argument("--context", action="append", default=[], metavar="KEY=VALUE", help="Optional compact request context, e.g. platform=kanban repo_path=/path")
+    parser.add_argument("--max-sources", type=int, default=4)
+    parser.add_argument("--trace-id")
+    parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
+    parser.add_argument("--classifier-timeout", type=float, default=8.0)
+    parser.add_argument("--offline", action="store_true", help="Use deterministic heuristic labels; makes no NPU claim")
+    parser.add_argument("--allow-offline-fallback", action="store_true", help="If live classifier is unavailable, emit an advisory fallback plan with npu_verified=false")
+    parser.add_argument("--no-require-npu-proof", action="store_true", help="Do not add npu_proof_inconclusive warning when running offline/fallback")
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_arg_parser()
+    args = parser.parse_args(argv)
+    try:
+        context = _parse_context(args.context)
+        options = {
+            "dry_run": True,
+            "max_sources": args.max_sources,
+            "include_private_text": False,
+            "require_npu_proof": not args.no_require_npu_proof,
+            "trace_id": args.trace_id,
+        }
+        if args.offline:
+            classifier = classify_offline(args.query, context)
+        else:
+            try:
+                classifier = classify_live(args.query, context, classifier_url=args.classifier_url, timeout=args.classifier_timeout)
+            except ContextGateError as exc:
+                if not args.allow_offline_fallback:
+                    raise
+                classifier = classify_offline(args.query, context, warning=str(exc))
+        plan = build_plan(args.query, context=context, options=options, classifier=classifier)
+    except ContextGateError as exc:
+        print(f"error={exc}", file=sys.stderr)
+        return 2
+
+    if args.format == "json":
+        print(json.dumps(plan, indent=2, sort_keys=True))
+    elif args.format == "compact-json":
+        print(compact_json(plan))
+    else:
+        print(compact_line(plan))
+    return 0
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
William Valentin	22e6ee90d2	docs(npu): document advisory observability gates Add operator runbook and link integrated health docs for advisory-only observability, dry-run metrics, and future promotion criteria.	2026-06-06 15:30:31 -07:00
William Valentin	72434c8bc3	feat(npu): add advisory metrics to utilization digest Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.	2026-06-06 15:30:31 -07:00
William Valentin	dae2a57124	feat(npu): add advisory dry-run comparison harness Add npu_advisory_decision_v1 schema, synthetic fixture set, comparison harness, docs, and focused tests for advisory-only NPU evaluation.	2026-06-06 15:30:31 -07:00
William Valentin	08fb9ca686	docs(npu): update integrated health runbooks	2026-06-05 15:52:51 -07:00
William Valentin	9e5ffa0fd0	feat(npu): add kanban hygiene advisory	2026-06-05 15:52:43 -07:00
William Valentin	d2bad88596	feat(npu): add voice audio advisory pipeline	2026-06-05 15:52:43 -07:00
William Valentin	6906c2079b	feat(npu): add explicit-root batch triage wrapper	2026-06-05 15:52:43 -07:00
William Valentin	6155b54ab5	feat(npu): add cron and n8n advisory examples	2026-06-05 15:52:43 -07:00
William Valentin	5a14adaf58	feat(npu): add utilization digest tooling	2026-06-05 15:52:43 -07:00
William Valentin	b7b4edf0f5	feat(npu): add local context gate advisory	2026-06-05 15:52:42 -07:00
William Valentin	24d620e9c9	fix(n8n): tolerate missing Obsidian REST health note Keep the Obsidian Health + Reindex workflow successful when the optional Obsidian Local REST note write is unavailable. The RAG/reindex checks remain active and verified separately.	2026-06-05 13:19:40 -07:00
William Valentin	ac3590df47	fix(n8n): harden evening digest workflow Allow optional n8n execution, Obsidian note listing, and Obsidian save steps to continue on failure so the digest can still send when one local source is unavailable.	2026-06-05 13:06:52 -07:00
William Valentin	cefd8789cd	fix(n8n): monitor advisory gateway health	2026-06-04 16:26:05 -07:00
William Valentin	aeb3c9f8fb	fix(npu): expose advisory gateway on docker bridge	2026-06-04 16:19:22 -07:00
William Valentin	59c5fd3e57	feat(npu): add advisory gateway wrapper	2026-06-04 16:03:52 -07:00
William Valentin	401321a6d5	Document live OpenVINO NPU sidecars	2026-06-04 15:32:32 -07:00
William Valentin	85c496a59e	docs(obsidian): update automation health status	2026-06-04 15:06:48 -07:00
William Valentin	06cd49247a	chore(rag): enable NPU reranker by default	2026-06-04 15:01:26 -07:00
William Valentin	71f3c05587	feat(rag): add optional NPU reranker fallback	2026-06-04 14:50:41 -07:00
William Valentin	06f235d26b	chore(openclaw): restore clobbered state backups	2026-06-04 13:29:47 -07:00
William Valentin	d2f4dd7cef	fix(openclaw): restore active runtime state	2026-06-04 13:29:47 -07:00
William Valentin	dad13e7648	fix(obsidian): restore vault template settings	2026-06-04 13:27:13 -07:00
William Valentin	137a2c28d2	feat(voice): restore CUDA Whisper fallback image	2026-06-04 13:26:50 -07:00
William Valentin	1772e5a1f3	chore(scripts): restore swarm helper utilities	2026-06-04 13:26:50 -07:00
William Valentin	b88331be42	chore(swarm): restore shared compose and health endpoint	2026-06-04 13:26:50 -07:00
William Valentin	4815750011	chore(n8n): restore workflow exports	2026-06-04 13:26:50 -07:00
William Valentin	99a4f93ce7	test(agent-evals): restore Atlas quality eval suite	2026-06-04 13:26:50 -07:00
William Valentin	6536320774	fix(obsidian): restore shared zap vault after develop rebuild	2026-06-04 13:26:50 -07:00
William Valentin	420df812c0	docs(npu): update service maps and runbooks	2026-06-04 13:08:18 -07:00
William Valentin	703c1df860	docs(npu): document VLM audio wake-word feasibility	2026-06-04 13:07:51 -07:00
William Valentin	2ef9e3dfd2	feat(npu): add bounded OpenVINO GenAI worker	2026-06-04 13:07:51 -07:00
William Valentin	d3373e7234	feat(npu): add document image triage prototype	2026-06-04 13:07:51 -07:00
William Valentin	ea452886f3	feat(npu): add dry-run classifier router prototype	2026-06-04 13:07:51 -07:00
William Valentin	0683253157	feat(npu): add OpenVINO reranker prototype	2026-06-04 13:07:51 -07:00
William Valentin	0a6f84fbf3	feat(rag): add OpenVINO NPU embedding services	2026-06-04 13:07:51 -07:00
William Valentin	83d0ced08c	feat(voice): add OpenVINO NPU Whisper service	2026-06-04 13:07:51 -07:00