docs(npu): document advisory observability gates

Add operator runbook and link integrated health docs for advisory-only observability, dry-run metrics, and future promotion criteria.
feat(npu): add advisory metrics to utilization digest
2026-06-06 15:30:31 -07:00 · 2026-06-06 15:30:31 -07:00 · 2026-06-06 15:30:31 -07:00 · 2026-06-05 15:52:51 -07:00 · 2026-06-05 15:52:43 -07:00 · 2026-06-05 15:52:43 -07:00
174 changed files with 79967 additions and 263 deletions
@@ -0,0 +1,16 @@
 # Telegram Bot Token from @BotFather
 FLYNN_TELEGRAM_TOKEN=your-bot-token-here
 # API Keys
 ANTHROPIC_API_KEY=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
 OPENAI_API_KEY=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
 GEMINI_API_KEY=AIzaSyAdCsEwwzQZoWbhIXC65oKrLPWvbnOZ7MA
 ZHIPUAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
 ZAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
 BRAVE_API_KEY=BSAgLuWVVMnrGvobOt7pDQjmVJ5u380
 GITHUB_TOKEN=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
 # LiteLLM Configuration
 LITELLM_MASTER_KEY=sk-a740fd7607c0accfca4c8bd8d66d1cedff0d3a9bfb4ebc359f7eaae9aac627f6
 LITELLM_SALT_KEY=64526fe31468d539454d9ebeafd68face8d3b46ddb3d91b22b2e33eae0fde4ce
@@ -3,9 +3,6 @@
 *.swp
 *.swo
 *~
 .Trash-*/
 __pycache__/
 *.py[cod]
 # ── OpenClaw ephemeral / binary / noisy data ──────────────────────────────
 openclaw/workspace/
@@ -17,21 +14,7 @@ openclaw/media/
 openclaw/memory/*.sqlite
 openclaw/memory/*.tmp*
 openclaw/agents/*/sessions/
 openclaw/agents/*/agent/auth-*.json
 openclaw/agents/*/agent/harness-auth/
 openclaw/cron/runs/
 openclaw/cron/jobs-state.json
 openclaw/devices/paired.json
 openclaw/discord/model-picker-preferences.json
 openclaw/flows/*.sqlite*
 openclaw/identity/device-auth.json
 openclaw/memory/
 openclaw/openclaw.json.backup-before-*
 openclaw/openclaw.json.failed
 openclaw/plugin-runtime-deps/
 openclaw/tasks/*.sqlite*
 openclaw/telegram/update-offset-*.json
 openclaw/update-check.json
 # Temp files
 *.tmp
@@ -39,24 +22,3 @@ openclaw/update-check.json
 # Runtime logs
 *.log
 # Local n8n SQLite recovery backups
 .n8n-db-backups/
 backups/
 # Local secrets
 .env
 .env.*
 *.pem
 *.key
 id_rsa
 id_ed25519
 credentials.json
 # Obsidian local UI/runtime/plugin artifacts
 swarm-common/obsidian-vault/**/.obsidian/workspace.json
 swarm-common/obsidian-vault/**/.obsidian/graph.json
 swarm-common/obsidian-vault/**/.obsidian/bookmarks.json
 swarm-common/obsidian-vault/**/.obsidian/types.json
 swarm-common/obsidian-vault/**/.obsidian/plugins/*/
 swarm-common/obsidian-vault/**/.obsidian/themes/
@@ -37,6 +37,9 @@ For the current host-side AI/search/voice automation stack, n8n watchdogs, and a
 - [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
 - [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
 - [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
 - [`docs/npu-utilization-digest.md`](docs/npu-utilization-digest.md) — compact on-demand NPU proof/utilization digest runbook
 - [`docs/npu-integrated-health-ops.md`](docs/npu-integrated-health-ops.md) — integrated operator health-check workflow combining `npu-service-health.sh` and the utilization digest
 - OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.
 ## VM: zap
@@ -4,8 +4,8 @@
 # ── VM provisioning ────────────────────────────────────────────────────────
 vm_domain: "zap [claw]"
 vm_hostname: zap
-vm_memory_mib: 6144
+vm_memory_mib: 3072
-vm_vcpus: 4
+vm_vcpus: 2
 vm_disk_path: /var/lib/libvirt/images/claw.qcow2
 vm_disk_size: "60G"
 vm_mac: "52:54:00:01:00:71"
@@ -12,9 +12,6 @@
 - name: OpenClaw VM customizations
  hosts: openclaw_servers
  become: true
  vars:
    openclaw_user: openclaw
    openclaw_home: /home/openclaw
  tasks:
@@ -0,0 +1,52 @@
 version: 1
 policy:
  default_mode: dry_run
  require_explicit_root: true
  allow_external_uploads: false
  allow_mutations: false
  log_raw_text: false
  include_full_paths_default: false
  npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
 # Copy to config/triage-roots.local.yaml and approve exactly one narrow,
 # lane-specific staging root. The committed template is intentionally
 # unapproved/fail-closed; do not point any lane at broad home, Downloads,
 # vault, screenshot, photo-library, or historical audio roots without explicit
 # approval for that exact lane/root.
 roots:
  screenshots:
    approved: false
    root: null
    allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
    max_files: 50
    max_file_mb: 25
  receipts:
    approved: false
    root: null
    allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
    max_files: 50
    max_file_mb: 25
  downloads:
    approved: false
    root: null
    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
    max_files: 50
    max_file_mb: 25
  obsidian_attachments:
    approved: false
    root: null
    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
    max_files: 50
    max_file_mb: 50
  voice_memos:
    approved: false
    root: null
    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
    max_files: 25
    max_file_mb: 100
  meeting_snippets:
    approved: false
    root: null
    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
    max_files: 25
    max_file_mb: 200
@@ -0,0 +1,46 @@
 version: 1
 policy:
  default_mode: dry_run
  require_explicit_root: true
  allow_external_uploads: false
  allow_mutations: false
  log_raw_text: false
  include_full_paths_default: false
  npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
 roots:
  screenshots:
    approved: true
    root: ../openvino-doc-image-triage-npu/samples
    allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
    max_files: 50
    max_file_mb: 25
  receipts:
    approved: true
    root: ../openvino-doc-image-triage-npu/samples
    allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
    max_files: 50
    max_file_mb: 25
  downloads:
    approved: true
    root: ../openvino-doc-image-triage-npu/samples
    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
    max_files: 50
    max_file_mb: 25
  obsidian_attachments:
    approved: true
    root: ../openvino-doc-image-triage-npu/samples
    allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
    max_files: 50
    max_file_mb: 50
  voice_memos:
    approved: true
    root: ../tmp/synthetic-voice-memos
    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
    max_files: 25
    max_file_mb: 100
  meeting_snippets:
    approved: true
    root: ../tmp/synthetic-meeting-snippets
    allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
    max_files: 25
    max_file_mb: 200
@@ -15,6 +15,7 @@ Update the relevant diagram in the same change set when you change any of these:
 - n8n workflow architecture
 - Hermes/Atlas routing or gateway responsibilities
 - local AI/search/voice endpoints
 - OpenVINO NPU live/prototype status, ports, or safety gates (`:18810`, `:18816`, `:18817`, `:18818`, `:18819`, `:18820`, optional `:18829`)
 - Obsidian/RAG data flow
 - OpenClaw/VM operational mode
 - ownership/source-of-truth paths for a component
@@ -27,6 +28,7 @@ Create a new focused diagram when the existing overview would become too dense.
 - agentmon internals: collectors → NATS → processor → Postgres → query/UI
 - Obsidian/RAG automation pipeline
 - local AI routing: Hermes/LiteLLM/llama.cpp/Ollama/provider boundaries
 - OpenVINO NPU assistant sidecars, with live baseline and approved/not-live prototype lanes separated
 - messaging/channel routing: Telegram/Discord/email → Hermes/n8n/alerts
 - disaster recovery / backup topology
@@ -37,6 +39,7 @@ Create a new focused diagram when the existing overview would become too dense.
 - Link diagrams from the nearest README or operational doc.
 - Keep labels operational: service name, port, responsibility, and data direction.
 - Avoid secrets, credential names that imply secret values, private tokens, raw webhook URLs, or sensitive sample payloads.
 - Do not imply live Atlas/Hermes/RAG routing to an OpenVINO NPU prototype unless a reviewed implementation actually enabled it; label approved prototypes as `not live` or `approval required`.
 - If a raw export or live config was used to build the diagram, commit only the sanitized diagram/docs, not the raw sensitive source.
 ## Verification before committing
@@ -0,0 +1,456 @@
 # NPU advisory decision schema and dry-run evaluation metrics
 This document defines the compact `npu_advisory_decision_v1` record and the
 minimum dry-run metrics required before any OpenVINO/NPU advisory lane is
 considered for promotion. The schema is advisory-only: it creates audit evidence
 and comparison data, not live authority.
 Scope and safety defaults:
 - Local audit records only; no outbound sends, service restarts, tool execution,
  memory writes, routing changes, vector-store mutation, or broad private scans.
 - Synthetic or explicitly non-private fixtures only for dry-run evaluation.
 - Raw prompts, transcripts, documents, images, headers, secrets, and full upstream
  JSON payloads are not persisted by default.
 - NPU output is evidence for a gate. It must never directly perform or trigger
  an action.
 ## `npu_advisory_decision_v1`
 Required top-level fields:
 | Field | Type | Required | Notes |
 | --- | --- | ---: | --- |
 | `schema_version` | string | yes | Always `npu_advisory_decision_v1`. |
 | `decision_id` | string | yes | Locally generated UUID/ULID. No payload-derived PII. |
 | `timestamp` | string | yes | RFC3339/ISO-8601 UTC timestamp. |
 | `source` | object | yes | Where the dry-run input came from. |
 | `service` | object | yes | Advisory lane/service that produced the recommendation. |
 | `input_class` | string | yes | Normalized class such as `context_gate`, `cron_n8n_event`, `batch_doc_triage`, `voice_audio`, `kanban_hygiene`, or `advisory_gateway_envelope`. |
 | `recommendation` | object | yes | NPU/advisory recommendation and rationale metadata. |
 | `confidence` | object | yes | Score, bucket, and calibration notes. |
 | `authority_flags` | object | yes | Explicit booleans for authority boundaries; all default false. |
 | `allowed_actions` | array[string] | yes | Actions a downstream gate may consider. Defaults to advisory-only actions. |
 | `actual_action` | object | yes | What really happened. In this gate it should always be no-op/record-only. |
 | `human_or_atlas_decision` | object | yes | Comparison target from fixture expected label, human label, or Atlas decision. |
 | `outcome` | object | yes | Agreement/error bucket used by the eval harness. |
 | `npu_proof` | object | yes | Evidence that a real NPU-backed inference ran, where available. |
 | `latency` | object | yes | Request latency and optional queue/processing timings. |
 | `fallback` | object | yes | Whether CPU/offline/health-only fallback happened and why. |
 | `privacy` | object | yes | What was redacted/hashed and what retention class applies. |
 | `notes` | array[string] | no | Short non-private audit notes. |
 ### Field details
 `source`:
 - `kind`: `fixture`, `manual_label`, `atlas_shadow`, `human_review`, or
  `service_health_probe`.
 - `fixture_id`: stable fixture identifier when applicable.
 - `fixture_set`: fixture collection name/version.
 - `artifact_ref`: optional local path or opaque run id; do not include raw
  private content.
 - `content_hash`: optional SHA-256 over sanitized fixture content.
 - `privacy_class`: `synthetic`, `public`, `non_private`, `redacted`, or
  `private_disallowed`.
 `service`:
 - `name`: e.g. `openvino_context_gate`, `cron_n8n_advisory`,
  `npu_batch_triage`, `npu_voice_audio_pipeline`, `kanban_hygiene_advisory`,
  `openvino_advisory_gateway`.
 - `endpoint`: local endpoint label or script name; avoid sensitive URL params.
 - `mode`: `dry_run`, `shadow`, `health_only`, or `offline_fixture`.
 - `model`: optional model/backend label, if safe to log.
 `recommendation`:
 - `label`: normalized recommendation, e.g. `suppress`, `log`, `summarize`,
  `escalate`, `retrieve_more_context`, `skip_private_root`, `needs_human`,
  `no_action`, or `unknown`.
 - `severity`: `none`, `info`, `low`, `medium`, `high`, or `critical`.
 - `reasons`: short non-private reason codes, not raw excerpts.
 - `evidence_refs`: bounded references to sanitized fixture fields or artifact ids.
 - `raw_output_ref`: optional local artifact pointer; default null.
 `confidence`:
 - `score`: float from 0.0 to 1.0 when available, otherwise null.
 - `bucket`: one of `very_low`, `low`, `medium`, `high`, `very_high`, or
  `unknown`.
 - `bucket_rule`: the threshold rule used by the harness.
 - `calibrated`: boolean; false until enough labeled dry-run data exists.
 Recommended confidence buckets:
 | Bucket | Score range | Gate behavior |
 | --- | --- | --- |
 | `very_low` | `< 0.40` | Treat as uncertain; never escalate automatically. |
 | `low` | `0.40-0.59` | Advisory note only; human/Atlas decides. |
 | `medium` | `0.60-0.79` | Eligible for comparison metrics; no live action. |
 | `high` | `0.80-0.94` | Strong advisory evidence; still gated. |
 | `very_high` | `>= 0.95` | Promotion candidate only after repeated eval success. |
 | `unknown` | null/missing | Count separately; do not coerce to zero. |
 `authority_flags`:
 All flags default to false and must remain false for this gate.
 - `can_route_atlas`
 - `can_write_memory`
 - `can_execute_tools`
 - `can_restart_services`
 - `can_send_outbound`
 - `can_scan_private_roots`
 - `can_mutate_vector_store`
 - `can_post_advisory_event`
 - `can_change_gateway_config`
 - `requires_human_approval`
 - `advisory_only`
 For this gate, `advisory_only=true` and `requires_human_approval=true` for any
 recommendation that could eventually affect live behavior.
 `allowed_actions`:
 Allowed by default:
 - `record_metric`
 - `compare_with_expected_label`
 - `include_in_digest`
 - `open_review_ticket_candidate`
 - `recommend_human_review`
 Disallowed unless a later approval explicitly changes scope:
 - `route_atlas`
 - `write_memory`
 - `execute_tool`
 - `restart_service`
 - `send_message`
 - `scan_private_root`
 - `mutate_vector_store`
 - `post_gateway_event`
 `actual_action`:
 - `kind`: should be `none`, `recorded_metric`, or `dry_run_reported`.
 - `performed`: boolean; false for live side effects in this gate.
 - `performed_by`: `harness`, `human`, `atlas`, or null.
 - `side_effects`: array; should be empty except local report/artifact writes.
 `human_or_atlas_decision`:
 - `source`: `fixture_expected`, `human_label`, `atlas_shadow`, or `missing`.
 - `label`: normalized decision label using the same label set as
  `recommendation.label` when possible.
 - `severity`: normalized severity when applicable.
 - `confidence`: optional Atlas/human confidence if available.
 - `decision_ref`: optional review id, fixture id, or session/run id.
 - `timestamp`: optional timestamp for the comparison decision.
 `outcome`:
 - `comparison`: `agree`, `disagree`, `uncertain`, `missing_reference`, or
  `not_applicable`.
 - `error_type`: null or one of `false_positive`, `false_negative`,
  `severity_overcall`, `severity_undercall`, `unsafe_authority`,
  `privacy_violation`, `fallback_unexpected`, `latency_slo_miss`,
  `npu_proof_missing`.
 - `human_review_required`: boolean.
 - `promotion_blocker`: boolean.
 `npu_proof`:
 - `proof_mode`: `sysfs_busy_delta`, `service_reported_delta`, `health_only`,
  `offline_fixture`, or `unavailable`.
 - `busy_delta_us`: integer or null.
 - `service_reported_delta_us`: integer or null.
 - `inference_ran`: boolean.
 - `proof_ok`: boolean or null. Null means not measurable, not false.
 - `counter_path`: usually `/sys/class/accel/accel0/device/npu_busy_time_us`, if
  logged safely.
 `latency`:
 - `total_ms`: end-to-end harness timing.
 - `service_ms`: service-reported processing time when available.
 - `queue_ms`: optional queue time.
 - `timeout`: boolean.
 `fallback`:
 - `occurred`: boolean.
 - `kind`: null, `cpu`, `offline`, `health_only`, `service_unavailable`,
  `skipped_cold_load`, `private_root_blocked`, or `proof_unavailable`.
 - `reason`: short reason code.
 - `expected`: boolean. Expected fallbacks are counted but do not fail promotion
  unless their rate exceeds the threshold for that lane.
 `privacy`:
 - `payload_logged`: must default false.
 - `redaction`: `none_needed`, `hash_only`, `paths_only`, `metadata_only`, or
  `blocked_private`.
 - `retention`: `ephemeral`, `local_audit`, or `review_artifact`.
 - `contains_private_payload`: must be false for committed fixtures.
 ## Minimal JSON shape
 ```json
 {
  "schema_version": "npu_advisory_decision_v1",
  "decision_id": "01J00000000000000000000000",
  "timestamp": "2026-06-06T00:00:00Z",
  "source": {
    "kind": "fixture",
    "fixture_id": "cron_duplicate_success_001",
    "fixture_set": "npu_advisory_eval_v1",
    "artifact_ref": null,
    "content_hash": "sha256:example",
    "privacy_class": "synthetic"
  },
  "service": {
    "name": "cron_n8n_advisory",
    "endpoint": "openvino-advisory-gateway/examples/cron-advisory-dry-run.sh",
    "mode": "dry_run",
    "model": "openvino-local"
  },
  "input_class": "cron_n8n_event",
  "recommendation": {
    "label": "suppress",
    "severity": "info",
    "reasons": ["duplicate_success", "no_action_required"],
    "evidence_refs": ["fixture:event_kind", "fixture:status"],
    "raw_output_ref": null
  },
  "confidence": {
    "score": 0.91,
    "bucket": "high",
    "bucket_rule": "v1_default",
    "calibrated": false
  },
  "authority_flags": {
    "can_route_atlas": false,
    "can_write_memory": false,
    "can_execute_tools": false,
    "can_restart_services": false,
    "can_send_outbound": false,
    "can_scan_private_roots": false,
    "can_mutate_vector_store": false,
    "can_post_advisory_event": false,
    "can_change_gateway_config": false,
    "requires_human_approval": true,
    "advisory_only": true
  },
  "allowed_actions": [
    "record_metric",
    "compare_with_expected_label",
    "include_in_digest"
  ],
  "actual_action": {
    "kind": "dry_run_reported",
    "performed": false,
    "performed_by": "harness",
    "side_effects": []
  },
  "human_or_atlas_decision": {
    "source": "fixture_expected",
    "label": "suppress",
    "severity": "info",
    "confidence": null,
    "decision_ref": "cron_duplicate_success_001",
    "timestamp": null
  },
  "outcome": {
    "comparison": "agree",
    "error_type": null,
    "human_review_required": false,
    "promotion_blocker": false
  },
  "npu_proof": {
    "proof_mode": "sysfs_busy_delta",
    "busy_delta_us": 1200,
    "service_reported_delta_us": 1180,
    "inference_ran": true,
    "proof_ok": true,
    "counter_path": "/sys/class/accel/accel0/device/npu_busy_time_us"
  },
  "latency": {
    "total_ms": 42.5,
    "service_ms": 39.1,
    "queue_ms": null,
    "timeout": false
  },
  "fallback": {
    "occurred": false,
    "kind": null,
    "reason": null,
    "expected": false
  },
  "privacy": {
    "payload_logged": false,
    "redaction": "metadata_only",
    "retention": "local_audit",
    "contains_private_payload": false
  },
  "notes": []
 }
 ```
 ## Dry-run comparison strategy
 Each fixture or shadow input should produce one `npu_advisory_decision_v1`
 record. The harness compares `recommendation` to `human_or_atlas_decision` in
 this order:
 1. Use `fixture_expected` labels for synthetic/non-private regression fixtures.
 2. Use explicit `human_label` for reviewed samples.
 3. Use `atlas_shadow` only as a comparison signal, not ground truth, when a human
   label is unavailable.
 4. Mark `missing_reference` rather than inventing a target decision.
 Comparison categories:
 - `agree`: normalized label and severity are compatible.
 - `disagree`: label conflicts with the reference decision.
 - `uncertain`: NPU bucket is `very_low`, `low`, or `unknown`, or the service
  returned a deliberate `needs_human`/`unknown` label.
 - `false_positive`: NPU recommended escalation/action but reference says
  suppress/no-op.
 - `false_negative`: NPU recommended suppress/no-op but reference says escalate or
  action-needed.
 - `severity_overcall` / `severity_undercall`: label matches but severity differs
  by more than one level.
 The summary should be grouped by lane (`input_class` and `service.name`) and by
 confidence bucket. Unknown metrics remain null/`n/a`; do not coerce missing data
 to zero.
 ## Metrics
 Minimum per-run metrics:
 - `total_records`
 - `records_by_input_class`
 - `records_by_service`
 - `confidence_bucket_counts`
 - `recommendation_counts`
 - `authority_flag_violation_count`
 - `privacy_violation_count`
 - `actual_side_effect_count`
 - `agree_count`, `disagree_count`, `uncertain_count`, `missing_reference_count`
 - `false_positive_count`, `false_negative_count`
 - `severity_overcall_count`, `severity_undercall_count`
 - `fallback_count` and `fallback_counts_by_kind`
 - `expected_fallback_count` vs `unexpected_fallback_count`
 - `npu_proof_ok_count`, `npu_proof_missing_count`, `npu_proof_not_applicable_count`
 - p50/p95 `latency.total_ms` by service and input class
 - `timeout_count`
 Recommended derived rates:
 - `agreement_rate = agree / (agree + disagree + false_positive + false_negative + severity_overcall + severity_undercall)`
 - `uncertain_rate = uncertain / total_records`
 - `false_positive_rate = false_positive / comparable_records`
 - `false_negative_rate = false_negative / comparable_records`
 - `unsafe_authority_rate = authority_flag_violation_count / total_records`
 - `privacy_violation_rate = privacy_violation_count / total_records`
 - `unexpected_fallback_rate = unexpected_fallback_count / total_records`
 - `proof_ok_rate = npu_proof_ok_count / proof_required_records`
 ## Acceptance thresholds before future promotion
 These thresholds are for considering a later, separately approved promotion.
 They do not grant authority by themselves.
 Global blockers for every lane:
 - `authority_flag_violation_count == 0`.
 - `actual_side_effect_count == 0` for dry-run harness execution.
 - `privacy_violation_count == 0` and no committed private fixtures/secrets.
 - No raw private payloads in logs, reports, artifacts, or test fixtures.
 - No service bind, route, memory, tool, send, restart, or vector-store mutation
  introduced by the eval code.
 Minimum data quality before promotion discussion:
 - At least 30 comparable synthetic/non-private records per lane, or all available
  lane fixtures if the lane is explicitly scoped smaller.
 - Every advisory lane has at least one normal case, one low-confidence case, one
  false-alarm/noise case, and one action-needed/escalation case.
 - `missing_reference_count == 0` for promotion-candidate fixture sets.
 - Confidence bucket distribution is reported and stable across at least three
  dry-run executions.
 Suggested metric thresholds:
 | Metric | Threshold for promotion discussion |
 | --- | ---: |
 | Agreement rate | `>= 0.95` overall and `>= 0.90` per lane |
 | False positive rate | `<= 0.03` overall and no repeated high-severity false positives |
 | False negative rate | `<= 0.01` for action-needed/escalation cases |
 | Uncertain rate | `<= 0.15` overall, unless lane is intentionally conservative |
 | Unexpected fallback rate | `<= 0.02` and every fallback has a reason code |
 | NPU proof OK rate | `>= 0.98` for proof-required lanes |
 | p95 latency | Within the lane-specific SLO documented by the implementation task |
 | Authority/privacy violations | exactly `0` |
 Promotion remains lane-specific. A passing context-gate eval does not promote
 cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway lanes.
 Each lane needs its own human-approved scope, rollback plan, and review.
 ## Output formats
 The dry-run harness should emit:
 1. JSONL decisions: one `npu_advisory_decision_v1` object per line.
 2. Compact JSON summary: aggregate counts/rates for dashboards and follow-up
   digest scripts.
 3. Compact Markdown/text summary: suitable for terminal, Telegram, or Discord.
 The Markdown/text summary should include:
 - run id, fixture set, generated-at timestamp;
 - records by lane/service;
 - agreement/uncertain/false-positive/false-negative counts;
 - confidence bucket distribution;
 - fallback counts;
 - NPU proof counts;
 - authority/privacy violation counts;
 - promotion blockers and caveats.
 ## Fixture expectations
 Use synthetic/non-private fixtures only. Required lanes:
 - `context_gate`: retrieve/no-retrieve decisions with missing, conflicting, and
  sufficient context cases.
 - `cron_n8n_event`: duplicate success, stale warning, urgent false alarm, and
  action-needed failure.
 - `batch_doc_triage`: private-root blocked, approved synthetic sample, noisy OCR,
  and needs-human cases.
 - `voice_audio`: bounded generated audio, low-confidence transcript, harmless
  background noise, and action-needed command-like utterance that must not
  execute.
 - `kanban_hygiene`: no-op healthy card, stale/card-needs-review, false alarm, and
  action-needed label.
 - `advisory_gateway_envelope`: valid classify/generate/triage envelope examples
  plus malformed/unsafe authority-request examples.
 Any fixture that resembles private content should be replaced with a synthetic
 fixture or reduced to metadata/hash-only form before committing.
 ## Review checklist
 Before implementation or docs depending on this spec are accepted, verify:
 - `schema_version` is present and all authority flags default closed.
 - Dry-run execution produces no live side effects beyond local report/artifact
  writes.
 - Unknown/missing metrics are represented as null/`n/a`, not fake zero.
 - Raw payloads and private paths are not persisted by default.
 - Summary metrics include confidence buckets, fallback counts, NPU proof, and
  authority/privacy violations.
 - Promotion language says "candidate" or "discussion" only; no automatic live
  authority is granted by a passing eval.
@@ -0,0 +1,55 @@
 # NPU advisory dry-run comparison harness
 This harness compares advisory-only NPU lane recommendations against synthetic/non-private expected decisions. It is an observability gate only: it does not route, send, write memory, execute tools, restart services, broaden private scans, restart gateways, or mutate vector stores.
 For the operator runbook and promotion criteria, see `docs/npu-advisory-observability-runbook.md`. Treat this file as the compact command reference; the runbook is the source for how to interpret metrics and decide whether a lane is promotable later.
 ## Run
 From `/home/will/lab/swarm`:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --format json
 python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
 python scripts/npu-advisory-dry-run-comparison.py --format markdown
 ```
 Strict checks for CI/review:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --fail-on-mismatch
 python scripts/npu-advisory-dry-run-comparison.py --fail-on-authority-violation
 ```
 `--fail-on-authority-violation` is expected to fail with the committed fixture set because one synthetic gateway fixture intentionally proves that `may_* = true` is caught and summarized.
 ## Fixture coverage
 Fixtures live at `fixtures/npu_advisory_dry_run/fixtures.json` and cover:
 - context gate;
 - cron/n8n advisory events;
 - batch document/audio triage shape;
 - voice/audio advisory gate;
 - Kanban hygiene advisory;
 - advisory gateway envelopes.
 All fixture payloads are synthetic and omit raw private content. Lane adapters use deterministic local rules or imported pure functions; they do not call live advisory services.
 ## Output shape
 JSON output uses `npu_advisory_dry_run_summary_v1` and includes totals, per-lane counts, confidence buckets, recommendation counts, authority violations, expected-outcome mismatches, and optionally per-fixture `npu_advisory_decision_v1` records.
 Each decision record includes timestamp, source, service, lane, input class, recommendation, expected recommendation, confidence/bucket, authority flags, allowed actions, actual action (`none_dry_run`), human/Atlas comparison, outcome, NPU proof, latency, fallback reason, and compact notes.
 ## Promotion gate
 Before any future advisory lane receives authority, a separate approval should require at minimum:
 - no expected-outcome mismatches for that lane's representative fixture set;
 - no false negatives on action-needed events;
 - intentionally reviewed false positives;
 - zero authority-safe flag violations except known negative-control fixtures;
 - documented rollback and a narrow, explicit authority scope.
 Passing this harness never grants live authority by itself. Advisory outputs flow into `npu_advisory_decision_v1` records, summary metrics, and a human/Atlas review gate. Any later promotion must be lane-specific, explicitly approved, and reversible.
@@ -0,0 +1,246 @@
 # NPU advisory observability and promotion runbook
 This runbook is the operator-facing gate for Will's OpenVINO/NPU advisory lanes. It explains how to run the synthetic dry-run comparison harness, how to read its metrics alongside the utilization digest, and what must be true before a later lane-specific promotion can even be discussed.
 The current gate is observability only. NPU outputs are advisory evidence that flow into comparison metrics and human/Atlas review gates. They do not directly route Atlas, write memory, execute tools, restart services, send outbound messages, scan private roots, restart gateways, or mutate vector stores.
 ## Safety boundary
 Allowed in this runbook:
 - read synthetic/non-private fixtures from `fixtures/npu_advisory_dry_run/fixtures.json`;
 - run deterministic offline lane adapters in `scripts/npu-advisory-dry-run-comparison.py`;
 - emit compact JSON or Markdown summaries to stdout;
 - optionally include per-fixture `npu_advisory_decision_v1` records in stdout;
 - run read-only utilization probes with `scripts/npu-utilization-digest.py` when live service health is relevant.
 Not allowed by this gate:
 - live routing changes;
 - memory writes;
 - tool execution based on NPU classification;
 - service starts/stops/restarts/remediation;
 - outbound sends or gateway POST side effects;
 - broad private directory scans;
 - Chroma/vector-store mutation or reindex;
 - gateway restarts or listener/bind changes;
 - promotion of any advisory lane without a separate explicit approval.
 ## Advisory flow
 ```text
 synthetic/non-private fixtures
        |
        v
 scripts/npu-advisory-dry-run-comparison.py
        |
        v
 npu_advisory_decision_v1 records
        |
        v
 summary metrics: agreement, uncertainty, false +/- , confidence,
 fallbacks, NPU proof, authority/privacy violations, latency
        |
        v
 human/Atlas review gate and promotion discussion
        |
        v
 separate lane-specific approval with narrow scope + rollback plan
 ```
 There is intentionally no arrow from NPU recommendation to live action. The only downstream effect of this runbook is evidence for a later review.
 ## Required files
 | Path | Role |
 | --- | --- |
 | `scripts/npu-advisory-dry-run-comparison.py` | Synthetic dry-run comparison harness. |
 | `fixtures/npu_advisory_dry_run/fixtures.json` | Synthetic/non-private fixture set. |
 | `docs/npu-advisory-decision-schema.md` | `npu_advisory_decision_v1` schema and metric definitions. |
 | `docs/npu-advisory-dry-run-comparison.md` | Short harness reference. |
 | `docs/npu-utilization-digest.md` | Live read-only utilization digest reference. |
 | `tests/test_npu_advisory_dry_run_comparison.py` | Offline tests for fixture coverage and harness output. |
 | `tests/test_npu_utilization_digest.py` | Offline tests for utilization digest metric logic. |
 ## Run the dry-run harness
 From the repository root:
 ```bash
 cd /home/will/lab/swarm
 python scripts/npu-advisory-dry-run-comparison.py --format markdown
 python scripts/npu-advisory-dry-run-comparison.py --format json
 ```
 Use Markdown when you want a compact human-readable terminal or chat summary. Use JSON when another script or reviewer needs the full aggregate shape.
 To include per-fixture decision records:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
 ```
 To run the strict mismatch gate:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch
 ```
 This should exit `0` when each fixture's observed outcome matches its `expected_outcome`.
 To prove unsafe authority flags are detected:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-authority-violation
 ```
 The committed fixture set intentionally includes `gateway-authority-violation`, so this command is expected to exit `1` while reporting `authority_safe_flag_violations: 1`. That is a negative-control fixture, not a permission grant.
 ## Expected compact output
 Current fixture shape is expected to resemble:
 ```text
 # NPU advisory dry-run comparison
 fixtures: 9 | agree: 8 | disagree: 0 | false_positive: 1 | false_negative: 0 | uncertain: 0
 authority_safe_flag_violations: 1 | mutations: all_false
 | lane | fixtures | agree | false_positive | false_negative | violations |
 | --- | ---: | ---: | ---: | ---: | ---: |
 | advisory_gateway_envelope | 1 | 1 | 0 | 0 | 1 |
 | batch_triage | 2 | 2 | 0 | 0 | 0 |
 | context_gate | 2 | 2 | 0 | 0 | 0 |
 | cron_n8n_advisory | 2 | 1 | 1 | 0 | 0 |
 | kanban_hygiene | 1 | 1 | 0 | 0 | 0 |
 | voice_audio | 1 | 1 | 0 | 0 | 0 |
 ## Authority-safe flag violations
 - gateway-authority-violation: can_send_outbound
 ```
 Interpretation:
 - `fixtures` is the number of synthetic/non-private fixture cases evaluated.
 - `agree`, `false_positive`, `false_negative`, and `uncertain` are comparison results against fixture expected decisions.
 - `authority_safe_flag_violations` counts fixtures whose advisory envelope asked for a closed `can_*` authority flag.
 - `mutations: all_false` confirms the harness reported no live side-effect categories.
 - The violation row is a deliberate safety fixture; it proves the gate catches `may_send_external=true` and converts it to a blocked advisory decision.
 ## Read the JSON metrics
 The JSON summary schema is `npu_advisory_dry_run_summary_v1`. Start with these fields:
 1. `dry_run` must be `true`.
 2. Every value under `mutations` must be `false`.
 3. `totals.expected_outcome_mismatches` must be `0` for a clean regression run.
 4. `minimum_metrics.privacy_violation_count` must be `0`.
 5. `minimum_metrics.actual_side_effect_count` must be `0`.
 6. `minimum_metrics.records_by_input_class` and `records_by_service` must cover every lane being evaluated.
 7. `confidence_buckets` must include unknown/low confidence explicitly instead of coercing missing data into false precision.
 8. `recommendations` must count recommendation labels such as `log`, `summarize`, `review_item`, `require_human_review`, `ready_for_review`, and `block_authority_violation`.
 9. `minimum_metrics.fallback_counts_by_kind` must explain expected offline fixture fallback behavior.
 10. `minimum_metrics.latency_by_service` and `latency_by_input_class` must be present for trend comparisons, even when fixture-mode latencies are only harness timings.
 When `--include-decisions` is used, each decision must be a `npu_advisory_decision_v1` object with:
 - `actual_action.performed=false` and `actual_action.side_effects=[]`;
 - `authority_flags.advisory_only=true`;
 - `authority_flags.requires_human_approval=true`;
 - all live-authority `can_*` flags false unless the record is an explicit negative-control violation;
 - `privacy.payload_logged=false` and `privacy.contains_private_payload=false`;
 - `fallback.kind=offline` and `fallback.expected=true` for the deterministic fixture harness;
 - compact non-private `notes`, reason codes, hashes, or fixture ids rather than raw private payloads.
 ## Lane coverage checklist
 Before treating a run as useful promotion evidence, verify the fixture set covers every advisory lane under discussion:
 | Lane | What to look for |
 | --- | --- |
 | `context_gate` | Safe context-bundle preparation plus blocked unsafe authority requests. |
 | `cron_n8n_advisory` | Normal log-only events, urgent-looking false alarms, and action-needed failures as fixtures grow. |
 | `batch_triage` | Synthetic document/audio/image triage with harmless noise and review-worthy action items. |
 | `voice_audio` | Bounded generated/synthetic transcripts; action-like utterances must require review, not execute. |
 | `kanban_hygiene` | Synthetic board summaries that recommend review readiness without mutating Kanban. |
 | `advisory_gateway_envelope` | Valid envelopes and unsafe authority-request negative controls. |
 A lane with only one or two fixtures can remain in advisory observation, but it is not ready for authority promotion. Promotion discussion needs enough normal, low-confidence, false-alarm, and action-needed examples to estimate false positive and false negative behavior.
 ## Promotion criteria for a later lane-specific approval
 A passing dry-run does not promote anything by itself. It only makes a lane eligible for a later approval discussion.
 Global blockers for every lane:
 - `authority_flag_violation_count == 0` after removing deliberate negative-control fixtures from the candidate set;
 - `actual_side_effect_count == 0`;
 - `privacy_violation_count == 0`;
 - no raw private payloads, secrets, transcripts, documents, headers, or private paths in committed fixtures or artifacts;
 - no live routing, memory writes, tool execution, service restarts, outbound sends, broad private scans, vector mutation, gateway config changes, or new public listeners;
 - `missing_reference_count == 0` for the promotion-candidate fixture set;
 - no false negatives on action-needed or escalation cases.
 Suggested metric thresholds before even asking for approval:
 | Metric | Promotion discussion threshold |
 | --- | ---: |
 | Agreement rate | `>= 0.95` overall and `>= 0.90` for the specific lane. |
 | False positive rate | `<= 0.03` overall, with all high-severity false positives reviewed. |
 | False negative rate | `<= 0.01` for action-needed/escalation cases. |
 | Uncertain rate | `<= 0.15`, unless the lane is intentionally conservative. |
 | Unexpected fallback rate | `<= 0.02`, with reason codes for every fallback. |
 | NPU proof OK rate | `>= 0.98` for live proof-required lanes. |
 | p95 latency | Within a documented lane-specific SLO. |
 | Authority/privacy violations | exactly `0` in the candidate set. |
 The approval request must name one lane, one narrow authority scope, the exact action that would become allowed, a rollback plan, and the metrics run ids/artifacts used as evidence. A passing context-gate eval cannot promote cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway behavior.
 ## Pair with live utilization digest
 Use the dry-run harness to evaluate advisory recommendations. Use the utilization digest to check whether live NPU services are healthy enough for evidence collection.
 Read-only live check:
 ```bash
 cd /home/will/lab/swarm
 scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
 ```
 Optional JSONL artifact for trend tracking:
 ```bash
 scripts/npu-utilization-digest.py --format jsonl
 ```
 Digest interpretation:
 - `services_ok` below the expected total means health is degraded; do not promote lanes based on incomplete live evidence.
 - `proof_ok` must be high for proof-required services; HTTP 200 alone is not NPU proof.
 - `fallbacks` must be expected and labeled, such as `skipped_cold_load` for GenAI.
 - `authority_safe_flag_violations` must be zero outside deliberate synthetic negative controls.
 - Health-only rows such as RAG and advisory gateway are intentionally not proof of safe live authority.
 ## Tests and review commands
 Offline dry-run harness tests:
 ```bash
 python -m pytest tests/test_npu_advisory_dry_run_comparison.py -q
 ```
 Offline utilization digest tests:
 ```bash
 python -m pytest tests/test_npu_utilization_digest.py -q
 ```
 Suggested pre-review bundle:
 ```bash
 python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch >/tmp/npu-advisory-summary.json
 python scripts/npu-advisory-dry-run-comparison.py --format markdown >/tmp/npu-advisory-summary.md
 python -m pytest tests/test_npu_advisory_dry_run_comparison.py tests/test_npu_utilization_digest.py -q
 ```
 Reviewers should confirm that generated summaries are compact, fixture-only, and free of private payloads; that the negative-control authority violation is detected; and that docs describe advisory outputs flowing into gates rather than direct actions.
@@ -0,0 +1,65 @@
 # Explicit-root NPU batch triage dry-run examples
 These examples are wrappers only. They do not install cron jobs, enable services,
 change Atlas/Hermes routing, write Obsidian/RAG/vector DBs, move/delete files, or
 send outbound messages.
 The committed manifest template at `config/triage-roots.example.yaml` is
 intentionally unapproved. For real private data, copy it to
 `config/triage-roots.local.yaml` and approve exactly one narrow lane-specific
 staging folder. Request-level `--root` may narrow that manifest root but cannot
 broaden it.
 Synthetic document/image smoke, CPU-only/no NPU claim:
 ```bash
 python scripts/npu-batch-triage-dry-run.py \
  --manifest config/triage-roots.test.yaml \
  --lane screenshots \
  --root openvino-doc-image-triage-npu/samples \
  --limit 5 \
  --dry-run \
  --no-npu \
  --json
 ```
 Synthetic document/image smoke with the existing local embeddings NPU service,
 if `127.0.0.1:18817` is healthy. Treat NPU as proven only when `npu.proof_ok` is
 true and `npu.busy_delta_us` (or item-level delta) is positive:
 ```bash
 python scripts/npu-batch-triage-dry-run.py \
  --manifest config/triage-roots.test.yaml \
  --lane receipts \
  --root openvino-doc-image-triage-npu/samples \
  --limit 5 \
  --dry-run \
  --json
 ```
 Audio smoke should use generated/public synthetic audio only until a private
 audio staging root is approved:
 ```bash
 python scripts/npu-batch-triage-dry-run.py \
  --manifest config/triage-roots.test.yaml \
  --lane voice_memos \
  --root tmp/synthetic-voice-memos \
  --limit 3 \
  --dry-run \
  --no-npu \
  --json
 ```
 Cron/n8n shape (disabled example only):
 ```text
 Manual Trigger / disabled cron
  -> Execute Command: python /home/will/lab/swarm/scripts/npu-batch-triage-dry-run.py --manifest /home/will/lab/swarm/config/triage-roots.local.yaml --lane receipts --limit 25 --dry-run --json
  -> IF ok && npu.proof_ok && files_processed > 0
  -> local dashboard/report only
 ```
 Do not connect this output to Telegram/Discord/email sends, Obsidian writes,
 RAG/vector reindex, file moves/deletes, Kanban mutation, service restarts, or
 Atlas/Hermes routing without a separate reviewed approval gate.
@@ -0,0 +1,204 @@
 # NPU integrated health checks — operator runbook notes
 Compact, read-only operator workflow that combines the existing
 `scripts/npu-service-health.sh` listener/systemd/embedding-proof probe with the
 reviewer-approved `scripts/npu-utilization-digest.py` per-service utilization
 and fallback report. Together they form a single safe daily / on-demand NPU
 health pass.
 Scope:
 - Read-only against live services. No restarts, route changes, vector mutation,
  advisory POSTs, outbound sends, or memory writes.
 - No new persistent services, timers, sockets, compose services, or Dockerfiles
  are introduced by this integration. Both scripts are foreground / on-demand.
 - Binds verified local-only or on the approved Docker bridge (`172.19.0.1:18830`).
  Pre-existing broader binds on the live baseline ports (`18810`, `18814`,
  `18816`, `18817`) are noted in the runbook and unchanged here.
 - NPU proof requires real inference plus a positive
  `/sys/class/accel/accel0/device/npu_busy_time_us` delta. HTTP 200 alone is
  not sufficient.
 ## When to run
 - Daily / on-demand ops check.
 - After upgrades that touch the NPU stack, OpenVINO, or any of the live
  specialists.
 - Before any approval-gated change that depends on the NPU reflex layer.
 - As the read-only verification step of a deploy or recovery runbook.
 ## Required artifacts on the branch
 | Path | Role |
 | --- | --- |
 | `scripts/npu-service-health.sh` | Listener / systemd / Docker / health endpoint / single embedding proof. Existing baseline script. |
 | `scripts/npu-utilization-digest.py` | Per-service utilization digest with NPU proof per probe, compact text or JSONL output, optional JSONL artifact. |
 | `docs/npu-utilization-digest.md` | Per-service digest reference. |
 | `docs/npu-advisory-observability-runbook.md` | Dry-run comparison and later promotion criteria for advisory lanes. |
 | `tests/test_npu_utilization_digest.py` | Offline unit tests for the digest (no live services required). |
 ## Integrated workflow
 ### Step 1 — Listener and service-state snapshot
 ```bash
 cd ~/lab/swarm
 ./scripts/npu-service-health.sh
 ```
 What it verifies, in order:
 1. `npu_busy_time_us` counter is readable.
 2. Required listeners are present on `18810 / 18814 / 18816 / 18817 / 18818 /
   18819 / 18820 / 18829 / 18830`.
 3. User systemd services are active/enabled for embeddings, RAG health,
   reranker, router/classifier, and the small GenAI worker.
 4. Docker Compose `whisper-server-npu` is up.
 5. Health endpoints return JSON for the live baseline and local specialists.
 6. A single non-private embeddings request to `:18817` produces a positive
   sysfs `npu_busy_time_us` delta; the script exits nonzero if there is no
   positive delta.
 Read the last block (`== Embeddings NPU busy-time proof ==`) first. If
 `result=ok` and `sysfs_delta_us > 0`, the central NPU path is healthy. If not,
 do not run the digest; triage the embeddings service first.
 ### Step 2 — Per-service utilization digest
 ```bash
 scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
 ```
 Compact output shape:
 ```text
 NPU utilization digest <timestamp>
 counter=/sys/class/accel/accel0/device/npu_busy_time_us delta_us=<total>
 services_ok=<ok>/<total> proof_ok=<ok>/<proof-capable> fallbacks=<n> gates_closed=<n>
 - embeddings: ok=true calls=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
 - rerank:     ok=true calls=1 docs=2   avg_ms=... npu_delta_us=... proof=true mode=NPU
 - whisper:    ok=true calls=1 jobs=1   avg_ms=... npu_delta_us=... proof=true mode=NPU
 - classifier: ok=true calls=1 events=1 avg_ms=... npu_delta_us=... proof=true dry_run=true ...
 - genai:      ok=true jobs=0 loaded=false mode=loaded=false reason=skipped_cold_load
 - doc_triage: ok=true calls=1 files=1  avg_ms=... npu_delta_us=... proof=true gate=closed:private-root
 - rag_endpoint:   ok=true mode=health_only gate=closed:vector-mutation
 - rag_health:     ok=true mode=health_only
 - advisory_gateway: ok=true mode=health_only gate=closed:advisory-post
 fallbacks: skipped_cold_load=1
 ```
 Read order for ops:
 1. `services_ok` row — anything below `9/9` means a service is down or unhealthy.
 2. `proof_ok` row — `proof_ok=5/5` means every probe that ran with a real
   inference request produced a positive sysfs NPU delta.
 3. `fallbacks:` line — `skipped_cold_load=1` is expected (GenAI worker is
   intentionally not cold-loaded). Any other fallback label is a triage signal.
 4. `gate=` labels — closed gates that remain closed by design.
 ### Step 3 — Optional artifact for trend tracking
 ```bash
 scripts/npu-utilization-digest.py --format jsonl
 ```
 Writes a single JSONL line per digest under
 `/home/will/.local/state/npu-utilization/digests/<timestamp>.jsonl`. The first
 line is the summary; subsequent lines are per-service rows. No JSONL write
 happens with `--no-write`.
 ### Step 4 — Offline unit tests
 ```bash
 python -m pytest tests/test_npu_utilization_digest.py -q
 ```
 Does not require live services. Use to validate digest logic after edits or
 before merging.
 ## Compact proof interpretation
 For each proof-capable service, both the response-level `npu_busy_delta_us`
 (when the service reports it) and the script's own sysfs before/after delta
 must agree and be `> 0`. The proof is only valid when an actual inference
 request ran. If a probe was skipped (`reason=skipped_cold_load` or
 `reason=smoke_disabled`), `proof_ok` for that row is `None` and the row
 contributes a labeled fallback instead of a proof failure.
 Proof currently runs on:
 - `embeddings` (`:18817`)
 - `rerank` (`:18818`)
 - `whisper` (`:18816`) when `--include-whisper-smoke=true` (default)
 - `classifier` (`:18819`)
 - `doc_triage` (`:18829`) when `--include-doc-triage-smoke=true` (default);
  proof is via the embeddings service, not directly on the NPU device, so the
  row reports `mode=NPU-via-embedding-service`.
 Intentionally health-only (no proof row):
 - `rag_endpoint` (`:18810`) — closed:vector-mutation
 - `rag_health` (`:18814`)
 - `advisory_gateway` (`172.19.0.1:18830`) — closed:advisory-post
 Intentionally skipped by default:
 - `genai` (`:18820`) — `loaded=false` until first use; cold-loading just to
  prove the NPU is not free, so it is treated as a labeled fallback rather
  than a proof failure. Opt in with `--include-genai-smoke=true` only when the
  task actually needs a generation smoke.
 ## Exit codes and triage gates
 `scripts/npu-service-health.sh`:
 | Exit | Meaning | Next |
 | ---: | --- | --- |
 | 0 | All checks passed including embeddings proof. | Continue to digest. |
 | 2 | `npu_busy_time_us` not readable. | Check kernel/driver; do not run digest. |
 | 3 | Embedding request failed. | Triage `openvino-embeddings.service` and port `:18817`. |
 | 4 | Embedding request succeeded but sysfs delta `<= 0`. | Service reachable but not on the NPU; check service logs and device bind. |
 `scripts/npu-utilization-digest.py`:
 | Exit | Meaning | Next |
 | ---: | --- | --- |
 | 0 | All reachable services handled; proof/fallback accounting completed. | Inspect `proof_ok` and `fallbacks:` for any unexpected labels. |
 | 2 | `--strict-proof` was set and at least one proof-required probe ran without a positive sysfs delta. | Triage the named service's NPU path. |
 ## Approval gates left closed
 The integrated workflow intentionally does not:
 - start, stop, restart, enable, or disable any user systemd unit or Docker
  Compose service;
 - write to or mutate the Chroma collection `obsidian_bge_npu` or any other
  vector store;
 - change Atlas/Hermes routing or model defaults;
 - post classification/generation/triage events to the advisory gateway;
 - broaden private document, image, or audio roots;
 - bind any new listener, including on `0.0.0.0`;
 - write memory, send messages, execute tools, or mutate Kanban state.
 These remain approval-gated and are tracked on the `npu-maximization` board.
 For advisory-lane promotion decisions, pair this live utilization pass with the fixture-only dry-run comparison in `docs/npu-advisory-observability-runbook.md`. The digest can show whether live NPU services are healthy enough to collect evidence; it does not promote advisory outputs into authority. Promotion remains a separate lane-specific approval with explicit scope and rollback.
 ## Quick reference
 ```bash
 # Single-pass NPU health check (listener + systemd + embeddings proof).
 cd ~/lab/swarm && ./scripts/npu-service-health.sh
 # Compact digest with per-service proof and fallback accounting.
 scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
 # Same, with a JSONL artifact for trend tracking.
 scripts/npu-utilization-digest.py --format jsonl
 # Strict mode for CI / pre-merge.
 scripts/npu-utilization-digest.py --no-write --strict-proof
 # Offline digest logic tests.
 python -m pytest tests/test_npu_utilization_digest.py -q
 ```
@@ -0,0 +1,49 @@
 # NPU utilization digest
 Compact on-demand observability for Will's local OpenVINO/NPU specialists.
 Script:
 ```bash
 /home/will/lab/swarm/scripts/npu-utilization-digest.py --format text
 ```
 Safe defaults:
 - read-only for services; no service starts/stops/restarts, routing changes, vector DB mutation, advisory POSTs, outbound sends, or memory writes;
 - writes only a compact JSONL artifact under `/home/will/.local/state/npu-utilization/digests` unless `--no-write` is passed;
 - uses synthetic/non-private requests for embeddings, rerank, classifier dry-run, and doc triage;
 - keeps GenAI generation disabled by default when the worker is not loaded, to avoid cold-load side effects;
 - advisory gateway remains health-only because POSTs write metadata/events;
 - NPU proof is only true when an inference probe ran and `/sys/class/accel/accel0/device/npu_busy_time_us` increased around that probe.
 Common commands:
 ```bash
 # Compact CLI digest, plus JSONL artifact.
 scripts/npu-utilization-digest.py --format text
 # No artifact write; useful during reviews.
 scripts/npu-utilization-digest.py --no-write --include-genai-smoke false
 # Machine-readable stdout.
 scripts/npu-utilization-digest.py --format jsonl --no-write
 # CI/unit tests; live services not required.
 python -m pytest tests/test_npu_utilization_digest.py -q
 ```
 Output shape is intentionally small: service booleans, request counts by service, average probe ms, sysfs/NPU busy deltas by service, proof flags, fallback totals and per-service fallback counts, confidence distribution, escalation/suppression recommendation counts, authority-safe flag violation totals, artifact path, and closed gates. `fallbacks` includes unavailable services, failed/missing proof, and skipped proof-capable smokes such as disabled Whisper/doc-triage probes or GenAI cold-load skips; intentionally health-only RAG/advisory rows are not fallbacks unless unavailable. It does not print raw embeddings, transcripts, OCR text, model completions, request headers, or full upstream JSON.
 Covered rows:
 - `embeddings`: `/v1/embeddings` synthetic string, positive sysfs delta required.
 - `rerank`: `/rerank` with two synthetic docs, positive sysfs delta required.
 - `whisper`: health-only unless the bounded generated-WAV smoke is enabled.
 - `classifier`: `/v1/classify` with `dry_run=true` and `include_evidence=false`, positive sysfs delta required.
 - `genai`: health-only by default; skips when `loaded=false` unless explicitly opted in.
 - `doc_triage`: one approved synthetic sample under the service sample root, with `allowed_roots` narrowed to that sample directory; NPU proof is via embeddings.
 - `rag_endpoint` and `rag_health`: health-only; no vector mutation.
 - `advisory_gateway`: health-only; `closed:advisory-post` gate remains closed.
 Closed gates left for later approval: sending/delivery, recurring timer, GenAI cold-load smoke, advisory POSTs, Atlas/Hermes routing changes, vector mutation/reindex, and broad private document/audio/image roots.
@@ -0,0 +1,135 @@
 # NPU voice/audio local-file pipeline
 This is the first-slice local-file voice/audio path for the NPU maximization program:
 ```text
 local audio file or already-staged attachment
  -> OpenVINO NPU Whisper (:18816)
  -> OpenVINO NPU classifier (:18819)
  -> explicit advisory gate
  -> Atlas/Hermes only after separate approval
 ```
 The implementation is `scripts/npu_voice_audio_pipeline.py`. It is a CLI wrapper only; it starts no listener and performs no outbound sends, Obsidian writes, memory writes, vector DB mutations, Kanban mutations, service restarts, platform API calls, or live Atlas/Hermes routing changes.
 ## Safety gates
 Closed unless explicitly approved later:
 - Telegram/Discord fetching by bot token or attachment URL.
 - Outbound messages or auto-sends.
 - Obsidian/vault writes.
 - Memory writes.
 - Vector DB mutation or reindex.
 - Automatic Kanban mutation.
 - Service restarts or new persistent listeners.
 - Private-directory root broadening.
 - Live Atlas/Hermes routing authority changes.
 HTTP success is not NPU proof. For NPU claims, require real inference plus positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas. The CLI reports response deltas and observed sysfs deltas for Whisper and classifier calls.
 ## Example: synthetic local WAV smoke
 ```bash
 cd /home/will/lab/swarm
 python - <<'PY'
 import math, struct, wave
 path = '/tmp/npu-voice-smoke.wav'
 sr = 16000
 with wave.open(path, 'wb') as w:
    w.setnchannels(1)
    w.setsampwidth(2)
    w.setframerate(sr)
    frames = bytearray()
    for i in range(int(sr * 0.6)):
        frames.extend(struct.pack('<h', int(12000 * math.sin(2 * math.pi * 440 * i / sr))))
    w.writeframes(frames)
 print(path)
 PY
 ```
 Run the local-file wrapper:
 ```bash
 /home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
  --audio /tmp/npu-voice-smoke.wav \
  --title "synthetic smoke" \
  --source manual_smoke \
  --json
 ```
 Compact output shape:
 ```json
 {
  "ok": true,
  "source": "manual_smoke",
  "transcript_chars": 3,
  "action_worthy": false,
  "atlas_gate": "suppressed_not_action_worthy",
  "whisper_npu_delta_us": 85441,
  "whisper_sysfs_delta_us": 85441,
  "classifier_npu_delta_us": 85908,
  "classifier_sysfs_delta_us": 85908,
  "classifier_observed_sysfs_delta_us": 85908,
  "external_sends": 0,
  "writes": 0
 }
 ```
 A non-actionable smoke should stay `suppressed_not_action_worthy`. A transcript with a reminder, task, follow-up, explicit question, or classifier `tool_needed=true` should become `advisory_only_not_sent`, not sent.
 ## Example: already-staged platform voice file
 This example assumes another approved process has already placed the audio file locally. The wrapper does not fetch from Telegram/Discord and does not read bot tokens.
 ```bash
 /home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
  --audio /tmp/staged-voice-message.ogg \
  --source staged_telegram \
  --title "staged local Telegram voice memo" \
  --json
 ```
 ## Compact fields
 The CLI always reports:
 - `ok`
 - `id`
 - `source`
 - `transcript_chars`
 - `action_worthy`
 - `atlas_gate`
 - `next_gate`
 - `whisper_npu_delta_us`
 - `whisper_sysfs_delta_us`
 - `classifier_npu_delta_us`
 - `classifier_sysfs_delta_us`
 - `classifier_observed_sysfs_delta_us`
 - `labels.workflow_category`
 - `labels.tool_needed`
 - `labels.urgency`
 - `labels.safety_confirmation_required`
 - `external_sends`
 - `writes`
 Transcript text is omitted by default. Use `--include-transcript` or `--include-transcript-preview-chars N` only for explicit local debugging.
 ## Input limits
 - `--audio` must be an absolute local path.
 - Symlinks, directories, missing files, empty files, unsupported extensions, and files over `--max-bytes` are refused.
 - WAV duration is capped by `--max-audio-seconds`; other codecs remain size-capped in this first slice.
 - Classifier transcript payload is bounded by `--max-transcript-chars`.
 ## Health prerequisites
 Read-only checks:
 ```bash
 curl -fsS http://127.0.0.1:18816/health
 curl -fsS http://127.0.0.1:18819/healthz
 ```
 Do not restart services from this runbook. If either endpoint is unhealthy, stop and request an ops/remediation task.
@@ -0,0 +1,388 @@
 # OpenVINO/NPU VLM, audio, and wake-word feasibility
 Date: 2026-06-04
 Scope: feasibility/spec only for lower-priority assistant sidecars. This document does not enable services, alter Atlas/Hermes/gateway routing, mutate RAG/Chroma/vector collections, or process private document/image directories.
 ## Existing baseline and constraints
 Live baseline discovered by parent task:
 - RAG endpoint: `127.0.0.1:18810`
 - RAG health wrapper: `127.0.0.1:18814`
 - Whisper OpenVINO NPU: `127.0.0.1:18816`
 - OpenVINO embeddings: `127.0.0.1:18817`
 - Prototype ports currently reserved/not live: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, optional doc/image triage `:18829`
 Local NPU runtime snapshot from the feasibility run:
 - `/home/will/.venvs/npu` has `openvino==2026.2.0` and `openvino-genai==2026.2.0.0`.
 - `openvino.Core().available_devices` reports `CPU`, `GPU.0`, `GPU.1`, and `NPU`.
 - NPU device name: `Intel(R) AI Boost`.
 - NPU claims must be verified by positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas around inference.
 External release/project signals checked:
 - OpenVINO 2026.2.0 release notes mention broader GenAI coverage and VLM samples, but the VLM acceleration notes are CPU/GPU-oriented; they do not provide a clear low-risk NPU VLM path.
 - Prior OpenVINO release notes/search results mention OpenVINO Model Server VLM support for Qwen2-VL, Phi-3.5-Vision, and InternVL2.
 - `openWakeWord` is an active Apache-2.0 local wake-word framework with ONNX Runtime/TFLite support, pre-trained wake-word models, optional VAD, and 16 kHz PCM streaming examples. It is not installed in the current NPU venv.
 ## Recommendation summary
 | Lane | Recommendation | Priority | Why |
 | --- | --- | --- | --- |
 | VLM / image captioning | Defer NPU-first VLM. If pursued, prototype CPU/GPU VLM CLI first, then attempt NPU only after model/runtime compatibility is proven. | Low | NPU support for VLMs is not clearly mature in the current OpenVINO public notes; VLMs are memory/op-shape heavy; failures could be slow and noisy. Existing doc/image triage already covers practical local image metadata without a full VLM. |
 | Lightweight image classification / caption fallback | Extend the existing `openvino-doc-image-triage-npu` lane before adding a new service. | Medium-low | It already has privacy boundaries, synthetic fixtures, CLI/server split, and NPU proof through embeddings. Add static-shape classifier only if a later task needs image labels beyond rule fallback. |
 | Audio classification | Defer until a concrete assistant workflow needs it. Consider CPU/GPU/OpenVINO Runtime prototype using Speech Commands/ESC-style classifier before any daemon. | Low | Whisper NPU already covers transcription. Generic audio tags are less useful without a routing/product requirement and need dataset-specific threshold tuning. |
 | Wake word | Worth a small CPU-only local smoke prototype; do not spend NPU time first. | Medium | Wake-word detection must be always-on, tiny, and reliable. CPU openWakeWord/ONNX/TFLite is the lowest-risk path and avoids starving existing NPU Whisper/embedding services. NPU use is only worth testing after CPU false-positive/latency behavior is acceptable. |
 ## VLM / image-captioning path
 ### Recommended model/runtime
 Initial runtime: CLI-first OpenVINO GenAI or OpenVINO Model Server on CPU/GPU, not NPU-first.
 Candidate models to evaluate, in order:
 1. `Qwen2-VL-2B-Instruct` OpenVINO/OVMS-compatible export if a small converted artifact is already available.
 2. `Phi-3.5-Vision-Instruct` only if memory/startup is acceptable.
 3. `InternVL2` only as a compatibility reference; likely too heavy for a low-priority local assistant sidecar.
 Why this order:
 - Qwen2-VL is broadly supported by OpenVINO Model Server release notes/search results and has smaller variants.
 - Phi-3.5-Vision is also named in OpenVINO Model Server VLM support, but may be heavier.
 - NPU is not the first target because public OpenVINO 2026.2 release notes emphasize VLM improvements for CPU/GPU, not NPU. Treat NPU VLM as experimental until a smoke test proves compilation and positive busy-time deltas.
 ### Endpoint/CLI contract
 CLI-first contract:
 ```bash
 python vlm_caption.py \
  --image /path/to/synthetic_or_explicitly_allowed_image.png \
  --prompt "Describe this image in one sentence." \
  --device CPU \
  --max-new-tokens 96 \
  --json
 ```
 Response shape:
 ```json
 {
  "ok": true,
  "media_type": "image",
  "source_path_basename": "synthetic_scene.png",
  "source_sha256": "sha256:...",
  "model": "qwen2-vl-small-openvino",
  "runtime": "openvino-genai-or-ovms",
  "device_requested": "CPU",
  "device_observed": "CPU",
  "caption": "A synthetic chart with three colored bars.",
  "safety": {
    "external_uploads": false,
    "raw_image_logged": false,
    "private_paths_allowed": false
  },
  "timing_ms": {
    "load": 0,
    "inference": 0,
    "total": 0
  },
  "npu_busy_delta_us": null
 }
 ```
 Optional localhost HTTP contract, only after CLI is stable:
 - Bind: `127.0.0.1:18829` or another explicitly approved unused prototype port.
 - `GET /healthz`
 - `GET /models`
 - `POST /v1/vision/caption`
 Request body:
 ```json
 {
  "path": "/allowed/root/synthetic_scene.png",
  "prompt": "Describe this image in one sentence.",
  "max_new_tokens": 96,
  "device": "CPU"
 }
 ```
 ### Smoke-test plan using non-private data
 Use only generated fixtures under the repo, similar to `openvino-doc-image-triage-npu/samples/`:
 1. Create synthetic PNGs: simple chart, receipt-like image, screenshot-like text panel, and blank/noisy image.
 2. Run CLI with `--allowed-root "$PWD/samples"` and assert:
   - JSON parses.
   - `external_uploads=false`.
   - only basename and SHA-256 are returned by default.
   - captions are non-empty and under a configured token/character limit.
   - unsupported/private paths are rejected.
 3. If an HTTP server is added, start it in foreground on `127.0.0.1`, call `/healthz` and `/v1/vision/caption`, then stop it.
 4. No private image/document folders and no Obsidian vault content should be used for smoke tests.
 ### NPU busy-time verification plan
 Only claim NPU VLM if all of these pass:
 1. Verify the counter is readable:
 ```bash
 BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
 test -r "$BUSY" && before=$(cat "$BUSY")
 ```
 2. Run exactly one synthetic-image inference with `device=NPU`.
 3. Read `after=$(cat "$BUSY")`.
 4. Require `after - before > 0` and a response-level `npu_busy_delta_us > 0` if the server reports it.
 5. Repeat with a second synthetic image to avoid counting unrelated startup activity only.
 6. If HTTP returns 200 but the sysfs delta is zero, document as `NPU not verified` and do not call it an NPU service.
 ### No-go / defer criteria
 Defer VLM NPU work if any apply:
 - Model export/compile to NPU fails or requires unsupported ops/custom patches.
 - First successful inference needs more than 60 seconds cold or more than 10 seconds warm for a small synthetic image.
 - NPU busy-time delta is zero or inconsistent.
 - Memory pressure disrupts Whisper `:18816`, embeddings `:18817`, or RAG `:18810`.
 - The only useful path requires processing private images/docs before synthetic smoke tests are stable.
 - Captions are too hallucination-prone for automation decisions without a human-review gate.
 ## Lightweight image triage/classification path
 ### Recommended model/runtime
 Recommended near-term path: keep `openvino-doc-image-triage-npu` as the primary image/document lane and add only a static-shape classifier if rule fallback becomes inadequate.
 Candidate classifier families for a later task:
 - MobileNetV3/EfficientNet-Lite/ResNet-18 style image classifier exported to OpenVINO IR.
 - Use NPU only if the IR compiles with static shapes and produces positive busy-time deltas.
 - Keep OCR/PDF rendering CPU-local; do not try to force OCR onto NPU in this phase.
 Why:
 - The current triage prototype already has the right privacy contract and reports CPU vs NPU stages.
 - A small classifier is much lower risk than a VLM and can be used for labels like `screenshot`, `receipt`, `document`, `photo`, `chart`.
 ### Endpoint/CLI contract
 Extend existing CLI shape rather than introduce a new daemon:
 ```bash
 /home/will/.venvs/npu/bin/python triage.py \
  --allowed-root "$PWD" \
  --image-classifier-model /home/will/models/openvino-image-classifier/model.xml \
  --image-classifier-device NPU \
  --pretty \
  samples/synthetic_invoice.png
 ```
 Response addition:
 ```json
 {
  "classification": {
    "label": "receipt_or_invoice",
    "confidence": 0.82,
    "device": "NPU",
    "method": "openvino_image_classifier",
    "npu_busy_delta_us": 12345
  }
 }
 ```
 ### Smoke-test plan
 Reuse `openvino-doc-image-triage-npu/make_samples.py` and `tests/smoke_test.py`; add synthetic image-label assertions only after a classifier model exists. Keep `--no-embeddings` mode available so the smoke suite can separate classifier NPU proof from embeddings `:18817` proof.
 ### No-go / defer criteria
 - Static-shape classifier cannot compile on NPU.
 - Labels are not useful enough to drive an assistant workflow.
 - Classifier output duplicates the existing rule-based fallback.
 ## Audio classification path
 ### Recommended model/runtime
 Defer implementation. If a concrete workflow appears, start with a CLI-only OpenVINO Runtime classifier on CPU/GPU using synthetic/public audio fixtures, not a persistent service.
 Potential model classes:
 - Speech Commands keyword classifier for short command categories.
 - ESC-50/AudioSet-like environmental sound classifier only if the task requires non-speech detection.
 - Whisper transcript + lightweight text classifier may be enough for most assistant routing, using existing Whisper NPU `:18816`.
 Why:
 - The system already has local Whisper NPU transcription.
 - Generic audio classification needs careful threshold tuning and false-positive analysis.
 - Always-on audio processing has privacy and resource implications; keep it explicit and local.
 ### CLI contract
 ```bash
 python audio_classify.py \
  --input samples/synthetic_chime.wav \
  --model /home/will/models/openvino-audio-classifier/model.xml \
  --device CPU \
  --json
 ```
 Response shape:
 ```json
 {
  "ok": true,
  "source_path_basename": "synthetic_chime.wav",
  "source_sha256": "sha256:...",
  "sample_rate": 16000,
  "duration_seconds": 1.2,
  "labels": [
    {"label": "chime", "confidence": 0.76}
  ],
  "device_requested": "CPU",
  "device_observed": "CPU",
  "npu_busy_delta_us": null,
  "privacy": {"external_uploads": false, "raw_audio_logged": false}
 }
 ```
 Optional HTTP should wait until a workflow exists. If it exists later, bind localhost and avoid overlap with current ports.
 ### Smoke-test plan using non-private data
 1. Generate synthetic WAV files in repo-local `samples/`: sine tone, silence, white noise, simple chime, and a short synthetic spoken phrase if a local TTS fixture is available.
 2. Run CLI on each file with `--allowed-root "$PWD/samples"`.
 3. Assert JSON parses, durations are bounded, and confidence values are numeric.
 4. Do not stream microphone input or scan private audio directories in smoke tests.
 5. If NPU mode is attempted, wrap each inference in sysfs busy-time reads.
 ### No-go / defer criteria
 - No concrete downstream automation consumes the labels.
 - False positives cannot be characterized on synthetic/public fixtures.
 - It competes with Whisper NPU or requires a persistent microphone daemon without explicit approval.
 ## Wake-word path
 ### Recommended model/runtime
 Recommended first runtime: CPU-only `openWakeWord` CLI/foreground process with ONNX Runtime or TFLite backend.
 NPU recommendation: defer. Try NPU/OpenVINO conversion only after CPU openWakeWord passes false-positive and latency checks.
 Why:
 - Wake-word detection is always-on and latency-sensitive; reliability matters more than accelerator novelty.
 - The model is small enough that CPU is likely acceptable and simpler.
 - Keeping wake-word off NPU reduces contention with Whisper NPU and embeddings.
 - openWakeWord has pre-trained models, optional VAD, and straightforward 16 kHz PCM frame APIs.
 ### Endpoint/CLI contract
 CLI smoke contract:
 ```bash
 python wake_word_smoke.py \
  --model hey_jarvis \
  --positive samples/synthetic_wake_positive.wav \
  --negative samples/synthetic_noise.wav \
  --threshold 0.5 \
  --json
 ```
 Foreground local stream contract, only for manual experiments:
 ```bash
 python wake_word_listen.py \
  --model hey_jarvis \
  --threshold 0.5 \
  --vad-threshold 0.3 \
  --oneshot \
  --json
 ```
 Response/event shape:
 ```json
 {
  "ok": true,
  "model": "hey_jarvis",
  "runtime": "openwakeword-onnxruntime-or-tflite",
  "device": "CPU",
  "threshold": 0.5,
  "events": [
    {"offset_ms": 1280, "score": 0.83, "detected": true}
  ],
  "false_positive_count": 0,
  "npu_busy_delta_us": null,
  "privacy": {"external_uploads": false, "raw_audio_logged": false}
 }
 ```
 If a localhost HTTP endpoint is ever needed, do not expose raw microphone streaming by default. Prefer events only:
 - `GET /healthz`
 - `POST /v1/wakeword/evaluate-file` for explicit files under allowed roots
 - `GET /v1/wakeword/events` for a manually started foreground listener
 ### Smoke-test plan using non-private data
 1. Install in a disposable or dedicated venv, not the existing NPU venv unless explicitly approved:
 ```bash
 python -m venv /tmp/openwakeword-smoke-venv
 /tmp/openwakeword-smoke-venv/bin/python -m pip install openwakeword
 ```
 2. Use public/generated WAVs only:
   - Negative: silence, white noise, generic non-wake speech/TTS if locally generated.
   - Positive: only if a public/pretrained wake phrase fixture is available or generated explicitly for the selected model. If no positive fixture exists, run negative-only false-positive smoke and mark recall untested.
 3. Assert no false positives over a bounded negative fixture set.
 4. Measure per-frame CPU latency and max RSS.
 5. Do not start a persistent microphone listener; manual foreground `--oneshot` only if explicitly approved.
 ### NPU busy-time verification plan
 Wake-word should not claim NPU in the initial path. If a later task converts a model to OpenVINO IR and targets NPU:
 1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before a bounded file evaluation.
 2. Run NPU inference on a fixed set of WAV frames.
 3. Read the counter after inference.
 4. Require positive delta and stable predictions matching CPU baseline.
 5. Also verify that keeping the wake-word loop active does not starve Whisper `:18816` or embeddings `:18817`.
 ### No-go / defer criteria
 - CPU openWakeWord has unacceptable false positives on local negative fixtures.
 - A usable positive fixture cannot be created without recording private audio.
 - Always-on microphone capture is required before explicit approval.
 - NPU conversion changes scores materially from CPU baseline.
 - NPU loop increases contention with Whisper/embedding services.
 ## Docs and diagram implications
 If these lanes advance beyond feasibility:
 1. Update `docs/swarm-infrastructure.md` and `docs/swarm-infrastructure.html` to keep live vs prototype labels clear.
 2. Update the OpenVINO NPU runbook with smoke commands and the sysfs busy-time proof steps.
 3. Update the Service Catalog only after a service is actually approved/live; until then list as `prototype/not live` or omit.
 4. Architecture diagrams may show:
   - live: RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`;
   - prototypes: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, doc/image triage optional `:18829`;
   - VLM/audio/wake-word as `CLI feasibility / not live` unless a later implementation task creates a service.
 5. Do not imply Atlas/Hermes routing integration for any of these lanes without explicit approval.
 ## Overall go/no-go decision
 - Go later: wake-word CPU-only CLI smoke, because it is useful and low risk if kept foreground/local.
 - Maybe later: lightweight image classifier inside existing doc/image triage, if rule fallback is not enough.
 - Defer: NPU-first VLM captioning until OpenVINO VLM-on-NPU compatibility is proven by a minimal synthetic-image smoke.
 - Defer: generic audio classification until there is a concrete assistant workflow that consumes the output.
@@ -27,7 +27,7 @@
  <div class="wrap">
    <div class="header"><div class="dot"></div><div><h1>Will's Swarm Infrastructure</h1><div class="sub">Atlas/Hermes gateway + n8n automation + agentmon monitoring + local AI/search/voice services</div></div></div>
    <div class="card">
-      <svg viewBox="0 0 1280 900" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
+      <svg viewBox="0 0 1280 980" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
        <defs>
          <pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse"><path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/></pattern>
          <marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="3" orient="auto" markerUnits="strokeWidth"><path d="M0,0 L0,6 L9,3 z" fill="#38bdf8" /></marker>
@@ -40,7 +40,7 @@
            .edge{fill:none; stroke:#38bdf8; stroke-width:1.8; marker-end:url(#arrow); opacity:.8}.edgeG{fill:none; stroke:#34d399; stroke-width:1.8; marker-end:url(#arrowGreen); opacity:.85}.edgeO{fill:none; stroke:#fb923c; stroke-width:1.8; marker-end:url(#arrowOrange); opacity:.85}.edgeR{fill:none; stroke:#fb7185; stroke-width:1.8; stroke-dasharray:5,4; marker-end:url(#arrowRose); opacity:.85}
          </style>
        </defs>
-        <rect width="1280" height="900" fill="#020617"/><rect width="1280" height="900" fill="url(#grid)" opacity="0.7"/>
+        <rect width="1280" height="980" fill="#020617"/><rect width="1280" height="980" fill="url(#grid)" opacity="0.7"/>
        <!-- arrows behind nodes -->
        <path class="edge" d="M140 120 C210 120 210 205 280 205"/>
@@ -58,13 +58,14 @@
        <path class="edge" d="M815 695 C900 695 900 735 965 735"/>
        <path class="edgeG" d="M625 635 C555 635 555 720 470 720"/>
        <path class="edge" d="M470 720 C545 720 545 565 620 565"/>
        <path class="edgeR" d="M490 735 C620 735 790 880 965 880"/>
        <!-- boundaries -->
        <rect x="250" y="80" width="250" height="260" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="265" y="103" class="tiny" fill="#fbbf24">Hermes gateway layer</text>
        <rect x="590" y="105" width="260" height="655" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="605" y="128" class="tiny" fill="#fbbf24">n8n + agentmon observability</text>
-        <rect x="935" y="95" width="280" height="760" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
+        <rect x="935" y="95" width="280" height="850" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
        <text x="950" y="118" class="tiny" fill="#fbbf24">local swarm services</text>
        <!-- external channels -->
@@ -86,28 +87,29 @@
        <g><rect x="965" y="385" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="385" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="415" text-anchor="middle" class="title">Voice</text><text x="1070" y="436" text-anchor="middle" class="tiny">Kokoro + Whisper</text><text x="1070" y="454" text-anchor="middle" class="port">:18805 / :18816</text></g>
        <g><rect x="965" y="555" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="555" width="210" height="80" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="585" text-anchor="middle" class="title">Docker services</text><text x="1070" y="606" text-anchor="middle" class="tiny">agentmon.monitor=true</text><text x="1070" y="624" text-anchor="middle" class="port">swarm/service snapshots</text></g>
        <g><rect x="965" y="665" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="665" width="210" height="80" rx="9" fill="rgba(120,53,15,.3)" stroke="#fbbf24" stroke-width="1.6"/><text x="1070" y="695" text-anchor="middle" class="title">OpenClaw VMs</text><text x="1070" y="716" text-anchor="middle" class="tiny">currently dormant</text><text x="1070" y="734" text-anchor="middle" class="port">openclaw.snapshot</text></g>
-        <g><rect x="965" y="775" width="210" height="60" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="60" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="822" text-anchor="middle" class="port">:27123/:27124 + ChromaDB</text></g>
+        <g><rect x="965" y="775" width="210" height="75" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="75" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="821" text-anchor="middle" class="tiny">:18810 semantic search</text><text x="1070" y="840" text-anchor="middle" class="port">NPU embed + rerank</text></g>
        <g><rect x="965" y="870" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="870" width="210" height="80" rx="9" fill="rgba(244,63,94,.16)" stroke="#fb7185" stroke-width="1.6" stroke-dasharray="6,4"/><text x="1070" y="896" text-anchor="middle" class="title">NPU sidecars</text><text x="1070" y="917" text-anchor="middle" class="tiny">approved prototypes; not live</text><text x="1070" y="936" text-anchor="middle" class="port">:18818/:18819/:18820/:18829</text></g>
        <!-- host local ai box -->
-        <g><rect x="280" y="675" width="210" height="120" rx="10" fill="#0f172a"/><rect x="280" y="675" width="210" height="120" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="385" y="706" text-anchor="middle" class="title">host local AI</text><text x="385" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="385" y="752" text-anchor="middle" class="tiny">Ollama fallback :18807</text><text x="385" y="774" text-anchor="middle" class="tiny">OpenVINO NPU embed :18817</text></g>
+        <g><rect x="280" y="675" width="210" height="145" rx="10" fill="#0f172a"/><rect x="280" y="675" width="210" height="145" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="385" y="706" text-anchor="middle" class="title">host local AI</text><text x="385" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="385" y="752" text-anchor="middle" class="tiny">Ollama fallback :18807</text><text x="385" y="774" text-anchor="middle" class="tiny">OpenVINO embed :18817 live</text><text x="385" y="797" text-anchor="middle" class="tiny">Whisper NPU :18816 live</text></g>
        <!-- legend -->
-        <g transform="translate(40,820)">
+        <g transform="translate(40,910)">
          <text class="tiny" fill="#94a3b8">Legend</text>
          <rect x="0" y="16" width="14" height="10" fill="rgba(8,51,68,.4)" stroke="#22d3ee"/><text x="22" y="25" class="tiny">Gateway/Search/Voice</text>
          <rect x="180" y="16" width="14" height="10" fill="rgba(6,78,59,.4)" stroke="#34d399"/><text x="202" y="25" class="tiny">Automation/API</text>
          <rect x="320" y="16" width="14" height="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa"/><text x="342" y="25" class="tiny">Data/AI stores</text>
          <rect x="475" y="16" width="14" height="10" fill="rgba(251,146,60,.14)" stroke="#fb923c"/><text x="497" y="25" class="tiny">Event bus/pipeline</text>
-          <line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring flows</text>
+          <line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring / not-live prototype flows</text>
        </g>
      </svg>
    </div>
    <div class="cards">
      <div class="info"><h3>Monitoring model</h3><ul><li>• n8n direct probes critical ports</li><li>• agentmon aggregates Docker/OpenClaw snapshots</li><li>• n8n polls agentmon for stale/degraded state</li></ul></div>
-      <div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• local LLM/embed: 18806 / 18817</li><li>• Ollama fallback: 18807</li></ul></div>
+      <div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• live NPU: RAG 18810, Whisper 18816, embeddings 18817</li><li>• live local reranker: 18818</li><li>• prototypes not live-routed: 18819/18820/18829</li></ul></div>
      <div class="info"><h3>Source paths</h3><ul><li>• Swarm repo: ~/lab/swarm</li><li>• Agentmon repo: ~/lab/agentmon</li><li>• Workflows: swarm-common/n8n-workflows</li></ul></div>
    </div>
-    <div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline.</div>
+    <div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline. The :18818 reranker is live as a request-time second stage for :18810 semantic search with safe vector fallback; classifier/GenAI/doc-image sidecars remain prototypes/not live-routed.</div>
  </div>
 </body>
 </html>
@@ -36,6 +36,7 @@ local AI/search/voice services
        +--> OpenVINO NPU embeddings :18817
        +--> Kokoro TTS :18805
        +--> Whisper NPU :18816
        +--> local-only NPU sidecars: reranker :18818, router/classifier :18819, GenAI worker :18820, doc/image triage :18829
 ```
 See also:
@@ -125,10 +126,26 @@ Host/user services:
 - `ollama.service` — `:18807`, legacy/CPU embeddings API fallback
 - `openvino-embeddings.service` — `:18817`, OpenVINO NPU embeddings API (`/v1/embeddings`, `/api/embed`, `/api/embeddings`)
 - `docker-health-endpoint.service` — `:18809`, read-only container health for n8n
- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger; default collection `obsidian_bge_npu` using OpenVINO NPU embeddings
+- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger and `/semantic-search`; default collection `obsidian_bge_npu` using OpenVINO NPU embeddings, with request-time `:18818` reranking enabled with vector-order fallback
 - `url-content-extractor.service` — `:18812`, YouTube/PDF/web extraction
 - `voice-memo-processor.service` — `:18813`, voice memo processing
 - `rag-embedding-health.service` — `:18814`, RAG/embedding health wrapper
 - `openvino-router-classifier.service` — `:18819`, local-only dry-run Atlas/Hermes message classifier; advisory only
 - `openvino-genai-npu-worker.service` — `:18820`, local-only bounded GenAI worker for small background generation jobs
 - `openvino-doc-image-triage.service` — `:18829`, local-only document/image triage HTTP wrapper with allowed-root enforcement
 - `openvino-advisory-gateway.service` — `172.19.0.1:18830`, Docker-bridge advisory envelope wrapper over classifier, GenAI, and doc/image triage for `n8n-agent`; explicit no-authority contract
 Local-only OpenVINO NPU sidecars:
 | Port | Component | State | Safety boundary |
 | ---: | --- | --- | --- |
 | `18818` | reranker | live user service; request-time second stage for `:18810/semantic-search` | no Chroma/vector mutation; vector-order fallback on timeout/error/non-positive NPU proof |
 | `18819` | router/classifier | live user service; dry-run only | no Hermes/Atlas routing, memory writes, service restarts, or outbound messages |
 | `18820` | bounded GenAI worker | live user service | background jobs only; not primary Atlas/Hermes model routing |
 | `18829` | document/image triage | live localhost server | allowed-root limited; no private directory processing unless explicitly approved; NPU stage is embeddings via `:18817` |
 | `18830` | advisory gateway | live user service; bound to `172.19.0.1` for `n8n-agent` bridge access | returns `openvino_advisory_v1` envelopes only; no routing, memory writes, external sends, tool execution, restarts, or process-root broadening from request payloads; refuses wildcard binds |
 These sidecars bind to `127.0.0.1` by default, except `openvino-advisory-gateway.service`, which is explicitly approved on the Docker bridge IP `172.19.0.1` so `n8n-agent` can call it. They must not be wired into live Atlas/Hermes routing, memory writes, broad private document processing, external sends, tool execution, service restarts, or primary model paths without explicit Will approval. Any NPU claim requires a positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta before/after inference or service-reported equivalent. HTTP 200 alone is not proof.
 ### 5. Obsidian and RAG
@@ -147,7 +164,8 @@ RAG/vector store:
 - Reindex state/progress: active BGE/NPU state in `~/.hermes/data/rag-search/obsidian_bge_npu_index_state.json` and `obsidian_bge_npu_reindex_progress.json`; legacy Ollama state in `obsidian_index_state.json` remains for comparison/fallback.
 - Active RAG query/reindex embedding backend: OpenVINO NPU embeddings service on `:18817`, currently `bge-base-en-v1.5-int8-ov`, collection `obsidian_bge_npu`.
 - Legacy comparison/fallback collection: `obsidian`, built with Ollama on `:18807` using `nomic-embed-text`.
- Reindex endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test.
+- Reindex/search endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test, and `POST :18810/semantic-search` for n8n/Hermes semantic context lookup.
 - Reranker path: `RAG_RERANK_ENABLED=true` for `:18810/semantic-search` after local bake testing. `/semantic-search` retrieves `RAG_RERANK_INITIAL_K` vector candidates, calls `RAG_RERANK_URL` (`http://127.0.0.1:18818/rerank`), returns reranked `RAG_RERANK_TOP_K`, requires positive `npu_busy_delta_us` by default (`RAG_RERANK_REQUIRE_NPU_PROOF=true`), and falls back to vector order with `rerank.error` metadata on timeout/error/non-positive NPU proof. Reranking is request-time only and must not mutate Chroma/vector collections.
 ## Monitoring model
@@ -201,6 +219,12 @@ From the host:
 cd /home/will/lab/swarm
 make status
 make local-ai-health
 ./scripts/npu-service-health.sh  # read-only; includes sysfs busy-time proof for :18817
 curl -fsS http://127.0.0.1:18810/semantic-health | jq '{status,state,search_ok,result_count}'
 curl -fsS http://127.0.0.1:18810/semantic-search \
  -H 'Content-Type: application/json' \
  -d '{"query":"non-private semantic smoke","top_k":2}' \
  | jq '{ok,index,top_k,search_k,rerank,result_count}'
 curl -fsS http://127.0.0.1:18808/healthz
 curl -fsS http://127.0.0.1:8081/healthz
 curl -fsS 'http://127.0.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1' | jq .
@@ -210,8 +234,9 @@ From inside `n8n-agent`:
 ```bash
 docker exec n8n-agent /bin/sh -lc '
-  wget -qO- -T 5 http://172.19.0.1:8081/healthz
+  wget -qO- -T 5 http://172.19.0.1:18810/healthz
-  wget -qO- -T 5 "http://172.19.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1" | head -c 500
+  wget -qO- -T 5 http://172.19.0.1:18814/healthz
  wget -qO- -T 5 http://172.19.0.1:18817/healthz | head -c 500
 '
 ```
@@ -234,3 +259,4 @@ jq '.[0] | {id,name,active,nodes:(.nodes|length)}' /tmp/agentmon-export.json
 - From `n8n-agent`, use `127.0.0.1:5678` for n8n itself and `172.19.0.1:<host-port>` for host-published swarm services.
 - Agentmon `/healthz` only proves the web/API process is alive; pair it with snapshot freshness to prove the monitoring pipeline is flowing.
 - OpenClaw is intentionally dormant unless explicitly re-enabled; do not alert on VMs being shut off by default.
 - OpenVINO NPU sidecars on `:18819`, `:18820`, and `:18829` are live local-only services, but remain isolated specialists. The `:18818` reranker is live as a local request-time second stage for `:18810/semantic-search`; it still falls back to vector order on timeout/error/non-positive NPU proof. Do not draw live Atlas/Hermes routing, memory-write, broad document-processing, or primary-model arrows to these sidecars without a separate approved integration.
@@ -0,0 +1,123 @@
 {
  "schema": "npu_advisory_dry_run_fixture_set_v1",
  "description": "Synthetic/non-private fixtures for advisory-only NPU dry-run comparison.",
  "fixtures": [
    {
      "id": "context-gate-coding-safe",
      "lane": "context_gate",
      "source": "synthetic_cli",
      "service": "openvino_context_gate",
      "input_class": "coding_context_request",
      "query": "Implement a dry-run harness in /home/will/lab/swarm and run focused pytest checks.",
      "context": {"platform": "kanban", "task_id": "t_synthetic", "repo_path": "/home/will/lab/swarm"},
      "expected_recommendation": "prepare_context_bundle",
      "human_or_atlas_decision": "prepare_context_bundle",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    },
    {
      "id": "context-gate-live-routing-blocked",
      "lane": "context_gate",
      "source": "synthetic_cli",
      "service": "openvino_context_gate",
      "input_class": "unsafe_authority_request",
      "query": "Change live routing and restart services based only on classifier output.",
      "context": {"platform": "cli", "repo_path": "/home/will/lab/swarm"},
      "expected_recommendation": "require_human_review",
      "human_or_atlas_decision": "require_human_review",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    },
    {
      "id": "cron-normal-log",
      "lane": "cron_n8n_advisory",
      "source": "synthetic_cron",
      "service": "openvino_advisory_gateway",
      "input_class": "cron_health_check",
      "event": {"workflow": "nightly-health", "severity": "normal", "kind": "health_check", "subject": "synthetic all clear", "dedupe_key": "nightly-health-ok"},
      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-normal", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.74}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 10}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
      "expected_recommendation": "log",
      "human_or_atlas_decision": "log",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "medium"
    },
    {
      "id": "cron-urgent-false-alarm",
      "lane": "cron_n8n_advisory",
      "source": "synthetic_n8n",
      "service": "openvino_advisory_gateway",
      "input_class": "urgent_looking_false_alarm",
      "event": {"workflow": "backup-monitor", "severity": "warning", "kind": "alert", "subject": "synthetic warning recovered before paging", "dedupe_key": "backup-recovered"},
      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-warning", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.62}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 7}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
      "expected_recommendation": "summarize",
      "human_or_atlas_decision": "log",
      "expected_outcome": "false_positive",
      "expected_confidence_bucket": "medium"
    },
    {
      "id": "batch-receipt-action",
      "lane": "batch_triage",
      "source": "synthetic_fixture_file",
      "service": "npu_batch_triage_dry_run",
      "input_class": "receipt_with_deadline",
      "document_text": "Synthetic receipt. Amount due $42.00. Please follow up by 2026-06-10.",
      "triage_lane": "receipts",
      "expected_recommendation": "review_item",
      "human_or_atlas_decision": "review_item",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    },
    {
      "id": "batch-noisy-harmless",
      "lane": "batch_triage",
      "source": "synthetic_fixture_file",
      "service": "npu_batch_triage_dry_run",
      "input_class": "harmless_noisy_output",
      "document_text": "Synthetic screenshot text: lorem ipsum, random status output, no action signal.",
      "triage_lane": "screenshots",
      "expected_recommendation": "suppress",
      "human_or_atlas_decision": "suppress",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "medium"
    },
    {
      "id": "voice-audio-action-needed",
      "lane": "voice_audio",
      "source": "synthetic_voice_memo",
      "service": "npu_voice_audio_pipeline",
      "input_class": "voice_action_item",
      "transcript": "Reminder: review the NPU dry-run metrics and ask for approval before changing routing.",
      "labels": {"tool_needed": true, "urgency": "normal", "safety_confirmation_required": true},
      "npu_proof": {"whisper": true, "classifier": true},
      "expected_recommendation": "require_human_review",
      "human_or_atlas_decision": "require_human_review",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    },
    {
      "id": "kanban-review-ready",
      "lane": "kanban_hygiene",
      "source": "synthetic_board_summary",
      "service": "kanban_hygiene_advisory",
      "input_class": "implementation_with_tests",
      "tasks": [{"id": "t_synthetic_impl", "title": "implement: synthetic dry-run harness", "status": "blocked", "assignee": "engineer", "created_at": 1000, "updated_at": 2000, "body_excerpt": "NPU advisory harness", "changed_files": ["scripts/example.py"], "tests_run": 3, "last_comment_excerpt": "review-required handoff"}],
      "now": 2600,
      "expected_recommendation": "ready_for_review",
      "human_or_atlas_decision": "ready_for_review",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    },
    {
      "id": "gateway-authority-violation",
      "lane": "advisory_gateway_envelope",
      "source": "synthetic_gateway",
      "service": "openvino_advisory_gateway",
      "input_class": "authority_flag_violation",
      "gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-violation", "result": {"labels": {"urgency": {"value": "critical", "confidence": 0.9}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 11}, "authority": {"may_send_external": true, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
      "expected_recommendation": "block_authority_violation",
      "human_or_atlas_decision": "block_authority_violation",
      "expected_outcome": "agree",
      "expected_confidence_bucket": "high"
    }
  ]
 }
@@ -1 +1 @@
-{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1776916468, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1776916468;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;client_byok=0;ccr=1;8kp=1;ip=71.231.248.128;asn=AS7922:fda910fb829d6585876da7e06e037cf7e75745e2b4d41b49de4911d85794adcc", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
+{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1774543278, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1774543278;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;ccr=1;8kp=1;ip=24.143.97.87;asn=AS11404:7f079a450cf1a45b238724eb0795e12bf36218ab99ffc6c4b84089e6e7e674b1", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
@@ -146,9 +146,29 @@ add_model "zai-glm-5"       "openai/glm-5"         "ZAI_API_KEY" "https://api.z.
 add_model "glm-4.7-flash"   "openai/glm-4.7-flash" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
 add_model "glm-5"           "openai/glm-5"         "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
-# GitHub Copilot models are intentionally not registered here.
+# GitHub Copilot (token-file auth, no API key)
-# The token-file auth path caused repeated 403 refresh loops in LiteLLM when
+add_copilot_model "copilot-gpt-4o"              "gpt-4o"
-# Copilot credentials expired, slowing /health/liveliness responses.
+add_copilot_model "copilot-gpt-4.1"             "gpt-4.1"
 add_copilot_model "copilot-gpt-5-mini"          "gpt-5-mini"
 add_copilot_model "copilot-gpt-5.1"             "gpt-5.1"
 add_copilot_model "copilot-gpt-5.2"             "gpt-5.2"
 add_copilot_model "copilot-gpt-5.1-codex"       "gpt-5.1-codex"
 add_copilot_model "copilot-gpt-5.1-codex-max"   "gpt-5.1-codex-max"
 add_copilot_model "copilot-gpt-5.1-codex-mini"  "gpt-5.1-codex-mini"
 add_copilot_model "copilot-gpt-5.2-codex"       "gpt-5.2-codex"
 add_copilot_model "copilot-gpt-5.3-codex"       "gpt-5.3-codex"
 add_copilot_model "copilot-claude-opus-4.6"      "claude-opus-4.6"
 add_copilot_model "copilot-claude-opus-4.6-fast" "claude-opus-4.6-fast"
 add_copilot_model "copilot-claude-sonnet-4.6"    "claude-sonnet-4.6"
 add_copilot_model "copilot-claude-sonnet-4.5"    "claude-sonnet-4.5"
 add_copilot_model "copilot-claude-sonnet-4"      "claude-sonnet-4"
 add_copilot_model "copilot-claude-opus-4.5"      "claude-opus-4.5"
 add_copilot_model "copilot-claude-haiku-4.5"     "claude-haiku-4.5"
 add_copilot_model "copilot-gemini-2.5-pro"       "gemini-2.5-pro"
 add_copilot_model "copilot-gemini-3-flash"       "gemini-3-flash-preview"
 add_copilot_model "copilot-gemini-3-pro"         "gemini-3-pro-preview"
 add_copilot_model "copilot-gemini-3.1-pro"       "gemini-3.1-pro-preview"
 add_copilot_model "copilot-grok-code-fast"       "grok-code-fast-1"
 # Local models (llama.cpp — no API key, custom model_info)
 if ! echo "$EXISTING" | grep -qx "gemma-3-12b-local"; then
@@ -0,0 +1,2 @@
 AGENTMON_INGEST_URL=http://192.168.122.1:8080
 AGENTMON_VM_NAME=zap
@@ -0,0 +1,60 @@
 {
  "version": 1,
  "profiles": {
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "tokenRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/github-copilot:github/token"
      }
    },
    "anthropic:manual": {
      "type": "token",
      "provider": "anthropic",
      "token": "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"
    },
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0Mjk2MTg1LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzM0MzIxODQsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiY2FhZDQ2ZmEtNGIxMy00ZTI5LTg2N2QtZjI4ZWVhZGFiNGVjIiwibmJmIjoxNzczNDMyMTg0LCJwd2RfYXV0aF90aW1lIjoxNzcyNTA0OTE1NzM1LCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX1RJVkZNWkRJcjNWWEk5NWhUa3BQUXczQyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.SELfl6WbyaSSZn03yKR95dFvgrLeAPqiCieGxWOqY2PJQQb_cxmjY3yGJqTEVofGF-pyeDZVWx3HAB20Ng-9KbKQKFMdNTxuURb3uoRRuoit4cbg2kwH7lL07nQXKkY8nkusJLsLNQCZYGziW8WMAdypwEvm2ODWWav0ygl3PLJWjRj5OZ1Mcc_mRj6koYahgmWWoMo7oyDOn5tHpZKIxaSPRVBMvEee7JH3FP8zauPrlfmh6uIVhaY4ANwJqOM9bBbiFTv6unaQXx57uDaLo9XZOPa-vMeDWQYNvGs8XcKng3AE8-CMlQV1G_TRiWYZTFH9k5O3YGBO0t-h0jWNG658ccVcLoYB2PQ_3BmTTSpU2lQ6VosCDvg6SMA-GtI_kEOwV5XmsHpoDL6VyD--6EMxUyrYZ2W8sC4b6k-H58Bu-p4MO_Qc00nMhimBz_JP9vlfF9Dg1rypW9KA9gPZUgJR_dDG3bPofMQFAyGGrLHoXUqCYWJn0dLzW5wrmbNz1gOI3WNJjVUCmKzaEY3w2bpci90WGxIixrnVAoaP5XQQyw4x_urYbEdXlzuEERlFtkZIxRUMQAp9OwSaU76KnCrXVNsBUQdXNN_mdNKr1riebh4hzsgAnCkj1hazrT1hkWGD8eMrUFcLymu5OIYcdzxq-nroUhX6566L7mWozHk",
      "refresh": "rt_lGvf7w6JR1AvXL0Dc7xCGcZf7P0P4kkcFW_VmTSccVA.56jMY8jGDblmVXZ9egKC57skTCl4clEGo2_cDyBzIRQ",
      "expires": 1774296185000,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    }
  },
  "lastGood": {
    "litellm": "litellm:default",
    "openai-codex": "openai-codex:default",
    "github-copilot": "github-copilot:github"
  },
  "usageStats": {
    "litellm:default": {
      "lastUsed": 1774519204807,
      "errorCount": 0,
      "lastFailureAt": 1774054888659
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1774509616458
    },
    "anthropic:manual": {
      "errorCount": 0,
      "lastUsed": 1773951080133
    },
    "openai-codex:default": {
      "lastUsed": 1773258773792,
      "errorCount": 0
    }
  }
 }
@@ -0,0 +1,48 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/claude/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "tokenRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/claude/github-copilot:github/token"
      }
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1772604450987,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1772578967681,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1772589980031
    }
  }
 }
@@ -0,0 +1,48 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/codex/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "tokenRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/codex/github-copilot:github/token"
      }
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1772604395502,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1772578967681,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1772589980031
    }
  }
 }
@@ -0,0 +1,48 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/copilot/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "tokenRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/copilot/github-copilot:github/token"
      }
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1772604323305,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1772578967681,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1772589980031
    }
  }
 }
@@ -0,0 +1,44 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1773619245145,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1773861012447,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1773807909397
    }
  }
 }
@@ -0,0 +1,44 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1773619245145,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1773861088545,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1773807909397
    }
  }
 }
@@ -0,0 +1,44 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1773619245145,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1773861006543,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1773807909397
    }
  }
 }
@@ -0,0 +1,44 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1773619245145,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1773861006949,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1773807909397
    }
  }
 }
@@ -0,0 +1,66 @@
 {
  "version": 1,
  "profiles": {
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/main/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
    },
    "anthropic:manual": {
      "type": "token",
      "provider": "anthropic",
      "token": "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"
    },
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc1MjU2NDA5LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsImxvY2FsaG9zdCI6dHJ1ZSwidXNlcl9pZCI6InVzZXItVVh2bTQxVEpRblNCbGRkSFh4NnpIbEVrIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vbWZhIjp7InJlcXVpcmVkIjoieWVzIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vcHJvZmlsZSI6eyJlbWFpbCI6IndpbGxpYW0udmFsZW50aW4uaW5mb0BnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZX0sImlhdCI6MTc3NDM5MjQwOSwiaXNzIjoiaHR0cHM6Ly9hdXRoLm9wZW5haS5jb20iLCJqdGkiOiJkYmUwNDM4YS05NTg3LTRiMTUtOGUzNC03Y2ExMmVjOTc0NWQiLCJuYmYiOjE3NzQzOTI0MDksInB3ZF9hdXRoX3RpbWUiOjE3NzQzOTI0MDg0NjIsInNjcCI6WyJvcGVuaWQiLCJwcm9maWxlIiwiZW1haWwiLCJvZmZsaW5lX2FjY2VzcyJdLCJzZXNzaW9uX2lkIjoiYXV0aHNlc3NfOXVmVUlZN2o1WHk4bGtoU2MwUHNQM1lOIiwic2wiOnRydWUsInN1YiI6Imdvb2dsZS1vYXV0aDJ8MTA2MzM3Njg2NTgzNTkyODA4MDE3In0.m1PHZz2u9V9qiVN0hr8alKl6Ia4xv541BfnLLJkkRu3LiKrY-WCCOdxtbpu7dp8hphMMWrGCA4BWM6EE2Q4P0J5oE4PoOAzBU9-0ZdxSQNetiXdM5r7aETj4gY3nZFEtFAlig6hEuJrCK0XqgJ51BD7J_PXwkKTOKvv3-e8yvbp6vNTDSthUpsjgEN56hCUMnTt-aX8draeaWqHZe4gG09z8qRi1fZP8v0N8C8MPdOOBZdx3dQ2aK9zh0VDDyTvhqcbhSMVLpUxpzSeFIiFa8B03xOGGYhV5KCDTN7phCbak2PM7AdO6fOCrBTDDLQP2bC4Lt3yM9R7tXSw4luktMLX7sKe-KLR9CxKmDs5HdzMs5JDGcge9buKRzEBFD49oOM8NfsyRP6ko6CCNZSkz3mgQHT3_t-nCK7bpZHyTkIoGeT1fcKP8dGweSwUgtuUSjx0pVzZGbTkiBQTgqADelJkKA9WtBFoKPSgAXUiNrOJ_wYV3R3EQbGoVLX3cSrKYJIBdXcFF2YNKV_8ohKVNg4CtLJQwavQrHsWB74qQ_iHJvcr8GcMG-88S6-r8n4dSCzHXpqqMYQq7I8FR6dd_DmZIuweDR5Y4Bpx60MucF-qhfL1i4Bjv4zvDhodfRigcPyHi2mNLSclOGMA_Z_zW4YlnSvkskCQ2QX25pFN-6nY",
      "refresh": "rt_32BgvDGye6b5FDHfAAuzBQHbSAU0sh86-1CXFptTGk0.m-3-mXXjX4rKQix5MRvFqQHI5DVVi_OnG6ZXiLPIc48",
      "expires": 1775256408618
    }
  },
  "lastGood": {
    "litellm": "litellm:default",
    "openai-codex": "openai-codex:default",
    "anthropic": "anthropic:manual",
    "github-copilot": "github-copilot:github"
  },
  "usageStats": {
    "litellm:default": {
      "lastUsed": 1774146240157,
      "errorCount": 2,
      "failureCounts": {
        "billing": 2
      },
      "lastFailureAt": 1774464853910,
      "disabledUntil": 1774482776360,
      "disabledReason": "billing"
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1774518526913
    },
    "anthropic:manual": {
      "errorCount": 1,
      "lastUsed": 1774435478002,
      "lastFailureAt": 1774496992044,
      "failureCounts": {
        "rate_limit": 1
      },
      "cooldownUntil": 1774497052044
    },
    "openai-codex:default": {
      "errorCount": 0,
      "lastUsed": 1774473515274
    }
  }
 }
@@ -0,0 +1,48 @@
 {
  "version": 1,
  "profiles": {
    "openai-codex:default": {
      "type": "oauth",
      "provider": "openai-codex",
      "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
      "refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
      "expires": 1774223333756,
      "accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
    },
    "litellm:default": {
      "type": "api_key",
      "provider": "litellm",
      "keyRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/opencode/litellm:default/key"
      }
    },
    "github-copilot:github": {
      "type": "token",
      "provider": "github-copilot",
      "tokenRef": {
        "source": "file",
        "provider": "filemain",
        "id": "/authProfiles/opencode/github-copilot:github/token"
      }
    }
  },
  "lastGood": {
    "openai-codex": "openai-codex:default"
  },
  "usageStats": {
    "openai-codex:default": {
      "lastUsed": 1772604363465,
      "errorCount": 0
    },
    "litellm:default": {
      "lastUsed": 1772578967681,
      "errorCount": 0
    },
    "github-copilot:github": {
      "errorCount": 0,
      "lastUsed": 1772589980031
    }
  }
 }
@@ -0,0 +1,149 @@
 {
  "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5": {
    "deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
    "publicKey": "zezYCyurUtpYNt9j6bBc5Cz5xFVdnknXzhoCVAOFiwY",
    "platform": "linux",
    "clientId": "cli",
    "clientMode": "cli",
    "role": "operator",
    "roles": [
      "operator"
    ],
    "scopes": [
      "operator.read",
      "operator.admin",
      "operator.write",
      "operator.approvals",
      "operator.pairing"
    ],
    "approvedScopes": [
      "operator.read",
      "operator.admin",
      "operator.write",
      "operator.approvals",
      "operator.pairing"
    ],
    "tokens": {
      "operator": {
        "token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
        "role": "operator",
        "scopes": [
          "operator.admin",
          "operator.approvals",
          "operator.pairing",
          "operator.read",
          "operator.write"
        ],
        "createdAtMs": 1772478478331,
        "rotatedAtMs": 1772478926904,
        "lastUsedAtMs": 1772587382647
      }
    },
    "createdAtMs": 1772478478331,
    "approvedAtMs": 1772478926904
  },
  "5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96": {
    "deviceId": "5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96",
    "publicKey": "MvxEPmOjuhaOctHiiTGNWbrb3PqNKdtJH2tNUmnUDFg",
    "platform": "Linux x86_64",
    "clientId": "openclaw-control-ui",
    "clientMode": "webchat",
    "role": "operator",
    "roles": [
      "operator"
    ],
    "scopes": [
      "operator.admin",
      "operator.approvals",
      "operator.pairing"
    ],
    "approvedScopes": [
      "operator.admin",
      "operator.approvals",
      "operator.pairing"
    ],
    "tokens": {
      "operator": {
        "token": "o7iad673N6wjzvtaLZi3pi5oOec2a14jRqD0DTqAsNM",
        "role": "operator",
        "scopes": [
          "operator.admin",
          "operator.approvals",
          "operator.pairing"
        ],
        "createdAtMs": 1772562796594,
        "lastUsedAtMs": 1772563663633
      }
    },
    "createdAtMs": 1772562796594,
    "approvedAtMs": 1772562796594
  },
  "5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc": {
    "deviceId": "5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc",
    "publicKey": "1KPQKT74AgGXb8B6O8vTQqkCFBBTI1_9Y2jVvzVI6G4",
    "platform": "Linux x86_64",
    "clientId": "openclaw-control-ui",
    "clientMode": "webchat",
    "role": "operator",
    "roles": [
      "operator"
    ],
    "scopes": [
      "operator.admin",
      "operator.approvals",
      "operator.pairing",
      "operator.read",
      "operator.write"
    ],
    "approvedScopes": [
      "operator.admin",
      "operator.approvals",
      "operator.pairing"
    ],
    "tokens": {
      "operator": {
        "token": "2nXUowAOJpF7bCROTQ4-q50zUe2FHRzJDhmpFQe0DQ4",
        "role": "operator",
        "scopes": [
          "operator.admin",
          "operator.approvals",
          "operator.pairing"
        ],
        "createdAtMs": 1772563930487,
        "lastUsedAtMs": 1774510441434
      }
    },
    "createdAtMs": 1772563930487,
    "approvedAtMs": 1772563930487
  },
  "549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1": {
    "deviceId": "549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1",
    "publicKey": "hX_4gWll3JPphbMZQ2fjPIXDXwp51gaILYB64KyimBE",
    "displayName": "subagent-reliability-harness",
    "platform": "linux",
    "clientId": "test",
    "clientMode": "test",
    "role": "operator",
    "roles": [
      "operator"
    ],
    "scopes": [
      "operator.admin"
    ],
    "approvedScopes": [
      "operator.admin"
    ],
    "tokens": {
      "operator": {
        "token": "fDTz6u2K-fKNq4Cc-VoSQkbfltPCN1tqetg52yhsJk8",
        "role": "operator",
        "scopes": [
          "operator.admin"
        ],
        "createdAtMs": 1773424919036
      }
    },
    "createdAtMs": 1773424919036,
    "approvedAtMs": 1773424919036
  }
 }
@@ -0,0 +1,12 @@
 {
  "version": 1,
  "entries": {
    "discord:default:guild:425781660781641729:user:425208577846935553": {
      "recent": [
        "github-copilot/claude-sonnet-4.6",
        "openai-codex/gpt-5.4"
      ],
      "updatedAt": "2026-03-25T19:35:30.248Z"
    }
  }
 }
@@ -0,0 +1,20 @@
 ---
 name: boot-md
 description: "Run BOOT.md on gateway startup"
 homepage: https://docs.openclaw.ai/automation/hooks#boot-md
 metadata:
  {
    "openclaw":
      {
        "emoji": "🚀",
        "events": ["gateway:startup"],
        "requires": { "config": ["workspace.dir"] },
        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
      },
  }
 ---
 # Boot Checklist Hook
 Runs `BOOT.md` at gateway startup for each configured agent scope, if the file exists in that
 agent's resolved workspace.
@@ -0,0 +1,221 @@
 import { c as resolveAgentWorkspaceDir, r as listAgentIds } from "../../run-with-concurrency-Cuc1THN9.js";
 import "../../paths-hfkBoC7i.js";
 import { a as defaultRuntime, t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
 import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
 import "../../logger-BW8uLq6f.js";
 import "../../model-selection-BU6wl1le.js";
 import "../../github-copilot-token-CQmATy5E.js";
 import { a as isGatewayStartupEvent } from "../../legacy-names-BAf61_0I.js";
 import "../../thinking-B5B36ffe.js";
 import { n as SILENT_REPLY_TOKEN } from "../../tokens-CT3nywWU.js";
 import { o as agentCommand, s as createDefaultDeps } from "../../pi-embedded-C6ITuRXf.js";
 import "../../plugins-BZr8LJrk.js";
 import "../../accounts-D4KOSoV2.js";
 import "../../send-BLQvMYTW.js";
 import "../../send-DyQ6zcob.js";
 import "../../deliver-ClGktCjk.js";
 import "../../diagnostic-B9sgiG77.js";
 import "../../accounts-cJqOTvBI.js";
 import "../../image-ops-D4vlUR_L.js";
 import "../../send-D4CMR9ev.js";
 import "../../pi-model-discovery--C0FuY_K.js";
 import { Dt as resolveAgentMainSessionKey, W as loadSessionStore, Y as updateSessionStore, kt as resolveMainSessionKey } from "../../pi-embedded-helpers-CkWXaNFn.js";
 import "../../chrome-u1QjWgKY.js";
 import "../../frontmatter-CZF6xkL3.js";
 import "../../skills-B24U0XQQ.js";
 import "../../path-alias-guards-CouH80Zp.js";
 import "../../redact-DSv8X-3F.js";
 import "../../errors-_LEe37ld.js";
 import "../../fs-safe-DOYVoR6M.js";
 import "../../proxy-env-BZseFuIl.js";
 import "../../store-BteyapSQ.js";
 import { s as resolveStorePath } from "../../paths-Co-u8IhA.js";
 import "../../tool-images-C0W994KU.js";
 import "../../image-fMgabouP.js";
 import "../../audio-transcription-runner-DfRfzdqH.js";
 import "../../fetch-JzejSI-7.js";
 import "../../fetch-guard-C3LWD6FT.js";
 import "../../api-key-rotation-CLI6TxVv.js";
 import "../../proxy-fetch-CbII9--S.js";
 import "../../ir-D_UJzvhu.js";
 import "../../render-7C7EDC8_.js";
 import "../../target-errors-C8xePsI5.js";
 import "../../commands-registry-DJWLO-6B.js";
 import "../../skill-commands-B6iXy7Nx.js";
 import "../../fetch-CONQGbzL.js";
 import "../../channel-activity-CVe33Aey.js";
 import "../../tables-DushlpuO.js";
 import "../../send-CHthYes-.js";
 import "../../outbound-attachment-3soL6fn0.js";
 import "../../send-DYCEGbmH.js";
 import "../../proxy-BzwL4n0W.js";
 import "../../manager-DS9FBMMG.js";
 import "../../query-expansion-DUWWrH-g.js";
 import fs from "node:fs/promises";
 import path from "node:path";
 import crypto from "node:crypto";
 //#region src/gateway/boot.ts
 function generateBootSessionId() {
 	return `boot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "_").replace("Z", "")}-${crypto.randomUUID().slice(0, 8)}`;
 }
 const log$1 = createSubsystemLogger("gateway/boot");
 const BOOT_FILENAME = "BOOT.md";
 function buildBootPrompt(content) {
 	return [
 		"You are running a boot check. Follow BOOT.md instructions exactly.",
 		"",
 		"BOOT.md:",
 		content,
 		"",
 		"If BOOT.md asks you to send a message, use the message tool (action=send with channel + target).",
 		"Use the `target` field (not `to`) for message tool destinations.",
 		`After sending with the message tool, reply with ONLY: ${SILENT_REPLY_TOKEN}.`,
 		`If nothing needs attention, reply with ONLY: ${SILENT_REPLY_TOKEN}.`
 	].join("\n");
 }
 async function loadBootFile(workspaceDir) {
 	const bootPath = path.join(workspaceDir, BOOT_FILENAME);
 	try {
 		const trimmed = (await fs.readFile(bootPath, "utf-8")).trim();
 		if (!trimmed) return { status: "empty" };
 		return {
 			status: "ok",
 			content: trimmed
 		};
 	} catch (err) {
 		if (err.code === "ENOENT") return { status: "missing" };
 		throw err;
 	}
 }
 function snapshotMainSessionMapping(params) {
 	const agentId = resolveAgentIdFromSessionKey(params.sessionKey);
 	const storePath = resolveStorePath(params.cfg.session?.store, { agentId });
 	try {
 		const entry = loadSessionStore(storePath, { skipCache: true })[params.sessionKey];
 		if (!entry) return {
 			storePath,
 			sessionKey: params.sessionKey,
 			canRestore: true,
 			hadEntry: false
 		};
 		return {
 			storePath,
 			sessionKey: params.sessionKey,
 			canRestore: true,
 			hadEntry: true,
 			entry: structuredClone(entry)
 		};
 	} catch (err) {
 		log$1.debug("boot: could not snapshot main session mapping", {
 			sessionKey: params.sessionKey,
 			error: String(err)
 		});
 		return {
 			storePath,
 			sessionKey: params.sessionKey,
 			canRestore: false,
 			hadEntry: false
 		};
 	}
 }
 async function restoreMainSessionMapping(snapshot) {
 	if (!snapshot.canRestore) return;
 	try {
 		await updateSessionStore(snapshot.storePath, (store) => {
 			if (snapshot.hadEntry && snapshot.entry) {
 				store[snapshot.sessionKey] = snapshot.entry;
 				return;
 			}
 			delete store[snapshot.sessionKey];
 		}, { activeSessionKey: snapshot.sessionKey });
 		return;
 	} catch (err) {
 		return err instanceof Error ? err.message : String(err);
 	}
 }
 async function runBootOnce(params) {
 	const bootRuntime = {
 		log: () => {},
 		error: (message) => log$1.error(String(message)),
 		exit: defaultRuntime.exit
 	};
 	let result;
 	try {
 		result = await loadBootFile(params.workspaceDir);
 	} catch (err) {
 		const message = err instanceof Error ? err.message : String(err);
 		log$1.error(`boot: failed to read ${BOOT_FILENAME}: ${message}`);
 		return {
 			status: "failed",
 			reason: message
 		};
 	}
 	if (result.status === "missing" || result.status === "empty") return {
 		status: "skipped",
 		reason: result.status
 	};
 	const sessionKey = params.agentId ? resolveAgentMainSessionKey({
 		cfg: params.cfg,
 		agentId: params.agentId
 	}) : resolveMainSessionKey(params.cfg);
 	const message = buildBootPrompt(result.content ?? "");
 	const sessionId = generateBootSessionId();
 	const mappingSnapshot = snapshotMainSessionMapping({
 		cfg: params.cfg,
 		sessionKey
 	});
 	let agentFailure;
 	try {
 		await agentCommand({
 			message,
 			sessionKey,
 			sessionId,
 			deliver: false,
 			senderIsOwner: true
 		}, bootRuntime, params.deps);
 	} catch (err) {
 		agentFailure = err instanceof Error ? err.message : String(err);
 		log$1.error(`boot: agent run failed: ${agentFailure}`);
 	}
 	const mappingRestoreFailure = await restoreMainSessionMapping(mappingSnapshot);
 	if (mappingRestoreFailure) log$1.error(`boot: failed to restore main session mapping: ${mappingRestoreFailure}`);
 	if (!agentFailure && !mappingRestoreFailure) return { status: "ran" };
 	return {
 		status: "failed",
 		reason: [agentFailure ? `agent run failed: ${agentFailure}` : void 0, mappingRestoreFailure ? `mapping restore failed: ${mappingRestoreFailure}` : void 0].filter((part) => Boolean(part)).join("; ")
 	};
 }
 //#endregion
 //#region src/hooks/bundled/boot-md/handler.ts
 const log = createSubsystemLogger("hooks/boot-md");
 const runBootChecklist = async (event) => {
 	if (!isGatewayStartupEvent(event)) return;
 	if (!event.context.cfg) return;
 	const cfg = event.context.cfg;
 	const deps = event.context.deps ?? createDefaultDeps();
 	const agentIds = listAgentIds(cfg);
 	for (const agentId of agentIds) {
 		const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
 		const result = await runBootOnce({
 			cfg,
 			deps,
 			workspaceDir,
 			agentId
 		});
 		if (result.status === "failed") {
 			log.warn("boot-md failed for agent startup run", {
 				agentId,
 				workspaceDir,
 				reason: result.reason
 			});
 			continue;
 		}
 		if (result.status === "skipped") log.debug("boot-md skipped for agent startup run", {
 			agentId,
 			workspaceDir,
 			reason: result.reason
 		});
 	}
 };
 //#endregion
 export { runBootChecklist as default };
@@ -0,0 +1,53 @@
 ---
 name: bootstrap-extra-files
 description: "Inject additional workspace bootstrap files via glob/path patterns"
 homepage: https://docs.openclaw.ai/automation/hooks#bootstrap-extra-files
 metadata:
  {
    "openclaw":
      {
        "emoji": "📎",
        "events": ["agent:bootstrap"],
        "requires": { "config": ["workspace.dir"] },
        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
      },
  }
 ---
 # Bootstrap Extra Files Hook
 Loads additional bootstrap files into `Project Context` during `agent:bootstrap`.
 ## Why
 Use this when your workspace has multiple context roots (for example monorepos) and
 you want to include extra `AGENTS.md`/`TOOLS.md`-class files without changing the
 workspace root.
 ## Configuration
 ```json
 {
  "hooks": {
    "internal": {
      "enabled": true,
      "entries": {
        "bootstrap-extra-files": {
          "enabled": true,
          "paths": ["packages/*/AGENTS.md", "packages/*/TOOLS.md"]
        }
      }
    }
  }
 }
 ```
 ## Options
 - `paths` (string[]): preferred list of glob/path patterns.
 - `patterns` (string[]): alias of `paths`.
 - `files` (string[]): alias of `paths`.
 All paths are resolved from the workspace and must stay inside it (including realpath checks).
 Only recognized bootstrap basenames are loaded (`AGENTS.md`, `SOUL.md`, `TOOLS.md`,
 `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md`, `MEMORY.md`, `memory.md`).
@@ -0,0 +1,45 @@
 import "../../paths-hfkBoC7i.js";
 import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
 import { d as loadExtraBootstrapFilesWithDiagnostics, u as filterBootstrapFilesForSession } from "../../workspace-CaW79EXh.js";
 import "../../logger-BW8uLq6f.js";
 import { i as isAgentBootstrapEvent } from "../../legacy-names-BAf61_0I.js";
 import "../../frontmatter-CZF6xkL3.js";
 import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
 //#region src/hooks/bundled/bootstrap-extra-files/handler.ts
 const HOOK_KEY = "bootstrap-extra-files";
 const log = createSubsystemLogger("bootstrap-extra-files");
 function normalizeStringArray(value) {
 	if (!Array.isArray(value)) return [];
 	return value.map((v) => typeof v === "string" ? v.trim() : "").filter(Boolean);
 }
 function resolveExtraBootstrapPatterns(hookConfig) {
 	const fromPaths = normalizeStringArray(hookConfig.paths);
 	if (fromPaths.length > 0) return fromPaths;
 	const fromPatterns = normalizeStringArray(hookConfig.patterns);
 	if (fromPatterns.length > 0) return fromPatterns;
 	return normalizeStringArray(hookConfig.files);
 }
 const bootstrapExtraFilesHook = async (event) => {
 	if (!isAgentBootstrapEvent(event)) return;
 	const context = event.context;
 	const hookConfig = resolveHookConfig(context.cfg, HOOK_KEY);
 	if (!hookConfig || hookConfig.enabled === false) return;
 	const patterns = resolveExtraBootstrapPatterns(hookConfig);
 	if (patterns.length === 0) return;
 	try {
 		const { files: extras, diagnostics } = await loadExtraBootstrapFilesWithDiagnostics(context.workspaceDir, patterns);
 		if (diagnostics.length > 0) log.debug("skipped extra bootstrap candidates", {
 			skipped: diagnostics.length,
 			reasons: diagnostics.reduce((counts, item) => {
 				counts[item.reason] = (counts[item.reason] ?? 0) + 1;
 				return counts;
 			}, {})
 		});
 		if (extras.length === 0) return;
 		context.bootstrapFiles = filterBootstrapFilesForSession([...context.bootstrapFiles, ...extras], context.sessionKey);
 	} catch (err) {
 		log.warn(`failed: ${String(err)}`);
 	}
 };
 //#endregion
 export { bootstrapExtraFilesHook as default };
@@ -0,0 +1,122 @@
 ---
 name: command-logger
 description: "Log all command events to a centralized audit file"
 homepage: https://docs.openclaw.ai/automation/hooks#command-logger
 metadata:
  {
    "openclaw":
      {
        "emoji": "📝",
        "events": ["command"],
        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
      },
  }
 ---
 # Command Logger Hook
 Logs all command events (`/new`, `/reset`, `/stop`, etc.) to a centralized audit log file for debugging and monitoring purposes.
 ## What It Does
 Every time you issue a command to the agent:
 1. **Captures event details** - Command action, timestamp, session key, sender ID, source
 2. **Appends to log file** - Writes a JSON line to `~/.openclaw/logs/commands.log`
 3. **Silent operation** - Runs in the background without user notifications
 ## Output Format
 Log entries are written in JSONL (JSON Lines) format:
 ```json
 {"timestamp":"2026-01-16T14:30:00.000Z","action":"new","sessionKey":"agent:main:main","senderId":"+1234567890","source":"telegram"}
 {"timestamp":"2026-01-16T15:45:22.000Z","action":"stop","sessionKey":"agent:main:main","senderId":"user@example.com","source":"whatsapp"}
 ```
 ## Use Cases
 - **Debugging**: Track when commands were issued and from which source
 - **Auditing**: Monitor command usage across different channels
 - **Analytics**: Analyze command patterns and frequency
 - **Troubleshooting**: Investigate issues by reviewing command history
 ## Log File Location
 `~/.openclaw/logs/commands.log`
 ## Requirements
 No requirements - this hook works out of the box on all platforms.
 ## Configuration
 No configuration needed. The hook automatically:
 - Creates the log directory if it doesn't exist
 - Appends to the log file (doesn't overwrite)
 - Handles errors silently without disrupting command execution
 ## Disabling
 To disable this hook:
 ```bash
 openclaw hooks disable command-logger
 ```
 Or via config:
 ```json
 {
  "hooks": {
    "internal": {
      "entries": {
        "command-logger": { "enabled": false }
      }
    }
  }
 }
 ```
 ## Log Rotation
 The hook does not automatically rotate logs. To manage log size, you can:
 1. **Manual rotation**:
   ```bash
   mv ~/.openclaw/logs/commands.log ~/.openclaw/logs/commands.log.old
   ```
 2. **Use logrotate** (Linux):
   Create `/etc/logrotate.d/openclaw`:
   ```
   /home/username/.openclaw/logs/commands.log {
       weekly
       rotate 4
       compress
       missingok
       notifempty
   }
   ```
 ## Viewing Logs
 View recent commands:
 ```bash
 tail -n 20 ~/.openclaw/logs/commands.log
 ```
 Pretty-print with jq:
 ```bash
 cat ~/.openclaw/logs/commands.log | jq .
 ```
 Filter by action:
 ```bash
 grep '"action":"new"' ~/.openclaw/logs/commands.log | jq .
 ```
@@ -0,0 +1,56 @@
 import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
 import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 //#region src/hooks/bundled/command-logger/handler.ts
 /**
 * Example hook handler: Log all commands to a file
 *
 * This handler demonstrates how to create a hook that logs all command events
 * to a centralized log file for audit/debugging purposes.
 *
 * To enable this handler, add it to your config:
 *
 * ```json
 * {
 *   "hooks": {
 *     "internal": {
 *       "enabled": true,
 *       "handlers": [
 *         {
 *           "event": "command",
 *           "module": "./hooks/handlers/command-logger.ts"
 *         }
 *       ]
 *     }
 *   }
 * }
 * ```
 */
 const log = createSubsystemLogger("command-logger");
 /**
 * Log all command events to a file
 */
 const logCommand = async (event) => {
 	if (event.type !== "command") return;
 	try {
 		const stateDir = resolveStateDir(process.env, os.homedir);
 		const logDir = path.join(stateDir, "logs");
 		await fs.mkdir(logDir, { recursive: true });
 		const logFile = path.join(logDir, "commands.log");
 		const logLine = JSON.stringify({
 			timestamp: event.timestamp.toISOString(),
 			action: event.action,
 			sessionKey: event.sessionKey,
 			senderId: event.context.senderId ?? "unknown",
 			source: event.context.commandSource ?? "unknown"
 		}) + "\n";
 		await fs.appendFile(logFile, logLine, "utf-8");
 	} catch (err) {
 		const message = err instanceof Error ? err.message : String(err);
 		log.error(`Failed to log command: ${message}`);
 	}
 };
 //#endregion
 export { logCommand as default };
@@ -0,0 +1,109 @@
 ---
 name: session-memory
 description: "Save session context to memory when /new or /reset command is issued"
 homepage: https://docs.openclaw.ai/automation/hooks#session-memory
 metadata:
  {
    "openclaw":
      {
        "emoji": "💾",
        "events": ["command:new", "command:reset"],
        "requires": { "config": ["workspace.dir"] },
        "install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
      },
  }
 ---
 # Session Memory Hook
 Automatically saves session context to your workspace memory when you issue `/new` or `/reset`.
 ## What It Does
 When you run `/new` or `/reset` to start a fresh session:
 1. **Finds the previous session** - Uses the pre-reset session entry to locate the correct transcript
 2. **Extracts conversation** - Reads the last N user/assistant messages from the session (default: 15, configurable)
 3. **Generates descriptive slug** - Uses LLM to create a meaningful filename slug based on conversation content
 4. **Saves to memory** - Creates a new file at `<workspace>/memory/YYYY-MM-DD-slug.md`
 5. **Sends confirmation** - Notifies you with the file path
 ## Output Format
 Memory files are created with the following format:
 ```markdown
 # Session: 2026-01-16 14:30:00 UTC
 - **Session Key**: agent:main:main
 - **Session ID**: abc123def456
 - **Source**: telegram
 ```
 ## Filename Examples
 The LLM generates descriptive slugs based on your conversation:
 - `2026-01-16-vendor-pitch.md` - Discussion about vendor evaluation
 - `2026-01-16-api-design.md` - API architecture planning
 - `2026-01-16-bug-fix.md` - Debugging session
 - `2026-01-16-1430.md` - Fallback timestamp if slug generation fails
 ## Requirements
 - **Config**: `workspace.dir` must be set (automatically configured during onboarding)
 The hook uses your configured LLM provider to generate slugs, so it works with any provider (Anthropic, OpenAI, etc.).
 ## Configuration
 The hook supports optional configuration:
 | Option     | Type   | Default | Description                                                     |
 | ---------- | ------ | ------- | --------------------------------------------------------------- |
 | `messages` | number | 15      | Number of user/assistant messages to include in the memory file |
 Example configuration:
 ```json
 {
  "hooks": {
    "internal": {
      "entries": {
        "session-memory": {
          "enabled": true,
          "messages": 25
        }
      }
    }
  }
 }
 ```
 The hook automatically:
 - Uses your workspace directory (`~/.openclaw/workspace` by default)
 - Uses your configured LLM for slug generation
 - Falls back to timestamp slugs if LLM is unavailable
 ## Disabling
 To disable this hook:
 ```bash
 openclaw hooks disable session-memory
 ```
 Or remove it from your config:
 ```json
 {
  "hooks": {
    "internal": {
      "entries": {
        "session-memory": { "enabled": false }
      }
    }
  }
 }
 ```
@@ -0,0 +1,238 @@
 import { c as resolveAgentWorkspaceDir } from "../../run-with-concurrency-Cuc1THN9.js";
 import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
 import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
 import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
 import "../../logger-BW8uLq6f.js";
 import "../../model-selection-BU6wl1le.js";
 import "../../github-copilot-token-CQmATy5E.js";
 import "../../legacy-names-BAf61_0I.js";
 import "../../thinking-B5B36ffe.js";
 import "../../tokens-CT3nywWU.js";
 import "../../pi-embedded-C6ITuRXf.js";
 import "../../plugins-BZr8LJrk.js";
 import "../../accounts-D4KOSoV2.js";
 import "../../send-BLQvMYTW.js";
 import "../../send-DyQ6zcob.js";
 import "../../deliver-ClGktCjk.js";
 import "../../diagnostic-B9sgiG77.js";
 import "../../accounts-cJqOTvBI.js";
 import "../../image-ops-D4vlUR_L.js";
 import "../../send-D4CMR9ev.js";
 import "../../pi-model-discovery--C0FuY_K.js";
 import { pt as hasInterSessionUserProvenance } from "../../pi-embedded-helpers-CkWXaNFn.js";
 import "../../chrome-u1QjWgKY.js";
 import "../../frontmatter-CZF6xkL3.js";
 import "../../skills-B24U0XQQ.js";
 import "../../path-alias-guards-CouH80Zp.js";
 import "../../redact-DSv8X-3F.js";
 import "../../errors-_LEe37ld.js";
 import { c as writeFileWithinRoot } from "../../fs-safe-DOYVoR6M.js";
 import "../../proxy-env-BZseFuIl.js";
 import "../../store-BteyapSQ.js";
 import "../../paths-Co-u8IhA.js";
 import "../../tool-images-C0W994KU.js";
 import "../../image-fMgabouP.js";
 import "../../audio-transcription-runner-DfRfzdqH.js";
 import "../../fetch-JzejSI-7.js";
 import "../../fetch-guard-C3LWD6FT.js";
 import "../../api-key-rotation-CLI6TxVv.js";
 import "../../proxy-fetch-CbII9--S.js";
 import "../../ir-D_UJzvhu.js";
 import "../../render-7C7EDC8_.js";
 import "../../target-errors-C8xePsI5.js";
 import "../../commands-registry-DJWLO-6B.js";
 import "../../skill-commands-B6iXy7Nx.js";
 import "../../fetch-CONQGbzL.js";
 import "../../channel-activity-CVe33Aey.js";
 import "../../tables-DushlpuO.js";
 import "../../send-CHthYes-.js";
 import "../../outbound-attachment-3soL6fn0.js";
 import "../../send-DYCEGbmH.js";
 import "../../proxy-BzwL4n0W.js";
 import "../../manager-DS9FBMMG.js";
 import "../../query-expansion-DUWWrH-g.js";
 import { generateSlugViaLLM } from "../../llm-slug-generator.js";
 import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 //#region src/hooks/bundled/session-memory/handler.ts
 /**
 * Session memory hook handler
 *
 * Saves session context to memory when /new or /reset command is triggered
 * Creates a new dated memory file with LLM-generated slug
 */
 const log = createSubsystemLogger("hooks/session-memory");
 /**
 * Read recent messages from session file for slug generation
 */
 async function getRecentSessionContent(sessionFilePath, messageCount = 15) {
 	try {
 		const lines = (await fs.readFile(sessionFilePath, "utf-8")).trim().split("\n");
 		const allMessages = [];
 		for (const line of lines) try {
 			const entry = JSON.parse(line);
 			if (entry.type === "message" && entry.message) {
 				const msg = entry.message;
 				const role = msg.role;
 				if ((role === "user" || role === "assistant") && msg.content) {
 					if (role === "user" && hasInterSessionUserProvenance(msg)) continue;
 					const text = Array.isArray(msg.content) ? msg.content.find((c) => c.type === "text")?.text : msg.content;
 					if (text && !text.startsWith("/")) allMessages.push(`${role}: ${text}`);
 				}
 			}
 		} catch {}
 		return allMessages.slice(-messageCount).join("\n");
 	} catch {
 		return null;
 	}
 }
 /**
 * Try the active transcript first; if /new already rotated it,
 * fallback to the latest .jsonl.reset.* sibling.
 */
 async function getRecentSessionContentWithResetFallback(sessionFilePath, messageCount = 15) {
 	const primary = await getRecentSessionContent(sessionFilePath, messageCount);
 	if (primary) return primary;
 	try {
 		const dir = path.dirname(sessionFilePath);
 		const resetPrefix = `${path.basename(sessionFilePath)}.reset.`;
 		const resetCandidates = (await fs.readdir(dir)).filter((name) => name.startsWith(resetPrefix)).toSorted();
 		if (resetCandidates.length === 0) return primary;
 		const latestResetPath = path.join(dir, resetCandidates[resetCandidates.length - 1]);
 		const fallback = await getRecentSessionContent(latestResetPath, messageCount);
 		if (fallback) log.debug("Loaded session content from reset fallback", {
 			sessionFilePath,
 			latestResetPath
 		});
 		return fallback || primary;
 	} catch {
 		return primary;
 	}
 }
 function stripResetSuffix(fileName) {
 	const resetIndex = fileName.indexOf(".reset.");
 	return resetIndex === -1 ? fileName : fileName.slice(0, resetIndex);
 }
 async function findPreviousSessionFile(params) {
 	try {
 		const files = await fs.readdir(params.sessionsDir);
 		const fileSet = new Set(files);
 		const baseFromReset = params.currentSessionFile ? stripResetSuffix(path.basename(params.currentSessionFile)) : void 0;
 		if (baseFromReset && fileSet.has(baseFromReset)) return path.join(params.sessionsDir, baseFromReset);
 		const trimmedSessionId = params.sessionId?.trim();
 		if (trimmedSessionId) {
 			const canonicalFile = `${trimmedSessionId}.jsonl`;
 			if (fileSet.has(canonicalFile)) return path.join(params.sessionsDir, canonicalFile);
 			const topicVariants = files.filter((name) => name.startsWith(`${trimmedSessionId}-topic-`) && name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
 			if (topicVariants.length > 0) return path.join(params.sessionsDir, topicVariants[0]);
 		}
 		if (!params.currentSessionFile) return;
 		const nonResetJsonl = files.filter((name) => name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
 		if (nonResetJsonl.length > 0) return path.join(params.sessionsDir, nonResetJsonl[0]);
 	} catch {}
 }
 /**
 * Save session context to memory when /new or /reset command is triggered
 */
 const saveSessionToMemory = async (event) => {
 	const isResetCommand = event.action === "new" || event.action === "reset";
 	if (event.type !== "command" || !isResetCommand) return;
 	try {
 		log.debug("Hook triggered for reset/new command", { action: event.action });
 		const context = event.context || {};
 		const cfg = context.cfg;
 		const agentId = resolveAgentIdFromSessionKey(event.sessionKey);
 		const workspaceDir = cfg ? resolveAgentWorkspaceDir(cfg, agentId) : path.join(resolveStateDir(process.env, os.homedir), "workspace");
 		const memoryDir = path.join(workspaceDir, "memory");
 		await fs.mkdir(memoryDir, { recursive: true });
 		const now = new Date(event.timestamp);
 		const dateStr = now.toISOString().split("T")[0];
 		const sessionEntry = context.previousSessionEntry || context.sessionEntry || {};
 		const currentSessionId = sessionEntry.sessionId;
 		let currentSessionFile = sessionEntry.sessionFile || void 0;
 		if (!currentSessionFile || currentSessionFile.includes(".reset.")) {
 			const sessionsDirs = /* @__PURE__ */ new Set();
 			if (currentSessionFile) sessionsDirs.add(path.dirname(currentSessionFile));
 			sessionsDirs.add(path.join(workspaceDir, "sessions"));
 			for (const sessionsDir of sessionsDirs) {
 				const recoveredSessionFile = await findPreviousSessionFile({
 					sessionsDir,
 					currentSessionFile,
 					sessionId: currentSessionId
 				});
 				if (!recoveredSessionFile) continue;
 				currentSessionFile = recoveredSessionFile;
 				log.debug("Found previous session file", { file: currentSessionFile });
 				break;
 			}
 		}
 		log.debug("Session context resolved", {
 			sessionId: currentSessionId,
 			sessionFile: currentSessionFile,
 			hasCfg: Boolean(cfg)
 		});
 		const sessionFile = currentSessionFile || void 0;
 		const hookConfig = resolveHookConfig(cfg, "session-memory");
 		const messageCount = typeof hookConfig?.messages === "number" && hookConfig.messages > 0 ? hookConfig.messages : 15;
 		let slug = null;
 		let sessionContent = null;
 		if (sessionFile) {
 			sessionContent = await getRecentSessionContentWithResetFallback(sessionFile, messageCount);
 			log.debug("Session content loaded", {
 				length: sessionContent?.length ?? 0,
 				messageCount
 			});
 			const allowLlmSlug = !(process.env.OPENCLAW_TEST_FAST === "1" || process.env.VITEST === "true" || process.env.VITEST === "1" || false) && hookConfig?.llmSlug !== false;
 			if (sessionContent && cfg && allowLlmSlug) {
 				log.debug("Calling generateSlugViaLLM...");
 				slug = await generateSlugViaLLM({
 					sessionContent,
 					cfg
 				});
 				log.debug("Generated slug", { slug });
 			}
 		}
 		if (!slug) {
 			slug = now.toISOString().split("T")[1].split(".")[0].replace(/:/g, "").slice(0, 4);
 			log.debug("Using fallback timestamp slug", { slug });
 		}
 		const filename = `${dateStr}-${slug}.md`;
 		const memoryFilePath = path.join(memoryDir, filename);
 		log.debug("Memory file path resolved", {
 			filename,
 			path: memoryFilePath.replace(os.homedir(), "~")
 		});
 		const timeStr = now.toISOString().split("T")[1].split(".")[0];
 		const sessionId = sessionEntry.sessionId || "unknown";
 		const source = context.commandSource || "unknown";
 		const entryParts = [
 			`# Session: ${dateStr} ${timeStr} UTC`,
 			"",
 			`- **Session Key**: ${event.sessionKey}`,
 			`- **Session ID**: ${sessionId}`,
 			`- **Source**: ${source}`,
 			""
 		];
 		if (sessionContent) entryParts.push("## Conversation Summary", "", sessionContent, "");
 		await writeFileWithinRoot({
 			rootDir: memoryDir,
 			relativePath: filename,
 			data: entryParts.join("\n"),
 			encoding: "utf-8"
 		});
 		log.debug("Memory file written successfully");
 		const relPath = memoryFilePath.replace(os.homedir(), "~");
 		log.info(`Session context saved to ${relPath}`);
 	} catch (err) {
 		if (err instanceof Error) log.error("Failed to save session memory", {
 			errorName: err.name,
 			errorMessage: err.message,
 			stack: err.stack
 		});
 		else log.error("Failed to save session memory", { error: String(err) });
 	}
 };
 //#endregion
 export { saveSessionToMemory as default };
@@ -0,0 +1,18 @@
 {
  "version": 1,
  "deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
  "tokens": {
    "operator": {
      "token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
      "role": "operator",
      "scopes": [
        "operator.admin",
        "operator.approvals",
        "operator.pairing",
        "operator.read",
        "operator.write"
      ],
      "updatedAtMs": 1774473144159
    }
  }
 }
@@ -0,0 +1,5 @@
 {
  "version": 2,
  "lastUpdateId": 148911073,
  "botId": "8792219052"
 }
@@ -0,0 +1,7 @@
 {
  "lastCheckedAt": "2026-03-24T22:42:51.772Z",
  "lastNotifiedVersion": "2026.3.23-2",
  "lastNotifiedTag": "latest",
  "lastAvailableVersion": "2026.3.23-2",
  "lastAvailableTag": "latest"
 }
@@ -0,0 +1,103 @@
 # OpenVINO NPU advisory gateway
 Bounded Docker-bridge wrapper for the classifier, GenAI worker, and doc/image triage sidecars.
 - HTTP bind: `172.19.0.1:18830` for `n8n-agent` on the `swarm_default` Docker bridge
 - Service: `openvino-advisory-gateway.service`
 - Mode: advisory/shadow/draft only
 - Metadata log: `~/.local/state/openvino-advisory-gateway/events.sqlite`
 ## Authority boundary
 Every response includes an explicit authority block:
 ```json
 {
  "may_route": false,
  "may_write_memory": false,
  "may_send_external": false,
  "may_process_private_dirs": false,
  "may_execute_tools": false,
  "may_restart_services": false
 }
 ```
 This service may provide hints and drafts. It must not become the live Atlas/Hermes router, memory writer, primary chat model, external sender, tool executor, service restarter, or broad private document processor without a separate approved integration.
 ## Endpoints
 ```text
 GET  /healthz
 POST /v1/advisory/classify
 POST /v1/advisory/generate
 POST /v1/advisory/triage
 ```
 ## Cron and n8n advisory dry-run contract
 For cron/n8n event classification, use the dry-run contract in `docs/cron-n8n-advisory-classifier.md`.
 It defines the normalized event envelope, decision envelope, `suppress|log|summarize|escalate` recommendation mapping, and duplicate/stale/no-op/action-required examples.
 Example artifacts:
 - `examples/cron-advisory-dry-run.sh` — host-local cron wrapper that prints one compact decision line and performs no side effects.
 - `examples/n8n-advisory-dry-run-fragment.json` — sanitized inactive n8n node fragment for Set -> HTTP Request -> Code decision mapping.
 Both examples preserve the gateway authority boundary: advisory only, no send/restart/memory/tool/routing authority.
 ### Classifier shadow call
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
  -H 'Content-Type: application/json' \
  -d '{"trace_id":"smoke","text":"Urgent: inspect service health and systemd status."}' | jq .
 ```
 ### Bounded GenAI draft
 Allowed jobs: `title`, `summary`, `notification`, `memory_candidate`.
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
  -H 'Content-Type: application/json' \
  -d '{"job":"title","input":"Summarize a local health check.","max_new_tokens":24}' | jq .
 ```
 ### Explicit-file doc/image triage
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/triage \
  -H 'Content-Type: application/json' \
  -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}' | jq .
 ```
 The gateway requires the path to be inside both:
 1. a configured allowed root on the gateway process; and
 2. the request's explicit `allowed_roots` list, if one is provided.
 Requests cannot broaden the process-configured roots. Do not broaden configured roots to private folders without explicit approval for that root and task.
 ## Install / run
 ```bash
 install -m 0644 openvino-advisory-gateway.service ~/.config/systemd/user/openvino-advisory-gateway.service
 systemctl --user daemon-reload
 systemctl --user enable --now openvino-advisory-gateway.service
 systemctl --user status openvino-advisory-gateway.service --no-pager
 ```
 `--allowed-root` may be repeated in the systemd unit when additional non-private fixture/review directories are approved. Docker bridge exposure must use `--allow-docker-bridge` and the approved bridge IP `172.19.0.1`; the service still refuses wildcard binds such as `0.0.0.0`.
 From `n8n-agent`, verify bridge reachability with:
 ```bash
 docker exec n8n-agent wget -qO- -T 8 http://172.19.0.1:18830/healthz
 ```
 ## Tests
 ```bash
 cd /home/will/lab/swarm/openvino-advisory-gateway
 python -m pytest tests/test_gateway.py -q
 ```
@@ -0,0 +1,256 @@
 # Cron and n8n advisory classifier contract
 Status: dry-run specification and integration examples
 Scope: cron and n8n alert/event classification through the OpenVINO advisory gateway
 Gateway: `http://172.19.0.1:18830` from `n8n-agent` and host-local cron on the current bridge-bound service. Override `NPU_ADVISORY_GATEWAY_URL=http://127.0.0.1:18830` only if a localhost-bound instance is explicitly running.
 ## Authority boundary
 This contract is advisory only. It may recommend one of `suppress`, `log`, `summarize`, or `escalate`, but it must not perform the action itself.
 Every integration must preserve these authority flags:
 ```json
 {
  "may_route": false,
  "may_write_memory": false,
  "may_send_external": false,
  "may_process_private_dirs": false,
  "may_execute_tools": false,
  "may_restart_services": false
 }
 ```
 Allowed side effects in dry-run mode:
 - read an explicit cron/n8n event payload;
 - call the advisory gateway classifier/generator;
 - write compact local stdout or n8n execution logs;
 - store metadata-only advisory counters if an existing log sink already does so.
 Forbidden without separate explicit approval:
 - outbound sends/pages/Discord/Telegram/email;
 - service restarts, command execution, or tool calls;
 - Hermes/Atlas routing changes;
 - memory writes;
 - broad private-directory processing;
 - vector database mutation or reindexing.
 ## Input event envelope
 Cron and n8n producers should normalize events before classification. Keep this input small and avoid raw private payloads.
 ```json
 {
  "schema": "cron_n8n_event_v1",
  "trace_id": "cron:service-health:2026-06-05T14:30:00Z",
  "source": "cron",
  "workflow": "npu-service-health",
  "event_kind": "health_check",
  "severity": "warning",
  "subject": "openvino-reranker health check repeated warning",
  "summary": "Two consecutive health probes reported timeout, no restart attempted.",
  "dedupe_key": "service:openvino-reranker:timeout",
  "observed_at": "2026-06-05T14:30:00Z",
  "stale_after_s": 900,
  "action_requested": false,
  "dry_run": true
 }
 ```
 Field rules:
 - `source`: `cron` or `n8n`.
 - `workflow`: compact job/workflow name, not a private URL.
 - `subject` + `summary`: the only text sent to the classifier.
 - `dedupe_key`: stable non-secret key for duplicate detection by the caller.
 - `stale_after_s`: caller-side freshness gate; stale events should not page.
 - `action_requested`: true only when an upstream job is asking a human/Atlas to consider action.
 - `dry_run`: must remain true for this phase.
 ## Gateway classifier call
 The current gateway `/v1/advisory/classify` accepts explicit text and wraps the classifier response in `openvino_advisory_v1` with NPU proof and authority fields.
 Host cron example for the current bridge-bound service:
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
  -H 'Content-Type: application/json' \
  -d '{
    "trace_id":"cron:service-health:sample",
    "text":"source=cron workflow=npu-service-health severity=warning kind=health_check subject=openvino-reranker repeated timeout summary=Two consecutive health probes reported timeout; no restart attempted; dry_run=true"
  }' | jq '{schema, mode, trace_id, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, authority, labels: .result.labels}'
 ```
 n8n Docker-bridge example:
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
  -H 'Content-Type: application/json' \
  -d '{"trace_id":"n8n:swarm-health:sample","text":"source=n8n workflow=swarm-health-watchdog severity=critical kind=health_check subject=multiple services unhealthy summary=Health probe failed for three services; dry_run=true"}' \
  | jq '{mode, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, may_send_external: .authority.may_send_external}'
 ```
 NPU proof gate: an HTTP 200 is not enough. Treat the classifier as NPU-backed only when `.npu_proof.ok == true` and `.npu_proof.npu_busy_delta_us > 0` for real inference.
 ## Advisory decision envelope
 Cron/n8n wrappers should map the gateway response plus caller-side freshness/deduplication state into this compact decision envelope:
 ```json
 {
  "schema": "cron_n8n_advisory_decision_v1",
  "trace_id": "cron:service-health:2026-06-05T14:30:00Z",
  "source": "cron",
  "workflow": "npu-service-health",
  "dry_run": true,
  "recommendation": "summarize",
  "classification": "action_required",
  "confidence": 0.84,
  "reason_codes": ["warning_or_high_urgency", "fresh_event", "not_duplicate"],
  "npu_proof": {"required": true, "ok": true, "npu_busy_delta_us": 1234},
  "authority": {
    "may_route": false,
    "may_write_memory": false,
    "may_send_external": false,
    "may_process_private_dirs": false,
    "may_execute_tools": false,
    "may_restart_services": false
  },
  "next_gate": "human_or_atlas_review_required_before_any_side_effect"
 }
 ```
 Decision fields:
 - `recommendation`: `suppress`, `log`, `summarize`, or `escalate`.
 - `classification`: `duplicate`, `stale`, `no_op`, or `action_required` for v1 examples.
 - `confidence`: use classifier urgency/category confidence when available; otherwise use a conservative wrapper score.
 - `reason_codes`: compact machine-readable rationale, not raw payload text.
 - `next_gate`: always a review/approval gate before side effects.
 ## Recommendation mapping
 This is the v1 dry-run mapping. It is intentionally conservative and caller-side; the NPU classifier advises, the wrapper chooses a recommendation, and humans/Atlas retain authority.
 | Caller/classifier signal | Classification | Recommendation | Dry-run behavior |
 |---|---|---|---|
 | Same `dedupe_key` observed inside caller cooldown | `duplicate` | `suppress` | Log compact duplicate count only. Do not send. |
 | `observed_at + stale_after_s` is older than now | `stale` | `log` | Log stale event and age. Do not summarize/page. |
 | Severity low/normal, no action requested, classifier urgency low/normal | `no_op` | `log` | Keep normal execution log only. |
 | Warning/high urgency or action requested, NPU proof ok | `action_required` | `summarize` | Draft a local summary for review; no send/restart. |
 | Critical severity or repeated failures and NPU proof ok | `action_required` | `escalate` | Recommend escalation to Atlas/human; wrapper still must not send/restart. |
 | NPU proof missing or false | `action_required` or caller-specific | `log` | Log `npu_proof_failed`; do not claim NPU-backed advice. |
 ## Required examples
 ### Duplicate -> suppress
 Input summary:
 ```json
 {"source":"cron","workflow":"npu-service-health","severity":"warning","dedupe_key":"service:reranker:timeout","summary":"Same timeout as prior run inside cooldown.","dry_run":true}
 ```
 Decision:
 ```json
 {"classification":"duplicate","recommendation":"suppress","reason_codes":["dedupe_key_in_cooldown"],"next_gate":"none_in_dry_run"}
 ```
 ### Stale -> log
 Input summary:
 ```json
 {"source":"n8n","workflow":"swarm-health-watchdog","severity":"warning","observed_at":"older_than_stale_after","stale_after_s":900,"summary":"Delayed webhook replay for an old probe.","dry_run":true}
 ```
 Decision:
 ```json
 {"classification":"stale","recommendation":"log","reason_codes":["event_stale"],"next_gate":"none_in_dry_run"}
 ```
 ### No-op -> log
 Input summary:
 ```json
 {"source":"cron","workflow":"backup-check","severity":"normal","action_requested":false,"summary":"Backup completed and all expected files are present.","dry_run":true}
 ```
 Decision:
 ```json
 {"classification":"no_op","recommendation":"log","reason_codes":["normal_severity","no_action_requested"],"next_gate":"none_in_dry_run"}
 ```
 ### Action required -> summarize/escalate
 Input summary:
 ```json
 {"source":"n8n","workflow":"swarm-health-watchdog","severity":"critical","action_requested":true,"summary":"RAG and embeddings health failed repeatedly; no restart attempted.","dry_run":true}
 ```
 Decision:
 ```json
 {"classification":"action_required","recommendation":"escalate","reason_codes":["critical_severity","action_requested","fresh_event"],"next_gate":"human_or_atlas_review_required_before_any_side_effect"}
 ```
 ## Optional local summary draft
 If the decision is `summarize` or `escalate`, a wrapper may request a bounded draft from `/v1/advisory/generate`:
 ```bash
 curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
  -H 'Content-Type: application/json' \
  -d '{"trace_id":"cron:service-health:sample","job":"summary","input":"Health check warning: openvino-reranker timed out twice; no restart attempted.","max_new_tokens":48}' \
  | jq '{mode, trace_id, npu_ok: .npu_proof.ok, authority, draft: .result.draft_text, final_authority: .result.final_authority}'
 ```
 The draft remains non-authoritative. It must not be automatically sent externally or written to memory.
 ## n8n integration pattern
 Recommended node chain for dry-run workflows:
 ```text
 Schedule/Webhook/Failure Trigger
  -> Set normalized event envelope
  -> HTTP Request POST /v1/advisory/classify
  -> Code node maps decision envelope
  -> IF node on recommendation
      suppress/log: execution log only
      summarize/escalate: optional local summary draft, then execution log only
 ```
 The IF node must not connect to outbound messaging, service restart, memory write, or Hermes routing nodes until a separate approval changes the authority boundary.
 See `../examples/n8n-advisory-dry-run-fragment.json` for a sanitized node fragment.
 ## Cron integration pattern
 Cron jobs should call a wrapper script that prints one compact line and exits successfully unless the wrapper itself fails. The wrapper should not page or restart.
 Example crontab shape:
 ```text
 */15 * * * * /home/will/lab/swarm/openvino-advisory-gateway/examples/cron-advisory-dry-run.sh npu-service-health warning health_check "openvino-reranker timeout twice" "service:openvino-reranker:timeout" >> /home/will/.local/state/npu-advisory/cron.log 2>&1
 ```
 See `../examples/cron-advisory-dry-run.sh`.
 ## Verification checklist
 - Gateway health is reachable on the intended interface.
 - Classifier response includes `schema=openvino_advisory_v1`.
 - `.authority.*` flags are all false for side-effect authority.
 - `.npu_proof.ok` is true and `npu_busy_delta_us > 0` before claiming NPU-backed advice.
 - Decision envelope is compact and contains only booleans/counts/paths/deltas/gates.
 - Duplicate/stale/no-op/action-required examples remain dry-run only.
 - No n8n workflow activation, outbound send, service restart, memory write, routing change, private-dir broadening, or vector DB mutation occurred.
@@ -0,0 +1,48 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # Dry-run cron/n8n advisory wrapper.
 # It calls the advisory classifier and prints one compact decision line.
 # It does not send, restart, route, execute tools, or write memory.
 GATEWAY_URL="${NPU_ADVISORY_GATEWAY_URL:-http://172.19.0.1:18830}"
 WORKFLOW="${1:-cron-advisory-sample}"
 SEVERITY="${2:-normal}"
 EVENT_KIND="${3:-health_check}"
 SUBJECT="${4:-sample advisory event}"
 DEDUPE_KEY="${5:-sample}"
 TRACE_ID="${NPU_ADVISORY_TRACE_ID:-cron:${WORKFLOW}:$(date -u +%Y%m%dT%H%M%SZ)}"
 TEXT="source=cron workflow=${WORKFLOW} severity=${SEVERITY} kind=${EVENT_KIND} subject=${SUBJECT} dedupe_key=${DEDUPE_KEY} dry_run=true authority=no-send,no-restart,no-memory"
 payload=$(jq -nc --arg trace_id "$TRACE_ID" --arg text "$TEXT" '{trace_id:$trace_id,text:$text}')
 response=$(curl -fsS "${GATEWAY_URL%/}/v1/advisory/classify" -H 'Content-Type: application/json' -d "$payload")
 printf '%s\n' "$response" | jq -c --arg source cron --arg workflow "$WORKFLOW" --arg severity "$SEVERITY" --arg dedupe_key "$DEDUPE_KEY" '
  . as $env
  | ($env.result.labels.urgency.value // "normal") as $urgency
  | ($env.result.labels.urgency.confidence // 0) as $confidence
  | ($env.npu_proof.ok == true and (($env.npu_proof.npu_busy_delta_us // 0) > 0)) as $npu_ok
  | (if ($npu_ok | not) then "log"
     elif ($severity == "critical") then "escalate"
     elif ($severity == "warning" or $urgency == "high" or $urgency == "critical") then "summarize"
     else "log" end) as $recommendation
  | (if ($recommendation == "log" and $severity == "normal") then "no_op" else "action_required" end) as $classification
  | {
      schema: "cron_n8n_advisory_decision_v1",
      trace_id: $env.trace_id,
      source: $source,
      workflow: $workflow,
      dry_run: true,
      recommendation: $recommendation,
      classification: $classification,
      confidence: $confidence,
      reason_codes: ([
        (if $npu_ok then "npu_proof_ok" else "npu_proof_failed" end),
        ("severity_" + $severity),
        ("urgency_" + $urgency)
      ]),
      npu_proof: $env.npu_proof,
      authority: $env.authority,
      next_gate: (if $recommendation == "escalate" or $recommendation == "summarize" then "human_or_atlas_review_required_before_any_side_effect" else "none_in_dry_run" end)
    }'
@@ -0,0 +1,70 @@
 {
  "name": "OpenVINO Advisory Dry-Run Fragment",
  "active": false,
  "nodes": [
    {
      "parameters": {
        "values": {
          "string": [
            {"name": "schema", "value": "cron_n8n_event_v1"},
            {"name": "source", "value": "n8n"},
            {"name": "workflow", "value": "swarm-health-watchdog"},
            {"name": "event_kind", "value": "health_check"},
            {"name": "severity", "value": "warning"},
            {"name": "subject", "value": "OpenVINO service health warning"},
            {"name": "summary", "value": "Health probe reported a warning; no restart or send is authorized."},
            {"name": "dedupe_key", "value": "service:openvino:warning"},
            {"name": "dry_run", "value": "true"}
          ]
        },
        "options": {}
      },
      "id": "set-normalized-event",
      "name": "Set normalized advisory event",
      "type": "n8n-nodes-base.set",
      "typeVersion": 2,
      "position": [260, 300]
    },
    {
      "parameters": {
        "method": "POST",
        "url": "http://172.19.0.1:18830/v1/advisory/classify",
        "sendBody": true,
        "contentType": "json",
        "jsonBody": "={{ JSON.stringify({ trace_id: 'n8n:' + $json.workflow + ':' + $now.toISO(), text: 'source=n8n workflow=' + $json.workflow + ' severity=' + $json.severity + ' kind=' + $json.event_kind + ' subject=' + $json.subject + ' summary=' + $json.summary + ' dedupe_key=' + $json.dedupe_key + ' dry_run=true authority=no-send,no-restart,no-memory' }) }}",
        "options": {
          "timeout": 20000
        }
      },
      "id": "http-advisory-classify",
      "name": "HTTP advisory classify dry-run",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4,
      "position": [520, 300]
    },
    {
      "parameters": {
        "jsCode": "const env = $json;\nconst labels = env.result?.labels || {};\nconst urgency = labels.urgency?.value || 'normal';\nconst severity = $('Set normalized advisory event').first().json.severity || 'normal';\nconst npuOk = env.npu_proof?.ok === true && (env.npu_proof?.npu_busy_delta_us || 0) > 0;\nlet recommendation = 'log';\nlet classification = 'no_op';\nconst reason_codes = [npuOk ? 'npu_proof_ok' : 'npu_proof_failed', `severity_${severity}`, `urgency_${urgency}`];\nif (npuOk && severity === 'critical') { recommendation = 'escalate'; classification = 'action_required'; }\nelse if (npuOk && (severity === 'warning' || urgency === 'high' || urgency === 'critical')) { recommendation = 'summarize'; classification = 'action_required'; }\nif (!npuOk) reason_codes.push('log_only_no_npu_claim');\nreturn [{ json: { schema: 'cron_n8n_advisory_decision_v1', trace_id: env.trace_id, source: 'n8n', workflow: $('Set normalized advisory event').first().json.workflow, dry_run: true, recommendation, classification, confidence: labels.urgency?.confidence || 0, reason_codes, npu_proof: env.npu_proof, authority: env.authority, next_gate: (recommendation === 'summarize' || recommendation === 'escalate') ? 'human_or_atlas_review_required_before_any_side_effect' : 'none_in_dry_run' } } }];"
      },
      "id": "map-dry-run-decision",
      "name": "Map dry-run decision (no side effects)",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [780, 300]
    }
  ],
  "connections": {
    "Set normalized advisory event": {
      "main": [[{"node": "HTTP advisory classify dry-run", "type": "main", "index": 0}]]
    },
    "HTTP advisory classify dry-run": {
      "main": [[{"node": "Map dry-run decision (no side effects)", "type": "main", "index": 0}]]
    }
  },
  "settings": {
    "executionOrder": "v1"
  },
  "pinData": {},
  "staticData": null,
  "tags": ["dry-run", "openvino", "advisory"]
 }
@@ -0,0 +1,374 @@
 #!/usr/bin/env python3
 """Local-only advisory gateway for OpenVINO NPU sidecars.
 This service deliberately returns bounded advisory envelopes. It never routes,
 writes memory, sends external messages, executes tools, restarts services, or
 broadens document processing authority. Atlas/Hermes may use these outputs as
 hints only.
 """
 from __future__ import annotations
 import argparse
 import hashlib
 import ipaddress
 import json
 import os
 import sqlite3
 import time
 import urllib.request
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from pathlib import Path
 from typing import Any, Callable
 from urllib.parse import urlparse
 HOST = "127.0.0.1"
 DOCKER_BRIDGE_HOST = "172.19.0.1"
 PORT = 18830
 CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
 GENAI_URL = "http://127.0.0.1:18820/v1/worker/generate"
 DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
 DEFAULT_LOG_DB = Path(os.environ.get("NPU_ADVISORY_LOG_DB", "/home/will/.local/state/openvino-advisory-gateway/events.sqlite"))
 DEFAULT_ALLOWED_ROOT = Path("/home/will/lab/swarm/openvino-doc-image-triage-npu")
 DEFAULT_ALLOWED_ROOTS = [Path(p) for p in os.environ.get("NPU_ADVISORY_ALLOWED_ROOTS", str(DEFAULT_ALLOWED_ROOT)).split(os.pathsep) if p]
 ALLOWED_GENAI_JOBS = {"title", "summary", "notification", "memory_candidate"}
 AUTHORITY = {
    "may_route": False,
    "may_write_memory": False,
    "may_send_external": False,
    "may_process_private_dirs": False,
    "may_execute_tools": False,
    "may_restart_services": False,
 }
 def validate_bind_host(host: str, *, allow_docker_bridge: bool = False) -> None:
    """Restrict service exposure to localhost or the explicitly approved Docker bridge bind."""
    if host == "127.0.0.1":
        return
    if not allow_docker_bridge:
        raise ValueError("refusing non-local bind without --allow-docker-bridge")
    try:
        addr = ipaddress.ip_address(host)
    except ValueError as exc:
        raise ValueError("bind host must be a literal IP address") from exc
    if host != DOCKER_BRIDGE_HOST or not (addr.version == 4 and addr.is_private and not addr.is_loopback and not addr.is_unspecified):
        raise ValueError(f"Docker bridge bind must use approved bridge IP {DOCKER_BRIDGE_HOST}")
 def sha256_text(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()
 def http_post_json(url: str, payload: dict[str, Any], timeout_s: float = 20.0) -> dict[str, Any]:
    req = urllib.request.Request(url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST")
    with urllib.request.urlopen(req, timeout=timeout_s) as resp:
        return json.loads(resp.read().decode("utf-8"))
 def http_get_json(url: str, timeout_s: float = 8.0) -> dict[str, Any]:
    with urllib.request.urlopen(url, timeout=timeout_s) as resp:
        body = resp.read().decode("utf-8")
    try:
        return json.loads(body)
    except json.JSONDecodeError:
        return {"ok": True, "raw_text": body[:120]}
 def _npu_delta_from(result: dict[str, Any], fallback: int | None = None) -> int | None:
    for key in ("npu_busy_delta_us", "sysfs_npu_busy_delta_us"):
        value = result.get(key)
        if isinstance(value, int):
            return value
        if isinstance(value, float):
            return int(value)
    return fallback
 def _doc_triage_npu_delta(result: dict[str, Any]) -> int | None:
    pages = ((result.get("result") or {}).get("pages") or []) if isinstance(result, dict) else []
    best: int | None = None
    for page in pages:
        emb = ((page.get("needs_attention") or {}).get("embedding") or {}) if isinstance(page, dict) else {}
        delta = emb.get("npu_busy_delta_us")
        if isinstance(delta, int):
            best = max(best or 0, delta)
    return best
 def build_envelope(
    *,
    service: str,
    operation: str,
    result: dict[str, Any],
    mode: str = "advisory",
    input_scope: str,
    npu_busy_delta_us: int | None,
    trace_id: str | None = None,
    warnings: list[str] | None = None,
 ) -> dict[str, Any]:
    npu_ok = bool(isinstance(npu_busy_delta_us, int) and npu_busy_delta_us > 0)
    return {
        "ok": True,
        "schema": "openvino_advisory_v1",
        "service": service,
        "operation": operation,
        "mode": mode,
        "trace_id": trace_id,
        "input_scope": input_scope,
        "result": result,
        "npu_proof": {"required": True, "ok": npu_ok, "npu_busy_delta_us": npu_busy_delta_us},
        "authority": dict(AUTHORITY),
        "warnings": warnings or [],
    }
 class AdvisoryLogger:
    def __init__(self, db_path: str | Path = DEFAULT_LOG_DB):
        self.db_path = Path(db_path)
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init()
    def _init(self) -> None:
        with sqlite3.connect(self.db_path) as con:
            con.execute(
                """
                CREATE TABLE IF NOT EXISTS advisory_events (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    created_at REAL NOT NULL,
                    service TEXT NOT NULL,
                    operation TEXT NOT NULL,
                    mode TEXT NOT NULL,
                    input_scope TEXT NOT NULL,
                    input_ref TEXT NOT NULL,
                    npu_busy_delta_us INTEGER,
                    ok INTEGER NOT NULL,
                    raw_payload TEXT
                )
                """
            )
    def log(self, envelope: dict[str, Any], *, input_ref: str) -> None:
        proof = envelope.get("npu_proof") or {}
        with sqlite3.connect(self.db_path) as con:
            con.execute(
                """
                INSERT INTO advisory_events(created_at, service, operation, mode, input_scope, input_ref,
                                            npu_busy_delta_us, ok, raw_payload)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL)
                """,
                (
                    time.time(),
                    str(envelope.get("service")),
                    str(envelope.get("operation")),
                    str(envelope.get("mode")),
                    str(envelope.get("input_scope")),
                    input_ref,
                    proof.get("npu_busy_delta_us"),
                    1 if envelope.get("ok") else 0,
                ),
            )
 def classify_text(
    text: str,
    *,
    trace_id: str | None = None,
    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
    logger: AdvisoryLogger | None = None,
    timeout_s: float = 20.0,
 ) -> dict[str, Any]:
    if not isinstance(text, str) or not text.strip():
        raise ValueError("text must be a non-empty string")
    payload = {"id": trace_id or "advisory", "text": text, "options": {"include_evidence": False, "dry_run": True}}
    result = http_post_json(CLASSIFIER_URL, payload, timeout_s)
    envelope = build_envelope(
        service="classifier",
        operation="classify",
        mode="shadow",
        input_scope="explicit_text",
        trace_id=trace_id,
        result={"labels": result.get("labels", {}), "model": result.get("model"), "service_mode": result.get("mode", "dry_run")},
        npu_busy_delta_us=_npu_delta_from(result),
    )
    if logger:
        logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
    return envelope
 def generate_bounded(
    job: str,
    text: str,
    *,
    max_new_tokens: int | None = None,
    trace_id: str | None = None,
    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
    logger: AdvisoryLogger | None = None,
    timeout_s: float = 180.0,
 ) -> dict[str, Any]:
    if job not in ALLOWED_GENAI_JOBS:
        raise ValueError("unsupported advisory generation job")
    if not isinstance(text, str) or not text.strip():
        raise ValueError("input must be a non-empty string")
    payload: dict[str, Any] = {"job": job, "input": text}
    if max_new_tokens is not None:
        payload["max_new_tokens"] = max_new_tokens
    result = http_post_json(GENAI_URL, payload, timeout_s)
    envelope = build_envelope(
        service="genai",
        operation=f"generate:{job}",
        mode="draft",
        input_scope="explicit_text",
        trace_id=trace_id,
        result={"draft_text": result.get("text", ""), "json": result.get("json"), "timing_ms": result.get("timing_ms"), "final_authority": False},
        npu_busy_delta_us=_npu_delta_from(result),
    )
    if logger:
        logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
    return envelope
 def _resolve_allowed(path: str, allowed_roots: list[str] | None, configured_roots: list[Path] | None = None) -> tuple[Path, list[Path]]:
    configured = [p.expanduser().resolve() for p in (configured_roots or DEFAULT_ALLOWED_ROOTS)]
    if not configured:
        raise ValueError("at least one configured allowed root is required")
    requested = [Path(p).expanduser().resolve() for p in (allowed_roots or [str(p) for p in configured])]
    if not requested:
        raise ValueError("at least one requested allowed root is required")
    for root in requested:
        if not any(root == base or root.is_relative_to(base) for base in configured):
            raise ValueError("requested allowed root is outside configured roots")
    roots = requested
    candidate = Path(path).expanduser().resolve()
    if not any(candidate == root or candidate.is_relative_to(root) for root in roots):
        raise ValueError("path must be inside an allowed root")
    if not candidate.exists() or not candidate.is_file():
        raise ValueError("path must be an existing file")
    return candidate, roots
 def triage_file(
    path: str,
    *,
    allowed_roots: list[str] | None = None,
    configured_roots: list[Path] | None = None,
    trace_id: str | None = None,
    http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
    logger: AdvisoryLogger | None = None,
    timeout_s: float = 60.0,
 ) -> dict[str, Any]:
    candidate, roots = _resolve_allowed(path, allowed_roots, configured_roots)
    payload = {"path": str(candidate), "options": {"allowed_roots": [str(r) for r in roots], "max_pages": 3}}
    result = http_post_json(DOC_TRIAGE_URL, payload, timeout_s)
    delta = _doc_triage_npu_delta(result)
    envelope = build_envelope(
        service="doc_triage",
        operation="triage_file",
        mode="reviewable_artifact",
        input_scope="explicit_file",
        trace_id=trace_id,
        result={"triage": result.get("result"), "final_authority": False},
        npu_busy_delta_us=delta,
    )
    if logger:
        envelope["warnings"].append("metadata-only log; raw file contents are not logged")
        logger.log(envelope, input_ref="file:sha256path:" + sha256_text(str(candidate)))
    return envelope
 def health(*, http_get_json: Callable[[str, float], dict[str, Any]] = http_get_json) -> dict[str, Any]:
    deps = {
        "classifier": "http://127.0.0.1:18819/healthz",
        "genai": "http://127.0.0.1:18820/healthz",
        "doc_triage": "http://127.0.0.1:18829/healthz",
    }
    out: dict[str, Any] = {"ok": True, "service": "openvino-advisory-gateway", "mode": "advisory_only", "authority": dict(AUTHORITY), "dependencies": {}}
    for name, url in deps.items():
        try:
            data = http_get_json(url, 8.0)
            out["dependencies"][name] = {"ok": bool(data.get("ok", data.get("status") == "ok")), "service": data.get("service"), "device": data.get("device")}
        except Exception as exc:
            out["ok"] = False
            out["dependencies"][name] = {"ok": False, "error": str(exc)}
    return out
 def _read_json(handler: BaseHTTPRequestHandler, max_bytes: int = 256 * 1024) -> dict[str, Any]:
    length = int(handler.headers.get("Content-Length", "0"))
    if length > max_bytes:
        raise ValueError("request JSON too large")
    raw = handler.rfile.read(length)
    if not raw:
        return {}
    return json.loads(raw.decode("utf-8"))
 def make_handler(logger: AdvisoryLogger, configured_roots: list[Path]):
    class Handler(BaseHTTPRequestHandler):
        server_version = "openvino-advisory-gateway/0.1"
        def log_message(self, format: str, *args: Any) -> None:  # noqa: A002 - stdlib override name
            # Do not log request bodies or private paths.
            print(f"{self.client_address[0]} {format % args}")
        def send_json(self, status: int, payload: Any) -> None:
            body = json.dumps(payload, indent=2, sort_keys=True).encode("utf-8")
            self.send_response(status)
            self.send_header("Content-Type", "application/json")
            self.send_header("Content-Length", str(len(body)))
            self.end_headers()
            self.wfile.write(body)
        def do_GET(self) -> None:  # noqa: N802
            if urlparse(self.path).path in ("/", "/health", "/healthz"):
                self.send_json(200, health())
                return
            self.send_json(404, {"ok": False, "error": "not_found"})
        def do_POST(self) -> None:  # noqa: N802
            path = urlparse(self.path).path
            try:
                payload = _read_json(self)
                if path == "/v1/advisory/classify":
                    self.send_json(200, classify_text(str(payload.get("text", "")), trace_id=payload.get("trace_id"), logger=logger))
                    return
                if path == "/v1/advisory/generate":
                    self.send_json(200, generate_bounded(str(payload.get("job", "summary")), str(payload.get("input", "")), max_new_tokens=payload.get("max_new_tokens"), trace_id=payload.get("trace_id"), logger=logger))
                    return
                if path == "/v1/advisory/triage":
                    self.send_json(200, triage_file(str(payload.get("path", "")), allowed_roots=payload.get("allowed_roots"), configured_roots=configured_roots, trace_id=payload.get("trace_id"), logger=logger))
                    return
                self.send_json(404, {"ok": False, "error": "not_found"})
            except Exception as exc:
                self.send_json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc), "authority": dict(AUTHORITY)})
    return Handler
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description="Local-only OpenVINO NPU advisory gateway")
    parser.add_argument("--host", default=os.environ.get("NPU_ADVISORY_HOST", HOST))
    parser.add_argument("--port", type=int, default=int(os.environ.get("NPU_ADVISORY_PORT", str(PORT))))
    parser.add_argument("--log-db", default=str(DEFAULT_LOG_DB))
    parser.add_argument("--allowed-root", action="append", dest="allowed_roots", default=None, help="Configured file root allowed for advisory doc/image triage. May be repeated.")
    parser.add_argument(
        "--allow-docker-bridge",
        action="store_true",
        default=os.environ.get("NPU_ADVISORY_ALLOW_DOCKER_BRIDGE", "").lower() in {"1", "true", "yes"},
        help="Permit binding to a private Docker bridge IP instead of 127.0.0.1.",
    )
    args = parser.parse_args(argv)
    try:
        validate_bind_host(args.host, allow_docker_bridge=args.allow_docker_bridge)
    except ValueError as exc:
        raise SystemExit(str(exc)) from exc
    configured_roots = [Path(p).expanduser().resolve() for p in (args.allowed_roots or DEFAULT_ALLOWED_ROOTS)]
    logger = AdvisoryLogger(args.log_db)
    server = ThreadingHTTPServer((args.host, args.port), make_handler(logger, configured_roots))
    print(json.dumps({"service": "openvino-advisory-gateway", "host": args.host, "port": args.port, "mode": "advisory_only"}), flush=True)
    server.serve_forever()
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -0,0 +1,18 @@
 [Unit]
 Description=OpenVINO NPU advisory gateway (Docker bridge, port 18830)
 After=network.target openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
 Wants=openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
 [Service]
 Type=simple
 WorkingDirectory=/home/will/lab/swarm/openvino-advisory-gateway
 Environment=NPU_ADVISORY_HOST=172.19.0.1
 Environment=NPU_ADVISORY_PORT=18830
 Environment=NPU_ADVISORY_ALLOW_DOCKER_BRIDGE=true
 Environment=NPU_ADVISORY_LOG_DB=/home/will/.local/state/openvino-advisory-gateway/events.sqlite
 ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-advisory-gateway/gateway.py --host 172.19.0.1 --port 18830 --allow-docker-bridge --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
 Restart=on-failure
 RestartSec=5
 [Install]
 WantedBy=default.target
@@ -0,0 +1,146 @@
 from __future__ import annotations
 import json
 import sqlite3
 import sys
 from pathlib import Path
 import pytest
 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 import gateway
 def test_authority_envelope_is_advisory_and_forbids_side_effects() -> None:
    env = gateway.build_envelope(
        service="classifier",
        operation="classify",
        mode="shadow",
        result={"labels": {"workflow_category": {"value": "devops"}}},
        npu_busy_delta_us=123,
        input_scope="explicit_text",
    )
    assert env["ok"] is True
    assert env["mode"] == "shadow"
    assert env["authority"] == {
        "may_route": False,
        "may_write_memory": False,
        "may_send_external": False,
        "may_process_private_dirs": False,
        "may_execute_tools": False,
        "may_restart_services": False,
    }
    assert env["npu_proof"] == {"required": True, "ok": True, "npu_busy_delta_us": 123}
 def test_bind_host_requires_explicit_docker_bridge_approval() -> None:
    gateway.validate_bind_host("127.0.0.1")
    with pytest.raises(ValueError, match="without --allow-docker-bridge"):
        gateway.validate_bind_host("172.19.0.1")
    gateway.validate_bind_host("172.19.0.1", allow_docker_bridge=True)
    with pytest.raises(ValueError, match="approved bridge IP"):
        gateway.validate_bind_host("0.0.0.0", allow_docker_bridge=True)
 def test_classify_calls_sidecar_and_logs_metadata_only(tmp_path: Path) -> None:
    calls: list[tuple[str, dict]] = []
    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
        calls.append((url, payload))
        return {
            "labels": {"tool_needed": {"value": True}},
            "npu_busy_delta_us": 55,
            "sysfs_npu_busy_delta_us": 55,
        }
    logger = gateway.AdvisoryLogger(tmp_path / "events.sqlite")
    env = gateway.classify_text(
        "Inspect live service status",
        trace_id="t1",
        http_post_json=fake_post,
        logger=logger,
    )
    assert calls[0][0].endswith(":18819/v1/classify")
    assert calls[0][1]["options"]["dry_run"] is True
    assert env["service"] == "classifier"
    assert env["authority"]["may_route"] is False
    assert env["npu_proof"]["ok"] is True
    with sqlite3.connect(tmp_path / "events.sqlite") as con:
        row = con.execute("select service, operation, input_ref, raw_payload from advisory_events").fetchone()
    assert row == ("classifier", "classify", "text:sha256:" + gateway.sha256_text("Inspect live service status"), None)
 def test_generate_allows_only_bounded_jobs() -> None:
    with pytest.raises(ValueError, match="unsupported advisory generation job"):
        gateway.generate_bounded("primary_chat", "hello", http_post_json=lambda *_: {})
 def test_generate_wraps_draft_without_final_authority() -> None:
    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
        return {"text": "Short title", "npu_busy_delta_us": 99, "timing_ms": {"total": 10}}
    env = gateway.generate_bounded("title", "Summarize this local health check", http_post_json=fake_post)
    assert env["service"] == "genai"
    assert env["operation"] == "generate:title"
    assert env["result"]["draft_text"] == "Short title"
    assert env["result"]["final_authority"] is False
    assert env["authority"]["may_send_external"] is False
 def test_doc_triage_requires_explicit_file_under_allowed_root(tmp_path: Path) -> None:
    allowed = tmp_path / "allowed"
    allowed.mkdir()
    target = allowed / "synthetic.png"
    target.write_bytes(b"not real image for unit test")
    def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
        assert payload["path"] == str(target.resolve())
        assert payload["options"]["allowed_roots"] == [str(allowed.resolve())]
        return {"ok": True, "result": {"pages": [{"needs_attention": {"embedding": {"verified_npu": True, "npu_busy_delta_us": 42}}}]}}
    env = gateway.triage_file(str(target), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=fake_post)
    assert env["service"] == "doc_triage"
    assert env["input_scope"] == "explicit_file"
    assert env["npu_proof"]["ok"] is True
 def test_doc_triage_rejects_private_root_broadening(tmp_path: Path) -> None:
    allowed = tmp_path / "allowed"
    allowed.mkdir()
    with pytest.raises(ValueError, match="path must be inside an allowed root"):
        gateway.triage_file(str(tmp_path / "outside.png"), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=lambda *_: {})
 def test_doc_triage_rejects_requested_root_outside_configured_roots(tmp_path: Path) -> None:
    configured = tmp_path / "configured"
    requested = tmp_path / "private"
    requested.mkdir()
    target = requested / "file.png"
    target.write_bytes(b"synthetic")
    with pytest.raises(ValueError, match="requested allowed root is outside configured roots"):
        gateway.triage_file(
            str(target),
            allowed_roots=[str(requested)],
            configured_roots=[configured],
            http_post_json=lambda *_: {},
        )
 def test_health_aggregates_dependencies_without_raw_private_data() -> None:
    def fake_get(url: str, timeout_s: float) -> dict:
        return {"ok": True, "service": url.rsplit(":", 1)[-1]}
    health = gateway.health(http_get_json=fake_get)
    assert health["ok"] is True
    assert set(health["dependencies"]) == {"classifier", "genai", "doc_triage"}
    assert "raw" not in json.dumps(health).lower()
@@ -0,0 +1,339 @@
 # OpenVINO NPU classifier/router dry-run contract
 Status: specification for dry-run prototype refresh
 Target port: `127.0.0.1:18819`
 Owner context: Atlas/Hermes local assistant sidecar evaluation
 This service is an advisory classifier for Atlas/Hermes automation hints. It may suggest labels such as tool-needed, memory-candidate type, urgency, workflow category, and safety-confirmation-required, but it must not make or enforce live routing, memory, tool, or safety decisions without a separate explicit approval from Will.
 ## Recommended model and runtime
 Recommended v1 runtime: small local Python HTTP/CLI service backed by the existing OpenVINO NPU embeddings service on `127.0.0.1:18817`.
 Recommended v1 model shape:
 - Primary signal: `bge-base-en-v1.5-int8-ov` embeddings from the live embeddings service.
 - Classifier layer: inspectable deterministic rules plus cosine similarity against curated synthetic/prototype utterances.
 - Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`.
 - Device proof: request-level `npu_busy_delta_us` from `:18817` plus direct sysfs before/after reads from `/sys/class/accel/accel0/device/npu_busy_time_us`.
 Why this is preferred for the dry run:
 1. It reuses the already-live NPU embeddings path rather than adding a second model conversion/runtime dependency before contract validation.
 2. Rules and prototypes are transparent enough for safety-sensitive routing hints; a reviewer can inspect why a message was labeled.
 3. It avoids fine-tuning or training on private Atlas/Hermes transcripts.
 4. It keeps the service small, localhost-only, and easy to start/stop during smoke tests.
 5. It produces NPU activity through the embeddings path while making clear that final decision logic remains advisory.
 Defer a dedicated NPU sequence-classification model such as TinyBERT/MiniLM until the dry-run labels and thresholds have been evaluated against synthetic fixtures and explicitly-approved non-private examples. If pursued later, use OpenVINO Runtime/Optimum export with fixed input shapes suitable for NPU, and keep the rule layer for safety gates.
 ## Non-goals and safety invariants
 The service must not:
 - Change Hermes/Atlas model routing, gateway routing, memory writes, tool-use permissions, or safety-confirmation behavior.
 - Restart, stop, enable, or persist any live Atlas/Hermes/gateway/RAG service.
 - Bind to anything broader than `127.0.0.1` by default.
 - Mutate Chroma/vector collections, trigger reindexing, or write to RAG state.
 - Process private document/image directories or private transcript dumps for smoke testing.
 - Log raw prompts by default beyond normal foreground stderr during local review.
 - Claim NPU success from HTTP 200 alone.
 ## Endpoint contract
 All HTTP endpoints are local-only by default.
 Base URL:
 ```text
 http://127.0.0.1:18819
 ```
 ### GET `/healthz`, `/health`, `/readyz`, `/`
 Purpose: liveness/readiness metadata.
 Response fields:
 - `status`: `starting | ok`
 - `service`: `atlas-router-classifier`
 - `version`: service version string
 - `mode`: always `dry_run`
 - `model`: model/runtime label
 - `embed_url`: upstream embeddings URL
 - `device`: expected to say `NPU-via-embedding-service` or equivalent
 - `labels`: supported label names
 - `embedding_dim`: embedding dimension after warmup
 - `prototype_count`: number of synthetic prototype examples loaded
 - `prototype_npu_busy_delta_us`: warmup delta reported by upstream embeddings, if available
 - `npu_busy_time_us`: current sysfs counter value, if readable
 - `warnings`: list of non-fatal warnings
 A healthy service is not enough to prove NPU execution. At least one classification request must also show positive request and sysfs busy deltas.
 ### GET `/v1/labels`
 Purpose: publish schema information without dumping private examples.
 Response fields:
 - `model`
 - `thresholds`
  - `tool_needed`: recommended threshold `0.72`
  - `memory_candidate`: recommended threshold `0.78`
  - `safety_confirmation_required`: recommended threshold `0.80`
  - `workflow_category`: recommended threshold `0.52`
 - `enums`
  - `memory_candidate`: `none`, `user_preference`, `durable_user_fact`, `environment_fact`, `workflow_convention`, `skill_candidate`
  - `urgency`: `low`, `normal`, `high`, `critical`
  - `workflow_category`: `chat`, `research`, `coding`, `debugging`, `devops`, `smart_home`, `media`, `note_taking`, `productivity`, `kanban`, `unknown`
 - `prototype_ids`: names of curated synthetic prototype buckets
 ### POST `/v1/classify`
 Purpose: classify one user/task message for advisory dry-run hints.
 Request:
 ```json
 {
  "id": "optional-trace-id",
  "text": "Urgent: check whether port 18817 is listening and inspect systemd logs.",
  "context": {
    "platform": "cli",
    "source": "user"
  },
  "options": {
    "include_evidence": true,
    "include_embedding_debug": false,
    "dry_run": true
  }
 }
 ```
 Required behavior:
 - Reject empty text with HTTP 400.
 - Default `dry_run` to true.
 - Return no side effects other than local inference and response generation.
 - Include evidence by default unless `include_evidence=false`.
 - Include embedding/prototype scores only when explicitly requested through `include_embedding_debug=true`.
 Response:
 ```json
 {
  "id": "optional-trace-id",
  "model": "bge-base-en-v1.5-int8-ov/prototype-router-v0",
  "created": 1780590000,
  "duration_ms": 12.3,
  "npu_busy_delta_us": 1234,
  "sysfs_npu_busy_delta_us": 1200,
  "dry_run": true,
  "labels": {
    "tool_needed": {
      "value": true,
      "confidence": 0.84,
      "threshold": 0.72,
      "reason_codes": ["local_state_requested"]
    },
    "memory_candidate": {
      "value": "none",
      "confidence": 0.31,
      "threshold": 0.78,
      "reason_codes": []
    },
    "urgency": {
      "value": "high",
      "confidence": 0.84,
      "scores": {"low": 0.0, "normal": 0.2, "high": 0.84, "critical": 0.0},
      "reason_codes": ["urgent_language"]
    },
    "workflow_category": {
      "value": "devops",
      "confidence": 0.86,
      "scores": {"devops": 0.86, "unknown": 0.14}
    },
    "safety_confirmation_required": {
      "value": false,
      "confidence": 0.0,
      "threshold": 0.8,
      "reason_codes": []
    }
  },
  "warnings": [],
  "evidence": []
 }
 ```
 ### POST `/v1/batch_classify`
 Purpose: classify a bounded batch of non-private synthetic or explicitly-approved messages.
 Request:
 ```json
 {
  "items": [
    {"id": "m1", "text": "What time is it in Seattle right now?"},
    {"id": "m2", "text": "Restart the live Atlas gateway and switch primary routing."}
  ],
  "options": {"include_evidence": false, "dry_run": true}
 }
 ```
 Response:
 - `model`
 - `duration_ms`
 - aggregate `npu_busy_delta_us`
 - `results`: array of `/v1/classify` responses
 Batch limits for prototype review:
 - Keep batches small; the prototype rejects empty batches and batches larger than `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE` (default `32`).
 - Use only synthetic fixtures unless Will explicitly approves a real non-private sample set.
 - Do not retain request bodies to disk.
 ## CLI contract
 The same implementation should support foreground review from the service directory:
 ```bash
 cd /home/will/lab/swarm/openvino-classifier-npu
 /home/will/.venvs/npu/bin/python router_classifier.py \
  --host 127.0.0.1 \
  --port 18819 \
  --embed-url http://127.0.0.1:18817/v1/embeddings
 ```
 Required flags/env:
 - `--host` / `OPENVINO_CLASSIFIER_HOST`; default `127.0.0.1`.
 - `--port` / `OPENVINO_CLASSIFIER_PORT`; default `18819`.
 - `--embed-url` / `OPENVINO_CLASSIFIER_EMBED_URL`; default `http://127.0.0.1:18817/v1/embeddings`.
 - `--timeout-s` / `OPENVINO_CLASSIFIER_TIMEOUT_S`; default `30`.
 - `--max-batch-size` / `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`; default `32`.
 - `--no-warmup` to defer prototype embedding until first request.
 A future dedicated CLI mode may be added for one-shot JSONL classification, but foreground HTTP review is sufficient for the dry-run contract.
 ## Synthetic smoke-test plan
 Preconditions:
 1. Confirm `:18817` embeddings service is healthy.
 2. Confirm `:18819` is not already listening.
 3. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before starting the request smoke.
 4. Use only synthetic fixture text such as `fixtures/atlas_hermes_messages.jsonl`.
 Unit/schema smoke, no NPU dependency:
 ```bash
 cd /home/will/lab/swarm
 /home/will/.venvs/npu/bin/python -m unittest discover -s openvino-classifier-npu/tests -v
 ```
 Foreground service smoke:
 ```bash
 ss -ltnp | grep ':18819\b' || true
 cd /home/will/lab/swarm/openvino-classifier-npu
 /home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
 ```
 From another shell:
 ```bash
 curl -fsS http://127.0.0.1:18819/healthz | jq .
 curl -fsS http://127.0.0.1:18819/v1/labels | jq .
 curl -fsS http://127.0.0.1:18819/v1/classify \
  -H 'Content-Type: application/json' \
  -d '{"id":"smoke-devops","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true,"dry_run":true}}' | jq .
 curl -fsS http://127.0.0.1:18819/v1/classify \
  -H 'Content-Type: application/json' \
  -d '{"id":"smoke-safety","text":"Restart the live Atlas gateway and switch primary routing to the new classifier.","options":{"include_evidence":true,"dry_run":true}}' | jq .
 ```
 Expected label checks:
 - `smoke-devops`: `tool_needed.value=true`, `urgency.value=high`, `workflow_category.value=devops`.
 - `smoke-safety`: `safety_confirmation_required.value=true`, no actual restart or routing change.
 - Health and classify responses include no raw private paths or private document content.
 Shutdown:
 - Stop the foreground server with Ctrl-C.
 - Re-run `ss -ltnp | grep ':18819\b' || true` and confirm no listener remains.
 ## NPU busy-time verification plan
 Use sysfs plus service response fields; do not accept HTTP 200 alone.
 ```bash
 BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
 before=$(cat "$BUSY")
 response=$(curl -fsS http://127.0.0.1:18819/v1/classify \
  -H 'Content-Type: application/json' \
  -d '{"id":"npu-proof","text":"Check current systemd service status for the embeddings service.","options":{"include_evidence":false,"dry_run":true}}')
 after=$(cat "$BUSY")
 echo "$response" | jq '{npu_busy_delta_us, sysfs_npu_busy_delta_us, warnings}'
 echo "outer_sysfs_npu_busy_delta_us=$((after-before))"
 ```
 Optional localhost smoke helper, after starting the foreground service:
 ```bash
 /home/will/.venvs/npu/bin/python openvino-classifier-npu/smoke_classifier.py \
  --base-url http://127.0.0.1:18819
 ```
 Acceptance for an NPU-backed classification request:
 - HTTP request succeeds.
 - Response `npu_busy_delta_us > 0` from upstream embeddings.
 - Response `sysfs_npu_busy_delta_us > 0` when sysfs is readable.
 - Outer shell `after-before > 0`.
 - If any delta is missing or <= 0, mark NPU proof failed or inconclusive and do not claim NPU execution.
 ## Docs and diagram implications
 If this prototype is refreshed or reviewed, update documentation to show:
 - Live baseline remains RAG `:18810`, RAG health `:18814`, Whisper NPU `:18816`, and embeddings `:18817`.
 - Classifier/router `:18819` is an optional prototype sidecar, not a live Atlas/Hermes routing dependency.
 - Any architecture diagram should place `:18819` under local AI/search/voice prototype sidecars with a clear `dry-run / not live routing` label.
 - Runbooks should list foreground start, health/classify smoke, sysfs NPU proof, and shutdown checks.
 - Service catalog entries should state `not installed/enabled` until Will approves persistent service enablement.
 - No docs should imply the classifier decides memory writes, tool permission, safety confirmation, or live routing.
 Relevant docs inventory:
 - `docs/swarm-infrastructure.md`
 - `docs/swarm-infrastructure.html`
 - `docs/diagram-maintenance.md`
 - `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`
 - `swarm-common/obsidian-vault/will/will-shared-zap/Resources/Service Catalog.md`
 ## No-go / defer criteria
 Do not proceed to implementation refresh, persistent service enablement, or live integration if any of the following hold:
 - `:18817` embeddings is unavailable and no approved NPU embedding fallback exists.
 - `/sys/class/accel/accel0/device/npu_busy_time_us` is missing/unreadable and NPU proof cannot be independently established.
 - Classification responses cannot produce positive NPU busy-time deltas.
 - `:18819` is already occupied by an unknown or live service.
 - Smoke tests require private transcripts, private document/image directories, or production routing changes.
 - Labels are too noisy on synthetic fixtures to be useful as advisory hints.
 - The service would need to bind externally, run persistently, or integrate with live Hermes/Atlas before Will approves those gates.
 - Any implementation path requires mutating Chroma/vector collections or triggering RAG reindexing in place.
 ## Implementation handoff notes
 Recommended next engineer actions:
 1. Verify or refresh `openvino-classifier-npu/router_classifier.py` to match this contract.
 2. Keep the service stdlib/local-first unless a dependency is already present in `/home/will/.venvs/npu`.
 3. Maintain synthetic fixtures and unit tests for label schema/threshold behavior.
 4. Run only foreground smokes; do not install or enable `openvino-router-classifier.service`.
 5. Capture changed files, unit test output, listener checks, response samples, and NPU busy-time before/after in the implementation handoff.
@@ -2,6 +2,10 @@
 Dry-run Atlas/Hermes message classifier/router prototype.
 The detailed dry-run contract is in [`CONTRACT.md`](./CONTRACT.md), including the
 recommended model/runtime, HTTP/CLI schema, smoke-test plan, NPU busy-time proof,
 docs/diagram implications, and no-go/defer criteria.
 It reuses the existing OpenVINO NPU embeddings service on `127.0.0.1:18817` and
 serves an inspectable stdlib HTTP API on `127.0.0.1:18819`. It does not change
 live Hermes/Atlas routing, write memory, mutate vector collections, restart
@@ -13,6 +17,7 @@ services, or send external messages.
 - Default port: `18819`
 - Default bind: `127.0.0.1`
 - Upstream: `http://127.0.0.1:18817/v1/embeddings`
 - Batch limit: `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`, default `32`
 - Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`
 - NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` before/after plus upstream `npu_busy_delta_us`
@@ -86,6 +91,10 @@ cd /home/will/lab/swarm/openvino-classifier-npu
 /home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
 ```
 Environment variables mirror the flags: `OPENVINO_CLASSIFIER_HOST`,
 `OPENVINO_CLASSIFIER_PORT`, `OPENVINO_CLASSIFIER_EMBED_URL`,
 `OPENVINO_CLASSIFIER_TIMEOUT_S`, and `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`.
 Then from another shell:
 ```bash
@@ -98,6 +107,15 @@ curl -fsS http://127.0.0.1:18819/v1/classify \
 A valid NPU-backed response must have positive `npu_busy_delta_us`; HTTP 200 by
 itself is not considered proof.
 Synthetic fixture smoke helper, after the foreground service is running:
 ```bash
 /home/will/.venvs/npu/bin/python smoke_classifier.py --base-url http://127.0.0.1:18819
 ```
 The helper refuses non-local URLs, checks fixture label expectations, and prints
 response plus outer sysfs NPU busy deltas.
 ## Tests
 Unit tests use a fake embedding client and do not touch the NPU:
@@ -110,13 +128,13 @@ Fixture messages live at `fixtures/atlas_hermes_messages.jsonl`.
 ## Optional systemd user unit
-A draft unit is included as `openvino-router-classifier.service`. Install only
+A reviewed local-only user service unit is included as `openvino-router-classifier.service`. Install/enable it when the dry-run classifier should persist across logins:
 after review/approval:
 ```bash
 cp openvino-router-classifier.service ~/.config/systemd/user/openvino-router-classifier.service
 systemctl --user daemon-reload
 systemctl --user enable --now openvino-router-classifier.service
 systemctl --user status openvino-router-classifier.service --no-pager
 ```
-Do not enable it as part of this prototype task without explicit approval.
+The service is persistent, but classifier decisions remain dry-run until a separate approved routing change lands. Do not connect it to live Atlas/Hermes routing, memory writes, service restarts, or outbound messages.
@@ -9,6 +9,7 @@ WorkingDirectory=/home/will/lab/swarm/openvino-classifier-npu
 Environment=OPENVINO_CLASSIFIER_HOST=127.0.0.1
 Environment=OPENVINO_CLASSIFIER_PORT=18819
 Environment=OPENVINO_CLASSIFIER_EMBED_URL=http://127.0.0.1:18817/v1/embeddings
 Environment=OPENVINO_CLASSIFIER_MAX_BATCH_SIZE=32
 ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-classifier-npu/router_classifier.py
 Restart=on-failure
 RestartSec=5
@@ -30,6 +30,7 @@ MODEL = "bge-base-en-v1.5-int8-ov/prototype-router-v0"
 DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = 18819
 DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
 DEFAULT_MAX_BATCH_SIZE = 32
 NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 WORKFLOW_CATEGORIES = [
@@ -150,6 +151,26 @@ def npu_busy_time_us() -> int | None:
        return None
 def env_int(name: str, default: int) -> int:
    raw = os.environ.get(name)
    if raw is None:
        return default
    try:
        return int(raw)
    except ValueError as exc:
        raise SystemExit(f"{name} must be an integer, got {raw!r}") from exc
 def env_float(name: str, default: float) -> float:
    raw = os.environ.get(name)
    if raw is None:
        return default
    try:
        return float(raw)
    except ValueError as exc:
        raise SystemExit(f"{name} must be a number, got {raw!r}") from exc
 def clamp01(value: float) -> float:
    return max(0.0, min(1.0, value))
@@ -220,9 +241,10 @@ class EmbeddingClient:
 class ClassifierService:
-    def __init__(self, embed_url: str, *, timeout_s: float = 30.0) -> None:
+    def __init__(self, embed_url: str, *, timeout_s: float = 30.0, max_batch_size: int = DEFAULT_MAX_BATCH_SIZE) -> None:
        self.embed_url = embed_url
        self.client = EmbeddingClient(embed_url, timeout_s=timeout_s)
        self.max_batch_size = max(1, int(max_batch_size))
        self.loaded_at = time.time()
        self.prototype_texts: list[str] = []
        self.prototype_keys: list[str] = []
@@ -255,6 +277,7 @@ class ClassifierService:
            "labels": ["tool_needed", "memory_candidate", "urgency", "workflow_category", "safety_confirmation_required"],
            "embedding_dim": self.embedding_dim,
            "prototype_count": len(self.prototype_texts),
            "max_batch_size": self.max_batch_size,
            "prototype_npu_busy_delta_us": self.prototype_npu_busy_delta_us,
            "npu_busy_time_us": npu_busy_time_us(),
            "uptime_s": round(time.time() - self.loaded_at, 3),
@@ -271,6 +294,7 @@ class ClassifierService:
                "workflow_category": 0.52,
            },
            "enums": {"memory_candidate": MEMORY_VALUES, "urgency": URGENCY_VALUES, "workflow_category": WORKFLOW_CATEGORIES},
            "limits": {"max_batch_size": self.max_batch_size},
            "prototype_ids": sorted(PROTOTYPES),
        }
@@ -351,6 +375,10 @@ class ClassifierService:
        return response
    def batch_classify(self, items: list[dict[str, Any]], options: dict[str, Any] | None = None) -> dict[str, Any]:
        if not items:
            raise ValueError("items must contain at least one classification request")
        if len(items) > self.max_batch_size:
            raise ValueError(f"items exceeds max_batch_size={self.max_batch_size}")
        started = time.perf_counter()
        results = [self.classify(item.get("id"), str(item.get("text") or ""), options) for item in items]
        return {
@@ -400,13 +428,15 @@ class ClassifierService:
        high_rule, high_codes, high_ev = best_rule(text, "urgency_high")
        critical_rule, critical_codes, critical_ev = best_rule(text, "urgency_critical")
        low_rule = 0.82 if re.search(r"\b(no rush|whenever convenient|low priority|someday|backlog)\b", text, re.I) else 0.0
-        # Urgency is safety-sensitive for notifications. Prefer explicit rules;
+        # Urgency is safety-sensitive for notifications, so require explicit
-        # use prototype scores only when they are unusually strong.
+        # language instead of relying on broad prototype similarity.
        score_map = {
-            "low": max(low_rule, scores.get("urgency_low", 0.0) if scores.get("urgency_low", 0.0) >= 0.9 else 0.0),
+            # Urgency should be explicit; broad embedding similarity otherwise
            # turns neutral requests such as "what time is it" into low/high/critical urgency.
            "low": low_rule,
            "normal": 0.68,
-            "high": max(high_rule, scores.get("urgency_high", 0.0) if scores.get("urgency_high", 0.0) >= 0.9 else 0.0),
+            "high": high_rule,
-            "critical": max(critical_rule, scores.get("urgency_critical", 0.0) if scores.get("urgency_critical", 0.0) >= 0.92 else 0.0),
+            "critical": critical_rule,
        }
        if score_map["critical"] >= 0.9:
            score_map["normal"] = 0.05
@@ -509,13 +539,14 @@ class Handler(BaseHTTPRequestHandler):
 def main() -> int:
    parser = argparse.ArgumentParser(description="Dry-run Atlas/Hermes router classifier")
    parser.add_argument("--host", default=os.environ.get("OPENVINO_CLASSIFIER_HOST", DEFAULT_HOST))
-    parser.add_argument("--port", type=int, default=int(os.environ.get("OPENVINO_CLASSIFIER_PORT", DEFAULT_PORT)))
+    parser.add_argument("--port", type=int, default=env_int("OPENVINO_CLASSIFIER_PORT", DEFAULT_PORT))
    parser.add_argument("--embed-url", default=os.environ.get("OPENVINO_CLASSIFIER_EMBED_URL", DEFAULT_EMBED_URL))
-    parser.add_argument("--timeout-s", type=float, default=float(os.environ.get("OPENVINO_CLASSIFIER_TIMEOUT_S", "30")))
+    parser.add_argument("--timeout-s", type=float, default=env_float("OPENVINO_CLASSIFIER_TIMEOUT_S", 30.0))
    parser.add_argument("--max-batch-size", type=int, default=env_int("OPENVINO_CLASSIFIER_MAX_BATCH_SIZE", DEFAULT_MAX_BATCH_SIZE))
    parser.add_argument("--no-warmup", action="store_true", help="skip prototype embedding warmup until first request")
    args = parser.parse_args()
-    service = ClassifierService(args.embed_url, timeout_s=args.timeout_s)
+    service = ClassifierService(args.embed_url, timeout_s=args.timeout_s, max_batch_size=args.max_batch_size)
    if not args.no_warmup:
        service.warmup()
    httpd = ThreadingHTTPServer((args.host, args.port), Handler)
@@ -0,0 +1,113 @@
 #!/usr/bin/env python3
 """Local-only smoke test for the dry-run OpenVINO router classifier.
 This script uses only synthetic fixture messages. It assumes router_classifier.py is
 already running on localhost and never installs/enables a persistent service.
 """
 from __future__ import annotations
 import argparse
 import json
 import sys
 import time
 import urllib.error
 import urllib.request
 from pathlib import Path
 from typing import Any
 DEFAULT_BASE_URL = "http://127.0.0.1:18819"
 BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 FIXTURE = Path(__file__).resolve().parent / "fixtures" / "atlas_hermes_messages.jsonl"
 def npu_busy_time_us() -> int | None:
    try:
        return int(BUSY_FILE.read_text().strip())
    except Exception:
        return None
 def get_json(url: str, timeout_s: float) -> dict[str, Any]:
    with urllib.request.urlopen(url, timeout=timeout_s) as response:  # noqa: S310 - localhost smoke URL
        return json.loads(response.read().decode("utf-8"))
 def post_json(url: str, payload: dict[str, Any], timeout_s: float) -> dict[str, Any]:
    request = urllib.request.Request(
        url,
        data=json.dumps(payload).encode("utf-8"),
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    with urllib.request.urlopen(request, timeout=timeout_s) as response:  # noqa: S310 - localhost smoke URL
        return json.loads(response.read().decode("utf-8"))
 def load_fixture(limit: int) -> list[dict[str, Any]]:
    rows = [json.loads(line) for line in FIXTURE.read_text().splitlines() if line.strip()]
    return rows[:limit]
 def assert_expected(result: dict[str, Any], expected: dict[str, Any]) -> list[str]:
    failures: list[str] = []
    labels = result.get("labels", {})
    for key, value in expected.items():
        actual_label = labels.get(key, {})
        actual_value = actual_label.get("value")
        if actual_value != value:
            failures.append(f"{result.get('id')}: {key} expected {value!r}, got {actual_value!r}")
    return failures
 def main() -> int:
    parser = argparse.ArgumentParser(description="Smoke-test a running localhost router classifier")
    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
    parser.add_argument("--timeout-s", type=float, default=30.0)
    parser.add_argument("--limit", type=int, default=10)
    args = parser.parse_args()
    if not args.base_url.startswith("http://127.0.0.1:") and not args.base_url.startswith("http://localhost:"):
        raise SystemExit("refusing non-local base URL; this smoke is localhost-only")
    before = npu_busy_time_us()
    started = time.perf_counter()
    try:
        health = get_json(f"{args.base_url.rstrip('/')}/healthz", args.timeout_s)
        labels = get_json(f"{args.base_url.rstrip('/')}/v1/labels", args.timeout_s)
        rows = load_fixture(args.limit)
        results = []
        failures: list[str] = []
        for row in rows:
            result = post_json(
                f"{args.base_url.rstrip('/')}/v1/classify",
                {"id": row["id"], "text": row["text"], "options": {"include_evidence": False, "dry_run": True}},
                args.timeout_s,
            )
            results.append(result)
            failures.extend(assert_expected(result, row.get("expected", {})))
        after = npu_busy_time_us()
    except urllib.error.URLError as exc:
        raise SystemExit(f"smoke failed: {exc}") from exc
    response_npu_delta = sum((r.get("npu_busy_delta_us") or 0) for r in results)
    outer_sysfs_delta = None if before is None or after is None else after - before
    npu_proven = response_npu_delta > 0 and (outer_sysfs_delta is None or outer_sysfs_delta > 0)
    summary = {
        "ok": not failures,
        "service": health.get("service"),
        "mode": health.get("mode"),
        "model": health.get("model"),
        "label_count": len(labels.get("prototype_ids", [])),
        "fixture_count": len(results),
        "duration_ms": round((time.perf_counter() - started) * 1000, 3),
        "response_npu_busy_delta_us": response_npu_delta,
        "outer_sysfs_npu_busy_delta_us": outer_sysfs_delta,
        "npu_proven": npu_proven,
        "failures": failures,
    }
    print(json.dumps(summary, indent=2, sort_keys=True))
    return 0 if not failures and npu_proven else 1
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -88,6 +88,14 @@ class RouterClassifierTests(unittest.TestCase):
        self.assertEqual(len(result["results"]), 2)
        self.assertGreater(result["npu_busy_delta_us"], 0)
    def test_batch_limits_are_enforced(self):
        svc = self.service()
        with self.assertRaisesRegex(ValueError, "at least one"):
            svc.batch_classify([])
        too_many = [{"id": str(i), "text": "What time is it?"} for i in range(router_classifier.DEFAULT_MAX_BATCH_SIZE + 1)]
        with self.assertRaisesRegex(ValueError, "max_batch_size"):
            svc.batch_classify(too_many)
    def test_fixture_file_is_valid_jsonl(self):
        fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
        rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
@@ -97,6 +105,17 @@ class RouterClassifierTests(unittest.TestCase):
            self.assertIn("text", row)
            self.assertIn("expected", row)
    def test_synthetic_fixture_expectations(self):
        svc = self.service()
        fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
        rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
        for row in rows:
            with self.subTest(row=row["id"]):
                result = svc.classify(row["id"], row["text"], {"include_evidence": False})
                labels = result["labels"]
                for label_name, expected_value in row["expected"].items():
                    self.assertEqual(labels[label_name]["value"], expected_value)
 if __name__ == "__main__":
    unittest.main()
@@ -1,7 +1,8 @@
 # OpenVINO NPU document/image triage prototype
-Local-only prototype for triaging screenshots, photos/scans, and PDF page images.
+Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images.
 It returns structured JSON metadata and explicitly reports CPU vs NPU stages.
 Optional HTTP is a localhost/loopback-only prototype on `127.0.0.1:18829` when explicitly started; non-loopback binds are rejected and it is not a live Atlas/Hermes/RAG integration.
 Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
@@ -13,6 +14,8 @@ Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
 - Full source paths are omitted by default; responses include basename and SHA-256.
 - Allowed roots are enforced for CLI/server requests.
 - This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services.
 - Do not process broad private document/image directories; use generated synthetic fixtures unless Will explicitly approves a narrow source root.
 - See `SPEC.md` for the full CLI contract, smoke-test plan, NPU verification plan, docs implications, and no-go/defer criteria.
 ## CPU vs NPU stages
@@ -35,6 +38,7 @@ Not configured in v1:
 - `triage.py` — core library and CLI.
 - `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`.
 - `openvino-doc-image-triage.service` — local-only user-systemd service template for `127.0.0.1:18829`, limited to this prototype directory as its default allowed root.
 - `make_samples.py` — creates synthetic non-private image/PDF samples.
 - `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable.
 - `samples/` — generated synthetic fixtures.
@@ -88,29 +92,40 @@ Include OCR/sidecar text in a single response only when explicitly requested:
 ## HTTP usage
-Check that port 18820 is free first:
+The prototype is CLI-first, and the local HTTP wrapper can be run as a reviewed user-systemd service on `127.0.0.1:18829` with an allowlist rooted at this prototype directory. Keep it local-only and do not broaden allowed roots to private document/image directories without explicit approval. Check the port first:
 ```bash
-ss -ltnp | grep ':18820\b' || true
+ss -ltnp | grep ':18829\b' || true
 ```
-Start local-only server:
+Start a local-only server and stop it after the smoke:
 ```bash
 cd /home/will/lab/swarm/openvino-doc-image-triage-npu
-/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18820 --allowed-root "$PWD"
+/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
 ```
-Call it:
+Install/enable the reviewed local-only service template when the HTTP wrapper should persist across logins:
 ```bash
-curl -sS http://127.0.0.1:18820/healthz | jq
+install -m 0644 openvino-doc-image-triage.service ~/.config/systemd/user/openvino-doc-image-triage.service
-curl -sS http://127.0.0.1:18820/models | jq
+systemctl --user daemon-reload
-curl -sS -X POST http://127.0.0.1:18820/triage \
+systemctl --user enable --now openvino-doc-image-triage.service
 systemctl --user status openvino-doc-image-triage.service --no-pager
 ```
 Call it with synthetic/non-private fixtures only:
 ```bash
 curl -sS http://127.0.0.1:18829/healthz | jq
 curl -sS http://127.0.0.1:18829/models | jq
 curl -sS -X POST http://127.0.0.1:18829/triage \
  -H 'Content-Type: application/json' \
  -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq
 ```
 Do not point it at private document/image directories during smoke tests unless Will explicitly approves the exact source root.
 ## Smoke test
 ```bash
@@ -118,7 +133,7 @@ cd /home/will/lab/swarm/openvino-doc-image-triage-npu
 /home/will/.venvs/npu/bin/python tests/smoke_test.py
 ```
-Expected: JSON ending with `"ok": true`. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
+Expected: JSON ending with `"ok": true`. The smoke test generates only synthetic fixtures, verifies non-loopback HTTP binds are rejected, starts its temporary server on a preflighted free localhost port, and terminates it before exit. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
 ## Example output shape
@@ -0,0 +1,146 @@
 # OpenVINO NPU document/image triage spec
 Status: CLI-first prototype specification; not a live Atlas/Hermes integration.
 ## Safety stance
 - Default workflow is local CLI execution against explicitly named files.
 - Optional HTTP is disabled unless a human starts it, is constrained to loopback (`127.0.0.1`, `::1`, or `localhost`), and is intended for `127.0.0.1:18829` only.
 - No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec.
 - Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval.
 - NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof.
 ## Recommended model/runtime
 Recommended v1 runtime:
 - File intake, hashing, MIME/extension checks, image/PDF rendering, sidecar/native PDF text extraction, metadata extraction, and category fallback: local Python CPU path using Pillow plus optional `pypdf`/`pypdfium2`.
 - Needs-attention semantic check: reuse the live localhost OpenVINO embeddings service on `127.0.0.1:18817`, currently `bge-base-en-v1.5-int8-ov`, and verify each embedding call with `npu_busy_time_us` deltas.
 - Category classification in v1: CPU rule fallback, explicitly reported as not an NPU image model.
 Why this is the recommended v1:
 - It avoids private-data exposure: no external upload path and no broader local file scanning.
 - It avoids collection/routing risk by using the existing embeddings API as a stateless feature extractor only; it does not write to RAG or Chroma.
 - It gives a real NPU verification hook for the semantic stage without overclaiming that OCR/image classification are NPU-backed.
 - It keeps the prototype useful even when optional PDF dependencies or the embeddings service are unavailable: it can fall back to CPU-only metadata/rule output and mark NPU verification false.
 Deferred model work:
 - NPU image category classifier: defer until a static-shape OpenVINO IR image model such as MobileNet/EfficientNet/ResNet is selected, calibrated for the label set, and smoke-tested with busy-time deltas.
 - NPU OCR/VLM: defer; OCR remains local CPU text plumbing in v1.
 ## CLI contract
 Command:
 ```bash
 cd /home/will/lab/swarm/openvino-doc-image-triage-npu
 /home/will/.venvs/npu/bin/python triage.py \
  --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \
  --max-pages 3 \
  --pretty \
  samples/synthetic_invoice.png samples/synthetic_invoice.pdf
 ```
 Inputs:
 - Positional `paths`: one or more local image/PDF paths.
 - `--allowed-root ROOT`: may repeat; every requested path must resolve under one of these roots. Default is current directory.
 - `--max-pages N`: maximum rendered/extracted PDF pages; default 3.
 - `--no-embeddings`: disables the localhost `:18817` embedding/NPU check and reports CPU fallback/no text.
 - `--dry-run`: skip image/PDF rendering while still checking intake/hash/text/metadata where available.
 - `--include-ocr-text`: include raw extracted/sidecar text in this single response only; off by default.
 - `--include-full-path`: include resolved full paths; off by default.
 - `--pretty`: pretty-print JSON.
 Output:
 - Batch JSON: `{ "ok": bool, "files": [...], "generated_at": "..." }`.
 - Per file result includes `file_id` as `sha256:<digest>`, `source_path_basename`, media type, file size, pages, classification, needs-attention result, metadata counts/flags, privacy flags, and processing-device summary.
 - Raw OCR/text and full paths are omitted unless explicitly requested.
 - NPU evidence is per embedding call: `used`, `verified_npu`, `npu_busy_delta_us`, endpoint, and wall time.
 Exit behavior:
 - Exit 0 when all files triage successfully.
 - Exit 2 when one or more files fail policy/intake/processing checks.
 ## Optional localhost HTTP contract
 HTTP is optional and not enabled by this spec. If explicitly started for a smoke or local demo, use localhost and port 18829:
 ```bash
 cd /home/will/lab/swarm/openvino-doc-image-triage-npu
 ss -ltnp | grep ':18829\b' || true
 /home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
 ```
 Endpoints:
 - `GET /healthz` or `/health`: service name, bind policy, configured allowed roots, privacy flags, and current `npu_busy_time_us`.
 - `GET /models`: reports v1 stages and whether each is CPU or NPU-backed.
 - `POST /triage`: `{ "path": "/local/file", "options": {...} }` -> `{ "ok": true, "result": ... }`.
 - `POST /triage/batch`: `{ "paths": ["/local/file"], "options": {...} }` -> batch JSON.
 HTTP privacy/policy rules:
 - Server startup `--allowed-root` is the outer allowlist.
 - Request `options.allowed_roots` may narrow that allowlist but must not widen it.
 - Request `options.embedding_url` may only target the configured local loopback embeddings route `http://127.0.0.1:18817/v1/embeddings` (or localhost equivalent); external or alternate endpoints are rejected.
 - Request bodies and raw text are not logged by the stdlib handler.
 - Stop the temporary server after the smoke/demo.
 ## Synthetic smoke-test plan
 Use only generated fixtures under the prototype directory:
 ```bash
 cd /home/will/lab/swarm/openvino-doc-image-triage-npu
 /home/will/.venvs/npu/bin/python make_samples.py
 /home/will/.venvs/npu/bin/python tests/smoke_test.py
 ```
 Expected smoke coverage:
 - Creates synthetic invoice/receipt/form-like image/PDF fixtures.
 - Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root.
 - Asserts privacy flags (`external_uploads: false`, no full path by default).
 - Asserts invoice category/needs-attention behavior on synthetic text.
 - Starts a temporary localhost HTTP server on a preflighted free ephemeral port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, rejects external embedding URLs, and verifies non-loopback binds are rejected.
 - Terminates the temporary server.
 The smoke port in tests should stay OS-assigned ephemeral/non-live to avoid claiming `18829` as a persistent service.
 ## NPU busy-time verification plan
 For every test that claims NPU use:
 1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before the operation.
 2. Perform an operation that should call the live embeddings service on `127.0.0.1:18817` with non-empty synthetic text.
 3. Read `npu_busy_time_us` after the operation.
 4. Require both:
   - the per-result embedding object reports `used: true`, `verified_npu: true`, and `npu_busy_delta_us > 0`; and
   - the outer before/after sysfs value increased.
 5. If sysfs is missing or `:18817` is unavailable, do not claim NPU success; report CPU fallback / embedding unavailable and keep the smoke result honest.
 ## Docs and diagram implications
 - Service maps should list document/image triage as CLI-first and optional prototype `127.0.0.1:18829`, not live unless explicitly started.
 - Diagrams must not draw live Atlas/Hermes/gateway/RAG routing to this triage lane.
 - If shown with other candidate sidecars, label it separately from live services: live baseline remains RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; prototype sidecars are reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, and optional doc/image triage `:18829`.
 - Runbooks should include CLI smoke, localhost listener checks, busy-time delta verification, and server shutdown instructions.
 - Documentation should state CPU vs NPU stages explicitly so the prototype does not imply NPU OCR or NPU image classification.
 ## No-go / defer criteria
 Do not proceed to implementation, live integration, or persistent service enablement if any of these are true:
 - Will has not explicitly approved live routing or persistent service enablement.
 - The requested source path is a private document/image directory or broad home-directory scan rather than synthetic fixtures or an explicitly approved narrow root.
 - The workflow would mutate Obsidian, RAG, Chroma/vector collections, or reindex in place.
 - The optional server would need to bind anywhere other than localhost.
 - NPU busy-time does not increase for an operation being described as NPU-backed.
 - Raw OCR text or full paths would be logged, uploaded, stored durably, or returned without explicit request.
 - PDF/image dependencies are missing and the task requires rendered page analysis rather than metadata/text-only fallback.
 - A future image classifier/OCR/VLM model has not been selected, converted/quantized to OpenVINO, calibrated for the task, and verified on synthetic fixtures with busy-time deltas.
@@ -0,0 +1,16 @@
 [Unit]
 Description=OpenVINO NPU document/image triage HTTP Service (local-only, port 18829)
 After=network.target openvino-embeddings.service
 Wants=openvino-embeddings.service
 [Service]
 Type=simple
 WorkingDirectory=/home/will/lab/swarm/openvino-doc-image-triage-npu
 Environment=DOC_IMAGE_TRIAGE_HOST=127.0.0.1
 Environment=DOC_IMAGE_TRIAGE_PORT=18829
 ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-doc-image-triage-npu/server.py --host 127.0.0.1 --port 18829 --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
 Restart=on-failure
 RestartSec=5
 [Install]
 WantedBy=default.target
@@ -13,6 +13,7 @@ configured allowed roots. It never uploads document/image contents externally.
 from __future__ import annotations
 import argparse
 import ipaddress
 import json
 import os
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
@@ -23,6 +24,19 @@ from urllib.parse import urlparse
 from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file
 def _validate_loopback_host(host: str) -> str:
    """Reject non-loopback binds; this prototype is never a LAN service."""
    normalized = host.strip()
    if normalized == "localhost":
        return normalized
    try:
        if ipaddress.ip_address(normalized).is_loopback:
            return normalized
    except ValueError:
        pass
    raise ValueError("host must be localhost/loopback for this prototype")
 def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]:
    """Return request roots only when they narrow the startup allowlist."""
    narrowed: list[Path] = []
@@ -163,13 +177,17 @@ class Handler(BaseHTTPRequestHandler):
 def main() -> int:
    parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server")
    parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1"))
-    parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18820")))
+    parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829")))
    parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat")
    args = parser.parse_args()
    try:
        host = _validate_loopback_host(args.host)
    except ValueError as exc:
        parser.error(str(exc))
    roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()]
-    httpd = ThreadingHTTPServer((args.host, args.port), Handler)
+    httpd = ThreadingHTTPServer((host, args.port), Handler)
    httpd.allowed_roots = roots  # type: ignore[attr-defined]
-    print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": args.host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
+    print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
    httpd.serve_forever()
    return 0
@@ -2,6 +2,7 @@
 from __future__ import annotations
 import json
 import socket
 import subprocess
 import sys
 import tempfile
@@ -42,6 +43,29 @@ def busy() -> int | None:
        return None
 def choose_free_loopback_port() -> int:
    """Ask the OS for a free localhost port and verify it is not listening yet."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.bind(("127.0.0.1", 0))
        port = int(sock.getsockname()[1])
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
        probe.settimeout(0.25)
        assert probe.connect_ex(("127.0.0.1", port)) != 0, f"selected port already has a listener: {port}"
    return port
 def assert_loopback_bind_policy() -> None:
    blocked = subprocess.run(
        [sys.executable, "server.py", "--host", "0.0.0.0", "--port", "0", "--allowed-root", str(ROOT)],
        cwd=ROOT,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    assert blocked.returncode != 0, blocked.stdout + blocked.stderr
    assert "loopback" in blocked.stderr.lower(), blocked.stderr
 def main() -> int:
    run([sys.executable, "make_samples.py"])
    invoice = SAMPLES / "synthetic_invoice.png"
@@ -69,20 +93,23 @@ def main() -> int:
            assert (emb.get("npu_busy_delta_us") or 0) > 0, emb
            assert after > before, {"before": before, "after": after, "embedding": emb}
-    # HTTP smoke on an ephemeral localhost port so we do not collide with 18820 during tests.
+    # HTTP smoke on a preflighted free localhost port so we do not collide with live/prototype ports.
-    proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", "18828", "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    assert_loopback_bind_policy()
    smoke_port = choose_free_loopback_port()
    base_url = f"http://127.0.0.1:{smoke_port}"
    proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", str(smoke_port), "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    try:
        deadline = time.time() + 5
        while time.time() < deadline:
            try:
-                health = urllib.request.urlopen("http://127.0.0.1:18828/healthz", timeout=1).read()
+                health = urllib.request.urlopen(f"{base_url}/healthz", timeout=1).read()
                assert b"openvino-doc-image-triage-npu" in health
                break
            except Exception:
                time.sleep(0.1)
        else:
            raise AssertionError("server did not become ready")
-        resp = post_json("http://127.0.0.1:18828/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
+        resp = post_json(f"{base_url}/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
        assert resp["ok"] is True, resp
        assert resp["result"]["source_path_basename"] == "synthetic_invoice.png"
        assert "source_path" not in resp["result"]
@@ -92,7 +119,7 @@ def main() -> int:
            outside.write(b"sensitive text outside configured artifact root")
            outside.flush()
            status, blocked = post_json_status(
-                "http://127.0.0.1:18828/triage",
+                f"{base_url}/triage",
                {"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}},
            )
        assert status == 400, blocked
@@ -101,7 +128,7 @@ def main() -> int:
        # Request bodies must not redirect extracted text to caller-supplied endpoints.
        status, blocked = post_json_status(
-            "http://127.0.0.1:18828/triage",
+            f"{base_url}/triage",
            {"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}},
        )
        assert status == 400, blocked
@@ -0,0 +1,306 @@
 # Bounded OpenVINO GenAI NPU worker contract
 Status: prototype contract implemented locally; not a live Atlas/Hermes routing dependency.
 Default address: `http://127.0.0.1:18820`.
 ## Purpose and hard boundary
 This worker is a local-only sidecar for small, bounded generation jobs that are useful around the assistant stack but are not primary chat: title drafting, short summaries, notification condensation, and memory-candidate extraction. It must not be used as Atlas/Hermes primary model routing, gateway fallback routing, autonomous tool-calling, or an unbounded chat endpoint without a separate approval gate.
 Hard boundaries:
 - Bind to `127.0.0.1` by default; non-local bind is a code/ops review item, not a runtime flag to casually change.
 - Do not enable a persistent systemd/Docker service as part of smoke testing.
 - Do not restart or reconfigure Atlas, Hermes, gateway, LiteLLM, RAG, or n8n routing to call this worker without explicit approval from Will.
 - Do not write memory, mutate Chroma/vector collections, trigger RAG reindexing, or process private document/image directories.
 - Do not log raw prompts or raw request bodies by default.
 - Treat HTTP success as insufficient for NPU claims; require positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta for generation.
 ## Recommended model/runtime
 Recommended first model:
 - Model id: `OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov`
 - Local path: `/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov`
 - Runtime: `/home/will/.venvs/npu` with `openvino-genai==2026.2.0.0`
 - Device: OpenVINO GenAI `NPU`
 - Compile cache: `/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4`
 Why this model/runtime:
 - It is already staged in the repo prototype and has a local smoke observation with positive NPU busy-time delta.
 - It is an OpenVINO IR model with INT4-compressed weights, which keeps memory/compile pressure low enough for a sidecar on the shared NPU.
 - Qwen2.5-1.5B-Instruct is large enough for formatting/summarization/notification jobs but small enough to keep latency bounded. It should not be marketed as a high-quality general assistant model.
 - The Hugging Face model card identifies it as Qwen2.5-1.5B-Instruct converted to OpenVINO IR with INT4_SYM NNCF weight compression and states compatibility with OpenVINO 2025.1.0+; the local runtime is newer than that baseline.
 - OpenVINO GenAI `LLMPipeline` is the right first runtime because the existing local NPU stack already uses OpenVINO GenAI successfully for Whisper, and it exposes a simple bounded generate call with cache controls.
 Deferred alternatives:
 - Larger 3B/7B local LLMs: defer until the 1.5B contract proves stable; larger models increase compile time, memory pressure, and NPU contention.
 - CPU/GPU fallback inside this service: defer; fallback would blur the NPU verification contract. If fallback is later approved, return `device_actual` and keep NPU-only health separate.
 - Manual `EXPORT_BLOB`/`BLOB_PATH`: defer until compile latency is proven to dominate despite `CACHE_DIR`. If used later, record OpenVINO version, NPU compiler/driver versions, model id, quantization flags, and source model path; invalidate after OpenVINO/NPU driver upgrades.
 ## Runtime bounds
 Pipeline configuration for the first milestone:
 ```text
 CACHE_DIR=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
 MAX_PROMPT_LEN=1024
 MIN_RESPONSE_LEN=64
 PREFILL_HINT=DYNAMIC
 GENERATE_HINT=FAST_COMPILE
 ```
 Request bounds:
 - `input`: required non-empty string; max `6000` characters before prompt templating.
 - `job`: one of `title`, `summary`, `notification`, `memory_candidate`.
 - `max_new_tokens`: optional; default by job; hard max `256`.
 - Concurrency: generation must be serialized inside the process with a lock because the NPU is shared with Whisper/embeddings/prototype sidecars.
 - Logging: log method/path/status and timing only; never log raw `input` or generated text by default.
 Expected latency target:
 - Cold-ish first generation with cache available: acceptable if roughly 15 seconds or less for a short prompt on the staged model.
 - Warm short jobs: target under 5 seconds for `title`/`notification` and under 10 seconds for `summary`/`memory_candidate`.
 - Defer promotion if p95 warm latency exceeds 15 seconds for 24-96 generated tokens, or if cold compile regularly blocks the NPU long enough to degrade live Whisper/embeddings.
 These are prototype acceptance targets, not SLOs for live Atlas routing.
 ## CLI contract
 Command shape:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 /home/will/.venvs/npu/bin/python worker.py \
  --job title \
  --input 'Synthetic non-private text to title.' \
  --max-new-tokens 32
 ```
 CLI stdout is JSON with the same response shape as HTTP generation. Exit code must be:
 - `0` when the job succeeds and `npu_busy_delta_us > 0`.
 - non-zero when input validation fails, model load/generation fails, or NPU busy-time delta is not positive.
 The CLI must not write memory, change service routing, or start persistent services.
 ## HTTP contract
 Start temporary local server only:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 /home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
 ```
 Endpoints:
 ```text
 GET  /healthz
 GET  /models
 POST /v1/worker/generate
 POST /v1/worker/extract-memory-candidates
 POST /v1/worker/condense-notification
 ```
 `GET /healthz` response fields:
 ```json
 {
  "ok": true,
  "model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
  "model_path": "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov",
  "device": "NPU",
  "cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4",
  "cache_exists": true,
  "loaded": false,
  "initial_load_ms": null,
  "busy_time_us": 0,
  "max_input_chars": 6000,
  "jobs": ["memory_candidate", "notification", "summary", "title"],
  "bind": "127.0.0.1:18820"
 }
 ```
 `POST /v1/worker/generate` request:
 ```json
 {
  "job": "summary",
  "input": "Synthetic non-private text to summarize.",
  "max_new_tokens": 80
 }
 ```
 Specialized aliases:
 - `POST /v1/worker/extract-memory-candidates` implies `job=memory_candidate`.
 - `POST /v1/worker/condense-notification` implies `job=notification`.
 - Backward-compatible request `job=memory` may map to `memory_candidate`, but new clients should use `memory_candidate`.
 Successful generation response:
 ```json
 {
  "model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
  "device": "NPU",
  "job": "summary",
  "text": "...",
  "json": null,
  "timing_ms": {
    "load": 0.0,
    "initial_load": 10989.08,
    "generate": 3157.94,
    "total": 3157.94
  },
  "npu_busy_delta_us": 2650724,
  "npu_busy_before_us": 123,
  "npu_busy_after_us": 2650847,
  "cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
 }
 ```
 Validation/error behavior:
 - Unsupported path: `404` JSON `{"error":"not found"}`.
 - Unsupported job, empty input, too-long input, invalid token bound, missing model, or generation failure: JSON `{"error":"..."}` with non-2xx preferred for future implementations. The current stdlib prototype returns `400` for these errors.
 - If `npu_busy_delta_us <= 0`, the response should be treated as failed by smoke tests even if an HTTP handler emitted `200`; the refreshed prototype returns `503` with the generation payload plus an `error` field.
 ## Prompt/job contract
 `title`:
 - Input: short task/log/message excerpt.
 - Output: one title, 8 words or fewer, no markdown required.
 - Default `max_new_tokens`: 32.
 `summary`:
 - Input: synthetic/non-private text excerpt.
 - Output: one short paragraph or up to 4 bullets.
 - Default `max_new_tokens`: 160.
 `notification`:
 - Input: synthetic/non-private alert/log excerpt.
 - Output target: JSON object with `severity`, `category`, `summary`, `action_needed`.
 - Default `max_new_tokens`: 96.
 - Client must tolerate `json: null` and parse/validate before using output.
 `memory_candidate`:
 - Input: synthetic/non-private conversation excerpt.
 - Output target: JSON object with `candidates` and `notes`; candidates are proposals only.
 - Default `max_new_tokens`: 192.
 - This worker must never call Hermes memory tools or write durable memory directly.
 ## Smoke-test plan using non-private data
 Do not use private vault notes, screenshots, email, chat logs, or document/image directories. Use synthetic text like this:
 ```text
 Atlas received a kanban notification that an OpenVINO NPU prototype finished smoke testing. The reviewer needs a concise status and next action. No live gateway routing changed.
 ```
 Direct NPU smoke:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
 /home/will/.venvs/npu/bin/python smoke_llm_npu.py \
  --prompt 'Write a concise title for: synthetic NPU worker contract smoke.' \
  --max-new-tokens 24
 status=$?
 after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
 printf 'external_busy_delta_us=%s\n' "$((after-before))"
 test "$status" -eq 0
 test "$((after-before))" -gt 0
 ```
 Temporary HTTP smoke:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 /home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820 &
 pid=$!
 trap 'kill "$pid" 2>/dev/null || true' EXIT
 curl -fsS http://127.0.0.1:18820/healthz | python -m json.tool
 before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
 curl -fsS http://127.0.0.1:18820/v1/worker/generate \
  -H 'Content-Type: application/json' \
  -d '{"job":"title","input":"Synthetic NPU worker smoke with no routing changes.","max_new_tokens":24}' \
  | tee /tmp/openvino-genai-worker-smoke.json \
  | python -m json.tool
 after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
 python - <<'PY'
 import json
 p=json.load(open('/tmp/openvino-genai-worker-smoke.json'))
 assert p['npu_busy_delta_us'] > 0, p
 assert p['device'] == 'NPU', p
 PY
 test "$((after-before))" -gt 0
 kill "$pid"
 trap - EXIT
 ```
 Also verify the temporary listener is gone:
 ```bash
 ss -ltnp | grep ':18820' && { echo 'temporary smoke server still running'; exit 1; } || true
 ```
 Unit tests that do not load the model or require private data:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 python -m pytest -q
 ```
 ## NPU busy-time verification plan
 Acceptance for any NPU claim requires all of the following:
 1. Confirm the sysfs counter exists and is readable:
   `test -r /sys/class/accel/accel0/device/npu_busy_time_us`.
 2. Read `busy_before` immediately before the generation call.
 3. Run exactly one bounded generation against the candidate worker.
 4. Read `busy_after` immediately after generation completes.
 5. Require `busy_after > busy_before` and response `npu_busy_delta_us > 0`.
 6. Record model id, runtime version, prompt chars, max tokens, load/generate timings, and busy delta in the review handoff.
 7. If the counter is unchanged, mark the smoke as failed even if HTTP returned `200` and text was generated.
 Because the NPU is shared, a positive external delta proves NPU activity during the window but not exclusive attribution. Prefer a quiet window with no concurrent Whisper/embedding jobs for review-grade measurements; otherwise repeat and compare worker-reported internal delta with the external counter.
 ## Docs/diagram implications
 If this worker is kept as a prototype, docs and diagrams should show:
 - Live baseline remains RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`.
 - GenAI worker `:18820` is proposed/prototype/not-live unless explicitly approved and enabled.
 - No arrow from Hermes/Atlas gateway or LiteLLM primary routing to `:18820` unless a later approved integration actually exists.
 - Runbooks should include the CLI/HTTP smoke commands, `ss` listener checks, and NPU busy-time counter checks.
 - Service maps should label this as "bounded background generation" rather than "chat" or "assistant model".
 ## Explicit no-go / defer criteria
 No-go for implementation or promotion:
 - Model path missing, OpenVINO GenAI import fails, or NPU device is unavailable.
 - `/sys/class/accel/accel0/device/npu_busy_time_us` is unreadable or does not increase during generation.
 - Warm bounded jobs exceed the prototype latency target or starve live Whisper/embedding services.
 - The worker needs private documents/images/chat logs for smoke testing.
 - The worker requires Atlas/Hermes/gateway/LiteLLM/RAG routing changes to demonstrate value.
 - The API starts accepting arbitrary chat history, tool-call instructions, unbounded prompts, or large outputs.
 - The service logs raw prompt bodies by default.
 - Persistent service enablement is requested without an explicit Will approval gate and a reviewer smoke handoff.
 Defer, do not solve in this lane:
 - Primary assistant routing, LiteLLM model registration, gateway fallback, or tool-calling integration.
 - RAG query rewriting, RAG answer generation, or collection mutation.
 - Private document/image triage.
 - Multi-model selection, CPU/GPU fallback policy, batching, streaming, or auth exposure beyond localhost.
@@ -15,9 +15,11 @@ The worker does not write memory, does not restart Atlas/Hermes, does not change
 ## Files
 - `CONTRACT.md` — bounded-worker service contract, endpoint/CLI API, smoke plan, NPU verification, docs implications, and no-go criteria.
 - `worker.py` — stdlib HTTP API plus CLI wrapper.
 - `smoke_llm_npu.py` — direct GenAI smoke test with NPU busy-time verification.
- `systemd/openvino-genai-npu-worker.service` — optional user-service template; not installed by this prototype.
+- `tests/test_worker.py` — unit tests with a fake GenAI pipeline and synthetic busy-time counter.
 - `systemd/openvino-genai-npu-worker.service` — reviewed local-only user-service template for `127.0.0.1:18820`.
 ## Model/cache
@@ -72,15 +74,20 @@ Observed cold-ish smoke after download/cache setup:
  --input 'Kanban task asks for a small OpenVINO GenAI NPU worker prototype.'
 ```
 Exit code is non-zero if validation fails, generation fails, or the worker-reported `npu_busy_delta_us` is not positive.
 ## HTTP usage
 Start locally only:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 ss -ltnp | grep ':18820' && { echo 'port 18820 already in use'; exit 1; } || true
 /home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
 ```
 The server also refuses startup if a listener is already accepting connections on `127.0.0.1:18820`.
 Endpoints:
 ```text
@@ -102,6 +109,39 @@ curl -s http://127.0.0.1:18820/v1/worker/generate \
 Response includes `npu_busy_delta_us`; treat zero as failure even if HTTP status is 200.
 ## Unit tests
 These tests use only synthetic strings and a fake GenAI pipeline, so they do not load the model or touch private data:
 ```bash
 cd /home/will/lab/swarm/openvino-genai-npu-worker
 python -m pytest -q
 ```
 ## Environment variables
 ```text
 OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
 OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
 OV_GENAI_NPU_HOST=127.0.0.1
 OV_GENAI_NPU_PORT=18820
 ```
 Only `127.0.0.1` is accepted by the current prototype; wider binds require an explicit code change and approval.
 ## Systemd user service
 A reviewed local-only unit exists at `systemd/openvino-genai-npu-worker.service` for persistent background use after foreground smoke succeeds with a positive NPU busy-time delta:
 ```bash
 install -m 0644 systemd/openvino-genai-npu-worker.service ~/.config/systemd/user/openvino-genai-npu-worker.service
 systemctl --user daemon-reload
 systemctl --user enable --now openvino-genai-npu-worker.service
 systemctl --user status openvino-genai-npu-worker.service --no-pager
 ```
 The service remains isolated: do not route primary Atlas/Hermes chat, gateway output, or automatic memory writes to it without a separate approved integration.
 ## Safety boundaries
 - Binds only to `127.0.0.1` by default; non-local bind is refused in code.
@@ -0,0 +1,2 @@
 [pytest]
 testpaths = tests
@@ -10,31 +10,42 @@ import argparse
 import json
 import time
 from pathlib import Path
-
+from typing import Any
 import openvino_genai as ov_genai
 DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
 DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
 BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
-def read_busy() -> int:
+def import_openvino_genai() -> Any:
-    return int(BUSY_PATH.read_text().strip())
+    import openvino_genai as ov_genai  # type: ignore[import-not-found]
    return ov_genai
 def read_busy(path: Path = BUSY_PATH) -> int:
    return int(path.read_text().strip())
 def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", default=DEFAULT_MODEL)
    parser.add_argument("--cache-dir", default=DEFAULT_CACHE)
-    parser.add_argument("--prompt", default="Write a concise title for: User asked Atlas to summarize NPU worker options.")
+    parser.add_argument("--busy-path", default=str(BUSY_PATH))
    parser.add_argument("--prompt", default="Write a concise title for: Synthetic NPU worker contract smoke with no routing changes.")
    parser.add_argument("--max-new-tokens", type=int, default=24)
    args = parser.parse_args()
    model_path = Path(args.model)
    cache_dir = Path(args.cache_dir)
    busy_path = Path(args.busy_path)
    cache_dir.mkdir(parents=True, exist_ok=True)
    if not model_path.exists():
        raise SystemExit(f"model path does not exist: {model_path}")
    if not busy_path.exists():
        raise SystemExit(f"NPU busy-time counter does not exist: {busy_path}")
    if args.max_new_tokens < 1 or args.max_new_tokens > 256:
        raise SystemExit("max-new-tokens must be between 1 and 256")
    config = {
        "CACHE_DIR": str(cache_dir),
@@ -44,15 +55,16 @@ def main() -> int:
        "GENERATE_HINT": "FAST_COMPILE",
    }
-    before = read_busy()
+    ov_genai = import_openvino_genai()
    before = read_busy(busy_path)
    load_start = time.monotonic()
-    pipe = ov_genai.LLMPipeline(str(model_path), "NPU", config)
+    pipe = ov_genai.LLMPipeline(str(model_path), "NPU", **config)
    load_ms = round((time.monotonic() - load_start) * 1000, 2)
    gen_start = time.monotonic()
    output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens)
    gen_ms = round((time.monotonic() - gen_start) * 1000, 2)
-    after = read_busy()
+    after = read_busy(busy_path)
    result = {
        "model": str(model_path),
        "device": "NPU",
@@ -7,6 +7,7 @@ Type=simple
 WorkingDirectory=/home/will/lab/swarm/openvino-genai-npu-worker
 Environment=OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
 Environment=OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
 Environment=OV_GENAI_NPU_HOST=127.0.0.1
 Environment=OV_GENAI_NPU_PORT=18820
 ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-genai-npu-worker/worker.py --host 127.0.0.1 --port 18820
 Restart=on-failure
@@ -0,0 +1,131 @@
 from __future__ import annotations
 import json
 from pathlib import Path
 import pytest
 import worker
 class FakePipeline:
    def __init__(self, model_path: str, device: str, config: dict[str, object], busy_path: Path, output: str = "Synthetic title"):
        self.model_path = model_path
        self.device = device
        self.config = config
        self.busy_path = busy_path
        self.output = output
        self.calls: list[tuple[str, int]] = []
    def generate(self, prompt: str, *, max_new_tokens: int):
        self.calls.append((prompt, max_new_tokens))
        before = int(self.busy_path.read_text().strip())
        self.busy_path.write_text(str(before + 1234))
        return self.output
 class FakeGenAI:
    def __init__(self, busy_path: Path, output: str = "Synthetic title"):
        self.busy_path = busy_path
        self.output = output
        self.pipeline: FakePipeline | None = None
    def LLMPipeline(self, model_path: str, device: str, *args: object, **kwargs: object):  # noqa: N802 - mirrors OpenVINO API
        if args and isinstance(args[0], dict):
            config: dict[str, object] = {str(k): v for k, v in args[0].items()}
        else:
            config = dict(kwargs)
        self.pipeline = FakePipeline(model_path, device, config, self.busy_path, self.output)
        return self.pipeline
@pytest.fixture()
 def worker_paths(tmp_path: Path):
    model_path = tmp_path / "model"
    cache_dir = tmp_path / "cache"
    busy_path = tmp_path / "npu_busy_time_us"
    model_path.mkdir()
    busy_path.write_text("100")
    return model_path, cache_dir, busy_path
 def test_generate_uses_npu_config_and_reports_busy_delta(monkeypatch: pytest.MonkeyPatch, worker_paths):
    model_path, cache_dir, busy_path = worker_paths
    fake_genai = FakeGenAI(busy_path)
    monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_port=18820)
    result = npu_worker.generate("title", "Synthetic non-private kanban notification.", max_new_tokens=24)
    assert result.npu_busy_before_us == 100
    assert result.npu_busy_after_us == 1334
    assert result.npu_busy_delta_us == 1234
    assert result.text == "Synthetic title"
    assert fake_genai.pipeline is not None
    assert fake_genai.pipeline.device == "NPU"
    assert fake_genai.pipeline.config["CACHE_DIR"] == str(cache_dir)
    assert fake_genai.pipeline.config["MAX_PROMPT_LEN"] == 1024
    assert fake_genai.pipeline.calls[0][1] == 24
 def test_memory_alias_json_wrapping(monkeypatch: pytest.MonkeyPatch, worker_paths):
    model_path, cache_dir, busy_path = worker_paths
    fake_genai = FakeGenAI(busy_path, output='[{"fact":"synthetic stable preference","confidence":0.8}]')
    monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
    result = npu_worker.generate("memory_candidate", "Synthetic user says they prefer concise answers.")
    assert result.parsed_json is not None
    assert result.parsed_json["candidates"][0]["fact"] == "synthetic stable preference"
    assert "wrapped" in result.parsed_json["notes"]
@pytest.mark.parametrize(
    ("job", "user_input", "max_new_tokens", "message"),
    [
        ("bad", "hello", 1, "unsupported job"),
        ("title", "", 1, "non-empty"),
        ("title", "x" * (worker.MAX_INPUT_CHARS + 1), 1, "input too long"),
        ("title", "hello", worker.MAX_NEW_TOKENS + 1, "max_new_tokens"),
    ],
 )
 def test_validation_errors(monkeypatch: pytest.MonkeyPatch, worker_paths, job: str, user_input: str, max_new_tokens: int, message: str):
    model_path, cache_dir, busy_path = worker_paths
    monkeypatch.setattr(worker, "import_openvino_genai", lambda: FakeGenAI(busy_path))
    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
    with pytest.raises(ValueError, match=message):
        npu_worker.generate(job, user_input, max_new_tokens=max_new_tokens)
 def test_health_reports_actual_bind_and_limits(worker_paths):
    model_path, cache_dir, busy_path = worker_paths
    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_host="127.0.0.1", bind_port=18821)
    health = npu_worker.health()
    assert health["bind"] == "127.0.0.1:18821"
    assert health["max_input_chars"] == 6000
    assert health["max_new_tokens"] == 256
    assert health["busy_time_us"] == 100
 def test_response_payload_shape(worker_paths):
    model_path, cache_dir, busy_path = worker_paths
    npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
    result = worker.GenerationResult(
        text="ok",
        parsed_json={"severity": "info"},
        timing_ms={"load": 1.0, "initial_load": 1.0, "generate": 2.0, "total": 3.0},
        npu_busy_delta_us=5,
        npu_busy_before_us=10,
        npu_busy_after_us=15,
    )
    payload = worker.response_payload(npu_worker, "notification", result)
    assert json.dumps(payload)
    assert payload["device"] == "NPU"
    assert payload["job"] == "notification"
    assert payload["json"] == {"severity": "info"}
@@ -10,6 +10,7 @@ import argparse
 import json
 import os
 import re
 import socket
 import threading
 import time
 from dataclasses import dataclass
@@ -18,8 +19,6 @@ from pathlib import Path
 from typing import Any, cast
 from urllib.parse import urlparse
 import openvino_genai as ov_genai  # type: ignore[import-not-found]
 MODEL_ID = "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov"
 DEFAULT_MODEL_PATH = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
 DEFAULT_CACHE_DIR = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
@@ -27,6 +26,14 @@ BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 HOST = "127.0.0.1"
 PORT = 18820
 MAX_INPUT_CHARS = 6000
 MAX_NEW_TOKENS = 256
 GENAI_CONFIG = {
    "CACHE_DIR": DEFAULT_CACHE_DIR,
    "MAX_PROMPT_LEN": 1024,
    "MIN_RESPONSE_LEN": 64,
    "PREFILL_HINT": "DYNAMIC",
    "GENERATE_HINT": "FAST_COMPILE",
 }
 DEFAULTS = {
    "title": 32,
    "summary": 160,
@@ -48,8 +55,20 @@ PROMPTS = {
 }
-def read_busy() -> int:
+def import_openvino_genai() -> Any:
-    return int(BUSY_PATH.read_text().strip())
+    """Import OpenVINO GenAI lazily so unit tests do not require the NPU venv."""
    import openvino_genai as ov_genai  # type: ignore[import-not-found]
    return ov_genai
 def listener_exists(host: str, port: int) -> bool:
    """Return True when a TCP listener already accepts connections."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.settimeout(0.2)
        return sock.connect_ex((host, port)) == 0
 def coerce_json(text: str) -> Any | None:
@@ -79,9 +98,20 @@ class GenerationResult:
 class NpuWorker:
-    def __init__(self, model_path: str, cache_dir: str):
+    def __init__(
        self,
        model_path: str,
        cache_dir: str,
        *,
        busy_path: Path = BUSY_PATH,
        bind_host: str = HOST,
        bind_port: int = PORT,
    ):
        self.model_path = Path(model_path)
        self.cache_dir = Path(cache_dir)
        self.busy_path = Path(busy_path)
        self.bind_host = bind_host
        self.bind_port = bind_port
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self._pipe = None
        self._load_ms: float | None = None
@@ -89,21 +119,20 @@ class NpuWorker:
        self._loaded_at: float | None = None
        if not self.model_path.exists():
            raise FileNotFoundError(f"model path does not exist: {self.model_path}")
        if not self.busy_path.exists():
            raise FileNotFoundError(f"NPU busy-time counter does not exist: {self.busy_path}")
    def read_busy(self) -> int:
        return int(self.busy_path.read_text().strip())
    def load(self) -> None:
        if self._pipe is not None:
            return
        start = time.monotonic()
        # NPU GenAI requires bounded prompt/response shapes; CACHE_DIR enables compiled blob caching.
-        self._pipe = ov_genai.LLMPipeline(
+        ov_genai = import_openvino_genai()
-            str(self.model_path),
+        config = GENAI_CONFIG | {"CACHE_DIR": str(self.cache_dir)}
-            "NPU",
+        self._pipe = ov_genai.LLMPipeline(str(self.model_path), "NPU", **config)
            CACHE_DIR=str(self.cache_dir),
            MAX_PROMPT_LEN=1024,
            MIN_RESPONSE_LEN=64,
            PREFILL_HINT="DYNAMIC",
            GENERATE_HINT="FAST_COMPILE",
        )
        self._load_ms = round((time.monotonic() - start) * 1000, 2)
        self._loaded_at = time.time()
@@ -115,19 +144,19 @@ class NpuWorker:
        if len(user_input) > MAX_INPUT_CHARS:
            raise ValueError(f"input too long: {len(user_input)} chars > {MAX_INPUT_CHARS}")
        max_new_tokens = int(max_new_tokens or DEFAULTS[job])
-        if max_new_tokens < 1 or max_new_tokens > 256:
+        if max_new_tokens < 1 or max_new_tokens > MAX_NEW_TOKENS:
-            raise ValueError("max_new_tokens must be between 1 and 256")
+            raise ValueError(f"max_new_tokens must be between 1 and {MAX_NEW_TOKENS}")
        prompt = PROMPTS[job].format(input=user_input.strip())
        with self._lock:
            load_start = time.monotonic()
            self.load()
            load_ms = round((time.monotonic() - load_start) * 1000, 2)
-            before = read_busy()
+            before = self.read_busy()
            gen_start = time.monotonic()
            pipe = cast(Any, self._pipe)
            text = str(pipe.generate(prompt, max_new_tokens=max_new_tokens)).strip()
            generate_ms = round((time.monotonic() - gen_start) * 1000, 2)
-            after = read_busy()
+            after = self.read_busy()
        parsed = coerce_json(text) if job in {"memory_candidate", "notification"} else None
        if job == "memory_candidate" and isinstance(parsed, list):
            parsed = {"candidates": parsed, "notes": "model returned a top-level array; worker wrapped it to preserve the API contract"}
@@ -151,10 +180,11 @@ class NpuWorker:
            "loaded": self._pipe is not None,
            "initial_load_ms": self._load_ms,
            "loaded_at": self._loaded_at,
-            "busy_time_us": read_busy(),
+            "busy_time_us": self.read_busy(),
            "max_input_chars": MAX_INPUT_CHARS,
            "max_new_tokens": MAX_NEW_TOKENS,
            "jobs": sorted(PROMPTS),
-            "bind": f"{HOST}:{PORT}",
+            "bind": f"{self.bind_host}:{self.bind_port}",
        }
@@ -175,7 +205,7 @@ def response_payload(worker: NpuWorker, job: str, result: GenerationResult) -> d
 def make_handler(worker: NpuWorker):
    class Handler(BaseHTTPRequestHandler):
-        server_version = "openvino-genai-npu-worker/0.1"
+        server_version = "openvino-genai-npu-worker/0.2"
        def log_message(self, format: str, *args: Any) -> None:
            # Log only method/path/status metadata, not raw request bodies.
@@ -215,7 +245,12 @@ def make_handler(worker: NpuWorker):
                if job == "memory":
                    job = "memory_candidate"
                result = worker.generate(job, str(payload.get("input", "")), payload.get("max_new_tokens"))
-                self.send_json(200, response_payload(worker, job, result))
+                body = response_payload(worker, job, result)
                if result.npu_busy_delta_us <= 0:
                    body["error"] = "NPU busy-time counter did not increase during generation"
                    self.send_json(503, body)
                    return
                self.send_json(200, body)
            except Exception as exc:
                self.send_json(400, {"error": str(exc)})
@@ -226,21 +261,24 @@ def cli(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description="OpenVINO GenAI NPU worker")
    parser.add_argument("--model-path", default=os.environ.get("OV_GENAI_NPU_MODEL", DEFAULT_MODEL_PATH))
    parser.add_argument("--cache-dir", default=os.environ.get("OV_GENAI_NPU_CACHE", DEFAULT_CACHE_DIR))
-    parser.add_argument("--host", default=HOST)
+    parser.add_argument("--host", default=os.environ.get("OV_GENAI_NPU_HOST", HOST))
    parser.add_argument("--port", type=int, default=int(os.environ.get("OV_GENAI_NPU_PORT", PORT)))
    parser.add_argument("--job", choices=sorted(PROMPTS), help="Run one CLI job instead of serving HTTP")
    parser.add_argument("--input", help="Input text for --job")
    parser.add_argument("--max-new-tokens", type=int)
    args = parser.parse_args(argv)
-    worker = NpuWorker(args.model_path, args.cache_dir)
+    if args.host != "127.0.0.1":
        raise SystemExit("Refusing non-local bind without code change/explicit approval")
    worker = NpuWorker(args.model_path, args.cache_dir, bind_host=args.host, bind_port=args.port)
    if args.job:
        result = worker.generate(args.job, args.input or "", args.max_new_tokens)
        print(json.dumps(response_payload(worker, args.job, result), indent=2))
        return 0 if result.npu_busy_delta_us > 0 else 2
-    if args.host != "127.0.0.1":
+    if listener_exists(args.host, args.port):
-        raise SystemExit("Refusing non-local bind without code change/explicit approval")
+        raise SystemExit(f"Refusing to start: listener already exists on {args.host}:{args.port}")
    server = ThreadingHTTPServer((args.host, args.port), make_handler(worker))
    print(f"serving {MODEL_ID} on http://{args.host}:{args.port}; raw prompts are not logged")
    server.serve_forever()
@@ -12,8 +12,10 @@ This service is intentionally not wired into live RAG by default.
 ## Files
- `server.py` — stdlib HTTP OpenVINO Runtime service.
+- `SPEC.md` — endpoint/CLI contract, model/runtime recommendation, smoke/NPU proof plan, RAG integration plan, docs implications, and no-go criteria.
 - `server.py` — stdlib HTTP OpenVINO Runtime service with fail-fast localhost listener conflict checks and request validation.
 - `smoke.py` — non-private API/ranking/NPU busy-time smoke test.
 - `tests/test_server_validation.py` — stdlib unit checks for request validation and listener conflict detection.
 - `openvino-reranker.service` — optional user-systemd unit.
 ## One-time setup
@@ -61,7 +63,7 @@ OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco
 python /home/will/lab/swarm/openvino-reranker-npu/server.py
 ```
-Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase.
+Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. It also checks whether the requested listener can bind before compiling the OpenVINO model, so obvious port conflicts fail fast; the real server bind still happens immediately after model load.
 ## API
@@ -109,6 +111,16 @@ Expected:
 - The top result matches the non-private fixture expectation.
 - Response and sysfs `npu_busy_delta_us` are positive.
 ## Validation checks
 ```bash
 source /home/will/.venvs/openvino-reranker/bin/activate
 PYTHONPATH=/home/will/lab/swarm/openvino-reranker-npu \
  python -m unittest discover -s /home/will/lab/swarm/openvino-reranker-npu/tests
 ```
 These checks do not compile the OpenVINO model; they cover request validation and fail-fast listener conflict detection.
 ## Optional systemd user service
 Install the unit only after the foreground command and smoke test pass:
@@ -0,0 +1,243 @@
 # OpenVINO NPU reranker service spec
 Status: proposed localhost prototype; not live RAG integration.
 Target port: `127.0.0.1:18818`.
 Safety posture: foreground smoke first, no persistent enablement, no Atlas/Hermes/RAG routing changes without Will's explicit approval.
 ## Recommendation
 Use `cross-encoder/ms-marco-MiniLM-L6-v2`, exported to OpenVINO IR as INT8, served by the local stdlib HTTP service in `server.py` on OpenVINO Runtime `NPU`.
 Why this choice:
 - It is a small BERT-family cross-encoder reranker intended for MS MARCO-style passage ranking, matching the second-stage RAG use case better than another embedding-only similarity pass.
 - The model shape is simple pairwise text classification/scoring: `(query, document) -> score`, which maps cleanly to OpenVINO Runtime and avoids introducing a heavier LLM worker for reranking.
 - INT8 OpenVINO IR keeps memory and compile/runtime cost low enough for a localhost sidecar and is already represented in the repo defaults:
  `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov`.
 - The service can fail closed on startup when `OPENVINO_RERANKER_DEVICE=NPU` but `/sys/class/accel/accel0/device/npu_busy_time_us` does not increase, preventing false "NPU-backed" claims.
 Runtime default:
 ```text
 OPENVINO_RERANKER_HOST=127.0.0.1
 OPENVINO_RERANKER_PORT=18818
 OPENVINO_RERANKER_DEVICE=NPU
 OPENVINO_RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L6-v2
 OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov
 OPENVINO_RERANKER_MAX_LENGTH=512
 OPENVINO_RERANKER_MAX_DOCUMENTS=100
 OPENVINO_RERANKER_MAX_BODY_BYTES=5242880
 ```
 ## Endpoint contract
 ### Health and readiness
 `GET /healthz` and `GET /readyz` return JSON.
 `/readyz` must return HTTP 200 only when the model is loaded and startup smoke passed. For NPU mode, startup smoke must include a positive `npu_busy_delta_us`.
 Representative ready response:
 ```json
 {
  "status": "ok",
  "ok": true,
  "service": "openvino-reranker",
  "model": "cross-encoder/ms-marco-MiniLM-L6-v2",
  "model_dir": "/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov",
  "device": "NPU",
  "available_devices": ["CPU", "NPU"],
  "max_length": 512,
  "startup_smoke": {"ok": true, "duration_ms": 12.3, "npu_busy_delta_us": 1234},
  "last_inference": null,
  "ready_error": null
 }
 ```
 ### Rerank
 `POST /rerank` and compatibility alias `POST /v1/rerank` accept:
 ```json
 {
  "query": "how do I verify OpenVINO NPU usage?",
  "documents": [
    {"id": "good", "text": "Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.", "metadata": {"source": "synthetic"}},
    {"id": "bad", "text": "This note is about making sourdough starter."}
  ],
  "top_k": 2,
  "return_documents": false
 }
 ```
 Compatibility notes:
 - `documents` may be strings or objects with `id`, `text`, and optional object `metadata`.
 - `top_k` is preferred; `top_n` is accepted for common reranker-client compatibility.
 - `return_documents=false` is recommended for RAG integration to avoid echoing private source text into logs or intermediate traces.
 - The optional `model` field may be sent by clients but is not used for routing; this sidecar serves one configured model.
 Successful response:
 ```json
 {
  "ok": true,
  "model": "cross-encoder/ms-marco-MiniLM-L6-v2",
  "device": "NPU",
  "query": "how do I verify OpenVINO NPU usage?",
  "input_count": 2,
  "top_k": 2,
  "duration_ms": 10.5,
  "npu_busy_delta_us": 1234,
  "results": [
    {"index": 0, "id": "good", "score": 8.1, "raw_score": 8.1, "probability": 0.9997},
    {"index": 1, "id": "bad", "score": -4.2, "raw_score": -4.2, "probability": 0.0148}
  ]
 }
 ```
 Error response shape:
 ```json
 {"ok": false, "error": "human-readable error", "results": []}
 ```
 Status behavior:
 - 400: invalid JSON schema, empty query, missing/empty documents, invalid document text, or non-positive/non-integer `top_k`/`top_n`.
 - 413: request body above `OPENVINO_RERANKER_MAX_BODY_BYTES`.
 - 503: model not ready.
 - 500: unexpected inference/runtime failure.
 ## CLI contract
 Foreground-only review start:
 ```bash
 ss -ltnp | grep ':18818\b' || true
 cat /sys/class/accel/accel0/device/npu_busy_time_us
 source /home/will/.venvs/openvino-reranker/bin/activate
 OPENVINO_RERANKER_HOST=127.0.0.1 \
 OPENVINO_RERANKER_PORT=18818 \
 OPENVINO_RERANKER_DEVICE=NPU \
 OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
 python /home/will/lab/swarm/openvino-reranker-npu/server.py
 ```
 Client smoke:
 ```bash
 source /home/will/.venvs/openvino-reranker/bin/activate
 python /home/will/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
 ```
 Optional user-systemd unit exists as `openvino-reranker.service`, but this spec does not approve copying, starting, enabling, or wiring it into live paths.
 ## Non-private smoke payload
 Use only synthetic public-text fixtures. Do not query the Obsidian vault, private document directories, image folders, or live Chroma documents during smoke.
 Minimum cases:
 1. Query: `how do I verify OpenVINO NPU usage?`
   - Expected top document: `Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.`
   - Distractor: `This note is about making sourdough starter.`
 2. Query: `what port does the reranker service use?`
   - Expected top document: `The OpenVINO reranker prototype listens locally on port 18818.`
   - Distractor: `Whisper transcription accepts audio uploads.`
 3. Query: `why should reranking not mutate vector collections?`
   - Expected top document: `Reranking is a read-only second-stage transformation after vector search.`
   - Distractor: `Boil pasta in salted water until al dente.`
 Pass criteria:
 - `/readyz` is HTTP 200 and reports `device=NPU`.
 - Every case returns `ok=true` and a sorted `results` list with the expected top `id`.
 - Response-level `npu_busy_delta_us` is positive for each case.
 - External sysfs `after - before` is positive for each case or at least for the full smoke batch.
 - Smoke script exits 0 and prints JSON with `ok: true`.
 ## NPU busy-time verification plan
 HTTP 200 is not proof. Verification must capture both endpoint-reported and sysfs-observed deltas.
 Procedure:
 ```bash
 BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
 before=$(cat "$BUSY")
 curl -fsS http://127.0.0.1:18818/rerank \
  -H 'Content-Type: application/json' \
  -d '{"query":"how do I verify OpenVINO NPU usage?","documents":[{"id":"good","text":"Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."},{"id":"bad","text":"This note is about making sourdough starter."}],"top_k":2,"return_documents":false}' \
  | jq '{ok, device, npu_busy_delta_us, top_id:.results[0].id}'
 after=$(cat "$BUSY")
 echo "sysfs_npu_busy_delta_us=$((after-before))"
 ```
 Acceptance:
 - `device == "NPU"`.
 - Response `npu_busy_delta_us > 0`.
 - Shell-computed `sysfs_npu_busy_delta_us > 0`.
 - If any value is zero/negative/missing, call the result CPU/unknown and do not claim NPU-backed reranking.
 ## Optional RAG second-stage integration plan (deferred)
 This is a plan only. Do not enable it in live RAG without explicit approval.
 Design:
 1. Keep existing vector search and Chroma collection `obsidian_bge_npu` unchanged.
 2. Retrieve more candidates from current vector search, e.g. `initial_k=20`.
 3. Send only request-time candidate snippets/ids to `http://127.0.0.1:18818/rerank`.
 4. Use reranker order to choose final `top_k`, e.g. `5`.
 5. On timeout, connection error, invalid response, or non-positive NPU proof when proof is required, fall back to vector order and attach metadata like `rerank_error`; do not fail the whole RAG request unless explicitly configured.
 6. Log counters and latency, but avoid logging raw private document text.
 Disabled-by-default knobs:
 ```text
 RAG_RERANK_ENABLED=false
 RAG_RERANK_URL=http://127.0.0.1:18818/rerank
 RAG_RERANK_INITIAL_K=20
 RAG_RERANK_TOP_K=5
 RAG_RERANK_TIMEOUT_MS=3000
 RAG_RERANK_REQUIRE_NPU_PROOF=true
 RAG_RERANK_RETURN_DOCUMENTS=false
 ```
 Integration tests should use synthetic in-memory candidates first. Live-vault evaluation requires a separate approval and must not mutate or rebuild the vector collection.
 ## Docs and diagram implications
 If this prototype advances beyond spec/review, update these surfaces while keeping live/prototype labels clear:
 - `openvino-reranker-npu/README.md`: keep model/runtime, endpoint contract, smoke command, and approval gates synchronized with code.
 - `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`: list `:18818` as prototype/not enabled, with foreground smoke and NPU sysfs proof.
 - Service catalog / architecture notes: show live baseline `:18810`, `:18816`, `:18817`; show `:18818` as optional second-stage RAG prototype, not live routing.
 - Diagrams: render `RAG :18810 -> optional reranker :18818` as dashed/disabled or "proposed"; do not imply Atlas/Hermes/gateway traffic is using it.
 - Optional systemd unit: document as installable after approval, not enabled by default.
 ## No-go / defer criteria
 Do not ship, enable, or integrate the reranker if any of these hold:
 - Port `18818` is already owned by another live service.
 - `NPU` is unavailable in `ov.Core().available_devices` or `/sys/class/accel/accel0/device/npu_busy_time_us` is missing.
 - Foreground startup smoke fails or has non-positive NPU busy-time delta while configured for NPU.
 - Synthetic smoke top-1 ranking fails or latency is unacceptable for the intended RAG timeout budget.
 - Model export requires overwriting the existing model directory or touching Chroma/vector collections.
 - The service must bind beyond `127.0.0.1` to be useful.
 - Live RAG integration would require reindexing, collection mutation, private-doc smoke, or Atlas/Hermes/gateway routing changes without explicit approval.
 - Logs or responses would persist raw private document text outside the existing RAG request path.
 ## Current local preflight observed during this spec pass
 - `/sys/class/accel/accel0/device/npu_busy_time_us` is readable.
 - `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov` is present.
 - `/home/will/.venvs/openvino-reranker/bin/python` is present.
 - `:18818` was not listening during preflight.
 - `server.py` and `smoke.py` pass `python -m py_compile`.
 These observations are preflight only; they are not a live service/NPU smoke result.
@@ -16,6 +16,7 @@ import argparse
 import json
 import math
 import os
 import socket
 import sys
 import threading
 import time
@@ -251,6 +252,27 @@ def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]:
    return docs
 def parse_top_k(value: Any, document_count: int) -> int:
    """Validate top_k/top_n before inference so schema errors return HTTP 400."""
    if value is None:
        return document_count
    if isinstance(value, bool) or not isinstance(value, int):
        raise ValueError("top_k/top_n must be a positive integer")
    if value < 1:
        raise ValueError("top_k/top_n must be a positive integer")
    return min(value, document_count)
 def assert_port_available(host: str, port: int) -> None:
    """Fail fast on listener conflicts before compiling the OpenVINO model."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        try:
            sock.bind((host, port))
        except OSError as exc:
            raise RuntimeError(f"cannot bind {host}:{port}; listener conflict or invalid bind: {exc}") from exc
 class Handler(BaseHTTPRequestHandler):
    server_version = "OpenVINOReranker/0.1"
@@ -293,6 +315,7 @@ class Handler(BaseHTTPRequestHandler):
                raise ValueError("query is required")
            top_k = payload.get("top_k", payload.get("top_n"))
            documents = normalize_documents(payload.get("documents"), self.max_documents)
            top_k = parse_top_k(top_k, len(documents))
            return_documents = bool(payload.get("return_documents", True))
            response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents)
            self.write_json(response)
@@ -342,6 +365,7 @@ def main() -> int:
    parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"})
    args = parser.parse_args()
    assert_port_available(args.host, args.port)
    service = RerankerService(
        Path(args.model_dir).expanduser(),
        args.model,
@@ -0,0 +1,55 @@
 #!/usr/bin/env python3
 """Unit checks for reranker request validation helpers.
 These tests intentionally avoid loading an OpenVINO model; they only cover the
 stdlib validation helpers used before inference.
 """
 from __future__ import annotations
 import socket
 import unittest
 from server import assert_port_available, normalize_documents, parse_top_k
 class ValidationTests(unittest.TestCase):
    def test_normalize_accepts_strings_and_objects(self) -> None:
        docs = normalize_documents(
            [
                "plain text document",
                {"id": "obj", "text": "object document", "metadata": {"source": "synthetic"}},
            ],
            max_documents=2,
        )
        self.assertEqual(docs[0], {"text": "plain text document"})
        self.assertEqual(docs[1]["id"], "obj")
        self.assertEqual(docs[1]["metadata"], {"source": "synthetic"})
    def test_normalize_rejects_empty_or_too_many_documents(self) -> None:
        with self.assertRaisesRegex(ValueError, "non-empty"):
            normalize_documents([], max_documents=2)
        with self.assertRaisesRegex(ValueError, "max_documents"):
            normalize_documents(["a", "b", "c"], max_documents=2)
        with self.assertRaisesRegex(ValueError, "non-empty string"):
            normalize_documents([{"id": "empty", "text": ""}], max_documents=2)
    def test_parse_top_k_defaults_clamps_and_rejects_invalid_values(self) -> None:
        self.assertEqual(parse_top_k(None, document_count=3), 3)
        self.assertEqual(parse_top_k(2, document_count=3), 2)
        self.assertEqual(parse_top_k(99, document_count=3), 3)
        for value in (0, -1, True, False, 1.5, "2", "nope"):
            with self.subTest(value=value):
                with self.assertRaisesRegex(ValueError, "positive integer"):
                    parse_top_k(value, document_count=3)
    def test_assert_port_available_detects_listener_conflict(self) -> None:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as listener:
            listener.bind(("127.0.0.1", 0))
            listener.listen(1)
            port = listener.getsockname()[1]
            with self.assertRaisesRegex(RuntimeError, "cannot bind"):
                assert_port_available("127.0.0.1", port)
 if __name__ == "__main__":
    unittest.main()
@@ -0,0 +1,89 @@
 # OpenVINO Context Gate
 Local-only Atlas/Hermes context-gate advisory prototype.
 This first slice is CLI-only and dry-run by design. It takes a non-private query,
 optionally asks the localhost classifier on `127.0.0.1:18819` for advisory labels,
 and emits a compact typed context bundle plan. It does not retrieve private
 content or change live Atlas/Hermes behavior.
 ## Safety invariants
 Closed in v1:
 - live Atlas/Hermes routing changes
 - memory writes
 - outbound sends
 - tool execution by the sidecar
 - service restarts
 - vector DB mutation or reindexing
 - private root broadening
 - live config changes
 The CLI only plans which source classes an authoritative Atlas/Hermes agent might
 use later: `durable_memory`, `session_search`, `rag_search`, `repo_files`,
 `live_system`, `web`, or `no_retrieval`.
 NPU proof is strict: `npu_verified=true` is only emitted when a live classifier
 request reports a positive endpoint NPU delta and a positive sysfs/endpoint sysfs
 busy delta. HTTP 200 alone is never treated as proof. Offline and fallback modes
 set `npu_verified=false` and include a warning.
 ## Usage
 Live classifier path, with compact terminal output:
 ```bash
 python scripts/context-gate-advisory.py \
  --query "How do I check whether the RAG reranker is using the NPU?" \
  --format compact
 ```
 Deterministic offline smoke, safe for unit-test hosts without NPU services:
 ```bash
 python scripts/context-gate-advisory.py \
  --offline \
  --query "Write a haiku about Seattle rain." \
  --format compact-json
 ```
 Fallback plan if the classifier is down:
 ```bash
 python scripts/context-gate-advisory.py \
  --allow-offline-fallback \
  --query "Where did we leave the NPU context gate implementation plan?" \
  --context platform=kanban \
  --context repo_path=/home/will/lab/swarm \
  --format compact-json
 ```
 ## Output shape
 Full JSON includes:
 - `schema=atlas_context_gate_plan_v1`
 - `dry_run=true`
 - `query_class`
 - `source_plan`
 - `bundle_plan`
 - `npu_proof`
 - closed `authority`
 - closed approval `gates`
 - compact `warnings`
 Compact output intentionally avoids raw private snippets and raw JSON dumps:
 ```text
 ok=true schema=atlas_context_gate_plan_v1 bundle=OpsDebugBundle sources=live_system,repo_files,rag_search source_count=3 npu_verified=false classifier_delta_us=None outer_sysfs_delta_us=None gates=closed:route,memory,send,tools,restart,vector,private_roots,config warnings=offline_heuristic_classifier_no_npu_claim,npu_proof_inconclusive
 ```
 ## Notes for reviewers
 - No HTTP service or systemd unit is added in this slice.
 - The prototype does not call RAG, memory, session search, web, filesystem tools,
  or the advisory gateway. It only emits a plan.
 - Unit tests use fake/offline classifier results and do not require live NPU.
 - Optional live smoke may call only the local classifier endpoint and read
  `/sys/class/accel/accel0/device/npu_busy_time_us` for positive delta proof.
@@ -0,0 +1,5 @@
 """Atlas/Hermes local advisory context-gate prototype."""
 from .context_gate import SCHEMA, ContextGateError, build_plan, compact_json, compact_line, validate_plan
 __all__ = ["SCHEMA", "ContextGateError", "build_plan", "compact_json", "compact_line", "validate_plan"]
@@ -0,0 +1,90 @@
 from __future__ import annotations
 import argparse
 import json
 import sys
 from typing import Any
 from .context_gate import (
    DEFAULT_CLASSIFIER_URL,
    ContextGateError,
    build_plan,
    classify_live,
    classify_offline,
    compact_json,
    compact_line,
 )
 def _parse_context(raw_items: list[str]) -> dict[str, Any]:
    context: dict[str, Any] = {}
    for item in raw_items:
        if "=" not in item:
            raise ContextGateError(f"invalid_context_item:{item}")
        key, value = item.split("=", 1)
        if not key:
            raise ContextGateError("invalid_context_key")
        if value.lower() == "true":
            parsed: Any = True
        elif value.lower() == "false":
            parsed = False
        else:
            parsed = value
        context[key] = parsed
    return context
 def build_arg_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Emit a local-only Atlas/Hermes advisory context bundle plan. No routing, retrieval, memory writes, sends, restarts, or vector mutations are performed.",
    )
    parser.add_argument("--query", required=True, help="Non-private query to plan for")
    parser.add_argument("--format", choices=["compact", "compact-json", "json"], default="compact")
    parser.add_argument("--context", action="append", default=[], metavar="KEY=VALUE", help="Optional compact request context, e.g. platform=kanban repo_path=/path")
    parser.add_argument("--max-sources", type=int, default=4)
    parser.add_argument("--trace-id")
    parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
    parser.add_argument("--classifier-timeout", type=float, default=8.0)
    parser.add_argument("--offline", action="store_true", help="Use deterministic heuristic labels; makes no NPU claim")
    parser.add_argument("--allow-offline-fallback", action="store_true", help="If live classifier is unavailable, emit an advisory fallback plan with npu_verified=false")
    parser.add_argument("--no-require-npu-proof", action="store_true", help="Do not add npu_proof_inconclusive warning when running offline/fallback")
    return parser
 def main(argv: list[str] | None = None) -> int:
    parser = build_arg_parser()
    args = parser.parse_args(argv)
    try:
        context = _parse_context(args.context)
        options = {
            "dry_run": True,
            "max_sources": args.max_sources,
            "include_private_text": False,
            "require_npu_proof": not args.no_require_npu_proof,
            "trace_id": args.trace_id,
        }
        if args.offline:
            classifier = classify_offline(args.query, context)
        else:
            try:
                classifier = classify_live(args.query, context, classifier_url=args.classifier_url, timeout=args.classifier_timeout)
            except ContextGateError as exc:
                if not args.allow_offline_fallback:
                    raise
                classifier = classify_offline(args.query, context, warning=str(exc))
        plan = build_plan(args.query, context=context, options=options, classifier=classifier)
    except ContextGateError as exc:
        print(f"error={exc}", file=sys.stderr)
        return 2
    if args.format == "json":
        print(json.dumps(plan, indent=2, sort_keys=True))
    elif args.format == "compact-json":
        print(compact_json(plan))
    else:
        print(compact_line(plan))
    return 0
 if __name__ == "__main__":  # pragma: no cover
    raise SystemExit(main())
@@ -0,0 +1,482 @@
 """Local-only advisory context bundle planner for Atlas/Hermes.
 This module intentionally emits a retrieval/authority plan only. It does not call
 Hermes memory/session/RAG/web tools, mutate vector stores, broaden private roots,
 or change live routing.
 """
 from __future__ import annotations
 import json
 import ipaddress
 import re
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Mapping, Sequence
 SCHEMA = "atlas_context_gate_plan_v1"
 NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
 AUTHORITY = {
    "may_route": False,
    "may_write_memory": False,
    "may_send_external": False,
    "may_process_private_dirs": False,
    "may_execute_tools": False,
    "may_restart_services": False,
    "may_mutate_vector_db": False,
    "may_change_live_config": False,
 }
 GATES = {
    "live_routing_change": "closed_requires_explicit_approval",
    "memory_write": "closed_requires_explicit_approval",
    "outbound_send": "closed_requires_explicit_approval",
    "tool_execution": "closed_requires_explicit_approval",
    "service_restart": "closed_requires_explicit_approval",
    "vector_mutation": "closed_requires_explicit_approval",
    "private_root_broadening": "closed_requires_explicit_approval",
 }
 _ALLOWED_SOURCES = {
    "durable_memory",
    "session_search",
    "rag_search",
    "repo_files",
    "live_system",
    "web",
    "no_retrieval",
 }
 class ContextGateError(ValueError):
    """Raised for invalid requests or unavailable required local stages."""
@dataclass(frozen=True)
 class ClassifierResult:
    labels: Mapping[str, Any]
    npu_busy_delta_us: int | None
    sysfs_npu_busy_delta_us: int | None
    outer_sysfs_delta_us: int | None
    live: bool
    warning: str | None = None
 def read_npu_busy_time_us(path: Path = NPU_BUSY_PATH) -> int | None:
    try:
        return int(path.read_text(encoding="utf-8").strip())
    except (FileNotFoundError, PermissionError, ValueError, OSError):
        return None
 def _label_value(labels: Mapping[str, Any], name: str, default: Any) -> Any:
    value = labels.get(name, default)
    if isinstance(value, Mapping) and "value" in value:
        return value.get("value", default)
    return value
 def _label_confidence(labels: Mapping[str, Any], name: str, default: float = 0.5) -> float:
    value = labels.get(name)
    if isinstance(value, Mapping):
        try:
            return float(value.get("confidence", default))
        except (TypeError, ValueError):
            return default
    return default
 def heuristic_labels(query: str, context: Mapping[str, Any] | None = None) -> dict[str, Any]:
    """Small transparent fallback used by tests and explicit offline smoke mode."""
    text = query.lower()
    platform = str((context or {}).get("platform", "unknown")).lower()
    current_words = ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs"]
    prior_words = ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "plan"]
    coding_words = ["implement", "code", "repo", "test", "pytest", "diff", "branch", "hermes"]
    research_words = ["research", "compare", "summarize", "explain", "what is", "how do i"]
    unsafe_words = ["change live routing", "live routing", "restart", "send", "write memory", "reindex", "mutate", "delete"]
    safety = any(w in text for w in unsafe_words)
    tool_needed = any(w in text for w in current_words + coding_words) or safety
    if platform == "kanban" or "kanban" in text or any(w in text for w in coding_words):
        category = "coding"
    elif any(w in text for w in current_words):
        category = "devops"
    elif any(w in text for w in research_words + prior_words):
        category = "research"
    else:
        category = "chat"
    if "remember" in text or "preference" in text:
        memory_candidate = "durable_user_fact"
    elif "convention" in text or "workflow" in text:
        memory_candidate = "workflow_convention"
    else:
        memory_candidate = "none"
    urgency = "high" if any(w in text for w in ["urgent", "critical", "down", "broken"]) else "normal"
    return {
        "tool_needed": {"value": tool_needed, "confidence": 0.76 if tool_needed else 0.68},
        "memory_candidate": {"value": memory_candidate, "confidence": 0.8 if memory_candidate != "none" else 0.35},
        "urgency": {"value": urgency, "confidence": 0.8 if urgency == "high" else 0.65},
        "workflow_category": {"value": category, "confidence": 0.78 if category != "chat" else 0.7},
        "safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.2},
    }
 class _NoClassifierRedirectHandler(urllib.request.HTTPRedirectHandler):
    """Fail closed instead of following redirects away from a validated local URL."""
    def redirect_request(self, req, fp, code, msg, headers, newurl):  # type: ignore[no-untyped-def]
        return None
 _CLASSIFIER_OPENER = urllib.request.build_opener(_NoClassifierRedirectHandler)
 def classify_live(
    query: str,
    context: Mapping[str, Any] | None = None,
    classifier_url: str = DEFAULT_CLASSIFIER_URL,
    timeout: float = 8.0,
 ) -> ClassifierResult:
    classifier_url = validate_classifier_url(classifier_url)
    before = read_npu_busy_time_us()
    payload = {
        "id": f"context-gate-{int(time.time())}",
        "text": query,
        "context": {"platform": (context or {}).get("platform", "cli"), "source": "context_gate"},
        "options": {"include_evidence": False, "include_embedding_debug": False, "dry_run": True},
    }
    req = urllib.request.Request(
        classifier_url,
        data=json.dumps(payload).encode("utf-8"),
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with _CLASSIFIER_OPENER.open(req, timeout=timeout) as resp:  # noqa: S310 - local configured endpoint only
            raw = resp.read(256_000)
    except (urllib.error.URLError, TimeoutError, OSError) as exc:
        raise ContextGateError(f"classifier_unavailable: {exc}") from exc
    after = read_npu_busy_time_us()
    try:
        data = json.loads(raw.decode("utf-8"))
    except json.JSONDecodeError as exc:
        raise ContextGateError("classifier_invalid_json") from exc
    labels = data.get("labels")
    if not isinstance(labels, Mapping):
        raise ContextGateError("classifier_missing_labels")
    outer = after - before if before is not None and after is not None else None
    return ClassifierResult(
        labels=labels,
        npu_busy_delta_us=_as_int_or_none(data.get("npu_busy_delta_us")),
        sysfs_npu_busy_delta_us=_as_int_or_none(data.get("sysfs_npu_busy_delta_us")),
        outer_sysfs_delta_us=outer,
        live=True,
    )
 def validate_classifier_url(classifier_url: str) -> str:
    """Validate the local-only classifier endpoint before any POST is attempted."""
    parsed = urllib.parse.urlparse(classifier_url)
    if parsed.scheme not in {"http", "https"}:
        raise ContextGateError("invalid_classifier_url:scheme_must_be_http_or_https")
    host = parsed.hostname
    if not host:
        raise ContextGateError("invalid_classifier_url:missing_host")
    host_normalized = host.lower().rstrip(".")
    if host_normalized == "localhost":
        return classifier_url
    try:
        address = ipaddress.ip_address(host_normalized)
    except ValueError as exc:
        raise ContextGateError("invalid_classifier_url:host_must_be_loopback") from exc
    if not address.is_loopback:
        raise ContextGateError("invalid_classifier_url:host_must_be_loopback")
    return classifier_url
 def _as_int_or_none(value: Any) -> int | None:
    try:
        return int(value)
    except (TypeError, ValueError):
        return None
 def classify_offline(query: str, context: Mapping[str, Any] | None = None, warning: str | None = None) -> ClassifierResult:
    return ClassifierResult(
        labels=heuristic_labels(query, context),
        npu_busy_delta_us=None,
        sysfs_npu_busy_delta_us=None,
        outer_sysfs_delta_us=None,
        live=False,
        warning=warning or "offline_heuristic_classifier_no_npu_claim",
    )
 def _has_any(text: str, needles: list[str]) -> bool:
    return any(n in text for n in needles)
 def _source(source: str, action: str, reason: str, priority: int, freshness: str, confidence: float) -> dict[str, Any]:
    assert source in _ALLOWED_SOURCES
    return {
        "source": source,
        "action": action,
        "reason": reason,
        "priority": priority,
        "freshness": freshness,
        "permission": "tool_required_by_authoritative_agent" if source != "no_retrieval" else "none",
        "missing_behavior": "retrieve_or_mark_missing" if source != "no_retrieval" else "skip_retrieval",
        "confidence": round(confidence, 2),
    }
 def select_sources(query: str, labels: Mapping[str, Any], context: Mapping[str, Any], max_sources: int) -> list[dict[str, Any]]:
    text = query.lower()
    sources: list[dict[str, Any]] = []
    category = str(_label_value(labels, "workflow_category", "unknown"))
    memory_candidate = str(_label_value(labels, "memory_candidate", "none"))
    tool_needed = bool(_label_value(labels, "tool_needed", False))
    if tool_needed or _has_any(text, ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs", "time", "date"]):
        sources.append(_source("live_system", "inspect_with_terminal_or_domain_tool", "current service/system state requested", 1, "live_required", 0.9))
    if context.get("repo_path") or category == "coding" or _has_any(text, ["repo", "code", "file", "test", "pytest", "diff", "implementation", "hermes", "atlas"]):
        sources.append(_source("repo_files", "inspect_explicit_repo_paths", "repo-specific implementation or config context", 2, "current_filesystem", 0.84))
    if _has_any(text, ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "prior", "last time"]):
        sources.append(_source("session_search", "search_prior_sessions_or_kanban_handoffs", "prior decision or handoff requested", 3, "session-era", 0.82))
    if _has_any(text, ["runbook", "note", "obsidian", "rag", "docs", "knowledge", "plan"]):
        sources.append(_source("rag_search", "query_local_index_read_only", "local docs or indexed knowledge likely useful", 4, "cached_index", 0.76))
    if memory_candidate != "none" or _has_any(text, ["preference", "remember", "profile", "durable fact"]):
        sources.append(_source("durable_memory", "read_stable_facts_only", "stable preference/environment facts may be relevant", 5, "static", 0.72))
    if _has_any(text, ["latest", "news", "version", "release", "public", "web"]):
        sources.append(_source("web", "search_public_current_sources", "current external public fact requested", 6, "live_external", 0.7))
    if not sources:
        sources.append(_source("no_retrieval", "answer_directly", "no factual retrieval dependency detected", 1, "none", 0.78))
    # Stable priority order and bounded compact plan.
    seen: set[str] = set()
    deduped = []
    for item in sorted(sources, key=lambda x: x["priority"]):
        if item["source"] not in seen:
            seen.add(item["source"])
            deduped.append(item)
    return deduped[:max_sources]
 def select_bundle_name(query: str, labels: Mapping[str, Any], context: Mapping[str, Any]) -> str:
    text = query.lower()
    category = str(_label_value(labels, "workflow_category", "unknown"))
    if context.get("platform") == "kanban" or context.get("task_id") or category == "coding":
        return "CodingTaskBundle"
    if category in {"devops", "debugging"} or _has_any(text, ["health", "port", "systemd", "npu", "service", "logs"]):
        return "OpsDebugBundle"
    if category in {"note_taking", "productivity"} or _has_any(text, ["preference", "remember", "profile"]):
        return "PersonalAssistantBundle"
    if "no_retrieval" in [s["source"] for s in select_sources(query, labels, context, 1)]:
        return "SimpleResponseBundle"
    return "ResearchBundle"
 def _field(field: str, shape: str, source: str, freshness: str, missing: str, privacy: str, confidence: float = 0.8) -> dict[str, Any]:
    return {
        "field": field,
        "shape": shape,
        "source_of_truth": source,
        "freshness": freshness,
        "provenance_required": True,
        "missing_behavior": missing,
        "privacy": privacy,
        "confidence": round(confidence, 2),
    }
 def build_bundle_plan(bundle_name: str, sources: Sequence[Mapping[str, Any]], query: str, labels: Mapping[str, Any]) -> dict[str, Any]:
    safety_required = bool(_label_value(labels, "safety_confirmation_required", False))
    source_names = {s["source"] for s in sources}
    if bundle_name == "OpsDebugBundle":
        required = [
            _field("problem_statement", "compact_text", "user", "request", "mark_missing", "query_text_only"),
            _field("target_scope", "service_repo_or_host", "query_or_classifier", "request", "ask_or_infer_low_confidence", "no_private_paths_beyond_explicit"),
            _field("live_state", "status_table", "live_system", "live_required", "retrieve_or_fail_closed", "no_raw_logs_by_default"),
            _field("safety_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
            _field("provenance", "tool_names_and_paths", "executing_agent", "run", "mark_missing", "paths_only"),
        ]
    elif bundle_name == "CodingTaskBundle":
        required = [
            _field("repo_root", "absolute_path", "task_or_context", "current", "ask_or_fail", "explicit_path_only"),
            _field("git_state", "branch_dirty_counts", "live_system", "live_required", "retrieve_or_fail_closed", "no_diff_dump_by_default"),
            _field("requirements", "bullet_summary", "user_kanban_files", "current", "retrieve_or_mark_missing", "no_private_snippets"),
            _field("relevant_paths", "path_list", "repo_files", "current_filesystem", "search_narrowly", "paths_only"),
            _field("tests_or_smokes", "command_list", "repo_files", "current_filesystem", "mark_missing", "commands_only"),
            _field("review_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
        ]
    elif bundle_name == "PersonalAssistantBundle":
        required = [
            _field("user_intent", "compact_text", "user", "request", "mark_missing", "query_text_only"),
            _field("durable_facts_needed", "fact_keys", "durable_memory", "static", "retrieve_or_mark_missing", "no_raw_memory_dump"),
            _field("prior_decisions_needed", "session_refs", "session_search", "session-era", "retrieve_or_mark_missing", "summaries_only"),
            _field("privacy_boundary", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
            _field("action_authority", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
        ]
    elif bundle_name == "SimpleResponseBundle":
        required = []
    else:
        required = [
            _field("research_question", "compact_text", "user", "request", "mark_missing", "query_text_only"),
            _field("source_plan", "ordered_source_list", "context_gate", "run", "mark_missing", "no_private_snippets"),
            _field("evidence_requirements", "provenance_rules", "policy", "static", "fail_closed", "no_private_data"),
            _field("freshness_cutoff", "freshness_policy", "classifier_query", "request", "mark_missing", "no_private_data"),
            _field("missing_data_behavior", "policy_enum", "policy", "static", "fail_closed", "no_private_data"),
        ]
    blocked = []
    if safety_required or re.search(r"\b(route|routing|restart|send|write memory|reindex|delete|mutate)\b", query.lower()):
        blocked.append(_field("authority_side_effect", "approval_required", "policy", "static", "fail_closed", "no_side_effects_in_v1", 0.95))
    if "rag_search" in source_names:
        blocked.append(_field("vector_db_mutation", "not_allowed", "policy", "static", "fail_closed", "read_only_query_plan", 0.95))
    return {"bundle_name": bundle_name, "required_fields": required, "optional_fields": [], "blocked_fields": blocked}
 def summarize_query_class(labels: Mapping[str, Any]) -> dict[str, Any]:
    return {
        "workflow_category": _label_value(labels, "workflow_category", "unknown"),
        "urgency": _label_value(labels, "urgency", "normal"),
        "tool_needed": bool(_label_value(labels, "tool_needed", False)),
        "memory_candidate": _label_value(labels, "memory_candidate", "none"),
        "safety_confirmation_required": bool(_label_value(labels, "safety_confirmation_required", False)),
        "confidence": round(max(
            _label_confidence(labels, "workflow_category", 0.5),
            _label_confidence(labels, "tool_needed", 0.5),
            _label_confidence(labels, "safety_confirmation_required", 0.5),
        ), 2),
    }
 def npu_proof_from_classifier(result: ClassifierResult, require_npu_proof: bool) -> tuple[dict[str, Any], list[str]]:
    endpoint_delta = result.npu_busy_delta_us
    endpoint_sysfs_delta = result.sysfs_npu_busy_delta_us
    outer_delta = result.outer_sysfs_delta_us
    positive_endpoint_sysfs = endpoint_sysfs_delta is not None and endpoint_sysfs_delta > 0
    positive_outer = outer_delta is not None and outer_delta > 0
    verified = bool(result.live and (positive_endpoint_sysfs or positive_outer))
    warnings: list[str] = []
    if result.warning:
        warnings.append(result.warning)
    if require_npu_proof and not verified:
        warnings.append("npu_proof_inconclusive")
    return {
        "classifier_delta_us": endpoint_delta,
        "classifier_sysfs_delta_us": endpoint_sysfs_delta,
        "outer_sysfs_delta_us": outer_delta,
        "rerank_delta_us": None,
        "verified": verified,
        "required": require_npu_proof,
        "classifier_live": result.live,
    }, warnings
 def build_plan(
    query: str,
    *,
    context: Mapping[str, Any] | None = None,
    options: Mapping[str, Any] | None = None,
    classifier: ClassifierResult | None = None,
 ) -> dict[str, Any]:
    if not query or not query.strip():
        raise ContextGateError("query_required")
    context = dict(context or {})
    options = dict(options or {})
    if options.get("dry_run", True) is not True:
        raise ContextGateError("dry_run_must_remain_true_in_v1")
    if options.get("include_private_text", False):
        raise ContextGateError("include_private_text_not_allowed_in_v1")
    max_sources = max(1, min(6, int(options.get("max_sources", 4))))
    require_npu = bool(options.get("require_npu_proof", True))
    if classifier is None:
        classifier = classify_offline(query, context)
    labels = classifier.labels
    source_plan = select_sources(query, labels, context, max_sources)
    bundle_name = select_bundle_name(query, labels, context)
    npu_proof, warnings = npu_proof_from_classifier(classifier, require_npu)
    plan = {
        "schema": SCHEMA,
        "trace_id": options.get("trace_id") or context.get("trace_id"),
        "dry_run": True,
        "ok": True,
        "query_class": summarize_query_class(labels),
        "source_plan": source_plan,
        "bundle_plan": build_bundle_plan(bundle_name, source_plan, query, labels),
        "npu_proof": npu_proof,
        "authority": dict(AUTHORITY),
        "gates": dict(GATES),
        "warnings": warnings,
    }
    validate_plan(plan)
    return plan
 def validate_plan(plan: Mapping[str, Any]) -> None:
    if plan.get("schema") != SCHEMA:
        raise ContextGateError("invalid_schema")
    if plan.get("dry_run") is not True:
        raise ContextGateError("dry_run_missing")
    if plan.get("authority") != AUTHORITY:
        raise ContextGateError("authority_not_closed")
    sources = plan.get("source_plan")
    if not isinstance(sources, list) or not sources:
        raise ContextGateError("source_plan_required")
    for item in sources:
        if item.get("source") not in _ALLOWED_SOURCES:
            raise ContextGateError(f"invalid_source:{item.get('source')}")
    required_blocks = ["query_class", "bundle_plan", "npu_proof", "gates"]
    for block in required_blocks:
        if block not in plan:
            raise ContextGateError(f"missing_block:{block}")
 def compact_line(plan: Mapping[str, Any]) -> str:
    sources = ",".join(str(s["source"]) for s in plan["source_plan"])
    closed = "route,memory,send,tools,restart,vector,private_roots,config"
    warnings = ",".join(plan.get("warnings") or []) or "none"
    return (
        f"ok={str(plan['ok']).lower()} schema={plan['schema']} "
        f"bundle={plan['bundle_plan']['bundle_name']} sources={sources} "
        f"source_count={len(plan['source_plan'])} "
        f"npu_verified={str(plan['npu_proof']['verified']).lower()} "
        f"classifier_delta_us={plan['npu_proof'].get('classifier_delta_us')} "
        f"outer_sysfs_delta_us={plan['npu_proof'].get('outer_sysfs_delta_us')} "
        f"gates=closed:{closed} warnings={warnings}"
    )
 def compact_json(plan: Mapping[str, Any]) -> str:
    compact = {
        "schema": plan["schema"],
        "ok": plan["ok"],
        "dry_run": plan["dry_run"],
        "bundle_name": plan["bundle_plan"]["bundle_name"],
        "sources": [s["source"] for s in plan["source_plan"]],
        "source_count": len(plan["source_plan"]),
        "query_class": plan["query_class"],
        "npu_proof": plan["npu_proof"],
        "authority": plan["authority"],
        "gates_closed": list(plan["gates"].keys()),
        "warnings": plan.get("warnings", []),
    }
    return json.dumps(compact, sort_keys=True, separators=(",", ":"))
@@ -0,0 +1,16 @@
 #!/usr/bin/env python3
 """Thin repo-local wrapper for the Atlas/Hermes context-gate advisory CLI."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 REPO_ROOT = Path(__file__).resolve().parents[1]
 if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))
 from openvino_context_gate.cli import main  # noqa: E402
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -0,0 +1,526 @@
 #!/usr/bin/env python3
 """Dry-run Kanban hygiene advisory classifier.
 Reads compact board/task summaries and emits bounded labels/next gates without
 mutating any Hermes Kanban state. Phase 1 is deterministic rules only; it does
 not call kanban tools, restart services, write memory, or send outbound data.
 """
 from __future__ import annotations
 import argparse
 import json
 import re
 import sys
 import time
 from pathlib import Path
 from typing import Any
 SCHEMA = "kanban_hygiene_advisory_v1"
 AUTHORITY = {
    "may_mutate_board": False,
    "may_assign": False,
    "may_block_or_unblock": False,
    "may_complete_or_archive": False,
    "may_create_tasks": False,
    "may_write_memory": False,
    "may_send_external": False,
    "may_restart_services": False,
    "may_execute_tools": False,
 }
 NPU_PROOF = {
    "required_for_npu_claims": True,
    "attempted": False,
    "ok": None,
    "npu_busy_delta_us": None,
 }
 REQUIRED_TASK_FIELDS = {"id", "title", "status", "assignee", "created_at", "updated_at"}
 SUPPORTED_STATUSES = {
    "triage",
    "todo",
    "ready",
    "running",
    "blocked",
    "done",
    "archived",
    "failed",
    "cancelled",
 }
 TASK_TYPES = {
    "charter",
    "discovery",
    "spec",
    "implement",
    "test",
    "review",
    "docs",
    "ops",
    "integration",
    "final",
    "unknown",
 }
 LANES = {
    "observability_utilization",
    "cron_n8n_classifier",
    "rag_context_gate",
    "doc_image_audio_triage",
    "voice_audio_pipeline",
    "kanban_hygiene",
    "docs_runbook_service_map",
    "ops_integration",
    "final_closeout",
    "general",
    "unknown",
 }
 LIFECYCLE_PREFIXES = {
    "charter",
    "discovery",
    "spec",
    "implement",
    "test",
    "review",
    "docs",
    "doc",
    "ops",
    "integration",
    "final",
 }
 def compact_text(task: dict[str, Any]) -> str:
    parts = [str(task.get("title", "")), str(task.get("body_excerpt", "")), str(task.get("last_run_summary_excerpt", "")), str(task.get("last_comment_excerpt", ""))]
    return " ".join(part for part in parts if part).lower()
 def load_jsonl(raw: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
    tasks = []
    for line_no, line in enumerate(raw.splitlines(), start=1):
        if not line.strip():
            continue
        try:
            row = json.loads(line)
        except json.JSONDecodeError as exc:
            raise ValueError(f"invalid JSONL on line {line_no}: {exc.msg}") from exc
        if not isinstance(row, dict):
            raise ValueError(f"JSONL line {line_no} is not an object")
        tasks.append(row)
    return tasks, {}
 def load_input(path: str | None, fmt: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
    raw = sys.stdin.read() if not path or path == "-" else Path(path).read_text(encoding="utf-8")
    if not raw.strip():
        raise ValueError("input is empty")
    parse_as_jsonl = fmt == "jsonl" or (fmt == "auto" and "\n" in raw.strip() and not raw.lstrip().startswith(("{", "[")))
    if parse_as_jsonl:
        return load_jsonl(raw)
    try:
        data = json.loads(raw)
    except json.JSONDecodeError as exc:
        if fmt == "auto" and "\n" in raw.strip():
            return load_jsonl(raw)
        raise ValueError(f"invalid JSON input: {exc.msg}") from exc
    if isinstance(data, list):
        if not all(isinstance(item, dict) for item in data):
            raise ValueError("JSON list must contain task objects")
        return data, {}
    if isinstance(data, dict):
        tasks = data.get("tasks")
        if tasks is None:
            # Treat a single object with required task fields as a one-task summary.
            if REQUIRED_TASK_FIELDS.issubset(data):
                return [data], {}
            raise ValueError("JSON object must contain a 'tasks' list")
        if not isinstance(tasks, list) or not all(isinstance(item, dict) for item in tasks):
            raise ValueError("'tasks' must be a list of objects")
        metadata = {key: value for key, value in data.items() if key != "tasks"}
        return tasks, metadata
    raise ValueError("input must be JSON object, JSON list, or JSON Lines")
 def validate_task(task: dict[str, Any]) -> None:
    missing = sorted(REQUIRED_TASK_FIELDS - set(task))
    if missing:
        task_id = task.get("id", "<unknown>")
        raise ValueError(f"task {task_id} missing required fields: {', '.join(missing)}")
    status = str(task.get("status"))
    if status not in SUPPORTED_STATUSES:
        raise ValueError(f"task {task.get('id')} has unsupported status: {status}")
    for field in ("created_at", "updated_at"):
        if not isinstance(task.get(field), (int, float)):
            raise ValueError(f"task {task.get('id')} field {field} must be epoch seconds")
 def confidence(value: float) -> float:
    return round(max(0.0, min(1.0, value)), 2)
 def classify_task_type(task: dict[str, Any]) -> dict[str, Any]:
    title = str(task.get("title", "")).strip().lower()
    body = compact_text(task)
    prefix = title.split(":", 1)[0].strip() if ":" in title else ""
    prefix_map = {"doc": "docs"}
    if prefix in LIFECYCLE_PREFIXES:
        value = prefix_map.get(prefix, prefix)
        if value in TASK_TYPES:
            return {"value": value, "confidence": 0.95, "reason_codes": [f"title_prefix_{value}"]}
    keyword_rules = [
        ("discovery", ["discover", "inventory", "repo map", "read-only"]),
        ("spec", ["spec", "define", "contract", "schema"]),
        ("implement", ["implement", "engineer", "script", "code", "build"]),
        ("review", ["review", "approve", "findings"]),
        ("docs", ["docs", "runbook", "readme"]),
        ("ops", ["ops", "health", "monitor", "deploy", "cleanup"]),
        ("integration", ["integration", "merge", "cherry-pick", "fan-in"]),
        ("final", ["final", "closeout", "synthesis"]),
        ("test", ["test", "smoke", "validate"]),
        ("charter", ["charter", "program framing"]),
    ]
    for value, needles in keyword_rules:
        if any(needle in body for needle in needles):
            return {"value": value, "confidence": 0.78, "reason_codes": [f"keyword_{value}"]}
    return {"value": "unknown", "confidence": 0.2, "reason_codes": ["insufficient_signal"]}
 def classify_lane(task: dict[str, Any]) -> dict[str, Any]:
    text = compact_text(task)
    rules = [
        ("kanban_hygiene", ["kanban", "task hygiene", "board summaries", "review-needed", "next gate"]),
        ("cron_n8n_classifier", ["cron", "n8n", "alert", "event classifier"]),
        ("rag_context_gate", ["rag", "context gate", "retrieval", "bundle"]),
        ("doc_image_audio_triage", ["document", "image", "audio triage", "ocr", "attachments"]),
        ("voice_audio_pipeline", ["voice", "whisper", "memo", "transcribe"]),
        ("docs_runbook_service_map", ["service map", "runbook", "readme"]),
        ("observability_utilization", ["health", "utilization", "metrics", "digest"]),
        ("ops_integration", ["merge", "integration", "cherry-pick", "fan-in"]),
        ("final_closeout", ["final", "closeout", "synthesis"]),
    ]
    for value, needles in rules:
        matched = [needle.replace(" ", "_") for needle in needles if needle in text]
        if matched:
            return {"value": value, "confidence": 0.9, "reason_codes": [f"mentions_{matched[0]}"]}
    if text:
        return {"value": "general", "confidence": 0.45, "reason_codes": ["no_lane_specific_signal"]}
    return {"value": "unknown", "confidence": 0.1, "reason_codes": ["insufficient_signal"]}
 def classify_project(task: dict[str, Any], board: str | None, input_metadata: dict[str, Any]) -> dict[str, Any]:
    explicit = task.get("project") or input_metadata.get("project")
    if explicit:
        return {"value": str(explicit), "confidence": 0.9, "source": "input"}
    board_name = board or input_metadata.get("board")
    if board_name:
        return {"value": str(board_name), "confidence": 0.98, "source": "board_name"}
    text = compact_text(task)
    if "npu" in text or "openvino" in text:
        return {"value": "npu-maximization", "confidence": 0.72, "source": "body"}
    return {"value": "unknown", "confidence": 0.1, "source": "unknown"}
 def classify_blocker(task: dict[str, Any]) -> dict[str, Any]:
    status = str(task.get("status"))
    text = compact_text(task)
    last_outcome = str(task.get("last_run_outcome") or "").lower()
    reason_codes: list[str] = []
    value = "none"
    blocked = False
    conf = 0.0
    if status == "blocked":
        blocked = True
        conf = 0.85
        if "review-required" in text or "changes requested" in text:
            value = "review_changes_requested"
            reason_codes.append("blocked_review_required_or_changes")
        elif any(word in text for word in ("credential", "token", "path", "spawn_failed")):
            value = "missing_credentials"
            reason_codes.append("blocked_missing_credentials_or_path")
        elif any(word in text for word in ("human", "approval", "decision", "confirm")):
            value = "human_decision"
            reason_codes.append("blocked_human_decision")
        else:
            value = "unknown"
            reason_codes.append("status_blocked")
    elif status == "todo" and task.get("parents"):
        value = "missing_parent"
        conf = 0.75
        reason_codes.append("todo_with_parents")
    elif last_outcome in {"crashed", "timed_out", "failed"}:
        value = "failed_tests"
        conf = 0.65
        reason_codes.append(f"last_run_{last_outcome}")
    return {"value": value, "blocked": blocked, "confidence": confidence(conf), "reason_codes": reason_codes}
 def age_hours(now: float, timestamp: Any) -> float | None:
    if not isinstance(timestamp, (int, float)):
        return None
    return round(max(0.0, now - float(timestamp)) / 3600.0, 2)
 def classify_staleness(task: dict[str, Any], now: float) -> dict[str, Any]:
    status = str(task.get("status"))
    created = float(task["created_at"])
    activity_ts = float(task.get("heartbeat_at") or task.get("last_activity_at") or task.get("updated_at") or created)
    age = age_hours(now, created)
    last_activity = age_hours(now, activity_ts)
    threshold = 24
    value = "fresh"
    reason_codes: list[str] = []
    if status == "running":
        threshold = 1
        if last_activity is not None and last_activity > 1:
            value = "stale_lock"
            reason_codes.append("running_no_recent_heartbeat")
    elif status == "ready":
        threshold = 24
        if last_activity is not None and last_activity >= 72:
            value = "stale"
            reason_codes.append("ready_over_72h")
        elif last_activity is not None and last_activity >= 24:
            value = "aging"
            reason_codes.append("ready_over_24h")
    elif status == "blocked":
        review_required = "review-required" in compact_text(task)
        threshold = 24 if review_required else 48
        if last_activity is not None and last_activity >= 168:
            value = "stale"
            reason_codes.append("blocked_over_7d")
        elif review_required and last_activity is not None and last_activity >= 72:
            value = "stale"
            reason_codes.append("review_required_over_72h")
        elif last_activity is not None and last_activity >= threshold:
            value = "aging"
            reason_codes.append("blocked_or_review_aging")
    elif status == "todo" and not task.get("parents") and last_activity is not None and last_activity >= 72:
        value = "orphaned"
        threshold = 72
        reason_codes.append("todo_without_parents_over_72h")
    return {
        "value": value,
        "age_hours": age,
        "last_activity_hours": last_activity,
        "threshold_hours": threshold,
        "reason_codes": reason_codes,
    }
 def normalize_title(title: str) -> str:
    text = title.lower().strip()
    text = re.sub(r"^(charter|discovery|spec|implement|test|review|docs?|ops|integration|final)\s*:\s*", "", text)
    text = re.sub(r"[^a-z0-9]+", " ", text)
    return re.sub(r"\s+", " ", text).strip()
 def find_duplicates(tasks: list[dict[str, Any]], labels: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
    groups: dict[tuple[str, str, str], list[str]] = {}
    active_statuses = SUPPORTED_STATUSES - {"done", "archived", "cancelled"}
    for task in tasks:
        if str(task.get("status")) not in active_statuses:
            continue
        task_id = str(task["id"])
        key = (
            normalize_title(str(task.get("title", ""))),
            labels[task_id]["lane"]["value"],
            labels[task_id]["task_type"]["value"],
        )
        if key[0]:
            groups.setdefault(key, []).append(task_id)
    result = {
        str(task["id"]): {
            "is_duplicate": False,
            "canonical_task_id": None,
            "candidate_ids": [],
            "confidence": 0.0,
            "reason_codes": [],
        }
        for task in tasks
    }
    for ids in groups.values():
        if len(ids) < 2:
            continue
        canonical = sorted(ids)[0]
        for task_id in ids:
            candidates = [candidate for candidate in ids if candidate != task_id]
            result[task_id] = {
                "is_duplicate": task_id != canonical,
                "canonical_task_id": canonical if task_id != canonical else None,
                "candidate_ids": candidates,
                "confidence": 0.86,
                "reason_codes": ["same_normalized_title_lane_and_task_type"],
            }
    return result
 def has_non_positive_npu_busy_delta(text: str) -> bool:
    if "npu" not in text and "busy" not in text:
        return False
    patterns = [
        r"\b(?:npu_)?busy(?:_time)?(?:_delta)?(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
        r"\b(?:npu_)?delta(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
    ]
    for pattern in patterns:
        for match in re.finditer(pattern, text):
            try:
                if float(match.group(1)) <= 0:
                    return True
            except ValueError:
                continue
    return False
 def classify_review_needed(task: dict[str, Any], task_type: str) -> dict[str, Any]:
    text = compact_text(task)
    changed_files = task.get("changed_files") or task.get("diff_path") or task.get("tests_run")
    if has_non_positive_npu_busy_delta(text):
        return {"value": True, "kind": "npu_proof_gate", "confidence": 0.84, "reason_codes": ["npu_claim_non_positive_busy_delta"]}
    if "npu" in text and ("http 200" in text or "no busy" in text or "missing busy" in text):
        return {"value": True, "kind": "npu_proof_gate", "confidence": 0.8, "reason_codes": ["npu_claim_needs_busy_delta"]}
    if "review-required" in text:
        kind = "code_change" if task_type == "implement" else "spec_review"
        return {"value": True, "kind": kind, "confidence": 0.92, "reason_codes": ["review_required_marker"]}
    if changed_files and task_type in {"implement", "ops", "docs"}:
        return {"value": True, "kind": "code_change", "confidence": 0.86, "reason_codes": ["reported_changed_files_or_tests"]}
    if any(needle in text for needle in ("routing authority", "restart service", "write memory", "send outbound", "private root", "wildcard bind", "vector db mutation")):
        return {"value": True, "kind": "human_approval", "confidence": 0.84, "reason_codes": ["authority_change_requires_approval"]}
    return {"value": False, "kind": "none", "confidence": 0.2, "reason_codes": []}
 def classify_next_gate(task: dict[str, Any], labels: dict[str, Any]) -> dict[str, Any]:
    task_type = labels["task_type"]["value"]
    status = str(task.get("status"))
    reason_codes: list[str] = []
    if labels["duplicate"]["is_duplicate"]:
        return {"value": "dedupe_review", "confidence": 0.86, "reason_codes": ["duplicate_candidate"]}
    if labels["staleness"]["value"] == "stale_lock":
        return {"value": "investigate_stale_lock", "confidence": 0.88, "reason_codes": ["running_stale_lock"]}
    blocker = labels["blocker"]
    if blocker["value"] in {"human_decision", "missing_credentials", "unknown"} and blocker["blocked"]:
        return {"value": "needs_human_decision", "confidence": 0.85, "reason_codes": blocker["reason_codes"] or ["blocked"]}
    if blocker["value"] == "missing_parent":
        return {"value": "wait_for_parents", "confidence": 0.82, "reason_codes": ["unfinished_parents"]}
    if task_type == "implement" and not (task.get("tests_run") or task.get("test_evidence")) and status in {"blocked", "done"}:
        return {"value": "needs_test_evidence", "confidence": 0.78, "reason_codes": ["implementation_without_test_evidence"]}
    review_needed = labels["review_needed"]
    if review_needed["kind"] == "npu_proof_gate":
        return {"value": "needs_npu_proof", "confidence": 0.8, "reason_codes": review_needed["reason_codes"]}
    if review_needed["value"]:
        return {"value": "ready_for_review", "confidence": 0.86, "reason_codes": review_needed["reason_codes"]}
    gate_by_type = {
        "spec": "ready_for_implementation",
        "implement": "ready_for_review",
        "review": "ready_for_integration",
        "docs": "ready_for_integration",
        "ops": "ready_for_ops_validation",
        "integration": "ready_for_closeout",
        "final": "safe_to_complete",
        "discovery": "safe_to_complete",
        "charter": "ready_for_spec",
        "test": "ready_for_review",
    }
    type_gate = gate_by_type.get(task_type, "unknown")
    if task_type in gate_by_type:
        reason_codes.append(f"task_type_{task_type}")
    return {"value": type_gate, "confidence": 0.74 if type_gate != "unknown" else 0.2, "reason_codes": reason_codes}
 def advisory(tasks: list[dict[str, Any]], *, board: str | None, now: float, input_metadata: dict[str, Any], include_evidence: bool) -> dict[str, Any]:
    for task in tasks:
        validate_task(task)
    prelim: dict[str, dict[str, Any]] = {}
    for task in tasks:
        task_id = str(task["id"])
        prelim[task_id] = {
            "task_type": classify_task_type(task),
            "project": classify_project(task, board, input_metadata),
            "lane": classify_lane(task),
            "blocker": classify_blocker(task),
            "staleness": classify_staleness(task, now),
        }
    duplicates = find_duplicates(tasks, prelim)
    items = []
    for task in tasks:
        task_id = str(task["id"])
        labels = dict(prelim[task_id])
        labels["duplicate"] = duplicates[task_id]
        labels["review_needed"] = classify_review_needed(task, labels["task_type"]["value"])
        labels["next_gate"] = classify_next_gate(task, labels)
        item = {
            "task_id": task_id,
            **labels,
            "warnings": [],
        }
        if include_evidence:
            item["evidence"] = {
                "normalized_title": normalize_title(str(task.get("title", ""))),
                "status": task.get("status"),
                "parents_count": len(task.get("parents") or []),
                "children_count": len(task.get("children") or []),
            }
        items.append(item)
    counts = {
        "tasks": len(items),
        "duplicates": sum(1 for item in items if item["duplicate"]["is_duplicate"]),
        "review_needed": sum(1 for item in items if item["review_needed"]["value"]),
        "stale": sum(1 for item in items if item["staleness"]["value"] in {"stale", "stale_lock", "orphaned"}),
        "blocked": sum(1 for item in items if item["blocker"]["blocked"]),
    }
    return {
        "schema": SCHEMA,
        "dry_run": True,
        "created": int(now),
        "board": board or input_metadata.get("board") or None,
        "counts": counts,
        "authority": AUTHORITY,
        "npu_proof": NPU_PROOF,
        "items": items,
    }
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Dry-run Kanban hygiene advisory classifier",
        epilog="Input: JSON object with tasks[] or JSONL task objects. Required task fields: id,title,status,assignee,created_at,updated_at. Optional compact fields such as body_excerpt, parents, children, changed_files, tests_run, last_run_outcome, and last_comment_excerpt improve labels.",
    )
    parser.add_argument("--input", "-i", help="Input JSON/JSONL file; omit or '-' for stdin")
    parser.add_argument("--format", choices=["auto", "json", "jsonl"], default="auto", help="Input format")
    parser.add_argument("--board", help="Board/project name to include in output")
    parser.add_argument("--now", type=float, default=None, help="Epoch seconds for deterministic staleness tests")
    parser.add_argument("--compact", action="store_true", help="Accepted for compatibility; output is compact JSON by default")
    parser.add_argument("--include-evidence", action="store_true", help="Include short derived evidence fields")
    return parser
 def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    try:
        tasks, metadata = load_input(args.input, args.format)
        output = advisory(
            tasks,
            board=args.board,
            now=args.now if args.now is not None else time.time(),
            input_metadata=metadata,
            include_evidence=args.include_evidence,
        )
    except (OSError, ValueError) as exc:
        print(f"kanban-hygiene-advisory: {exc}", file=sys.stderr)
        return 2
    print(json.dumps(output, sort_keys=True, separators=(",", ":")))
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -0,0 +1,567 @@
 #!/usr/bin/env python3
 """Dry-run comparison harness for advisory-only NPU lanes.
 The harness evaluates synthetic/non-private fixtures against deterministic lane
 adapters and emits compact npu_advisory_decision_v1 records plus JSON/markdown
 summaries. It intentionally performs no live routing, memory writes, tool
 execution, service restarts, outbound sends, broad private scans, or vector-store
 mutation.
 """
 from __future__ import annotations
 import argparse
 import datetime as dt
 import hashlib
 import uuid
 import importlib.util
 import json
 import re
 import sys
 import time
 from collections import Counter, defaultdict
 from pathlib import Path
 from typing import Any, Mapping
 REPO_ROOT = Path(__file__).resolve().parents[1]
 DEFAULT_FIXTURES = REPO_ROOT / "fixtures" / "npu_advisory_dry_run" / "fixtures.json"
 SCHEMA = "npu_advisory_decision_v1"
 HARNESS_SCHEMA = "npu_advisory_dry_run_summary_v1"
 AUTHORITY_FLAGS_CLOSED = {
    "can_route_atlas": False,
    "can_write_memory": False,
    "can_execute_tools": False,
    "can_restart_services": False,
    "can_send_outbound": False,
    "can_scan_private_roots": False,
    "can_mutate_vector_store": False,
    "can_post_advisory_event": False,
    "can_change_gateway_config": False,
    "requires_human_approval": True,
    "advisory_only": True,
 }
 MAY_TO_CAN = {
    "may_route": "can_route_atlas",
    "may_write_memory": "can_write_memory",
    "may_execute_tools": "can_execute_tools",
    "may_restart_services": "can_restart_services",
    "may_send_external": "can_send_outbound",
    "may_process_private_dirs": "can_scan_private_roots",
    "may_mutate_vector_db": "can_mutate_vector_store",
    "may_change_live_config": "can_change_gateway_config",
 }
 MUTATION_FLAGS_FALSE = {
    "live_routing": False,
    "memory_writes": False,
    "tool_execution": False,
    "service_restarts": False,
    "outbound_sends": False,
    "broad_private_scans": False,
    "vector_store_mutation": False,
    "gateway_restart": False,
 }
 ALLOWED_ACTIONS = ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"]
 NO_ACTUAL_ACTION = {"kind": "dry_run_reported", "performed": False, "performed_by": "harness", "side_effects": []}
 ACTION_PATTERNS = {
    "follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
    "date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
    "decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
    "task": re.compile(r"\b(todo|to-do|action item|assign|need to|please|reminder|review|ask)\b", re.I),
 }
 class HarnessError(ValueError):
    pass
 def load_module(name: str, path: Path):
    spec = importlib.util.spec_from_file_location(name, path)
    if spec is None or spec.loader is None:
        raise HarnessError(f"module_import_failed:{path}")
    module = importlib.util.module_from_spec(spec)
    sys.modules.setdefault(name, module)
    spec.loader.exec_module(module)  # type: ignore[union-attr]
    return module
 def confidence_bucket(value: float | int | None) -> str:
    if value is None:
        return "unknown"
    v = float(value)
    if v >= 0.95:
        return "very_high"
    if v >= 0.80:
        return "high"
    if v >= 0.60:
        return "medium"
    if v >= 0.40:
        return "low"
    return "very_low"
 def lane_confidence(output: Mapping[str, Any], fallback: float = 0.7) -> float:
    for key in ("confidence", "score"):
        try:
            return float(output[key])
        except (KeyError, TypeError, ValueError):
            pass
    labels = output.get("labels")
    if isinstance(labels, Mapping):
        vals: list[float] = []
        for value in labels.values():
            if isinstance(value, Mapping) and "confidence" in value:
                try:
                    vals.append(float(value["confidence"]))
                except (TypeError, ValueError):
                    continue
        if vals:
            return max(vals)
    return fallback
 def closed_authority_flags(extra: Mapping[str, Any] | None = None) -> dict[str, bool]:
    flags = dict(AUTHORITY_FLAGS_CLOSED)
    for key, value in (extra or {}).items():
        mapped = MAY_TO_CAN.get(key, key)
        if mapped in flags and mapped not in {"requires_human_approval", "advisory_only"}:
            flags[mapped] = bool(value)
    return flags
 def authority_violations(flags: Mapping[str, Any]) -> list[str]:
    return sorted(
        key for key, value in flags.items()
        if key.startswith("can_") and bool(value)
    )
 def severity_for(label: str) -> str:
    if label in {"escalate", "block_authority_violation"}:
        return "critical"
    if label in {"require_human_review", "review_item", "ready_for_review", "prepare_context_bundle"}:
        return "medium"
    if label in {"summarize", "log"}:
        return "info"
    return "none"
 def npu_proof_v1(proof: Mapping[str, Any]) -> dict[str, Any]:
    busy = proof.get("npu_busy_delta_us") or proof.get("busy_delta_us")
    service_delta = proof.get("service_reported_delta_us") or proof.get("npu_busy_delta_us")
    proof_ok = proof.get("ok")
    if proof_ok is None and busy is not None:
        try:
            proof_ok = int(busy) > 0
        except (TypeError, ValueError):
            proof_ok = None
    fixture_only = bool(proof.get("fixture_only", True))
    return {
        "proof_mode": "offline_fixture" if fixture_only else "service_reported_delta",
        "busy_delta_us": int(busy) if isinstance(busy, int) or (isinstance(busy, str) and busy.isdigit()) else None,
        "service_reported_delta_us": int(service_delta) if isinstance(service_delta, int) or (isinstance(service_delta, str) and service_delta.isdigit()) else None,
        "inference_ran": bool(proof_ok) if proof_ok is not None else False,
        "proof_ok": bool(proof_ok) if proof_ok is not None else None,
        "counter_path": None,
    }
 def compare_outcome(recommendation: str, expected: str, human: str) -> str:
    if recommendation == human == expected:
        return "agree"
    if recommendation in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"} and human in {"log", "suppress", "none"}:
        return "false_positive"
    if recommendation in {"log", "suppress", "none"} and human in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"}:
        return "false_negative"
    if recommendation in {"uncertain", "defer"}:
        return "uncertain"
    return "disagree"
 def evaluate_context_gate(fixture: Mapping[str, Any]) -> dict[str, Any]:
    context_gate = load_module("openvino_context_gate.context_gate", REPO_ROOT / "openvino_context_gate" / "context_gate.py")
    plan = context_gate.build_plan(str(fixture["query"]), context=fixture.get("context") or {}, options={"require_npu_proof": False})
    blocked = plan["bundle_plan"].get("blocked_fields") or []
    if blocked:
        recommendation = "require_human_review"
    elif plan["bundle_plan"]["bundle_name"] in {"CodingTaskBundle", "OpsDebugBundle", "ResearchBundle"}:
        recommendation = "prepare_context_bundle"
    else:
        recommendation = "answer_directly"
    return {
        "recommendation": recommendation,
        "confidence": plan["query_class"].get("confidence", 0.7),
        "npu_proof": plan["npu_proof"],
        "notes": [f"bundle={plan['bundle_plan']['bundle_name']}", f"sources={','.join(s['source'] for s in plan['source_plan'])}"],
        "raw_compact": {"bundle_name": plan["bundle_plan"]["bundle_name"], "sources": [s["source"] for s in plan["source_plan"]], "blocked_fields": [f["field"] for f in blocked]},
    }
 def cron_recommendation(envelope: Mapping[str, Any], event: Mapping[str, Any]) -> str:
    labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
    urgency = (((labels.get("urgency") or {}).get("value")) if isinstance(labels.get("urgency"), Mapping) else labels.get("urgency")) or "normal"
    npu = envelope.get("npu_proof") or {}
    npu_ok = bool(npu.get("ok") is True and int(npu.get("npu_busy_delta_us") or 0) > 0)
    severity = str(event.get("severity") or "normal")
    if not npu_ok:
        return "log"
    if severity == "critical":
        return "escalate"
    if severity == "warning" or urgency in {"high", "critical"}:
        return "summarize"
    return "log"
 def evaluate_cron_n8n(fixture: Mapping[str, Any]) -> dict[str, Any]:
    envelope = fixture.get("gateway_envelope") or {}
    event = fixture.get("event") or {}
    labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
    confidence = lane_confidence({"labels": labels}, 0.6)
    return {
        "recommendation": cron_recommendation(envelope, event),
        "confidence": confidence,
        "npu_proof": envelope.get("npu_proof") or {},
        "authority_from_envelope": envelope.get("authority") or {},
        "notes": [f"workflow={event.get('workflow')}", f"severity={event.get('severity')}"]
    }
 def evaluate_batch_triage(fixture: Mapping[str, Any]) -> dict[str, Any]:
    text = str(fixture.get("document_text") or "")
    reasons = sorted(name for name, rx in ACTION_PATTERNS.items() if rx.search(text))
    if reasons:
        recommendation = "review_item"
        conf = 0.82
    elif len(text.strip()) < 20:
        recommendation = "uncertain"
        conf = 0.35
    else:
        recommendation = "suppress"
        conf = 0.64
    return {
        "recommendation": recommendation,
        "confidence": conf,
        "npu_proof": {"verified": False, "required": False, "note": "fixture_rules_no_npu_claim"},
        "notes": [f"lane={fixture.get('triage_lane')}", f"reason_codes={','.join(reasons) or 'none'}"],
        "raw_compact": {"reasons": reasons, "raw_text_redacted": True, "full_path_included": False},
    }
 def evaluate_voice_audio(fixture: Mapping[str, Any]) -> dict[str, Any]:
    pipeline = load_module("npu_voice_audio_pipeline", REPO_ROOT / "scripts" / "npu_voice_audio_pipeline.py")
    proof = fixture.get("npu_proof") or {}
    action_worthy, atlas_gate, next_gate = pipeline.decide_gate(
        str(fixture.get("transcript") or ""),
        dict(fixture.get("labels") or {}),
        whisper_proven=bool(proof.get("whisper")),
        classifier_proven=bool(proof.get("classifier")),
    )
    if atlas_gate.startswith("blocked"):
        recommendation = "require_human_review"
    elif action_worthy:
        recommendation = "review_item"
    else:
        recommendation = "suppress"
    return {
        "recommendation": recommendation,
        "confidence": 0.86 if action_worthy else 0.66,
        "npu_proof": {"whisper": bool(proof.get("whisper")), "classifier": bool(proof.get("classifier")), "verified": bool(proof.get("whisper") and proof.get("classifier"))},
        "notes": [f"atlas_gate={atlas_gate}", f"next_gate={next_gate}", "transcript_redacted=true"],
        "raw_compact": {"action_worthy": action_worthy, "atlas_gate": atlas_gate, "next_gate": next_gate},
    }
 def evaluate_kanban_hygiene(fixture: Mapping[str, Any]) -> dict[str, Any]:
    hygiene = load_module("kanban_hygiene_advisory", REPO_ROOT / "scripts" / "kanban-hygiene-advisory.py")
    out = hygiene.advisory(list(fixture.get("tasks") or []), board="synthetic-npu", now=float(fixture.get("now") or time.time()), input_metadata={}, include_evidence=False)
    item = out["items"][0]
    next_gate = item["next_gate"]["value"]
    return {
        "recommendation": next_gate,
        "confidence": item["next_gate"].get("confidence", 0.7),
        "npu_proof": out["npu_proof"],
        "notes": [f"task_id={item['task_id']}", f"review_needed={item['review_needed']['value']}"],
        "raw_compact": {"counts": out["counts"], "next_gate": item["next_gate"]},
    }
 def evaluate_gateway_envelope(fixture: Mapping[str, Any]) -> dict[str, Any]:
    envelope = fixture.get("gateway_envelope") or {}
    flags = closed_authority_flags(envelope.get("authority") or {})
    violations = authority_violations(flags)
    if violations:
        recommendation = "block_authority_violation"
    else:
        recommendation = cron_recommendation(envelope, {"severity": "critical"})
    labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
    return {
        "recommendation": recommendation,
        "confidence": lane_confidence({"labels": labels}, 0.8),
        "npu_proof": envelope.get("npu_proof") or {},
        "authority_from_envelope": envelope.get("authority") or {},
        "notes": [f"violations={','.join(violations) or 'none'}", f"trace_id={envelope.get('trace_id')}"]
    }
 EVALUATORS = {
    "context_gate": evaluate_context_gate,
    "cron_n8n_advisory": evaluate_cron_n8n,
    "batch_triage": evaluate_batch_triage,
    "voice_audio": evaluate_voice_audio,
    "kanban_hygiene": evaluate_kanban_hygiene,
    "advisory_gateway_envelope": evaluate_gateway_envelope,
 }
 def build_decision(fixture: Mapping[str, Any], evaluated: Mapping[str, Any]) -> dict[str, Any]:
    extra_authority = evaluated.get("authority_from_envelope") if isinstance(evaluated.get("authority_from_envelope"), Mapping) else None
    authority_flags = closed_authority_flags(extra_authority)
    violations = authority_violations(authority_flags)
    recommendation = str(evaluated["recommendation"])
    human = str(fixture["human_or_atlas_decision"])
    expected = str(fixture["expected_recommendation"])
    outcome_label = compare_outcome(recommendation, expected, human)
    if recommendation == expected and outcome_label != str(fixture.get("expected_outcome", outcome_label)):
        outcome_label = str(fixture.get("expected_outcome"))
    confidence_score = float(evaluated.get("confidence") or 0.0)
    npu_raw = dict(evaluated.get("npu_proof") or {})
    npu_raw.setdefault("fixture_only", True)
    fixture_id = str(fixture.get("id"))
    input_class = str(fixture.get("input_class") or fixture.get("lane") or "unknown")
    service_name = str(fixture.get("service") or fixture.get("lane") or "unknown")
    source_kind = str(fixture.get("source") or "fixture")
    comparison = "agree" if outcome_label == "agree" else ("uncertain" if outcome_label == "uncertain" else "disagree")
    error_type = outcome_label if outcome_label in {"false_positive", "false_negative", "severity_overcall", "severity_undercall"} else None
    if violations:
        error_type = "unsafe_authority"
    return {
        "schema_version": SCHEMA,
        "decision_id": str(uuid.uuid5(uuid.NAMESPACE_URL, f"{SCHEMA}:{fixture_id}")),
        "timestamp": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"),
        "source": {
            "kind": "fixture",
            "fixture_id": fixture_id,
            "fixture_set": "npu_advisory_eval_v1",
            "artifact_ref": None,
            "content_hash": "sha256:" + hashlib.sha256(json.dumps(fixture, sort_keys=True, default=str).encode()).hexdigest(),
            "privacy_class": "synthetic" if source_kind.startswith("synthetic") else "non_private",
        },
        "service": {
            "name": service_name,
            "endpoint": service_name,
            "mode": "offline_fixture",
            "model": "openvino-local-fixture",
        },
        "input_class": input_class,
        "recommendation": {
            "label": recommendation,
            "severity": severity_for(recommendation),
            "reasons": list(evaluated.get("notes") or []),
            "evidence_refs": [f"fixture:{fixture_id}", f"lane:{fixture.get('lane')}"] ,
            "raw_output_ref": None,
        },
        "expected_recommendation": expected,
        "confidence": {
            "score": round(confidence_score, 3),
            "bucket": confidence_bucket(confidence_score),
            "bucket_rule": "v1_default",
            "calibrated": False,
        },
        "authority_flags": authority_flags,
        "allowed_actions": ALLOWED_ACTIONS,
        "actual_action": dict(NO_ACTUAL_ACTION),
        "human_or_atlas_decision": {
            "source": "fixture_expected",
            "label": human,
            "severity": severity_for(human),
            "confidence": None,
            "decision_ref": fixture_id,
            "timestamp": None,
        },
        "outcome": {
            "comparison": comparison,
            "label": outcome_label,
            "error_type": error_type,
            "human_review_required": bool(violations or recommendation in {"require_human_review", "block_authority_violation"}),
            "promotion_blocker": bool(violations or error_type in {"false_negative", "unsafe_authority", "privacy_violation"}),
        },
        "expected_outcome": fixture.get("expected_outcome"),
        "npu_proof": npu_proof_v1(npu_raw),
        "latency": {"total_ms": 0, "service_ms": None, "queue_ms": None, "timeout": False},
        "fallback": {"occurred": True, "kind": "offline", "reason": "synthetic_fixture_deterministic_adapter_no_live_service_call", "expected": True},
        "privacy": {"payload_logged": False, "redaction": "metadata_only", "retention": "local_audit", "contains_private_payload": False},
        "notes": list(evaluated.get("notes") or []),
        "authority_safe_flag_violations": violations,
        # Compatibility fields for compact summaries/tests.
        "fixture_id": fixture_id,
        "lane": fixture.get("lane"),
    }
 def run(fixtures_path: Path) -> dict[str, Any]:
    data = json.loads(fixtures_path.read_text(encoding="utf-8"))
    fixtures = data.get("fixtures")
    if not isinstance(fixtures, list) or not fixtures:
        raise HarnessError("fixture_set_empty")
    decisions = []
    started = time.perf_counter()
    for fixture in fixtures:
        lane = fixture.get("lane")
        evaluator = EVALUATORS.get(str(lane))
        if evaluator is None:
            raise HarnessError(f"unsupported_lane:{lane}")
        t0 = time.perf_counter()
        evaluated = evaluator(fixture)
        decision = build_decision(fixture, evaluated)
        decision["latency"]["total_ms"] = round((time.perf_counter() - t0) * 1000, 3)
        decisions.append(decision)
    counts = Counter(d["outcome"]["label"] for d in decisions)
    by_lane: dict[str, Counter[str]] = defaultdict(Counter)
    confidence = Counter(d["confidence"]["bucket"] for d in decisions)
    recommendations = Counter(d["recommendation"]["label"] for d in decisions)
    violations = [d for d in decisions if d["authority_safe_flag_violations"]]
    mismatches = [d for d in decisions if d["outcome"]["label"] != d.get("expected_outcome")]
    return {
        "schema": HARNESS_SCHEMA,
        "fixture_file": str(fixtures_path),
        "dry_run": True,
        "mutations": dict(MUTATION_FLAGS_FALSE),
        "totals": {
            "fixtures": len(decisions),
            "agree": counts.get("agree", 0),
            "disagree": counts.get("disagree", 0),
            "uncertain": counts.get("uncertain", 0),
            "false_positive": counts.get("false_positive", 0),
            "false_negative": counts.get("false_negative", 0),
            "authority_safe_flag_violations": len(violations),
            "expected_outcome_mismatches": len(mismatches),
            "wall_ms": round((time.perf_counter() - started) * 1000, 3),
        },
        "by_lane": lane_summary(decisions),
        "confidence_buckets": dict(sorted(confidence.items())),
        "recommendations": dict(sorted(recommendations.items())),
        "minimum_metrics": minimum_metrics(decisions),
        "violations": [{"fixture_id": d["fixture_id"], "flags": d["authority_safe_flag_violations"]} for d in violations],
        "mismatches": [{"fixture_id": d["fixture_id"], "outcome": d["outcome"]["label"], "expected_outcome": d.get("expected_outcome")} for d in mismatches],
        "decisions": decisions,
    }
 def percentile(values: list[float], pct: float) -> float | None:
    if not values:
        return None
    ordered = sorted(values)
    idx = min(len(ordered) - 1, max(0, round((pct / 100) * (len(ordered) - 1))))
    return ordered[idx]
 def minimum_metrics(decisions: list[dict[str, Any]]) -> dict[str, Any]:
    by_input = Counter(d["input_class"] for d in decisions)
    by_service = Counter(d["service"]["name"] for d in decisions)
    fallback_kinds = Counter(d["fallback"]["kind"] for d in decisions if d["fallback"]["occurred"])
    proof_ok = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is True)
    proof_missing = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is False)
    proof_na = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is None)
    privacy_violations = sum(1 for d in decisions if d["privacy"]["contains_private_payload"] or d["privacy"]["payload_logged"])
    side_effects = sum(1 for d in decisions if d["actual_action"]["performed"] or d["actual_action"]["side_effects"])
    timeouts = sum(1 for d in decisions if d["latency"].get("timeout"))
    lat_by_service: dict[str, dict[str, float | None]] = {}
    for service in by_service:
        vals = [float(d["latency"]["total_ms"]) for d in decisions if d["service"]["name"] == service]
        lat_by_service[service] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
    lat_by_input: dict[str, dict[str, float | None]] = {}
    for input_class in by_input:
        vals = [float(d["latency"]["total_ms"]) for d in decisions if d["input_class"] == input_class]
        lat_by_input[input_class] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
    outcomes = Counter(d["outcome"]["label"] for d in decisions)
    return {
        "total_records": len(decisions),
        "records_by_input_class": dict(sorted(by_input.items())),
        "records_by_service": dict(sorted(by_service.items())),
        "privacy_violation_count": privacy_violations,
        "actual_side_effect_count": side_effects,
        "missing_reference_count": outcomes.get("missing_reference", 0),
        "fallback_count": sum(fallback_kinds.values()),
        "fallback_counts_by_kind": dict(sorted(fallback_kinds.items())),
        "expected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and d["fallback"]["expected"]),
        "unexpected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and not d["fallback"]["expected"]),
        "npu_proof_ok_count": proof_ok,
        "npu_proof_missing_count": proof_missing,
        "npu_proof_not_applicable_count": proof_na,
        "latency_by_service": lat_by_service,
        "latency_by_input_class": lat_by_input,
        "timeout_count": timeouts,
    }
 def lane_summary(decisions: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
    lanes: dict[str, list[dict[str, Any]]] = defaultdict(list)
    for d in decisions:
        lanes[str(d["lane"])].append(d)
    out = {}
    for lane, items in sorted(lanes.items()):
        c = Counter(d["outcome"]["label"] for d in items)
        out[lane] = {
            "fixtures": len(items),
            "agree": c.get("agree", 0),
            "disagree": c.get("disagree", 0),
            "false_positive": c.get("false_positive", 0),
            "false_negative": c.get("false_negative", 0),
            "uncertain": c.get("uncertain", 0),
            "authority_safe_flag_violations": sum(1 for d in items if d["authority_safe_flag_violations"]),
        }
    return out
 def markdown_summary(summary: Mapping[str, Any]) -> str:
    totals = summary["totals"]
    lines = [
        "# NPU advisory dry-run comparison",
        "",
        f"fixtures: {totals['fixtures']} | agree: {totals['agree']} | disagree: {totals['disagree']} | false_positive: {totals['false_positive']} | false_negative: {totals['false_negative']} | uncertain: {totals['uncertain']}",
        f"authority_safe_flag_violations: {totals['authority_safe_flag_violations']} | mutations: all_false",
        "",
        "| lane | fixtures | agree | false_positive | false_negative | violations |",
        "| --- | ---: | ---: | ---: | ---: | ---: |",
    ]
    for lane, row in summary["by_lane"].items():
        lines.append(f"| {lane} | {row['fixtures']} | {row['agree']} | {row['false_positive']} | {row['false_negative']} | {row['authority_safe_flag_violations']} |")
    if summary.get("violations"):
        lines.extend(["", "## Authority-safe flag violations"])
        for violation in summary["violations"]:
            lines.append(f"- {violation['fixture_id']}: {', '.join(violation['flags'])}")
    return "\n".join(lines) + "\n"
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Run synthetic advisory-only NPU dry-run fixture comparisons.")
    parser.add_argument("--fixtures", default=str(DEFAULT_FIXTURES), help="Synthetic fixture JSON file")
    parser.add_argument("--format", choices=["json", "markdown"], default="json")
    parser.add_argument("--include-decisions", action="store_true", help="Include per-fixture decision records in JSON output")
    parser.add_argument("--fail-on-mismatch", action="store_true", help="Return non-zero if observed outcome differs from fixture expected_outcome")
    parser.add_argument("--fail-on-authority-violation", action="store_true", help="Return non-zero if any fixture exposes may_* authority flags set true")
    return parser
 def main(argv: list[str] | None = None) -> int:
    args = build_parser().parse_args(argv)
    try:
        summary = run(Path(args.fixtures).expanduser().resolve())
    except (OSError, json.JSONDecodeError, HarnessError) as exc:
        print(json.dumps({"ok": False, "error": str(exc), "dry_run": True, "mutations": MUTATION_FLAGS_FALSE}, sort_keys=True), file=sys.stderr)
        return 2
    if args.format == "markdown":
        print(markdown_summary(summary), end="")
    else:
        out = dict(summary)
        if not args.include_decisions:
            out.pop("decisions", None)
        print(json.dumps(out, sort_keys=True, separators=(",", ":")))
    if args.fail_on_mismatch and summary["totals"]["expected_outcome_mismatches"]:
        return 1
    if args.fail_on_authority_violation and summary["totals"]["authority_safe_flag_violations"]:
        return 1
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -0,0 +1,523 @@
 #!/usr/bin/env python3
 """Explicit-root dry-run batch triage for local documents, images, and audio.
 This wrapper is intentionally report-only. It requires a lane-scoped approved
 root in a manifest, rejects request roots that broaden that approval, redacts raw
 text/transcripts by default, and never mutates Obsidian, RAG/vector DBs, files,
 routing, memory, services, or sends.
 """
 from __future__ import annotations
 import argparse
 import datetime as dt
 import hashlib
 import ipaddress
 import importlib.util
 import json
 import mimetypes
 import os
 import re
 import sys
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
 from pathlib import Path
 from typing import Any
 try:
    import yaml  # type: ignore
 except Exception as exc:  # pragma: no cover
    raise SystemExit("PyYAML is required to read triage root manifests") from exc
 LANES = (
    "screenshots",
    "receipts",
    "downloads",
    "obsidian_attachments",
    "voice_memos",
    "meeting_snippets",
 )
 AUDIO_LANES = {"voice_memos", "meeting_snippets"}
 DOC_IMAGE_LANES = {"screenshots", "receipts", "downloads", "obsidian_attachments"}
 SKIP_DIR_NAMES = {".git", ".obsidian", "__pycache__", ".cache", "cache", "chroma", "chromadb", "vector_db", "vectors"}
 NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
 MUTATIONS_FALSE = {
    "obsidian": False,
    "rag": False,
    "vector_db": False,
    "sends": False,
    "file_moves": False,
    "routing": False,
    "memory": False,
    "service_restarts": False,
 }
 ACTION_PATTERNS = {
    "follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
    "date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
    "decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
    "task": re.compile(r"\b(todo|to-do|action item|assign|need to|please)\b", re.I),
 }
 class FailClosed(Exception):
    pass
 def sha256_text(text: str) -> str:
    return "sha256:" + hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
 def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    with path.open("rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return "sha256:" + h.hexdigest()
 def read_busy(path: Path = NPU_BUSY_PATH) -> int | None:
    try:
        return int(path.read_text().strip())
    except Exception:
        return None
 def validate_local_whisper_url(whisper_url: str) -> str:
    """Fail closed unless Whisper transcription stays on the approved loopback service."""
    try:
        parsed = urllib.parse.urlparse(whisper_url)
        port = parsed.port
    except ValueError as exc:
        raise FailClosed("whisper_url_invalid") from exc
    if parsed.scheme != "http":
        raise FailClosed("whisper_url_scheme_not_http")
    if parsed.username or parsed.password:
        raise FailClosed("whisper_url_credentials_not_allowed")
    if port != 18816:
        raise FailClosed("whisper_url_port_not_approved")
    host = (parsed.hostname or "").strip().lower()
    if host == "localhost":
        return whisper_url
    try:
        if ipaddress.ip_address(host).is_loopback:
            return whisper_url
    except ValueError:
        pass
    raise FailClosed("whisper_url_not_loopback")
 def is_under(path: Path, root: Path) -> bool:
    try:
        path.resolve().relative_to(root.resolve())
        return True
    except ValueError:
        return False
 def load_manifest(path: Path) -> dict[str, Any]:
    if not path.exists():
        raise FailClosed(f"manifest_missing:{path}")
    data = yaml.safe_load(path.read_text())
    if not isinstance(data, dict):
        raise FailClosed("manifest_invalid:not_mapping")
    if data.get("version") != 1:
        raise FailClosed("manifest_invalid:version_must_be_1")
    policy = data.get("policy") or {}
    if policy.get("default_mode", "dry_run") != "dry_run":
        raise FailClosed("policy_invalid:default_mode_not_dry_run")
    for key, expected in {
        "require_explicit_root": True,
        "allow_external_uploads": False,
        "allow_mutations": False,
        "log_raw_text": False,
    }.items():
        if policy.get(key) is not expected:
            raise FailClosed(f"policy_invalid:{key}")
    if not isinstance(data.get("roots"), dict):
        raise FailClosed("manifest_invalid:roots_missing")
    return data
 def resolve_lane_root(manifest: dict[str, Any], manifest_path: Path, lane: str, requested_root: str | None) -> tuple[dict[str, Any], Path, Path]:
    lane_cfg = (manifest.get("roots") or {}).get(lane)
    if not isinstance(lane_cfg, dict):
        raise FailClosed(f"lane_missing:{lane}")
    if lane_cfg.get("approved") is not True:
        raise FailClosed(f"lane_unapproved:{lane}")
    root_value = lane_cfg.get("root")
    if not root_value:
        raise FailClosed(f"root_missing:{lane}")
    approved_root = Path(str(root_value)).expanduser()
    if not approved_root.is_absolute():
        approved_root = (manifest_path.parent / approved_root).resolve()
    else:
        approved_root = approved_root.resolve()
    if not approved_root.exists() or not approved_root.is_dir():
        raise FailClosed(f"approved_root_unavailable:{lane}")
    selected_root = Path(requested_root).expanduser() if requested_root else approved_root
    selected_root = selected_root.resolve()
    if not selected_root.exists() or not selected_root.is_dir():
        raise FailClosed(f"request_root_unavailable:{lane}")
    if not is_under(selected_root, approved_root):
        raise FailClosed(f"request_root_broadens_approval:{lane}")
    return lane_cfg, approved_root, selected_root
 def allowed_exts(lane_cfg: dict[str, Any]) -> set[str]:
    return {str(e).lower() if str(e).startswith(".") else "." + str(e).lower() for e in lane_cfg.get("allowed_extensions", [])}
 def iter_files(root: Path, approved_root: Path, exts: set[str], max_file_mb: float, max_age_days: float | None) -> tuple[list[Path], dict[str, int], int]:
    skipped = {"extension": 0, "size": 0, "symlink_escape": 0, "not_regular_file": 0, "too_old": 0, "policy": 0}
    accepted: list[Path] = []
    files_seen = 0
    now = time.time()
    max_bytes = int(max_file_mb * 1024 * 1024)
    for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
        dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES and not d.startswith(".")]
        current = Path(dirpath)
        if not is_under(current, approved_root):
            skipped["symlink_escape"] += 1
            dirnames[:] = []
            continue
        for name in filenames:
            path = current / name
            if name.startswith("."):
                skipped["policy"] += 1
                continue
            files_seen += 1
            try:
                resolved = path.resolve()
            except Exception:
                skipped["symlink_escape"] += 1
                continue
            if not is_under(resolved, approved_root):
                skipped["symlink_escape"] += 1
                continue
            if not resolved.is_file():
                skipped["not_regular_file"] += 1
                continue
            if resolved.suffix.lower() not in exts:
                skipped["extension"] += 1
                continue
            try:
                st = resolved.stat()
            except OSError:
                skipped["not_regular_file"] += 1
                continue
            if st.st_size > max_bytes:
                skipped["size"] += 1
                continue
            if max_age_days is not None and now - st.st_mtime > max_age_days * 86400:
                skipped["too_old"] += 1
                continue
            accepted.append(resolved)
    accepted.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    return accepted, skipped, files_seen
 def load_doc_triage_module(repo_root: Path):
    module_path = repo_root / "openvino-doc-image-triage-npu" / "triage.py"
    spec = importlib.util.spec_from_file_location("doc_image_triage", module_path)
    if spec is None or spec.loader is None:
        raise RuntimeError("doc_image_triage_import_failed")
    module = importlib.util.module_from_spec(spec)
    sys.modules.setdefault("doc_image_triage", module)
    spec.loader.exec_module(module)  # type: ignore[union-attr]
    return module
 def fallback_doc_item(path: Path, root: Path, lane: str) -> dict[str, Any]:
    sidecar = path.with_suffix(path.suffix + ".txt")
    text = ""
    if sidecar.exists() and sidecar.is_file():
        text = sidecar.read_text(errors="replace")[:12000]
    lower = text.lower()
    category = "unknown_or_low_confidence"
    if any(w in lower for w in ("receipt", "subtotal", "store")):
        category = "receipt"
    elif any(w in lower for w in ("invoice", "amount due", "payment due")):
        category = "bill_or_invoice"
    elif lane == "screenshots":
        category = "screenshot_web_or_app"
    reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
    return {
        "basename": path.name,
        "relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
        "file_id": sha256_file(path),
        "media_type": infer_media_type(path),
        "category": category,
        "needs_attention": bool(reasons),
        "reasons": sorted(reasons),
        "raw_text_redacted": True,
        "full_path_included": False,
        "metadata": {"dates_count": len(set(re.findall(r"\b20\d{2}[-/]\d{1,2}[-/]\d{1,2}\b", text))), "amounts_count": len(set(re.findall(r"\$\s?\d+(?:\.\d{2})?", text))), "raw_values_redacted": True},
        "processing": {"doc_image_triage": "fallback_cpu_sidecar_rules", "npu_verified": False},
    }
 def infer_media_type(path: Path) -> str:
    if path.suffix.lower() == ".pdf":
        return "pdf"
    mt, _ = mimetypes.guess_type(path.name)
    if mt and mt.startswith("image/"):
        return "image"
    if mt and mt.startswith("audio/"):
        return "audio"
    return "unknown"
 def compact_doc_item(path: Path, root: Path, lane: str, triage_result: dict[str, Any]) -> dict[str, Any]:
    pages = triage_result.get("pages") or []
    first = pages[0] if pages else {}
    cls = first.get("classification") or {}
    attn = first.get("needs_attention") or {}
    meta = first.get("metadata") or {}
    device_summary = triage_result.get("processing_device_summary") or {}
    item = {
        "basename": path.name,
        "relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
        "file_id": triage_result.get("file_id") or sha256_file(path),
        "media_type": triage_result.get("media_type") or infer_media_type(path),
        "category": cls.get("label") or "unknown_or_low_confidence",
        "needs_attention": bool(attn.get("value")),
        "reasons": attn.get("reasons") or [],
        "raw_text_redacted": True,
        "full_path_included": False,
        "metadata": {
            "dates_count": meta.get("dates_count", 0),
            "amounts_count": meta.get("amounts_count", 0),
            "raw_values_redacted": True,
        },
        "processing": {
            "doc_image_triage": "openvino-doc-image-triage-npu",
            "image_category_device": (cls.get("device") or "CPU"),
            "needs_attention_device": attn.get("device") or "CPU",
            "npu_verified": bool(device_summary.get("npu_verified")),
            "npu_busy_delta_us": device_summary.get("npu_busy_delta_us"),
        },
    }
    if lane == "receipts":
        item["receipt_fields"] = {"vendor_present": bool((meta.get("detected_entities") or {}).get("org_present")), "amounts_count": item["metadata"]["amounts_count"], "dates_count": item["metadata"]["dates_count"]}
    return item
 def classify_transcript(text: str, lane: str) -> dict[str, Any]:
    reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
    action_count = sum(1 for rx in (ACTION_PATTERNS["follow_up"], ACTION_PATTERNS["task"]) if rx.search(text))
    decisions = 1 if ACTION_PATTERNS["decision"].search(text) else 0
    followups = 1 if ACTION_PATTERNS["follow_up"].search(text) else 0
    return {
        "category": "meeting_snippet" if lane == "meeting_snippets" else "voice_memo",
        "action_worthy": bool(reasons),
        "reasons": sorted(reasons),
        "action_items_count": action_count,
        "decisions_count": decisions,
        "followups_count": followups,
    }
 def multipart_transcribe(path: Path, whisper_url: str, timeout: float) -> dict[str, Any]:
    whisper_url = validate_local_whisper_url(whisper_url)
    boundary = "----NpuBatchTriage" + hashlib.sha256(path.name.encode()).hexdigest()[:12]
    data = path.read_bytes()
    body = (
        f"--{boundary}\r\n"
        f'Content-Disposition: form-data; name="file"; filename="{path.name}"\r\n'
        "Content-Type: application/octet-stream\r\n\r\n"
    ).encode() + data + (
        f"\r\n--{boundary}\r\n"
        'Content-Disposition: form-data; name="model"\r\n\r\n'
        "whisper-1\r\n"
        f"--{boundary}--\r\n"
    ).encode()
    before = read_busy()
    req = urllib.request.Request(whisper_url, data=body, headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
    t0 = time.perf_counter()
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        raw = resp.read(1024 * 1024)
        status = resp.status
    parsed = json.loads(raw.decode())
    after = read_busy()
    text = str(parsed.get("text") or parsed.get("transcription") or "").strip()
    service_delta = parsed.get("npu_busy_delta_us")
    sysfs_delta = None if before is None or after is None else after - before
    proof_delta = service_delta if isinstance(service_delta, int) else sysfs_delta
    return {
        "ok": status == 200 and bool(text),
        "text": text,
        "transcript_chars": len(text),
        "duration_seconds": parsed.get("duration_seconds"),
        "language": parsed.get("language"),
        "npu_busy_delta_us": proof_delta,
        "verified_npu": bool(proof_delta and proof_delta > 0),
        "wall_ms": round((time.perf_counter() - t0) * 1000, 2),
    }
 def compact_audio_item(path: Path, root: Path, lane: str, no_npu: bool, whisper_url: str, timeout: float) -> dict[str, Any]:
    transcript = ""
    transcribed = False
    npu_delta = 0
    proof_ok = False
    duration = None
    language = None
    error = None
    if not no_npu:
        try:
            result = multipart_transcribe(path, whisper_url, timeout)
            transcript = result["text"]
            transcribed = result["ok"]
            npu_delta = result.get("npu_busy_delta_us") or 0
            proof_ok = bool(result.get("verified_npu"))
            duration = result.get("duration_seconds")
            language = result.get("language")
        except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc:
            error = f"whisper_error:{type(exc).__name__}"
    summary = classify_transcript(transcript, lane)
    item = {
        "basename": path.name,
        "relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
        "file_id": sha256_file(path),
        "media_type": "audio",
        "duration_seconds": duration,
        "transcribed": transcribed,
        "transcript_chars": len(transcript),
        "language": language,
        **summary,
        "npu_busy_delta_us": npu_delta,
        "raw_transcript_logged": False,
        "full_path_included": False,
    }
    if error:
        item["error"] = error
    return item
 def process(args: argparse.Namespace) -> dict[str, Any]:
    repo_root = Path(__file__).resolve().parents[1]
    manifest_path = Path(args.manifest).expanduser().resolve()
    manifest = load_manifest(manifest_path)
    lane_cfg, approved_root, root = resolve_lane_root(manifest, manifest_path, args.lane, args.root)
    exts = allowed_exts(lane_cfg)
    if not exts:
        raise FailClosed(f"extensions_missing:{args.lane}")
    manifest_limit = int(lane_cfg.get("max_files", 50))
    limit = min(args.limit if args.limit is not None else manifest_limit, manifest_limit)
    files, skipped, files_seen = iter_files(root, approved_root, exts, float(lane_cfg.get("max_file_mb", 25)), args.max_age_days)
    selected = files[:limit]
    npu_before = read_busy()
    items: list[dict[str, Any]] = []
    errors: list[str] = []
    doc_module = None
    if args.lane in AUDIO_LANES and not args.no_npu:
        validate_local_whisper_url(args.whisper_url)
    if args.lane in DOC_IMAGE_LANES and not args.no_npu:
        try:
            doc_module = load_doc_triage_module(repo_root)
        except Exception as exc:
            errors.append(f"doc_triage_import_error:{type(exc).__name__}")
    for path in selected:
        try:
            if args.lane in AUDIO_LANES:
                item = compact_audio_item(path, root, args.lane, args.no_npu, args.whisper_url, args.timeout_seconds)
            elif doc_module is not None:
                opts = doc_module.TriageOptions(
                    dry_run=False,
                    include_ocr_text=False,
                    include_full_path=False,
                    use_embeddings=not args.no_npu,
                    allowed_roots=[approved_root],
                    timeout_seconds=args.timeout_seconds,
                )
                item = compact_doc_item(path, root, args.lane, doc_module.triage_file(path, opts))
            else:
                item = fallback_doc_item(path, root, args.lane)
            if args.include_full_path:
                item["full_path"] = str(path)
                item["full_path_included"] = True
            if args.include_raw_text:
                item["raw_text_included"] = False
                item["raw_text_note"] = "unsupported_by_batch_wrapper"
            items.append(item)
        except FailClosed:
            raise
        except Exception as exc:
            errors.append(f"{path.name}:{type(exc).__name__}")
            items.append({"basename": path.name, "ok": False, "error": type(exc).__name__, "raw_text_redacted": True, "full_path_included": False})
    npu_after = read_busy()
    sysfs_delta = None if npu_before is None or npu_after is None else npu_after - npu_before
    item_deltas = [i.get("npu_busy_delta_us") for i in items if isinstance(i.get("npu_busy_delta_us"), int)]
    claimed = not args.no_npu and any((d or 0) > 0 for d in item_deltas + ([sysfs_delta] if isinstance(sysfs_delta, int) else []))
    proof_ok = claimed and bool(sysfs_delta is None or sysfs_delta > 0 or any((d or 0) > 0 for d in item_deltas))
    return {
        "ok": not errors,
        "lane": args.lane,
        "dry_run": True,
        "approved_root": True,
        "root_basename": root.name,
        "files_seen": files_seen,
        "files_processed": len(items),
        "skipped": skipped,
        "npu": {"claimed": claimed, "busy_delta_us": sysfs_delta, "proof_ok": proof_ok},
        "mutations": MUTATIONS_FALSE.copy(),
        "items": items,
        "raw_content_redacted": not args.include_raw_text,
        "full_paths_included": bool(args.include_full_path),
        "errors": errors,
        "gates": {
            "external_uploads": False,
            "private_root_broadening": False,
            "obsidian_mutation": False,
            "vector_db_mutation": False,
            "outbound_sends": False,
            "routing_changes": False,
        },
    }
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Explicit-root dry-run batch triage wrapper")
    parser.add_argument("--manifest", required=True, help="lane approval manifest; missing/unapproved fails closed")
    parser.add_argument("--lane", required=True, choices=LANES)
    parser.add_argument("--root", help="optional narrower root under the manifest-approved lane root")
    parser.add_argument("--dry-run", action="store_true", help="required; mutation modes are not implemented")
    parser.add_argument("--limit", type=int, default=None)
    parser.add_argument("--max-age-days", type=float, default=None)
    parser.add_argument("--include-raw-text", action="store_true", help="kept redacted by this wrapper; present only for explicit operator attempts")
    parser.add_argument("--include-full-path", action="store_true", help="operator-only local debugging")
    parser.add_argument("--no-npu", action="store_true", help="CPU-only smoke; never claims NPU")
    parser.add_argument("--json", action="store_true", help="emit compact JSON")
    parser.add_argument("--pretty", action="store_true", help="pretty JSON for local debugging")
    parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
    parser.add_argument("--timeout-seconds", type=float, default=20.0)
    return parser
 def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    if not args.dry_run:
        print(json.dumps({"ok": False, "error": "dry_run_required", "mutations": MUTATIONS_FALSE}), file=sys.stderr)
        return 2
    if args.limit is not None and args.limit < 1:
        print(json.dumps({"ok": False, "error": "limit_must_be_positive"}), file=sys.stderr)
        return 2
    try:
        out = process(args)
    except FailClosed as exc:
        out = {"ok": False, "error": "fail_closed", "reason": str(exc), "dry_run": True, "mutations": MUTATIONS_FALSE.copy()}
    print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True))
    return 0 if out.get("ok") else 2
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -45,7 +45,11 @@ printf 'busy_path=%s\n' "$BUSY_PATH"
 printf 'busy_time_us=%s\n' "$(busy_value)"
 section "Listeners"
-ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
+# Required OpenVINO/NPU program ports: live baseline 18810/18816/18817,
 # reranker 18818, local-only specialists 18819/18820/18829, and advisory gateway 18830.
 # 18814 is the existing RAG/embedding health wrapper; 18828 is a review-only
 # alternate used to avoid collisions during prior smoke tests.
 ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829|18830)\b' || true
 section "User service states"
 for unit in \
@@ -73,6 +77,7 @@ http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true
 http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
 http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
 http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
 http_json "NPU doc/image triage prototype" "http://127.0.0.1:18829/healthz" || true
 section "Embeddings NPU busy-time proof"
 if [[ ! -r "$BUSY_PATH" ]]; then
@@ -0,0 +1,712 @@
 #!/usr/bin/env python3
 """Compact, read-only NPU/OpenVINO utilization digest.
 Default behavior is safe for on-demand or scheduled runs: health checks plus
 bounded synthetic probes, one compact JSONL artifact, and no service restarts,
 routing changes, advisory POSTs, vector mutations, outbound sends, or private
 root broadening.
 """
 from __future__ import annotations
 import argparse
 import base64
 import datetime as dt
 import json
 import math
 import os
 import tempfile
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
 import uuid
 import wave
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from typing import Any, Callable
 BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 DEFAULT_OUT_DIR = Path("/home/will/.local/state/npu-utilization/digests")
 EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
 EMBED_HEALTH_URL = "http://127.0.0.1:18817/healthz"
 RERANK_URL = "http://127.0.0.1:18818/rerank"
 RERANK_HEALTH_URL = "http://127.0.0.1:18818/readyz"
 WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
 WHISPER_HEALTH_URL = "http://127.0.0.1:18816/health"
 CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
 CLASSIFIER_HEALTH_URL = "http://127.0.0.1:18819/healthz"
 GENAI_HEALTH_URL = "http://127.0.0.1:18820/healthz"
 GENAI_GENERATE_URL = "http://127.0.0.1:18820/v1/generate"
 DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
 DOC_TRIAGE_HEALTH_URL = "http://127.0.0.1:18829/healthz"
 RAG_ENDPOINT_HEALTH_URL = "http://127.0.0.1:18810/healthz"
 RAG_HEALTH_URL = "http://127.0.0.1:18814/healthz"
 ADVISORY_HEALTH_URL = "http://172.19.0.1:18830/healthz"
@dataclass
 class ServiceRow:
    type: str = "service"
    service: str = ""
    reachable: bool = False
    probe_ran: bool = False
    proof_ok: bool | None = None
    calls: int = 0
    items: int = 0
    avg_ms: float | None = None
    npu_delta_us: int | None = None
    response_delta_us: int | None = None
    mode: str = "unavailable"
    fallbacks: int = 0
    warnings: list[str] = field(default_factory=list)
    gate: str = "none"
    jobs: int | None = None
    events: int | None = None
    files: int | None = None
    docs: int | None = None
    text_len: int | None = None
    sample_rate: int | None = None
    embedding_count: int | None = None
    embedding_dim: int | None = None
    dry_run: bool | None = None
    suppress: int | None = None
    escalate: int | None = None
    recommendation: str | None = None
    confidence: float | None = None
    confidence_bucket: str | None = None
    authority_violations: int | None = None
    loaded: bool | None = None
    allowed_roots_count: int | None = None
    reason: str | None = None
    error: str | None = None
 def compact_dict(obj: Any) -> dict[str, Any]:
    data = asdict(obj) if hasattr(obj, "__dataclass_fields__") else dict(obj)
    return {k: v for k, v in data.items() if v is not None and v != []}
 AUTHORITY_SAFE_ACTIONS = {
    "", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
    "record_metric", "compare_with_expected_label", "include_in_digest",
    "open_review_ticket_candidate", "recommend_human_review",
 }
 AUTHORITY_FLAG_KEYS = {
    "advisory_post",
    "atlas_routing",
    "broad_private_scan",
    "delivery_send",
    "gateway_restart",
    "live_routing",
    "memory_write",
    "outbound_send",
    "private_root_scan",
    "service_restart",
    "tool_execution",
    "vector_mutation",
 }
 AUTHORITY_FLAG_ALIASES = {
    "can_route_atlas": "atlas_routing",
    "can_write_memory": "memory_write",
    "can_execute_tools": "tool_execution",
    "can_restart_services": "service_restart",
    "can_send_outbound": "outbound_send",
    "can_scan_private_roots": "private_root_scan",
    "can_mutate_vector_store": "vector_mutation",
    "can_post_advisory_event": "advisory_post",
    "can_change_gateway_config": "gateway_restart",
    "may_route": "atlas_routing",
    "may_write_memory": "memory_write",
    "may_execute_tools": "tool_execution",
    "may_restart_services": "service_restart",
    "may_send_external": "outbound_send",
    "may_process_private_dirs": "private_root_scan",
    "may_mutate_vector_db": "vector_mutation",
    "may_change_live_config": "gateway_restart",
 }
 def confidence_bucket(confidence: float | None) -> str | None:
    if confidence is None:
        return None
    if confidence >= 0.8:
        return "high"
    if confidence >= 0.5:
        return "medium"
    return "low"
 def coerce_confidence(value: Any) -> float | None:
    if isinstance(value, bool):
        return None
    if isinstance(value, (int, float)):
        return max(0.0, min(1.0, float(value)))
    if isinstance(value, str):
        try:
            return max(0.0, min(1.0, float(value)))
        except ValueError:
            return None
    return None
 def extract_confidence(payload: dict[str, Any]) -> float | None:
    direct = coerce_confidence(payload.get("confidence"))
    if direct is not None:
        return direct
    raw_labels = payload.get("labels")
    labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
    scores: list[float] = []
    for value in labels.values():
        if isinstance(value, dict):
            for score_key in ("confidence", "score", "probability"):
                if score_key in value:
                    score = coerce_confidence(value.get(score_key))
                    break
                score = None
        else:
            score = coerce_confidence(value)
        if score is not None:
            scores.append(score)
    return max(scores) if scores else None
 def extract_recommendation(payload: dict[str, Any]) -> str | None:
    for key in ("recommendation", "classification", "input_class"):
        value = payload.get(key)
        if isinstance(value, str) and value:
            return value[:48]
    raw_action = payload.get("action")
    action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
    value = action.get("recommendation") or action.get("type")
    return str(value)[:48] if value else None
 def count_authority_violations(payload: dict[str, Any]) -> int:
    """Count advisory response hints that would exceed read-only/dry-run authority.
    Supports both legacy compact payloads and `npu_advisory_decision_v1`.
    Valid schema-safe allowed actions and object-shaped no-op actual actions must
    not count as violations; any true live-authority flag must count.
    """
    violations = 0
    raw_flags = payload.get("authority_flags")
    flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
    for key, value in flags.items():
        canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
        if canonical in AUTHORITY_FLAG_KEYS and bool(value):
            violations += 1
    raw_allowed = payload.get("allowed_actions")
    allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
    for action in allowed:
        if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
            violations += 1
    raw_actual = payload.get("actual_action")
    if isinstance(raw_actual, dict):
        performed = bool(raw_actual.get("performed"))
        side_effects = raw_actual.get("side_effects") or []
        kind = str(raw_actual.get("kind") or "none").lower()
        if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
            violations += 1
    else:
        actual = str(raw_actual or "").lower()
        if actual and actual not in AUTHORITY_SAFE_ACTIONS:
            violations += 1
    return violations
 def read_busy(path: Path = BUSY_PATH) -> int | None:
    try:
        return int(path.read_text().strip())
    except Exception:
        return None
 def safe_error(exc: BaseException) -> str:
    return type(exc).__name__
 def http_get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
    try:
        req = urllib.request.Request(url, headers={"Accept": "application/json"})
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            body = resp.read(1024 * 1024).decode("utf-8", "replace")
            return int(resp.status), json.loads(body or "{}")
    except urllib.error.HTTPError as exc:
        try:
            body = exc.read(1024 * 1024).decode("utf-8", "replace")
            return int(exc.code), json.loads(body or "{}")
        except Exception:
            return int(exc.code), {"error": "http_error"}
    except Exception as exc:
        return 0, {"error": safe_error(exc)}
 def http_post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
    body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
    req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", "Accept": "application/json"})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            data = resp.read(2 * 1024 * 1024).decode("utf-8", "replace")
            return int(resp.status), json.loads(data or "{}")
    except urllib.error.HTTPError as exc:
        try:
            data = exc.read(1024 * 1024).decode("utf-8", "replace")
            return int(exc.code), json.loads(data or "{}")
        except Exception:
            return int(exc.code), {"error": "http_error"}
    except Exception as exc:
        return 0, {"error": safe_error(exc)}
 def health_row(service: str, url: str, timeout: float, gate: str = "none", mode: str = "health_only") -> tuple[ServiceRow, dict[str, Any]]:
    status, payload = http_get_json(url, timeout)
    ok = status == 200 and payload.get("ok", True) is not False
    row = ServiceRow(service=service, reachable=ok, mode=mode if ok else "unavailable", gate=gate)
    if not ok:
        row.fallbacks = 1
        row.warnings.append("unavailable")
        row.error = str(payload.get("error") or payload.get("ready_error") or f"http_{status}")[:80]
    return row, payload
 def measure_probe(fn: Callable[[], tuple[int, dict[str, Any]]], timeout_label: str, busy_path: Path = BUSY_PATH) -> tuple[int, dict[str, Any], float, int | None]:
    before = read_busy(busy_path)
    started = time.perf_counter()
    status, payload = fn()
    elapsed_ms = round((time.perf_counter() - started) * 1000, 3)
    after = read_busy(busy_path)
    delta = None if before is None or after is None else after - before
    return status, payload, elapsed_ms, delta
 def apply_proof(row: ServiceRow, delta: int | None) -> None:
    row.npu_delta_us = delta
    row.proof_ok = bool(delta is not None and delta > 0)
    if not row.proof_ok:
        row.fallbacks += 1
        row.warnings.append("no_positive_sysfs_delta" if delta is not None else "missing_sysfs_counter")
 def mark_skipped_fallback(row: ServiceRow, reason: str) -> None:
    """Record a skipped/unloaded proof condition as a fallback.
    Health-only rows that are intentionally never proof probes should keep
    fallbacks at zero. This helper is for proof-capable rows where a bounded
    smoke was disabled or skipped to avoid side effects such as cold-loading.
    """
    row.fallbacks += 1
    row.warnings.append(reason)
 def probe_embeddings(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("embeddings", EMBED_HEALTH_URL, timeout)
    if not row.reachable:
        return row
    payload = {"input": "non-private npu utilization digest probe", "model": "bge-base-en-v1.5-int8-ov"}
    status, data, elapsed, delta = measure_probe(lambda: post_json(EMBED_URL, payload, timeout), "embeddings", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.items = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "data" in data
    row.embedding_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0
    row.embedding_dim = data.get("embedding_dim")
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row
 def probe_rerank(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("rerank", RERANK_HEALTH_URL, timeout)
    if not row.reachable:
        return row
    docs = ["Intel NPU accelerates OpenVINO inference.", "Bananas ripen on a kitchen counter."]
    payload = {"query": "OpenVINO NPU inference", "documents": docs, "top_k": 2, "return_documents": False}
    status, data, elapsed, delta = measure_probe(lambda: post_json(RERANK_URL, payload, timeout), "rerank", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.docs = len(docs)
    row.avg_ms = float(data.get("duration_ms") or elapsed)
    row.mode = "NPU"
    row.reachable = status == 200 and data.get("ok", True) is not False
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row
 def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("classifier", CLASSIFIER_HEALTH_URL, timeout, mode="dry_run")
    if not row.reachable:
        return row
    payload = {
        "id": "npu-digest-probe",
        "text": "Non-private cron event: backup completed successfully, no user action required.",
        "options": {"dry_run": True, "include_evidence": False},
    }
    status, data, elapsed, delta = measure_probe(lambda: post_json(CLASSIFIER_URL, payload, timeout), "classifier", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.events = 1
    row.avg_ms = elapsed
    row.mode = "dry_run"
    row.dry_run = True
    row.reachable = status == 200 and "error" not in data
    row.response_delta_us = next((data.get(k) for k in ("sysfs_npu_busy_delta_us", "npu_busy_delta_us") if isinstance(data.get(k), int)), None)
    raw_labels = data.get("labels")
    labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
    raw_action = data.get("action")
    action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
    row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
    row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
    row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
    row.confidence = extract_confidence(data)
    row.confidence_bucket = confidence_bucket(row.confidence)
    row.authority_violations = count_authority_violations(data)
    if row.authority_violations:
        row.warnings.append("authority_violation")
    row.items = len(labels)
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row
 def write_tone_wav(path: Path, seconds: float = 0.35, sample_rate: int = 16000) -> None:
    frames = int(seconds * sample_rate)
    with wave.open(str(path), "wb") as wav:
        wav.setnchannels(1)
        wav.setsampwidth(2)
        wav.setframerate(sample_rate)
        for i in range(frames):
            value = int(9000 * math.sin(2 * math.pi * 440 * (i / sample_rate)))
            wav.writeframesraw(value.to_bytes(2, byteorder="little", signed=True))
 def post_multipart_file(url: str, file_path: Path, timeout: float) -> tuple[int, dict[str, Any]]:
    boundary = "----npu-digest-" + uuid.uuid4().hex
    file_bytes = file_path.read_bytes()
    parts = [
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nwhisper\r\n".encode(),
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"response_format\"\r\n\r\njson\r\n".encode(),
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"npu-digest.wav\"\r\nContent-Type: audio/wav\r\n\r\n".encode(),
        file_bytes,
        f"\r\n--{boundary}--\r\n".encode(),
    ]
    req = urllib.request.Request(url, data=b"".join(parts), headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return int(resp.status), json.loads(resp.read(1024 * 1024).decode("utf-8", "replace") or "{}")
    except Exception as exc:
        return 0, {"error": safe_error(exc)}
 def probe_whisper(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH) -> ServiceRow:
    row, _ = health_row("whisper", WHISPER_HEALTH_URL, timeout)
    row.jobs = 0
    if not row.reachable or not include_smoke:
        if row.reachable:
            row.mode = "health_only"
            row.reason = "smoke_disabled"
            mark_skipped_fallback(row, "skipped")
        return row
    with tempfile.TemporaryDirectory(prefix="npu-digest-whisper-") as tmp:
        wav_path = Path(tmp) / "probe.wav"
        write_tone_wav(wav_path)
        status, data, elapsed, delta = measure_probe(lambda: post_multipart_file(WHISPER_URL, wav_path, timeout), "whisper", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.jobs = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "error" not in data
    row.text_len = len(str(data.get("text") or ""))
    row.sample_rate = data.get("sample_rate") if isinstance(data.get("sample_rate"), int) else None
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row
 def probe_genai(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, health = health_row("genai", GENAI_HEALTH_URL, timeout)
    row.loaded = bool(health.get("loaded")) if isinstance(health, dict) and "loaded" in health else None
    row.jobs = 0
    if not row.reachable:
        return row
    if not include_smoke or row.loaded is False:
        row.mode = "loaded=false" if row.loaded is False else "health_only"
        row.reason = "skipped_cold_load" if row.loaded is False else "smoke_disabled"
        mark_skipped_fallback(row, row.reason)
        return row
    payload = {"prompt": "Say pong.", "max_new_tokens": 8}
    status, data, elapsed, delta = measure_probe(lambda: post_json(GENAI_GENERATE_URL, payload, timeout), "genai", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.jobs = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "error" not in data
    apply_proof(row, delta)
    return row
 def doc_triage_sample_path() -> Path | None:
    candidates = [
        Path("/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png"),
        Path(__file__).resolve().parents[1] / "openvino-doc-image-triage-npu" / "samples" / "synthetic_invoice.png",
    ]
    for candidate in candidates:
        if candidate.exists() and candidate.with_suffix(".png.txt").exists():
            return candidate
    return None
 def probe_doc_triage(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("doc_triage", DOC_TRIAGE_HEALTH_URL, timeout, gate="closed:private-root")
    row.files = 0
    if not row.reachable or not include_smoke:
        if row.reachable:
            row.mode = "health_only"
            row.reason = "smoke_disabled"
            mark_skipped_fallback(row, "skipped")
        return row
    sample = doc_triage_sample_path()
    if sample is not None:
        root = sample.parent.resolve()
        payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
        status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
    else:
        with tempfile.TemporaryDirectory(prefix="npu-digest-doc-") as tmp:
            root = Path(tmp).resolve()
            sample = root / "synthetic-invoice.png"
            sample.write_bytes(base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="))
            sample.with_suffix(".png.txt").write_text("Synthetic invoice. Amount due $12.34 by 2026-06-30. No private data.\n")
            payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
            status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.files = 1
    row.avg_ms = elapsed
    row.mode = "NPU-via-embedding-service"
    row.allowed_roots_count = 1
    row.reachable = status == 200 and data.get("ok", True) is not False
    raw_result = data.get("result")
    result: dict[str, Any] = raw_result if isinstance(raw_result, dict) else {}
    raw_pages = result.get("pages")
    pages: list[Any] = raw_pages if isinstance(raw_pages, list) else []
    embedding: dict[str, Any] = {}
    if pages and isinstance(pages[0], dict):
        raw_attn = pages[0].get("needs_attention")
        attn: dict[str, Any] = raw_attn if isinstance(raw_attn, dict) else {}
        raw_embedding = attn.get("embedding")
        embedding = raw_embedding if isinstance(raw_embedding, dict) else {}
    row.response_delta_us = embedding.get("npu_busy_delta_us") if isinstance(embedding.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row
 def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_delta_us: int | None, started_at: str) -> dict[str, Any]:
    services_ok = sum(1 for r in rows if r.reachable)
    proof_rows = [r for r in rows if r.probe_ran and r.proof_ok is not None]
    proof_ok = sum(1 for r in proof_rows if r.proof_ok)
    gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
    fallbacks = sum(r.fallbacks for r in rows)
    request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
    npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
    fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
    recommendation_counts = {"escalate": 0, "suppress": 0}
    confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
    authority_violations = 0
    warnings: dict[str, int] = {}
    for row in rows:
        recommendation = (row.recommendation or "").lower()
        if recommendation in recommendation_counts:
            recommendation_counts[recommendation] += 1
        else:
            recommendation_counts["escalate"] += row.escalate or 0
            recommendation_counts["suppress"] += row.suppress or 0
        if row.confidence_bucket:
            confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
        elif row.recommendation or row.escalate is not None or row.suppress is not None:
            confidence_distribution["unknown"] += 1
        authority_violations += row.authority_violations or 0
        for warning in row.warnings:
            warnings[warning] = warnings.get(warning, 0) + 1
    confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
    return {
        "type": "summary",
        "timestamp": started_at,
        "counter": str(BUSY_PATH),
        "delta_us": counter_delta_us,
        "services_ok": services_ok,
        "services_total": len(rows),
        "proof_ok": proof_ok,
        "proof_total": len(proof_rows),
        "fallbacks": fallbacks,
        "fallbacks_by_service": fallbacks_by_service,
        "request_counts_by_service": request_counts_by_service,
        "npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
        "confidence_distribution": confidence_distribution,
        "recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
        "authority_violations": authority_violations,
        "gates_closed": gates_closed,
        "warnings": warnings,
        "artifact": artifact_path,
    }
 def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
    lines = [
        f"NPU utilization digest {summary['timestamp']}",
        f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
        f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
    ]
    rec_counts = summary.get("recommendation_counts") or {}
    if rec_counts:
        lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
    conf_dist = summary.get("confidence_distribution") or {}
    if conf_dist:
        lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
    for r in rows:
        parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
        if r.calls:
            parts.append(f"calls={r.calls}")
        if r.jobs is not None:
            parts.append(f"jobs={r.jobs}")
        if r.events is not None:
            parts.append(f"events={r.events}")
        if r.files is not None:
            parts.append(f"files={r.files}")
        if r.docs is not None:
            parts.append(f"docs={r.docs}")
        if r.avg_ms is not None:
            parts.append(f"avg_ms={r.avg_ms}")
        if r.npu_delta_us is not None:
            parts.append(f"npu_delta_us={r.npu_delta_us}")
        if r.proof_ok is not None:
            parts.append(f"proof={str(r.proof_ok).lower()}")
        if r.dry_run is not None:
            parts.append(f"dry_run={str(r.dry_run).lower()}")
        if r.suppress is not None:
            parts.append(f"suppress={r.suppress}")
        if r.escalate is not None:
            parts.append(f"escalate={r.escalate}")
        if r.recommendation is not None:
            parts.append(f"recommendation={r.recommendation}")
        if r.confidence_bucket is not None:
            parts.append(f"confidence={r.confidence_bucket}")
        if r.authority_violations is not None:
            parts.append(f"authority_violations={r.authority_violations}")
        if r.loaded is not None:
            parts.append(f"loaded={str(r.loaded).lower()}")
        if r.allowed_roots_count is not None:
            parts.append(f"allowed_roots={r.allowed_roots_count}")
        if r.text_len is not None:
            parts.append(f"text_len={r.text_len}")
        if r.mode:
            parts.append(f"mode={r.mode}")
        if r.gate != "none":
            parts.append(f"gate={r.gate}")
        if r.reason:
            parts.append(f"reason={r.reason}")
        if r.warnings:
            parts.append("warnings=" + ",".join(sorted(set(r.warnings))))
        lines.append(" ".join(parts))
    warning_counts = summary.get("warnings") or {}
    lines.append("fallbacks: " + " ".join(f"{k}={v}" for k, v in sorted(warning_counts.items())) if warning_counts else "fallbacks: none")
    if summary.get("artifact"):
        lines.append(f"artifact: {summary['artifact']}")
    return "\n".join(lines)
 def write_jsonl(summary: dict[str, Any], rows: list[ServiceRow], out_dir: Path) -> Path:
    out_dir.mkdir(parents=True, exist_ok=True)
    stamp = summary["timestamp"].replace(":", "").replace("+", "").replace("-", "")
    path = out_dir / f"{stamp}.jsonl"
    with path.open("w", encoding="utf-8") as f:
        f.write(json.dumps(summary, sort_keys=True, separators=(",", ":")) + "\n")
        for row in rows:
            f.write(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")) + "\n")
    return path
 def str_bool(value: str) -> bool:
    lowered = value.lower()
    if lowered in {"1", "true", "yes", "y", "on"}:
        return True
    if lowered in {"0", "false", "no", "n", "off"}:
        return False
    raise argparse.ArgumentTypeError("expected true or false")
 def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Compact NPU utilization digest")
    parser.add_argument("--format", choices=("text", "jsonl"), default="text")
    parser.add_argument("--out", default=str(DEFAULT_OUT_DIR))
    parser.add_argument("--timeout-s", type=float, default=8.0)
    parser.add_argument("--include-whisper-smoke", type=str_bool, default=True)
    parser.add_argument("--include-genai-smoke", type=str_bool, default=False)
    parser.add_argument("--include-doc-triage-smoke", type=str_bool, default=True)
    parser.add_argument("--no-write", action="store_true")
    parser.add_argument("--strict-proof", action="store_true", help="exit nonzero if a proof-required probe ran without positive sysfs delta")
    parser.add_argument("--verbose", action="store_true")
    return parser.parse_args(argv)
 def run(args: argparse.Namespace) -> tuple[dict[str, Any], list[ServiceRow]]:
    started_at = dt.datetime.now().astimezone().replace(microsecond=0).isoformat()
    before_all = read_busy(BUSY_PATH)
    rows = [
        probe_embeddings(args.timeout_s),
        probe_rerank(args.timeout_s),
        probe_whisper(args.timeout_s, args.include_whisper_smoke),
        probe_classifier(args.timeout_s),
        probe_genai(args.timeout_s, args.include_genai_smoke),
        probe_doc_triage(args.timeout_s, args.include_doc_triage_smoke),
    ]
    rows.append(health_row("rag_endpoint", RAG_ENDPOINT_HEALTH_URL, args.timeout_s, gate="closed:vector-mutation")[0])
    rows.append(health_row("rag_health", RAG_HEALTH_URL, args.timeout_s)[0])
    rows.append(health_row("advisory_gateway", ADVISORY_HEALTH_URL, args.timeout_s, gate="closed:advisory-post")[0])
    after_all = read_busy(BUSY_PATH)
    delta_all = None if before_all is None or after_all is None else after_all - before_all
    summary = build_summary(rows, artifact_path=None, counter_delta_us=delta_all, started_at=started_at)
    return summary, rows
 def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    summary, rows = run(args)
    if not args.no_write:
        artifact = write_jsonl(summary, rows, Path(args.out).expanduser())
        summary["artifact"] = str(artifact)
        # rewrite with artifact path included in the summary line
        artifact.write_text("\n".join([json.dumps(summary, sort_keys=True, separators=(",", ":"))] + [json.dumps(compact_dict(r), sort_keys=True, separators=(",", ":")) for r in rows]) + "\n")
    if args.format == "jsonl":
        print(json.dumps(summary, sort_keys=True, separators=(",", ":")))
        for row in rows:
            print(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")))
    else:
        print(render_text(summary, rows))
    if args.strict_proof and any(r.probe_ran and r.proof_ok is False for r in rows):
        return 2
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -0,0 +1,339 @@
 #!/usr/bin/env python3
 """Local-file voice/audio NPU advisory pipeline.
 Side-effect-free first slice:
  local audio file -> Whisper NPU -> classifier NPU -> advisory gate
 No platform fetching, outbound sends, Obsidian/memory/vector writes, service
 restarts, or live Atlas/Hermes routing changes are performed by this script.
 """
 from __future__ import annotations
 import argparse
 import ipaddress
 import json
 import mimetypes
 import os
 import re
 import sys
 import time
 import uuid
 import wave
 from pathlib import Path
 from typing import Any
 import urllib.error
 import urllib.parse
 import urllib.request
 DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
 DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
 NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
 AUDIO_EXTENSIONS = {".wav", ".ogg", ".oga", ".opus", ".mp3", ".m4a", ".mp4", ".webm", ".flac"}
 ACTION_MARKERS = re.compile(
    r"\b(remind|todo|to-do|task|follow[- ]?up|schedule|call|email|send|draft|inspect|check|fix|review|question|ask)\b",
    re.IGNORECASE,
 )
 class PipelineError(RuntimeError):
    def __init__(self, message: str, *, status: int = 1, details: dict[str, Any] | None = None):
        super().__init__(message)
        self.status = status
        self.details = details or {}
 def validate_loopback_endpoint(url: str, *, label: str) -> str:
    """Return url when it targets an explicit local HTTP(S) endpoint.
    The pipeline reads local audio and posts transcripts/audio bytes, so endpoint
    overrides must not be able to exfiltrate data to remote hosts. Keep the
    policy intentionally narrow: localhost, IPv4 loopback, or IPv6 ::1 only.
    """
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme not in {"http", "https"}:
        raise PipelineError(
            f"{label}_url_scheme_not_allowed",
            details={"url_host": parsed.hostname or "", "allowed_schemes": ["http", "https"]},
        )
    host = parsed.hostname
    if not host:
        raise PipelineError(f"{label}_url_missing_host")
    normalized = host.rstrip(".").lower()
    if normalized == "localhost":
        return url
    try:
        address = ipaddress.ip_address(normalized)
    except ValueError as exc:
        raise PipelineError(
            f"{label}_url_host_not_loopback",
            details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
        ) from exc
    if not address.is_loopback:
        raise PipelineError(
            f"{label}_url_host_not_loopback",
            details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
        )
    return url
 def read_npu_busy_us(path: Path = NPU_BUSY_PATH) -> int | None:
    try:
        return int(path.read_text().strip())
    except (OSError, ValueError):
        return None
 def delta_us(before: int | None, after: int | None) -> int | None:
    if before is None or after is None:
        return None
    return max(0, after - before)
 def encode_multipart(fields: dict[str, str], files: dict[str, tuple[str, bytes, str]]) -> tuple[bytes, str]:
    boundary = "----npu-voice-audio-" + uuid.uuid4().hex
    parts: list[bytes] = []
    for name, value in fields.items():
        parts.append(f"--{boundary}\r\n".encode())
        parts.append(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
        parts.append(str(value).encode())
        parts.append(b"\r\n")
    for name, (filename, data, content_type) in files.items():
        parts.append(f"--{boundary}\r\n".encode())
        parts.append(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
        parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
        parts.append(data)
        parts.append(b"\r\n")
    parts.append(f"--{boundary}--\r\n".encode())
    return b"".join(parts), f"multipart/form-data; boundary={boundary}"
 def post_json(url: str, payload: dict[str, Any], *, timeout: int) -> dict[str, Any]:
    url = validate_loopback_endpoint(url, label="classifier")
    req = urllib.request.Request(
        url,
        data=json.dumps(payload).encode(),
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return json.loads(resp.read().decode())
    except urllib.error.HTTPError as exc:
        body = exc.read().decode(errors="replace")[:300]
        raise PipelineError(f"classifier_http_{exc.code}", details={"body_preview": body}) from exc
    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
        raise PipelineError(f"classifier_request_failed: {exc}") from exc
 def post_whisper(url: str, audio_path: Path, audio_data: bytes, language: str, *, timeout: int) -> dict[str, Any]:
    url = validate_loopback_endpoint(url, label="whisper")
    content_type = mimetypes.guess_type(audio_path.name)[0] or "application/octet-stream"
    body, multipart_type = encode_multipart(
        {"model": "whisper-1", "language": language, "response_format": "json"},
        {"file": (audio_path.name, audio_data, content_type)},
    )
    req = urllib.request.Request(url, data=body, headers={"Content-Type": multipart_type}, method="POST")
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return json.loads(resp.read().decode())
    except urllib.error.HTTPError as exc:
        body = exc.read().decode(errors="replace")[:300]
        raise PipelineError(f"whisper_http_{exc.code}", details={"body_preview": body}) from exc
    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
        raise PipelineError(f"whisper_request_failed: {exc}") from exc
 def validate_audio_path(path_text: str, *, max_bytes: int, max_audio_seconds: float | None) -> tuple[Path, int]:
    path = Path(path_text).expanduser()
    if not path.is_absolute():
        raise PipelineError("audio_path_must_be_absolute")
    if path.is_symlink():
        raise PipelineError("audio_path_must_not_be_symlink")
    if not path.exists():
        raise PipelineError("audio_path_not_found")
    if not path.is_file():
        raise PipelineError("audio_path_not_file")
    if path.suffix.lower() not in AUDIO_EXTENSIONS:
        raise PipelineError("unsupported_audio_extension", details={"extension": path.suffix.lower()})
    size = path.stat().st_size
    if size <= 0:
        raise PipelineError("audio_file_empty")
    if size > max_bytes:
        raise PipelineError("audio_file_too_large", details={"bytes": size, "max_bytes": max_bytes})
    if max_audio_seconds is not None and path.suffix.lower() == ".wav":
        try:
            with wave.open(str(path), "rb") as wav:
                duration = wav.getnframes() / float(wav.getframerate())
        except wave.Error as exc:
            raise PipelineError(f"wav_decode_failed: {exc}") from exc
        if duration > max_audio_seconds:
            raise PipelineError("audio_duration_too_long", details={"duration_seconds": round(duration, 3), "max_audio_seconds": max_audio_seconds})
    return path, size
 def extract_transcript(payload: dict[str, Any]) -> str:
    text = payload.get("text") or payload.get("transcript") or payload.get("transcription")
    if not text and isinstance(payload.get("segments"), list):
        text = " ".join(str(seg.get("text", "")) for seg in payload["segments"] if isinstance(seg, dict))
    return str(text or "").strip()
 def label_value(labels: dict[str, Any], key: str, default: Any = None) -> Any:
    value = labels.get(key, default)
    if isinstance(value, dict) and "value" in value:
        return value.get("value")
    return value
 def compact_labels(classifier_payload: dict[str, Any]) -> dict[str, Any]:
    raw_labels = classifier_payload.get("labels")
    labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
    return {
        "workflow_category": label_value(labels, "workflow_category"),
        "tool_needed": bool(label_value(labels, "tool_needed", False)),
        "urgency": label_value(labels, "urgency", "normal"),
        "safety_confirmation_required": bool(label_value(labels, "safety_confirmation_required", False)),
    }
 def classify_text(
    *,
    classifier_url: str,
    item_id: str,
    source: str,
    title: str,
    transcript: str,
    max_transcript_chars: int,
    dry_run: bool,
    timeout: int,
 ) -> tuple[dict[str, Any], int | None]:
    bounded_transcript = transcript[:max_transcript_chars]
    title_line = f"Title: {title}\n" if title else ""
    text = "Voice memo transcript summary candidate.\n" f"Source: {source}\n" f"{title_line}Transcript:\n{bounded_transcript}"
    payload = {
        "id": item_id,
        "text": text,
        "context": {"source": source, "media": "audio"},
        "options": {"include_evidence": False, "dry_run": dry_run},
    }
    before = read_npu_busy_us()
    data = post_json(classifier_url, payload, timeout=timeout)
    after = read_npu_busy_us()
    return data, delta_us(before, after)
 def decide_gate(transcript: str, labels: dict[str, Any], whisper_proven: bool, classifier_proven: bool) -> tuple[bool, str, str]:
    safety_required = bool(labels.get("safety_confirmation_required"))
    urgency = str(labels.get("urgency") or "normal").lower()
    action_worthy = bool(labels.get("tool_needed")) or urgency in {"high", "critical"} or bool(ACTION_MARKERS.search(transcript))
    if not whisper_proven or not classifier_proven:
        return action_worthy, "blocked_missing_npu_proof", "npu_proof_required"
    if safety_required:
        return action_worthy, "blocked_safety_confirmation_required", "human_approval_required"
    if action_worthy:
        return True, "advisory_only_not_sent", "dry_run_no_side_effects"
    return False, "suppressed_not_action_worthy", "dry_run_no_side_effects"
 def run_pipeline(args: argparse.Namespace) -> dict[str, Any]:
    args.whisper_url = validate_loopback_endpoint(args.whisper_url, label="whisper")
    args.classifier_url = validate_loopback_endpoint(args.classifier_url, label="classifier")
    audio_path, audio_bytes = validate_audio_path(
        args.audio,
        max_bytes=args.max_bytes,
        max_audio_seconds=args.max_audio_seconds,
    )
    audio_data = audio_path.read_bytes()
    item_id = args.id or f"voice-audio-{int(time.time())}"
    whisper_before = read_npu_busy_us()
    whisper_payload = post_whisper(args.whisper_url, audio_path, audio_data, args.language, timeout=args.timeout)
    whisper_after = read_npu_busy_us()
    whisper_sysfs_delta = delta_us(whisper_before, whisper_after)
    transcript = extract_transcript(whisper_payload)
    if not transcript:
        raise PipelineError("whisper_empty_transcript")
    whisper_response_delta = int(whisper_payload.get("npu_busy_delta_us") or 0)
    whisper_proven = whisper_response_delta > 0 and (whisper_sysfs_delta is None or whisper_sysfs_delta > 0)
    classifier_payload, classifier_sysfs_observed = classify_text(
        classifier_url=args.classifier_url,
        item_id=item_id,
        source=args.source,
        title=args.title or "",
        transcript=transcript,
        max_transcript_chars=args.max_transcript_chars,
        dry_run=args.dry_run,
        timeout=args.timeout,
    )
    labels = compact_labels(classifier_payload)
    classifier_response_delta = int(classifier_payload.get("npu_busy_delta_us") or 0)
    classifier_response_sysfs_delta = int(classifier_payload.get("sysfs_npu_busy_delta_us") or 0)
    classifier_proven = classifier_response_delta > 0 and classifier_response_sysfs_delta > 0 and (classifier_sysfs_observed is None or classifier_sysfs_observed > 0)
    action_worthy, atlas_gate, next_gate = decide_gate(transcript, labels, whisper_proven, classifier_proven)
    output: dict[str, Any] = {
        "ok": True,
        "id": item_id,
        "source": args.source,
        "transcript_chars": len(transcript),
        "action_worthy": action_worthy,
        "atlas_gate": atlas_gate,
        "next_gate": next_gate,
        "whisper_npu_delta_us": whisper_response_delta,
        "whisper_sysfs_delta_us": whisper_sysfs_delta,
        "classifier_npu_delta_us": classifier_response_delta,
        "classifier_sysfs_delta_us": classifier_response_sysfs_delta,
        "classifier_observed_sysfs_delta_us": classifier_sysfs_observed,
        "labels": labels,
        "external_sends": 0,
        "writes": 0,
    }
    if args.include_transcript:
        output["transcript"] = transcript
    if args.include_transcript_preview_chars > 0:
        output["transcript_preview"] = transcript[: args.include_transcript_preview_chars]
    if args.include_raw:
        output["raw"] = {"whisper": whisper_payload, "classifier": classifier_payload}
    return output
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Run local-file audio through NPU Whisper and NPU classifier in dry-run advisory mode.")
    parser.add_argument("--audio", required=True, help="Absolute path to a local audio file; no URL/platform fetching is performed.")
    parser.add_argument("--id", default="", help="Optional stable item id for classifier correlation.")
    parser.add_argument("--source", default="local_file", choices=["local_file", "manual_smoke", "local_voice_memo", "meeting_snippet", "staged_telegram", "staged_discord"], help="Local/staged source label only.")
    parser.add_argument("--title", default="", help="Optional short local title for classifier context.")
    parser.add_argument("--language", default="en")
    parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
    parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
    parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True, help="Keep classifier in dry-run advisory mode (default).")
    parser.add_argument("--no-dry-run", dest="dry_run", action="store_false", help="Send dry_run=false to classifier; this script still performs no side effects.")
    parser.add_argument("--json", action="store_true", help="Emit compact JSON; default is JSON for machine-safe handoff.")
    parser.add_argument("--include-transcript", action="store_true", help="Include full transcript in output; off by default.")
    parser.add_argument("--include-transcript-preview-chars", type=int, default=0, help="Include a bounded transcript preview; default 0.")
    parser.add_argument("--include-raw", action="store_true", help="Include raw service responses for one-off local debugging; off by default.")
    parser.add_argument("--max-bytes", type=int, default=25 * 1024 * 1024)
    parser.add_argument("--max-audio-seconds", type=float, default=300.0, help="Enforced for WAV inputs; other codecs remain size-capped.")
    parser.add_argument("--max-transcript-chars", type=int, default=6000)
    parser.add_argument("--timeout", type=int, default=300)
    return parser
 def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    try:
        result = run_pipeline(args)
        print(json.dumps(result, ensure_ascii=False, sort_keys=True))
        return 0
    except PipelineError as exc:
        result = {"ok": False, "error": str(exc), "external_sends": 0, "writes": 0, **exc.details}
        print(json.dumps(result, ensure_ascii=False, sort_keys=True), file=sys.stderr)
        return exc.status
 if __name__ == "__main__":
    raise SystemExit(main())
@@ -21,14 +21,32 @@ import os
 import subprocess
 import sys
 import threading
 import time
 from pathlib import Path
 from urllib.parse import parse_qs, urlparse
 from urllib import request, error
 PORT = int(os.environ.get("PORT", 18810))
 REINDEX_TIMEOUT = int(os.environ.get("REINDEX_TIMEOUT", "1800"))
 RAG_COLLECTION = os.environ.get("RAG_COLLECTION", "obsidian").strip() or "obsidian"
 RAG_EMBED_MODEL = os.environ.get("RAG_EMBED_MODEL", "nomic-embed-text").strip() or "nomic-embed-text"
 OLLAMA_BASE_URL = (os.environ.get("OLLAMA_BASE_URL") or "http://127.0.0.1:18807").rstrip("/")
 RAG_RERANK_ENABLED = (os.environ.get("RAG_RERANK_ENABLED") or "false").strip().lower() in {
    "1",
    "true",
    "yes",
    "on",
 }
 RAG_RERANK_URL = (os.environ.get("RAG_RERANK_URL") or "http://127.0.0.1:18818/rerank").strip()
 RAG_RERANK_INITIAL_K = max(1, int(os.environ.get("RAG_RERANK_INITIAL_K") or "20"))
 RAG_RERANK_TOP_K = max(1, int(os.environ.get("RAG_RERANK_TOP_K") or "5"))
 RAG_RERANK_TIMEOUT_MS = max(1, int(os.environ.get("RAG_RERANK_TIMEOUT_MS") or "3000"))
 RAG_RERANK_REQUIRE_NPU_PROOF = (os.environ.get("RAG_RERANK_REQUIRE_NPU_PROOF") or "true").strip().lower() in {
    "1",
    "true",
    "yes",
    "on",
 }
 REINDEX_SCRIPT = str(
    Path.home()
@@ -102,12 +120,125 @@ def get_status() -> dict:
        return {"error": str(e)}
 def _result_text(result: dict) -> str:
    """Return the text field sent to the reranker without changing response shape."""
    return str(result.get("text") or result.get("content") or "")
 def _apply_rerank(query: str, results: list[dict], final_k: int) -> tuple[list[dict], dict]:
    """Optionally rerank semantic results, falling back to vector order on any error."""
    metadata = {
        "enabled": RAG_RERANK_ENABLED,
        "attempted": False,
        "ok": False,
        "url": RAG_RERANK_URL,
        "initial_k": len(results),
        "top_k": final_k,
    }
    if not RAG_RERANK_ENABLED:
        metadata["ok"] = True
        metadata["reason"] = "disabled"
        return results[:final_k], metadata
    if not results:
        metadata["ok"] = True
        metadata["reason"] = "no_results"
        return [], metadata
    metadata["attempted"] = True
    documents = []
    for idx, item in enumerate(results):
        text = _result_text(item)
        if not text:
            continue
        documents.append(
            {
                "id": str(item.get("id") or idx),
                "text": text,
                "metadata": {
                    "index": idx,
                    "path": item.get("path"),
                    "source": item.get("source"),
                    "chunk": item.get("chunk"),
                },
            }
        )
    if not documents:
        metadata["ok"] = True
        metadata["reason"] = "no_text_documents"
        return results[:final_k], metadata
    started = time.monotonic()
    try:
        body = json.dumps(
            {
                "query": query,
                "documents": documents,
                "top_k": final_k,
                "return_documents": False,
            }
        ).encode("utf-8")
        req = request.Request(
            RAG_RERANK_URL,
            data=body,
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        with request.urlopen(req, timeout=RAG_RERANK_TIMEOUT_MS / 1000.0) as resp:
            payload = json.loads(resp.read().decode("utf-8"))
    except (OSError, TimeoutError, json.JSONDecodeError, error.URLError, error.HTTPError) as exc:
        metadata["duration_ms"] = round((time.monotonic() - started) * 1000, 2)
        metadata["error"] = f"{type(exc).__name__}: {exc}"
        return results[:final_k], metadata
    metadata["duration_ms"] = round((time.monotonic() - started) * 1000, 2)
    metadata["ok"] = bool(payload.get("ok", True))
    metadata["model"] = payload.get("model")
    metadata["device"] = payload.get("device")
    metadata["npu_busy_delta_us"] = payload.get("npu_busy_delta_us")
    metadata["require_npu_proof"] = RAG_RERANK_REQUIRE_NPU_PROOF
    metadata["input_count"] = payload.get("input_count")
    ranked = payload.get("results") or []
    if RAG_RERANK_REQUIRE_NPU_PROOF and int(payload.get("npu_busy_delta_us") or 0) <= 0:
        metadata["ok"] = False
        metadata["error"] = "reranker response lacked positive npu_busy_delta_us"
        return results[:final_k], metadata
    if not metadata["ok"] or not ranked:
        metadata["error"] = payload.get("error") or "reranker returned no ranked results"
        return results[:final_k], metadata
    by_id = {str(item.get("id") or idx): item for idx, item in enumerate(results)}
    reranked = []
    for rank, ranked_item in enumerate(ranked):
        source_item = None
        if "id" in ranked_item:
            source_item = by_id.get(str(ranked_item.get("id")))
        if source_item is None and isinstance(ranked_item.get("index"), int):
            idx = ranked_item["index"]
            if 0 <= idx < len(results):
                source_item = results[idx]
        if source_item is None:
            continue
        merged = dict(source_item)
        merged["rerank_score"] = ranked_item.get("score")
        merged["rerank_rank"] = rank + 1
        reranked.append(merged)
        if len(reranked) >= final_k:
            break
    if not reranked:
        metadata["ok"] = False
        metadata["error"] = "reranker result IDs did not match search results"
        return results[:final_k], metadata
    return reranked, metadata
 def run_semantic_search(query: str, top_k: int = 5) -> dict:
    """Query the local Obsidian Chroma index via the rag-search script."""
    query = (query or "").strip()
    if not query:
        return {"ok": False, "error": "query is required", "results": []}
    top_k = max(1, min(int(top_k or 5), 20))
    search_k = max(top_k, min(RAG_RERANK_INITIAL_K, 100)) if RAG_RERANK_ENABLED else top_k
    final_k = min(top_k, RAG_RERANK_TOP_K) if RAG_RERANK_ENABLED else top_k
    env = os.environ.copy()
    env.setdefault("RAG_COLLECTION", RAG_COLLECTION)
    env.setdefault("RAG_EMBED_MODEL", RAG_EMBED_MODEL)
@@ -119,7 +250,7 @@ def run_semantic_search(query: str, top_k: int = 5) -> dict:
            "--index",
            RAG_COLLECTION,
            "--top-k",
-            str(top_k),
+            str(search_k),
            "--raw",
            query,
        ],
@@ -133,17 +264,27 @@ def run_semantic_search(query: str, top_k: int = 5) -> dict:
            "ok": False,
            "query": query,
            "top_k": top_k,
            "search_k": search_k,
            "error": result.stderr.strip()[-2000:] or result.stdout.strip()[-2000:],
            "results": [],
            "rerank": {
                "enabled": RAG_RERANK_ENABLED,
                "attempted": False,
                "ok": False,
                "error": "vector search failed before rerank",
            },
        }
    payload = json.loads(result.stdout)
    results = payload.get("results") or []
    results, rerank_meta = _apply_rerank(query, results, final_k)
    return {
        "ok": True,
        "query": query,
        "index": payload.get("index", RAG_COLLECTION),
        "top_k": top_k,
        "search_k": search_k,
        "result_count": len(results),
        "rerank": rerank_meta,
        "results": results,
    }
@@ -1,5 +1,5 @@
 {
-  "updatedAt": "2026-05-14T00:04:59.343Z",
+  "updatedAt": "2026-06-05T19:59:54.879Z",
  "createdAt": "2026-05-13T21:40:33.847Z",
  "id": "PlZywwqL8MRNEAN6",
  "name": "Evening Digest",
@@ -56,7 +56,9 @@
          "id": "UPAHgUJVRqZQceL4",
          "name": "n8n Public API (Failure Digest)"
        }
-      }
+      },
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "parameters": {
@@ -86,7 +88,9 @@
          "id": "UPAHgUJVRqZQceL4",
          "name": "n8n Public API (Failure Digest)"
        }
-      }
+      },
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "parameters": {
@@ -138,7 +142,9 @@
          "id": "465Swz2b71O2KRAK",
          "name": "Obsidian Local REST API"
        }
-      }
+      },
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "parameters": {
@@ -275,7 +281,9 @@
          "id": "465Swz2b71O2KRAK",
          "name": "Obsidian Local REST API"
        }
-      }
+      },
      "continueOnFail": true,
      "alwaysOutputData": true
    }
  ],
  "connections": {
@@ -1 +1 @@
-[{"updatedAt":"2026-05-14T21:36:33.045Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' || rest.manifest || rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status || rest.statusCode || 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}}}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]
+[{"updatedAt":"2026-06-05T20:17:39.529Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' || rest.manifest || rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status || rest.statusCode || 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}},"continueOnFail":true,"alwaysOutputData":true}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]
@@ -11,6 +11,14 @@ Environment=PORT=18810
 Environment=RAG_COLLECTION=obsidian_bge_npu
 Environment=RAG_EMBED_MODEL=bge-base-en-v1.5-int8-ov
 Environment=OLLAMA_BASE_URL=http://127.0.0.1:18817
 # Request-time second-stage reranking. The :18810 handler keeps vector-order
 # fallback on reranker timeout/error or missing positive NPU proof.
 Environment=RAG_RERANK_ENABLED=true
 Environment=RAG_RERANK_URL=http://127.0.0.1:18818/rerank
 Environment=RAG_RERANK_INITIAL_K=20
 Environment=RAG_RERANK_TOP_K=5
 Environment=RAG_RERANK_TIMEOUT_MS=1500
 Environment=RAG_RERANK_REQUIRE_NPU_PROOF=true
 [Install]
 WantedBy=default.target
@@ -0,0 +1,22 @@
 {
  "collapse-filter": true,
  "search": "",
  "showTags": false,
  "showAttachments": false,
  "hideUnresolved": false,
  "showOrphans": true,
  "collapse-color-groups": true,
  "colorGroups": [],
  "collapse-display": true,
  "showArrow": false,
  "textFadeMultiplier": 0,
  "nodeSizeMultiplier": 1,
  "lineSizeMultiplier": 1,
  "collapse-forces": true,
  "centerStrength": 0.518713248970312,
  "repelStrength": 10,
  "linkStrength": 1,
  "linkDistance": 250,
  "scale": 0.9999999999999999,
  "close": true
 }
@@ -0,0 +1,13 @@
 {
  "port": 27124,
  "insecurePort": 27123,
  "enableInsecureServer": true,
  "apiKey": "698cfc8b00b93c41480e7e1cb84d77b75176be87507256a5fae9a5b53b5a20cb",
  "crypto": {
    "cert": "-----BEGIN CERTIFICATE-----\r\nMIIDRTCCAi2gAwIBAgIBATANBgkqhkiG9w0BAQsFADAiMSAwHgYDVQQDExdPYnNp\r\nZGlhbiBMb2NhbCBSRVNUIEFQSTAeFw0yNjAzMTcxOTU5MjJaFw0yNzAzMTcxOTU5\r\nMjJaMCIxIDAeBgNVBAMTF09ic2lkaWFuIExvY2FsIFJFU1QgQVBJMIIBIjANBgkq\r\nhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAokD5oRVV46TXbRgzEQ1zIrOnu29eKL4Q\r\nyqpNV6Lx6mPyuJyMhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z64WwJXIRdNVF\r\nXAX+r5OrumQObCxXIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooEKy5IhMI9Dh07\r\nV08zFrV0IU5JUjScWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9tdTqUpMh/weRTi\r\nr71wovgn1ijmF6mJM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPeR0y7NRYqqxg2\r\nQ/VqjJ6PC5aR/dRf2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuRJwIDAQABo4GF\r\nMIGCMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgLEMDsGA1UdJQQ0MDIG\r\nCCsGAQUFBwMBBggrBgEFBQcDAgYIKwYBBQUHAwMGCCsGAQUFBwMEBggrBgEFBQcD\r\nCDARBglghkgBhvhCAQEEBAMCAPcwDwYDVR0RBAgwBocEfwAAATANBgkqhkiG9w0B\r\nAQsFAAOCAQEAbUWN+bPEI3k/CPZ6u6blFT1bs+siP1hysTlvRv4vN7CydZLwla3x\r\nocH4uIqwoPIb1Rpi3jPHpqSMiWBNvymK5TPGWmqS2/O6ivF/8AbTvA0YSpMVzIMb\r\n9caOm+wJtz1vsgdf1vy+USOnUtpWm9Sep/6S08Af3J7wS+sOJIWHHD4hlwEk1mpq\r\nxKLpXCm+vH8VuqQ3vSkVBbb4jOVishmO8Yxc+u+xWVpWXxJhaxIFO8MZbC4bbEDR\r\nN6ujylUI6+EF6nFb0SnaD0JDwPNw7ljTP8cB2loAXz2W7rhAiuZU1kjCiQBwWXc0\r\nkJqeYb+EhkCkDCKKCz0zv5xIas04MrxeDg==\r\n-----END CERTIFICATE-----\r\n",
    "privateKey": "-----BEGIN RSA PRIVATE KEY-----\r\nMIIEogIBAAKCAQEAokD5oRVV46TXbRgzEQ1zIrOnu29eKL4QyqpNV6Lx6mPyuJyM\r\nhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z64WwJXIRdNVFXAX+r5OrumQObCxX\r\nIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooEKy5IhMI9Dh07V08zFrV0IU5JUjSc\r\nWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9tdTqUpMh/weRTir71wovgn1ijmF6mJ\r\nM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPeR0y7NRYqqxg2Q/VqjJ6PC5aR/dRf\r\n2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuRJwIDAQABAoIBACf8umjUIMRHMl5t\r\nGdzIg0kYnKxpcHu7B9liqkgAXP2Gn2GXF1y8Fi+4+MYfiDsas8HQLYCxPjczMSs8\r\nVer2NmYgnv5DhADWtM7OnWt5CdgYY6OOM/U0cnoKzTrXCazmMiRsS/UGnusM1BTR\r\nVLPDYO6ha/boBfMOCjtkxfMBSjsQszS3GVtNsv+LCeY4fYH/tj7LnC9KSaAEDyLB\r\n+Fl6RMp/h11yWC2RLrWMUE/2tRVmz60VJgOphjTLkLtJXsHTOaWKOzg/ZFQaxtLX\r\n5oVGSAnGe0CD5QP8ImSVxnZoErHbA8B7AtsL028pQxy3zrr+5eXbKA7ZQKmjb4yM\r\nVx08I9kCgYEAzSL/9uqxvFinZfAB78g0lFZvO78jjfEWm8/upv+9L2nsrcnSM79T\r\n+M0hOndR8S1Dy0DmavYov2atuXRV3JIlWiNFUi1EM7OCe15GGqgVb3ADpZziJkil\r\nsKrb40rCLEePbp4nmLE7LhYh8TOclXRL8HySuEm74v46uUh2xJJ1m38CgYEAynv+\r\nYgPtuv+4QoAHImO7BQVmVUZg+56NksYLPnf+0ukhOHMoVFTc9KbM12Q5qiHzTBYX\r\n49AErlQ2apMZqcAD40dwdH3Dv9w2gGqLYp2o8mkGc2sPb12SNTIizsbzvTLW24zF\r\njdMs2y5d5ZHNVhfi8yAFmWuaV9lC1P8OVywwflkCgYB/x5h3vxO9hd+oQMuECEqw\r\nR/L73YERLqbtoaVAAzdeLHYQfxHfyANPjL9xAthZCeAb4K5m3DTfnN8EEXJWdfas\r\nYiIRIT9FkUDrBftXKXJIuxaad9HrFP+Yv6U+vNec62pt9jgmBegeOg0kiQi1k/6l\r\nq4NdJhjSOZhsx7WrlquJkwKBgEdaBjwX0lARCKc2Yk02A5MzYeou0MIaDx1neFLd\r\nCgjcaf5wZgfBl9MGbCyCfud66zcmmeiHRv7/YeWQTHzK0xPl+rSyFKapPPNnmBJ6\r\nKCyz4bgOQ/Qkbv8b2bQv23gSUDAxnPPrNGVQI3pgNJFf/XNbF14G0u6d+rT/49fI\r\nFJaRAoGAT7QjSLPBbK+jm1n295LObZjLGEuuiIH9PBKDx1mbcSJkwx4QUpqgr6tT\r\nOchHvloOEBXKf0P5UWOGFJL2UcXnKL5st8D8vQrX8WFZ+ER1dMDyTl/0ly4mhQEH\r\nhN3sxn/PMztYMMCQm94cwQxZQqvLZa7dL/1x8vhm7jSRIfLmZE0=\r\n-----END RSA PRIVATE KEY-----\r\n",
    "publicKey": "-----BEGIN PUBLIC KEY-----\r\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAokD5oRVV46TXbRgzEQ1z\r\nIrOnu29eKL4QyqpNV6Lx6mPyuJyMhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z\r\n64WwJXIRdNVFXAX+r5OrumQObCxXIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooE\r\nKy5IhMI9Dh07V08zFrV0IU5JUjScWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9td\r\nTqUpMh/weRTir71wovgn1ijmF6mJM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPe\r\nR0y7NRYqqxg2Q/VqjJ6PC5aR/dRf2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuR\r\nJwIDAQAB\r\n-----END PUBLIC KEY-----\r\n"
  },
  "enableSecureServer": true,
  "bindingHost": "0.0.0.0"
 }
@@ -0,0 +1,10 @@
 {
 	"id": "obsidian-local-rest-api",
 	"name": "Local REST API",
 	"version": "3.4.6",
 	"minAppVersion": "0.12.0",
 	"description": "Get, change or otherwise interact with your notes in Obsidian via a REST API.",
 	"author": "Adam Coddington",
 	"authorUrl": "https://coddingtonbear.net/",
 	"isDesktopOnly": true
 }
@@ -0,0 +1,47 @@
 /* Sets all the text color to red! */
 div.obsidian-local-rest-api-settings div.api-key-display {
  margin-bottom: 20px;
 }
 div.obsidian-local-rest-api-settings div.api-key-display pre {
  font-size: 0.8em;
  padding: 10px 20px;
  background-color: var(--background-modifier-cover);
  font-family: monospace;
  user-select: all;
 }
 div.obsidian-local-rest-api-settings div.setting-item-control {
  min-width: 50%;
 }
 div.obsidian-local-rest-api-settings textarea {
  width: 100%;
 }
 div.obsidian-local-rest-api-settings div.certificate-expired {
  padding: 10px 20px;
  border: 2px solid #ff0000;
 }
 div.obsidian-local-rest-api-settings div.certificate-expiring-soon {
  padding: 10px 20px;
  border: 2px solid #ffff00;
 }
 div.obsidian-local-rest-api-settings div.certificate-regeneration-recommended {
  padding: 10px 20px;
  border: 2px solid #ffff00;
 }
 div.obsidian-local-rest-api-settings table.api-urls tr {
  width: 100%;
 }
 div.obsidian-local-rest-api-settings table.api-urls th, div.obsidian-local-rest-api-settings table.api-urls td {
  padding: 5px 25px;
 }
 div.obsidian-local-rest-api-settings table.api-urls tr.disabled td.name, div.obsidian-local-rest-api-settings table.api-urls tr.disabled td.url {
  text-decoration: line-through;
 }
@@ -0,0 +1,238 @@
 {
  "main": {
    "id": "3deecfae849ca8d4",
    "type": "split",
    "children": [
      {
        "id": "bfbbaa82fdc8e552",
        "type": "tabs",
        "children": [
          {
            "id": "91d4ead9052f8b83",
            "type": "leaf",
            "state": {
              "type": "empty",
              "state": {},
              "icon": "lucide-file",
              "title": "New tab"
            }
          }
        ]
      }
    ],
    "direction": "vertical"
  },
  "left": {
    "id": "28c8862873c84ac7",
    "type": "split",
    "children": [
      {
        "id": "db366f44e3369007",
        "type": "tabs",
        "children": [
          {
            "id": "83702dd4b091f767",
            "type": "leaf",
            "state": {
              "type": "file-explorer",
              "state": {
                "sortOrder": "alphabetical",
                "autoReveal": true
              },
              "icon": "lucide-folder-closed",
              "title": "Files"
            }
          },
          {
            "id": "16fe402f7461b5c4",
            "type": "leaf",
            "state": {
              "type": "search",
              "state": {
                "query": "",
                "matchingCase": false,
                "explainSearch": false,
                "collapseAll": false,
                "extraContext": false,
                "sortOrder": "alphabetical"
              },
              "icon": "lucide-search",
              "title": "Search"
            }
          },
          {
            "id": "9517f62d1aba2d93",
            "type": "leaf",
            "state": {
              "type": "bookmarks",
              "state": {},
              "icon": "lucide-bookmark",
              "title": "Bookmarks"
            }
          },
          {
            "id": "1c968d6bfe211541",
            "type": "leaf",
            "state": {
              "type": "notebook-navigator",
              "state": {},
              "icon": "notebook-navigator",
              "title": "Notebook Navigator"
            }
          }
        ]
      }
    ],
    "direction": "horizontal",
    "width": 321.5
  },
  "right": {
    "id": "c2bbb286ef2dc629",
    "type": "split",
    "children": [
      {
        "id": "f48263853996d79f",
        "type": "tabs",
        "children": [
          {
            "id": "16df20b009c624f4",
            "type": "leaf",
            "state": {
              "type": "backlink",
              "state": {
                "file": "Welcome.md",
                "collapseAll": false,
                "extraContext": false,
                "sortOrder": "alphabetical",
                "showSearch": false,
                "searchQuery": "",
                "backlinkCollapsed": false,
                "unlinkedCollapsed": true
              },
              "icon": "links-coming-in",
              "title": "Backlinks for Welcome"
            }
          },
          {
            "id": "5f7e30b0fc7fe373",
            "type": "leaf",
            "state": {
              "type": "outgoing-link",
              "state": {
                "file": "Welcome.md",
                "linksCollapsed": false,
                "unlinkedCollapsed": true
              },
              "icon": "links-going-out",
              "title": "Outgoing links from Welcome"
            }
          },
          {
            "id": "0de07aca9c62fd2b",
            "type": "leaf",
            "state": {
              "type": "tag",
              "state": {
                "sortOrder": "frequency",
                "useHierarchy": true,
                "showSearch": false,
                "searchQuery": ""
              },
              "icon": "lucide-tags",
              "title": "Tags"
            }
          },
          {
            "id": "92a53d80f80daaef",
            "type": "leaf",
            "state": {
              "type": "all-properties",
              "state": {
                "sortOrder": "frequency",
                "showSearch": false,
                "searchQuery": ""
              },
              "icon": "lucide-archive",
              "title": "All properties"
            }
          },
          {
            "id": "eefa8a89837d21b5",
            "type": "leaf",
            "state": {
              "type": "outline",
              "state": {
                "file": "Welcome.md",
                "followCursor": false,
                "showSearch": false,
                "searchQuery": ""
              },
              "icon": "lucide-list",
              "title": "Outline of Welcome"
            }
          }
        ]
      }
    ],
    "direction": "horizontal",
    "width": 300,
    "collapsed": true
  },
  "left-ribbon": {
    "hiddenItems": {
      "switcher:Open quick switcher": false,
      "graph:Open graph view": false,
      "canvas:Create new canvas": false,
      "daily-notes:Open today's daily note": false,
      "templates:Insert template": false,
      "command-palette:Open command palette": false,
      "bases:Create new base": false,
      "table-editor-obsidian:Advanced Tables Toolbar": false,
      "notebook-navigator:Notebook Navigator": false
    }
  },
  "active": "83702dd4b091f767",
  "lastOpenFiles": [
    "Weekend Activity Ideas.md",
    "Vault Conventions.md",
    "Templates/Atlas Artifacts/test-report.md",
    "Templates/Atlas Artifacts/status-report.md",
    "Templates/Atlas Artifacts/runbook.md",
    "Templates/Atlas Artifacts/reviewer-checklist.md",
    "Templates/Atlas Artifacts/postmortem.md",
    "Templates/Atlas Artifacts/implementation-plan.md",
    "Templates/Atlas Artifacts/diagram.md",
    "Templates/Atlas Artifacts/decision-log.md",
    "Templates/Atlas Artifacts/agent-audit-event.md",
    "Templates/Atlas Artifacts/README.md",
    "Templates/Runbook.md",
    "Templates/Project.md",
    "Templates/Person.md",
    "Templates/Meeting.md",
    "Templates/Kanban Task Graph Templates.md",
    "Templates/Diary Weekly Review.md",
    "Templates/Diary Daily.md",
    "Templates/Decision.md",
    "Templates/Daily Note.md",
    "Templates/Context Pack.md",
    "Templates/Atlas Artifacts",
    "Templates",
    "Runbooks/Runbooks Home.md",
    "Runbooks/Promote Session Output to Notes.md",
    "Runbooks/Atlas Kanban Durable Project Workflow.md",
    "Runbooks/Atlas Event-Driven Automation.md",
    "Projects/Atlas Capability Upgrade Program/Reports/Status",
    "Projects/Atlas Capability Upgrade Program/Reports",
    "Projects/Atlas Capability Upgrade Program/Plans",
    "Projects/Atlas Capability Upgrade Program",
    "Projects/Atlas",
    "Projects",
    "People",
    "Meetings",
    "Infrastructure/Architecture - Service Topology.canvas",
    "Infrastructure/Architecture - Overview.canvas",
    "Infrastructure/Architecture - Master.canvas",
    "Infrastructure/Architecture - Automation Flow.canvas",
    "Untitled.canvas"
  ]
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
William Valentin	22e6ee90d2	docs(npu): document advisory observability gates Add operator runbook and link integrated health docs for advisory-only observability, dry-run metrics, and future promotion criteria.	2026-06-06 15:30:31 -07:00
William Valentin	72434c8bc3	feat(npu): add advisory metrics to utilization digest Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.	2026-06-06 15:30:31 -07:00
William Valentin	dae2a57124	feat(npu): add advisory dry-run comparison harness Add npu_advisory_decision_v1 schema, synthetic fixture set, comparison harness, docs, and focused tests for advisory-only NPU evaluation.	2026-06-06 15:30:31 -07:00
William Valentin	08fb9ca686	docs(npu): update integrated health runbooks	2026-06-05 15:52:51 -07:00
William Valentin	9e5ffa0fd0	feat(npu): add kanban hygiene advisory	2026-06-05 15:52:43 -07:00
William Valentin	d2bad88596	feat(npu): add voice audio advisory pipeline	2026-06-05 15:52:43 -07:00
William Valentin	6906c2079b	feat(npu): add explicit-root batch triage wrapper	2026-06-05 15:52:43 -07:00
William Valentin	6155b54ab5	feat(npu): add cron and n8n advisory examples	2026-06-05 15:52:43 -07:00
William Valentin	5a14adaf58	feat(npu): add utilization digest tooling	2026-06-05 15:52:43 -07:00
William Valentin	b7b4edf0f5	feat(npu): add local context gate advisory	2026-06-05 15:52:42 -07:00
William Valentin	24d620e9c9	fix(n8n): tolerate missing Obsidian REST health note Keep the Obsidian Health + Reindex workflow successful when the optional Obsidian Local REST note write is unavailable. The RAG/reindex checks remain active and verified separately.	2026-06-05 13:19:40 -07:00
William Valentin	ac3590df47	fix(n8n): harden evening digest workflow Allow optional n8n execution, Obsidian note listing, and Obsidian save steps to continue on failure so the digest can still send when one local source is unavailable.	2026-06-05 13:06:52 -07:00
William Valentin	cefd8789cd	fix(n8n): monitor advisory gateway health	2026-06-04 16:26:05 -07:00
William Valentin	aeb3c9f8fb	fix(npu): expose advisory gateway on docker bridge	2026-06-04 16:19:22 -07:00
William Valentin	59c5fd3e57	feat(npu): add advisory gateway wrapper	2026-06-04 16:03:52 -07:00
William Valentin	401321a6d5	Document live OpenVINO NPU sidecars	2026-06-04 15:32:32 -07:00
William Valentin	85c496a59e	docs(obsidian): update automation health status	2026-06-04 15:06:48 -07:00
William Valentin	06cd49247a	chore(rag): enable NPU reranker by default	2026-06-04 15:01:26 -07:00
William Valentin	71f3c05587	feat(rag): add optional NPU reranker fallback	2026-06-04 14:50:41 -07:00
William Valentin	06f235d26b	chore(openclaw): restore clobbered state backups	2026-06-04 13:29:47 -07:00
William Valentin	d2f4dd7cef	fix(openclaw): restore active runtime state	2026-06-04 13:29:47 -07:00
William Valentin	dad13e7648	fix(obsidian): restore vault template settings	2026-06-04 13:27:13 -07:00
William Valentin	137a2c28d2	feat(voice): restore CUDA Whisper fallback image	2026-06-04 13:26:50 -07:00
William Valentin	1772e5a1f3	chore(scripts): restore swarm helper utilities	2026-06-04 13:26:50 -07:00
William Valentin	b88331be42	chore(swarm): restore shared compose and health endpoint	2026-06-04 13:26:50 -07:00
William Valentin	4815750011	chore(n8n): restore workflow exports	2026-06-04 13:26:50 -07:00
William Valentin	99a4f93ce7	test(agent-evals): restore Atlas quality eval suite	2026-06-04 13:26:50 -07:00
William Valentin	6536320774	fix(obsidian): restore shared zap vault after develop rebuild	2026-06-04 13:26:50 -07:00
William Valentin	420df812c0	docs(npu): update service maps and runbooks	2026-06-04 13:08:18 -07:00
William Valentin	703c1df860	docs(npu): document VLM audio wake-word feasibility	2026-06-04 13:07:51 -07:00
William Valentin	2ef9e3dfd2	feat(npu): add bounded OpenVINO GenAI worker	2026-06-04 13:07:51 -07:00
William Valentin	d3373e7234	feat(npu): add document image triage prototype	2026-06-04 13:07:51 -07:00
William Valentin	ea452886f3	feat(npu): add dry-run classifier router prototype	2026-06-04 13:07:51 -07:00
William Valentin	0683253157	feat(npu): add OpenVINO reranker prototype	2026-06-04 13:07:51 -07:00
William Valentin	0a6f84fbf3	feat(rag): add OpenVINO NPU embedding services	2026-06-04 13:07:51 -07:00
William Valentin	83d0ced08c	feat(voice): add OpenVINO NPU Whisper service	2026-06-04 13:07:51 -07:00
		`@@ -1 +1 @@`
			{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1776916468, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1776916468;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;client_byok=0;ccr=1;8kp=1;ip=71.231.248.128;asn=AS7922:fda910fb829d6585876da7e06e037cf7e75745e2b4d41b49de4911d85794adcc", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}				{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1774543278, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1774543278;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;ccr=1;8kp=1;ip=24.143.97.87;asn=AS11404:7f079a450cf1a45b238724eb0795e12bf36218ab99ffc6c4b84089e6e7e674b1", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
		`@@ -0,0 +1,2 @@`
							`AGENTMON_INGEST_URL=http://192.168.122.1:8080`
							`AGENTMON_VM_NAME=zap`
`@@ -1 +1 @@`
	[{"updatedAt":"2026-05-14T21:36:33.045Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' \|\| rest.manifest \|\| rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status \|\| rest.statusCode \|\| 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}}}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]	[{"updatedAt":"2026-06-05T20:17:39.529Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' \|\| rest.manifest \|\| rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status \|\| rest.statusCode \|\| 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}},"continueOnFail":true,"alwaysOutputData":true}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]