Compare commits
10 Commits
24d620e9c9
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
| 22e6ee90d2 | |||
| 72434c8bc3 | |||
| dae2a57124 | |||
| 08fb9ca686 | |||
| 9e5ffa0fd0 | |||
| d2bad88596 | |||
| 6906c2079b | |||
| 6155b54ab5 | |||
| 5a14adaf58 | |||
| b7b4edf0f5 |
@@ -37,6 +37,8 @@ For the current host-side AI/search/voice automation stack, n8n watchdogs, and a
|
|||||||
- [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
|
- [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
|
||||||
- [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
|
- [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
|
||||||
- [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
|
- [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
|
||||||
|
- [`docs/npu-utilization-digest.md`](docs/npu-utilization-digest.md) — compact on-demand NPU proof/utilization digest runbook
|
||||||
|
- [`docs/npu-integrated-health-ops.md`](docs/npu-integrated-health-ops.md) — integrated operator health-check workflow combining `npu-service-health.sh` and the utilization digest
|
||||||
- OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.
|
- OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.
|
||||||
|
|
||||||
## VM: zap
|
## VM: zap
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
version: 1
|
||||||
|
policy:
|
||||||
|
default_mode: dry_run
|
||||||
|
require_explicit_root: true
|
||||||
|
allow_external_uploads: false
|
||||||
|
allow_mutations: false
|
||||||
|
log_raw_text: false
|
||||||
|
include_full_paths_default: false
|
||||||
|
npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
|
||||||
|
# Copy to config/triage-roots.local.yaml and approve exactly one narrow,
|
||||||
|
# lane-specific staging root. The committed template is intentionally
|
||||||
|
# unapproved/fail-closed; do not point any lane at broad home, Downloads,
|
||||||
|
# vault, screenshot, photo-library, or historical audio roots without explicit
|
||||||
|
# approval for that exact lane/root.
|
||||||
|
roots:
|
||||||
|
screenshots:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
receipts:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
downloads:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
obsidian_attachments:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 50
|
||||||
|
voice_memos:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 100
|
||||||
|
meeting_snippets:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 200
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
version: 1
|
||||||
|
policy:
|
||||||
|
default_mode: dry_run
|
||||||
|
require_explicit_root: true
|
||||||
|
allow_external_uploads: false
|
||||||
|
allow_mutations: false
|
||||||
|
log_raw_text: false
|
||||||
|
include_full_paths_default: false
|
||||||
|
npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
roots:
|
||||||
|
screenshots:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
receipts:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
downloads:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
obsidian_attachments:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 50
|
||||||
|
voice_memos:
|
||||||
|
approved: true
|
||||||
|
root: ../tmp/synthetic-voice-memos
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 100
|
||||||
|
meeting_snippets:
|
||||||
|
approved: true
|
||||||
|
root: ../tmp/synthetic-meeting-snippets
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 200
|
||||||
@@ -0,0 +1,456 @@
|
|||||||
|
# NPU advisory decision schema and dry-run evaluation metrics
|
||||||
|
|
||||||
|
This document defines the compact `npu_advisory_decision_v1` record and the
|
||||||
|
minimum dry-run metrics required before any OpenVINO/NPU advisory lane is
|
||||||
|
considered for promotion. The schema is advisory-only: it creates audit evidence
|
||||||
|
and comparison data, not live authority.
|
||||||
|
|
||||||
|
Scope and safety defaults:
|
||||||
|
|
||||||
|
- Local audit records only; no outbound sends, service restarts, tool execution,
|
||||||
|
memory writes, routing changes, vector-store mutation, or broad private scans.
|
||||||
|
- Synthetic or explicitly non-private fixtures only for dry-run evaluation.
|
||||||
|
- Raw prompts, transcripts, documents, images, headers, secrets, and full upstream
|
||||||
|
JSON payloads are not persisted by default.
|
||||||
|
- NPU output is evidence for a gate. It must never directly perform or trigger
|
||||||
|
an action.
|
||||||
|
|
||||||
|
## `npu_advisory_decision_v1`
|
||||||
|
|
||||||
|
Required top-level fields:
|
||||||
|
|
||||||
|
| Field | Type | Required | Notes |
|
||||||
|
| --- | --- | ---: | --- |
|
||||||
|
| `schema_version` | string | yes | Always `npu_advisory_decision_v1`. |
|
||||||
|
| `decision_id` | string | yes | Locally generated UUID/ULID. No payload-derived PII. |
|
||||||
|
| `timestamp` | string | yes | RFC3339/ISO-8601 UTC timestamp. |
|
||||||
|
| `source` | object | yes | Where the dry-run input came from. |
|
||||||
|
| `service` | object | yes | Advisory lane/service that produced the recommendation. |
|
||||||
|
| `input_class` | string | yes | Normalized class such as `context_gate`, `cron_n8n_event`, `batch_doc_triage`, `voice_audio`, `kanban_hygiene`, or `advisory_gateway_envelope`. |
|
||||||
|
| `recommendation` | object | yes | NPU/advisory recommendation and rationale metadata. |
|
||||||
|
| `confidence` | object | yes | Score, bucket, and calibration notes. |
|
||||||
|
| `authority_flags` | object | yes | Explicit booleans for authority boundaries; all default false. |
|
||||||
|
| `allowed_actions` | array[string] | yes | Actions a downstream gate may consider. Defaults to advisory-only actions. |
|
||||||
|
| `actual_action` | object | yes | What really happened. In this gate it should always be no-op/record-only. |
|
||||||
|
| `human_or_atlas_decision` | object | yes | Comparison target from fixture expected label, human label, or Atlas decision. |
|
||||||
|
| `outcome` | object | yes | Agreement/error bucket used by the eval harness. |
|
||||||
|
| `npu_proof` | object | yes | Evidence that a real NPU-backed inference ran, where available. |
|
||||||
|
| `latency` | object | yes | Request latency and optional queue/processing timings. |
|
||||||
|
| `fallback` | object | yes | Whether CPU/offline/health-only fallback happened and why. |
|
||||||
|
| `privacy` | object | yes | What was redacted/hashed and what retention class applies. |
|
||||||
|
| `notes` | array[string] | no | Short non-private audit notes. |
|
||||||
|
|
||||||
|
### Field details
|
||||||
|
|
||||||
|
`source`:
|
||||||
|
|
||||||
|
- `kind`: `fixture`, `manual_label`, `atlas_shadow`, `human_review`, or
|
||||||
|
`service_health_probe`.
|
||||||
|
- `fixture_id`: stable fixture identifier when applicable.
|
||||||
|
- `fixture_set`: fixture collection name/version.
|
||||||
|
- `artifact_ref`: optional local path or opaque run id; do not include raw
|
||||||
|
private content.
|
||||||
|
- `content_hash`: optional SHA-256 over sanitized fixture content.
|
||||||
|
- `privacy_class`: `synthetic`, `public`, `non_private`, `redacted`, or
|
||||||
|
`private_disallowed`.
|
||||||
|
|
||||||
|
`service`:
|
||||||
|
|
||||||
|
- `name`: e.g. `openvino_context_gate`, `cron_n8n_advisory`,
|
||||||
|
`npu_batch_triage`, `npu_voice_audio_pipeline`, `kanban_hygiene_advisory`,
|
||||||
|
`openvino_advisory_gateway`.
|
||||||
|
- `endpoint`: local endpoint label or script name; avoid sensitive URL params.
|
||||||
|
- `mode`: `dry_run`, `shadow`, `health_only`, or `offline_fixture`.
|
||||||
|
- `model`: optional model/backend label, if safe to log.
|
||||||
|
|
||||||
|
`recommendation`:
|
||||||
|
|
||||||
|
- `label`: normalized recommendation, e.g. `suppress`, `log`, `summarize`,
|
||||||
|
`escalate`, `retrieve_more_context`, `skip_private_root`, `needs_human`,
|
||||||
|
`no_action`, or `unknown`.
|
||||||
|
- `severity`: `none`, `info`, `low`, `medium`, `high`, or `critical`.
|
||||||
|
- `reasons`: short non-private reason codes, not raw excerpts.
|
||||||
|
- `evidence_refs`: bounded references to sanitized fixture fields or artifact ids.
|
||||||
|
- `raw_output_ref`: optional local artifact pointer; default null.
|
||||||
|
|
||||||
|
`confidence`:
|
||||||
|
|
||||||
|
- `score`: float from 0.0 to 1.0 when available, otherwise null.
|
||||||
|
- `bucket`: one of `very_low`, `low`, `medium`, `high`, `very_high`, or
|
||||||
|
`unknown`.
|
||||||
|
- `bucket_rule`: the threshold rule used by the harness.
|
||||||
|
- `calibrated`: boolean; false until enough labeled dry-run data exists.
|
||||||
|
|
||||||
|
Recommended confidence buckets:
|
||||||
|
|
||||||
|
| Bucket | Score range | Gate behavior |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `very_low` | `< 0.40` | Treat as uncertain; never escalate automatically. |
|
||||||
|
| `low` | `0.40-0.59` | Advisory note only; human/Atlas decides. |
|
||||||
|
| `medium` | `0.60-0.79` | Eligible for comparison metrics; no live action. |
|
||||||
|
| `high` | `0.80-0.94` | Strong advisory evidence; still gated. |
|
||||||
|
| `very_high` | `>= 0.95` | Promotion candidate only after repeated eval success. |
|
||||||
|
| `unknown` | null/missing | Count separately; do not coerce to zero. |
|
||||||
|
|
||||||
|
`authority_flags`:
|
||||||
|
|
||||||
|
All flags default to false and must remain false for this gate.
|
||||||
|
|
||||||
|
- `can_route_atlas`
|
||||||
|
- `can_write_memory`
|
||||||
|
- `can_execute_tools`
|
||||||
|
- `can_restart_services`
|
||||||
|
- `can_send_outbound`
|
||||||
|
- `can_scan_private_roots`
|
||||||
|
- `can_mutate_vector_store`
|
||||||
|
- `can_post_advisory_event`
|
||||||
|
- `can_change_gateway_config`
|
||||||
|
- `requires_human_approval`
|
||||||
|
- `advisory_only`
|
||||||
|
|
||||||
|
For this gate, `advisory_only=true` and `requires_human_approval=true` for any
|
||||||
|
recommendation that could eventually affect live behavior.
|
||||||
|
|
||||||
|
`allowed_actions`:
|
||||||
|
|
||||||
|
Allowed by default:
|
||||||
|
|
||||||
|
- `record_metric`
|
||||||
|
- `compare_with_expected_label`
|
||||||
|
- `include_in_digest`
|
||||||
|
- `open_review_ticket_candidate`
|
||||||
|
- `recommend_human_review`
|
||||||
|
|
||||||
|
Disallowed unless a later approval explicitly changes scope:
|
||||||
|
|
||||||
|
- `route_atlas`
|
||||||
|
- `write_memory`
|
||||||
|
- `execute_tool`
|
||||||
|
- `restart_service`
|
||||||
|
- `send_message`
|
||||||
|
- `scan_private_root`
|
||||||
|
- `mutate_vector_store`
|
||||||
|
- `post_gateway_event`
|
||||||
|
|
||||||
|
`actual_action`:
|
||||||
|
|
||||||
|
- `kind`: should be `none`, `recorded_metric`, or `dry_run_reported`.
|
||||||
|
- `performed`: boolean; false for live side effects in this gate.
|
||||||
|
- `performed_by`: `harness`, `human`, `atlas`, or null.
|
||||||
|
- `side_effects`: array; should be empty except local report/artifact writes.
|
||||||
|
|
||||||
|
`human_or_atlas_decision`:
|
||||||
|
|
||||||
|
- `source`: `fixture_expected`, `human_label`, `atlas_shadow`, or `missing`.
|
||||||
|
- `label`: normalized decision label using the same label set as
|
||||||
|
`recommendation.label` when possible.
|
||||||
|
- `severity`: normalized severity when applicable.
|
||||||
|
- `confidence`: optional Atlas/human confidence if available.
|
||||||
|
- `decision_ref`: optional review id, fixture id, or session/run id.
|
||||||
|
- `timestamp`: optional timestamp for the comparison decision.
|
||||||
|
|
||||||
|
`outcome`:
|
||||||
|
|
||||||
|
- `comparison`: `agree`, `disagree`, `uncertain`, `missing_reference`, or
|
||||||
|
`not_applicable`.
|
||||||
|
- `error_type`: null or one of `false_positive`, `false_negative`,
|
||||||
|
`severity_overcall`, `severity_undercall`, `unsafe_authority`,
|
||||||
|
`privacy_violation`, `fallback_unexpected`, `latency_slo_miss`,
|
||||||
|
`npu_proof_missing`.
|
||||||
|
- `human_review_required`: boolean.
|
||||||
|
- `promotion_blocker`: boolean.
|
||||||
|
|
||||||
|
`npu_proof`:
|
||||||
|
|
||||||
|
- `proof_mode`: `sysfs_busy_delta`, `service_reported_delta`, `health_only`,
|
||||||
|
`offline_fixture`, or `unavailable`.
|
||||||
|
- `busy_delta_us`: integer or null.
|
||||||
|
- `service_reported_delta_us`: integer or null.
|
||||||
|
- `inference_ran`: boolean.
|
||||||
|
- `proof_ok`: boolean or null. Null means not measurable, not false.
|
||||||
|
- `counter_path`: usually `/sys/class/accel/accel0/device/npu_busy_time_us`, if
|
||||||
|
logged safely.
|
||||||
|
|
||||||
|
`latency`:
|
||||||
|
|
||||||
|
- `total_ms`: end-to-end harness timing.
|
||||||
|
- `service_ms`: service-reported processing time when available.
|
||||||
|
- `queue_ms`: optional queue time.
|
||||||
|
- `timeout`: boolean.
|
||||||
|
|
||||||
|
`fallback`:
|
||||||
|
|
||||||
|
- `occurred`: boolean.
|
||||||
|
- `kind`: null, `cpu`, `offline`, `health_only`, `service_unavailable`,
|
||||||
|
`skipped_cold_load`, `private_root_blocked`, or `proof_unavailable`.
|
||||||
|
- `reason`: short reason code.
|
||||||
|
- `expected`: boolean. Expected fallbacks are counted but do not fail promotion
|
||||||
|
unless their rate exceeds the threshold for that lane.
|
||||||
|
|
||||||
|
`privacy`:
|
||||||
|
|
||||||
|
- `payload_logged`: must default false.
|
||||||
|
- `redaction`: `none_needed`, `hash_only`, `paths_only`, `metadata_only`, or
|
||||||
|
`blocked_private`.
|
||||||
|
- `retention`: `ephemeral`, `local_audit`, or `review_artifact`.
|
||||||
|
- `contains_private_payload`: must be false for committed fixtures.
|
||||||
|
|
||||||
|
## Minimal JSON shape
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema_version": "npu_advisory_decision_v1",
|
||||||
|
"decision_id": "01J00000000000000000000000",
|
||||||
|
"timestamp": "2026-06-06T00:00:00Z",
|
||||||
|
"source": {
|
||||||
|
"kind": "fixture",
|
||||||
|
"fixture_id": "cron_duplicate_success_001",
|
||||||
|
"fixture_set": "npu_advisory_eval_v1",
|
||||||
|
"artifact_ref": null,
|
||||||
|
"content_hash": "sha256:example",
|
||||||
|
"privacy_class": "synthetic"
|
||||||
|
},
|
||||||
|
"service": {
|
||||||
|
"name": "cron_n8n_advisory",
|
||||||
|
"endpoint": "openvino-advisory-gateway/examples/cron-advisory-dry-run.sh",
|
||||||
|
"mode": "dry_run",
|
||||||
|
"model": "openvino-local"
|
||||||
|
},
|
||||||
|
"input_class": "cron_n8n_event",
|
||||||
|
"recommendation": {
|
||||||
|
"label": "suppress",
|
||||||
|
"severity": "info",
|
||||||
|
"reasons": ["duplicate_success", "no_action_required"],
|
||||||
|
"evidence_refs": ["fixture:event_kind", "fixture:status"],
|
||||||
|
"raw_output_ref": null
|
||||||
|
},
|
||||||
|
"confidence": {
|
||||||
|
"score": 0.91,
|
||||||
|
"bucket": "high",
|
||||||
|
"bucket_rule": "v1_default",
|
||||||
|
"calibrated": false
|
||||||
|
},
|
||||||
|
"authority_flags": {
|
||||||
|
"can_route_atlas": false,
|
||||||
|
"can_write_memory": false,
|
||||||
|
"can_execute_tools": false,
|
||||||
|
"can_restart_services": false,
|
||||||
|
"can_send_outbound": false,
|
||||||
|
"can_scan_private_roots": false,
|
||||||
|
"can_mutate_vector_store": false,
|
||||||
|
"can_post_advisory_event": false,
|
||||||
|
"can_change_gateway_config": false,
|
||||||
|
"requires_human_approval": true,
|
||||||
|
"advisory_only": true
|
||||||
|
},
|
||||||
|
"allowed_actions": [
|
||||||
|
"record_metric",
|
||||||
|
"compare_with_expected_label",
|
||||||
|
"include_in_digest"
|
||||||
|
],
|
||||||
|
"actual_action": {
|
||||||
|
"kind": "dry_run_reported",
|
||||||
|
"performed": false,
|
||||||
|
"performed_by": "harness",
|
||||||
|
"side_effects": []
|
||||||
|
},
|
||||||
|
"human_or_atlas_decision": {
|
||||||
|
"source": "fixture_expected",
|
||||||
|
"label": "suppress",
|
||||||
|
"severity": "info",
|
||||||
|
"confidence": null,
|
||||||
|
"decision_ref": "cron_duplicate_success_001",
|
||||||
|
"timestamp": null
|
||||||
|
},
|
||||||
|
"outcome": {
|
||||||
|
"comparison": "agree",
|
||||||
|
"error_type": null,
|
||||||
|
"human_review_required": false,
|
||||||
|
"promotion_blocker": false
|
||||||
|
},
|
||||||
|
"npu_proof": {
|
||||||
|
"proof_mode": "sysfs_busy_delta",
|
||||||
|
"busy_delta_us": 1200,
|
||||||
|
"service_reported_delta_us": 1180,
|
||||||
|
"inference_ran": true,
|
||||||
|
"proof_ok": true,
|
||||||
|
"counter_path": "/sys/class/accel/accel0/device/npu_busy_time_us"
|
||||||
|
},
|
||||||
|
"latency": {
|
||||||
|
"total_ms": 42.5,
|
||||||
|
"service_ms": 39.1,
|
||||||
|
"queue_ms": null,
|
||||||
|
"timeout": false
|
||||||
|
},
|
||||||
|
"fallback": {
|
||||||
|
"occurred": false,
|
||||||
|
"kind": null,
|
||||||
|
"reason": null,
|
||||||
|
"expected": false
|
||||||
|
},
|
||||||
|
"privacy": {
|
||||||
|
"payload_logged": false,
|
||||||
|
"redaction": "metadata_only",
|
||||||
|
"retention": "local_audit",
|
||||||
|
"contains_private_payload": false
|
||||||
|
},
|
||||||
|
"notes": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dry-run comparison strategy
|
||||||
|
|
||||||
|
Each fixture or shadow input should produce one `npu_advisory_decision_v1`
|
||||||
|
record. The harness compares `recommendation` to `human_or_atlas_decision` in
|
||||||
|
this order:
|
||||||
|
|
||||||
|
1. Use `fixture_expected` labels for synthetic/non-private regression fixtures.
|
||||||
|
2. Use explicit `human_label` for reviewed samples.
|
||||||
|
3. Use `atlas_shadow` only as a comparison signal, not ground truth, when a human
|
||||||
|
label is unavailable.
|
||||||
|
4. Mark `missing_reference` rather than inventing a target decision.
|
||||||
|
|
||||||
|
Comparison categories:
|
||||||
|
|
||||||
|
- `agree`: normalized label and severity are compatible.
|
||||||
|
- `disagree`: label conflicts with the reference decision.
|
||||||
|
- `uncertain`: NPU bucket is `very_low`, `low`, or `unknown`, or the service
|
||||||
|
returned a deliberate `needs_human`/`unknown` label.
|
||||||
|
- `false_positive`: NPU recommended escalation/action but reference says
|
||||||
|
suppress/no-op.
|
||||||
|
- `false_negative`: NPU recommended suppress/no-op but reference says escalate or
|
||||||
|
action-needed.
|
||||||
|
- `severity_overcall` / `severity_undercall`: label matches but severity differs
|
||||||
|
by more than one level.
|
||||||
|
|
||||||
|
The summary should be grouped by lane (`input_class` and `service.name`) and by
|
||||||
|
confidence bucket. Unknown metrics remain null/`n/a`; do not coerce missing data
|
||||||
|
to zero.
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
Minimum per-run metrics:
|
||||||
|
|
||||||
|
- `total_records`
|
||||||
|
- `records_by_input_class`
|
||||||
|
- `records_by_service`
|
||||||
|
- `confidence_bucket_counts`
|
||||||
|
- `recommendation_counts`
|
||||||
|
- `authority_flag_violation_count`
|
||||||
|
- `privacy_violation_count`
|
||||||
|
- `actual_side_effect_count`
|
||||||
|
- `agree_count`, `disagree_count`, `uncertain_count`, `missing_reference_count`
|
||||||
|
- `false_positive_count`, `false_negative_count`
|
||||||
|
- `severity_overcall_count`, `severity_undercall_count`
|
||||||
|
- `fallback_count` and `fallback_counts_by_kind`
|
||||||
|
- `expected_fallback_count` vs `unexpected_fallback_count`
|
||||||
|
- `npu_proof_ok_count`, `npu_proof_missing_count`, `npu_proof_not_applicable_count`
|
||||||
|
- p50/p95 `latency.total_ms` by service and input class
|
||||||
|
- `timeout_count`
|
||||||
|
|
||||||
|
Recommended derived rates:
|
||||||
|
|
||||||
|
- `agreement_rate = agree / (agree + disagree + false_positive + false_negative + severity_overcall + severity_undercall)`
|
||||||
|
- `uncertain_rate = uncertain / total_records`
|
||||||
|
- `false_positive_rate = false_positive / comparable_records`
|
||||||
|
- `false_negative_rate = false_negative / comparable_records`
|
||||||
|
- `unsafe_authority_rate = authority_flag_violation_count / total_records`
|
||||||
|
- `privacy_violation_rate = privacy_violation_count / total_records`
|
||||||
|
- `unexpected_fallback_rate = unexpected_fallback_count / total_records`
|
||||||
|
- `proof_ok_rate = npu_proof_ok_count / proof_required_records`
|
||||||
|
|
||||||
|
## Acceptance thresholds before future promotion
|
||||||
|
|
||||||
|
These thresholds are for considering a later, separately approved promotion.
|
||||||
|
They do not grant authority by themselves.
|
||||||
|
|
||||||
|
Global blockers for every lane:
|
||||||
|
|
||||||
|
- `authority_flag_violation_count == 0`.
|
||||||
|
- `actual_side_effect_count == 0` for dry-run harness execution.
|
||||||
|
- `privacy_violation_count == 0` and no committed private fixtures/secrets.
|
||||||
|
- No raw private payloads in logs, reports, artifacts, or test fixtures.
|
||||||
|
- No service bind, route, memory, tool, send, restart, or vector-store mutation
|
||||||
|
introduced by the eval code.
|
||||||
|
|
||||||
|
Minimum data quality before promotion discussion:
|
||||||
|
|
||||||
|
- At least 30 comparable synthetic/non-private records per lane, or all available
|
||||||
|
lane fixtures if the lane is explicitly scoped smaller.
|
||||||
|
- Every advisory lane has at least one normal case, one low-confidence case, one
|
||||||
|
false-alarm/noise case, and one action-needed/escalation case.
|
||||||
|
- `missing_reference_count == 0` for promotion-candidate fixture sets.
|
||||||
|
- Confidence bucket distribution is reported and stable across at least three
|
||||||
|
dry-run executions.
|
||||||
|
|
||||||
|
Suggested metric thresholds:
|
||||||
|
|
||||||
|
| Metric | Threshold for promotion discussion |
|
||||||
|
| --- | ---: |
|
||||||
|
| Agreement rate | `>= 0.95` overall and `>= 0.90` per lane |
|
||||||
|
| False positive rate | `<= 0.03` overall and no repeated high-severity false positives |
|
||||||
|
| False negative rate | `<= 0.01` for action-needed/escalation cases |
|
||||||
|
| Uncertain rate | `<= 0.15` overall, unless lane is intentionally conservative |
|
||||||
|
| Unexpected fallback rate | `<= 0.02` and every fallback has a reason code |
|
||||||
|
| NPU proof OK rate | `>= 0.98` for proof-required lanes |
|
||||||
|
| p95 latency | Within the lane-specific SLO documented by the implementation task |
|
||||||
|
| Authority/privacy violations | exactly `0` |
|
||||||
|
|
||||||
|
Promotion remains lane-specific. A passing context-gate eval does not promote
|
||||||
|
cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway lanes.
|
||||||
|
Each lane needs its own human-approved scope, rollback plan, and review.
|
||||||
|
|
||||||
|
## Output formats
|
||||||
|
|
||||||
|
The dry-run harness should emit:
|
||||||
|
|
||||||
|
1. JSONL decisions: one `npu_advisory_decision_v1` object per line.
|
||||||
|
2. Compact JSON summary: aggregate counts/rates for dashboards and follow-up
|
||||||
|
digest scripts.
|
||||||
|
3. Compact Markdown/text summary: suitable for terminal, Telegram, or Discord.
|
||||||
|
|
||||||
|
The Markdown/text summary should include:
|
||||||
|
|
||||||
|
- run id, fixture set, generated-at timestamp;
|
||||||
|
- records by lane/service;
|
||||||
|
- agreement/uncertain/false-positive/false-negative counts;
|
||||||
|
- confidence bucket distribution;
|
||||||
|
- fallback counts;
|
||||||
|
- NPU proof counts;
|
||||||
|
- authority/privacy violation counts;
|
||||||
|
- promotion blockers and caveats.
|
||||||
|
|
||||||
|
## Fixture expectations
|
||||||
|
|
||||||
|
Use synthetic/non-private fixtures only. Required lanes:
|
||||||
|
|
||||||
|
- `context_gate`: retrieve/no-retrieve decisions with missing, conflicting, and
|
||||||
|
sufficient context cases.
|
||||||
|
- `cron_n8n_event`: duplicate success, stale warning, urgent false alarm, and
|
||||||
|
action-needed failure.
|
||||||
|
- `batch_doc_triage`: private-root blocked, approved synthetic sample, noisy OCR,
|
||||||
|
and needs-human cases.
|
||||||
|
- `voice_audio`: bounded generated audio, low-confidence transcript, harmless
|
||||||
|
background noise, and action-needed command-like utterance that must not
|
||||||
|
execute.
|
||||||
|
- `kanban_hygiene`: no-op healthy card, stale/card-needs-review, false alarm, and
|
||||||
|
action-needed label.
|
||||||
|
- `advisory_gateway_envelope`: valid classify/generate/triage envelope examples
|
||||||
|
plus malformed/unsafe authority-request examples.
|
||||||
|
|
||||||
|
Any fixture that resembles private content should be replaced with a synthetic
|
||||||
|
fixture or reduced to metadata/hash-only form before committing.
|
||||||
|
|
||||||
|
## Review checklist
|
||||||
|
|
||||||
|
Before implementation or docs depending on this spec are accepted, verify:
|
||||||
|
|
||||||
|
- `schema_version` is present and all authority flags default closed.
|
||||||
|
- Dry-run execution produces no live side effects beyond local report/artifact
|
||||||
|
writes.
|
||||||
|
- Unknown/missing metrics are represented as null/`n/a`, not fake zero.
|
||||||
|
- Raw payloads and private paths are not persisted by default.
|
||||||
|
- Summary metrics include confidence buckets, fallback counts, NPU proof, and
|
||||||
|
authority/privacy violations.
|
||||||
|
- Promotion language says "candidate" or "discussion" only; no automatic live
|
||||||
|
authority is granted by a passing eval.
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# NPU advisory dry-run comparison harness
|
||||||
|
|
||||||
|
This harness compares advisory-only NPU lane recommendations against synthetic/non-private expected decisions. It is an observability gate only: it does not route, send, write memory, execute tools, restart services, broaden private scans, restart gateways, or mutate vector stores.
|
||||||
|
|
||||||
|
For the operator runbook and promotion criteria, see `docs/npu-advisory-observability-runbook.md`. Treat this file as the compact command reference; the runbook is the source for how to interpret metrics and decide whether a lane is promotable later.
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
From `/home/will/lab/swarm`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown
|
||||||
|
```
|
||||||
|
|
||||||
|
Strict checks for CI/review:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --fail-on-mismatch
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --fail-on-authority-violation
|
||||||
|
```
|
||||||
|
|
||||||
|
`--fail-on-authority-violation` is expected to fail with the committed fixture set because one synthetic gateway fixture intentionally proves that `may_* = true` is caught and summarized.
|
||||||
|
|
||||||
|
## Fixture coverage
|
||||||
|
|
||||||
|
Fixtures live at `fixtures/npu_advisory_dry_run/fixtures.json` and cover:
|
||||||
|
|
||||||
|
- context gate;
|
||||||
|
- cron/n8n advisory events;
|
||||||
|
- batch document/audio triage shape;
|
||||||
|
- voice/audio advisory gate;
|
||||||
|
- Kanban hygiene advisory;
|
||||||
|
- advisory gateway envelopes.
|
||||||
|
|
||||||
|
All fixture payloads are synthetic and omit raw private content. Lane adapters use deterministic local rules or imported pure functions; they do not call live advisory services.
|
||||||
|
|
||||||
|
## Output shape
|
||||||
|
|
||||||
|
JSON output uses `npu_advisory_dry_run_summary_v1` and includes totals, per-lane counts, confidence buckets, recommendation counts, authority violations, expected-outcome mismatches, and optionally per-fixture `npu_advisory_decision_v1` records.
|
||||||
|
|
||||||
|
Each decision record includes timestamp, source, service, lane, input class, recommendation, expected recommendation, confidence/bucket, authority flags, allowed actions, actual action (`none_dry_run`), human/Atlas comparison, outcome, NPU proof, latency, fallback reason, and compact notes.
|
||||||
|
|
||||||
|
## Promotion gate
|
||||||
|
|
||||||
|
Before any future advisory lane receives authority, a separate approval should require at minimum:
|
||||||
|
|
||||||
|
- no expected-outcome mismatches for that lane's representative fixture set;
|
||||||
|
- no false negatives on action-needed events;
|
||||||
|
- intentionally reviewed false positives;
|
||||||
|
- zero authority-safe flag violations except known negative-control fixtures;
|
||||||
|
- documented rollback and a narrow, explicit authority scope.
|
||||||
|
|
||||||
|
Passing this harness never grants live authority by itself. Advisory outputs flow into `npu_advisory_decision_v1` records, summary metrics, and a human/Atlas review gate. Any later promotion must be lane-specific, explicitly approved, and reversible.
|
||||||
@@ -0,0 +1,246 @@
|
|||||||
|
# NPU advisory observability and promotion runbook
|
||||||
|
|
||||||
|
This runbook is the operator-facing gate for Will's OpenVINO/NPU advisory lanes. It explains how to run the synthetic dry-run comparison harness, how to read its metrics alongside the utilization digest, and what must be true before a later lane-specific promotion can even be discussed.
|
||||||
|
|
||||||
|
The current gate is observability only. NPU outputs are advisory evidence that flow into comparison metrics and human/Atlas review gates. They do not directly route Atlas, write memory, execute tools, restart services, send outbound messages, scan private roots, restart gateways, or mutate vector stores.
|
||||||
|
|
||||||
|
## Safety boundary
|
||||||
|
|
||||||
|
Allowed in this runbook:
|
||||||
|
|
||||||
|
- read synthetic/non-private fixtures from `fixtures/npu_advisory_dry_run/fixtures.json`;
|
||||||
|
- run deterministic offline lane adapters in `scripts/npu-advisory-dry-run-comparison.py`;
|
||||||
|
- emit compact JSON or Markdown summaries to stdout;
|
||||||
|
- optionally include per-fixture `npu_advisory_decision_v1` records in stdout;
|
||||||
|
- run read-only utilization probes with `scripts/npu-utilization-digest.py` when live service health is relevant.
|
||||||
|
|
||||||
|
Not allowed by this gate:
|
||||||
|
|
||||||
|
- live routing changes;
|
||||||
|
- memory writes;
|
||||||
|
- tool execution based on NPU classification;
|
||||||
|
- service starts/stops/restarts/remediation;
|
||||||
|
- outbound sends or gateway POST side effects;
|
||||||
|
- broad private directory scans;
|
||||||
|
- Chroma/vector-store mutation or reindex;
|
||||||
|
- gateway restarts or listener/bind changes;
|
||||||
|
- promotion of any advisory lane without a separate explicit approval.
|
||||||
|
|
||||||
|
## Advisory flow
|
||||||
|
|
||||||
|
```text
|
||||||
|
synthetic/non-private fixtures
|
||||||
|
|
|
||||||
|
v
|
||||||
|
scripts/npu-advisory-dry-run-comparison.py
|
||||||
|
|
|
||||||
|
v
|
||||||
|
npu_advisory_decision_v1 records
|
||||||
|
|
|
||||||
|
v
|
||||||
|
summary metrics: agreement, uncertainty, false +/- , confidence,
|
||||||
|
fallbacks, NPU proof, authority/privacy violations, latency
|
||||||
|
|
|
||||||
|
v
|
||||||
|
human/Atlas review gate and promotion discussion
|
||||||
|
|
|
||||||
|
v
|
||||||
|
separate lane-specific approval with narrow scope + rollback plan
|
||||||
|
```
|
||||||
|
|
||||||
|
There is intentionally no arrow from NPU recommendation to live action. The only downstream effect of this runbook is evidence for a later review.
|
||||||
|
|
||||||
|
## Required files
|
||||||
|
|
||||||
|
| Path | Role |
|
||||||
|
| --- | --- |
|
||||||
|
| `scripts/npu-advisory-dry-run-comparison.py` | Synthetic dry-run comparison harness. |
|
||||||
|
| `fixtures/npu_advisory_dry_run/fixtures.json` | Synthetic/non-private fixture set. |
|
||||||
|
| `docs/npu-advisory-decision-schema.md` | `npu_advisory_decision_v1` schema and metric definitions. |
|
||||||
|
| `docs/npu-advisory-dry-run-comparison.md` | Short harness reference. |
|
||||||
|
| `docs/npu-utilization-digest.md` | Live read-only utilization digest reference. |
|
||||||
|
| `tests/test_npu_advisory_dry_run_comparison.py` | Offline tests for fixture coverage and harness output. |
|
||||||
|
| `tests/test_npu_utilization_digest.py` | Offline tests for utilization digest metric logic. |
|
||||||
|
|
||||||
|
## Run the dry-run harness
|
||||||
|
|
||||||
|
From the repository root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json
|
||||||
|
```
|
||||||
|
|
||||||
|
Use Markdown when you want a compact human-readable terminal or chat summary. Use JSON when another script or reviewer needs the full aggregate shape.
|
||||||
|
|
||||||
|
To include per-fixture decision records:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the strict mismatch gate:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch
|
||||||
|
```
|
||||||
|
|
||||||
|
This should exit `0` when each fixture's observed outcome matches its `expected_outcome`.
|
||||||
|
|
||||||
|
To prove unsafe authority flags are detected:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-authority-violation
|
||||||
|
```
|
||||||
|
|
||||||
|
The committed fixture set intentionally includes `gateway-authority-violation`, so this command is expected to exit `1` while reporting `authority_safe_flag_violations: 1`. That is a negative-control fixture, not a permission grant.
|
||||||
|
|
||||||
|
## Expected compact output
|
||||||
|
|
||||||
|
Current fixture shape is expected to resemble:
|
||||||
|
|
||||||
|
```text
|
||||||
|
# NPU advisory dry-run comparison
|
||||||
|
|
||||||
|
fixtures: 9 | agree: 8 | disagree: 0 | false_positive: 1 | false_negative: 0 | uncertain: 0
|
||||||
|
authority_safe_flag_violations: 1 | mutations: all_false
|
||||||
|
|
||||||
|
| lane | fixtures | agree | false_positive | false_negative | violations |
|
||||||
|
| --- | ---: | ---: | ---: | ---: | ---: |
|
||||||
|
| advisory_gateway_envelope | 1 | 1 | 0 | 0 | 1 |
|
||||||
|
| batch_triage | 2 | 2 | 0 | 0 | 0 |
|
||||||
|
| context_gate | 2 | 2 | 0 | 0 | 0 |
|
||||||
|
| cron_n8n_advisory | 2 | 1 | 1 | 0 | 0 |
|
||||||
|
| kanban_hygiene | 1 | 1 | 0 | 0 | 0 |
|
||||||
|
| voice_audio | 1 | 1 | 0 | 0 | 0 |
|
||||||
|
|
||||||
|
## Authority-safe flag violations
|
||||||
|
- gateway-authority-violation: can_send_outbound
|
||||||
|
```
|
||||||
|
|
||||||
|
Interpretation:
|
||||||
|
|
||||||
|
- `fixtures` is the number of synthetic/non-private fixture cases evaluated.
|
||||||
|
- `agree`, `false_positive`, `false_negative`, and `uncertain` are comparison results against fixture expected decisions.
|
||||||
|
- `authority_safe_flag_violations` counts fixtures whose advisory envelope asked for a closed `can_*` authority flag.
|
||||||
|
- `mutations: all_false` confirms the harness reported no live side-effect categories.
|
||||||
|
- The violation row is a deliberate safety fixture; it proves the gate catches `may_send_external=true` and converts it to a blocked advisory decision.
|
||||||
|
|
||||||
|
## Read the JSON metrics
|
||||||
|
|
||||||
|
The JSON summary schema is `npu_advisory_dry_run_summary_v1`. Start with these fields:
|
||||||
|
|
||||||
|
1. `dry_run` must be `true`.
|
||||||
|
2. Every value under `mutations` must be `false`.
|
||||||
|
3. `totals.expected_outcome_mismatches` must be `0` for a clean regression run.
|
||||||
|
4. `minimum_metrics.privacy_violation_count` must be `0`.
|
||||||
|
5. `minimum_metrics.actual_side_effect_count` must be `0`.
|
||||||
|
6. `minimum_metrics.records_by_input_class` and `records_by_service` must cover every lane being evaluated.
|
||||||
|
7. `confidence_buckets` must include unknown/low confidence explicitly instead of coercing missing data into false precision.
|
||||||
|
8. `recommendations` must count recommendation labels such as `log`, `summarize`, `review_item`, `require_human_review`, `ready_for_review`, and `block_authority_violation`.
|
||||||
|
9. `minimum_metrics.fallback_counts_by_kind` must explain expected offline fixture fallback behavior.
|
||||||
|
10. `minimum_metrics.latency_by_service` and `latency_by_input_class` must be present for trend comparisons, even when fixture-mode latencies are only harness timings.
|
||||||
|
|
||||||
|
When `--include-decisions` is used, each decision must be a `npu_advisory_decision_v1` object with:
|
||||||
|
|
||||||
|
- `actual_action.performed=false` and `actual_action.side_effects=[]`;
|
||||||
|
- `authority_flags.advisory_only=true`;
|
||||||
|
- `authority_flags.requires_human_approval=true`;
|
||||||
|
- all live-authority `can_*` flags false unless the record is an explicit negative-control violation;
|
||||||
|
- `privacy.payload_logged=false` and `privacy.contains_private_payload=false`;
|
||||||
|
- `fallback.kind=offline` and `fallback.expected=true` for the deterministic fixture harness;
|
||||||
|
- compact non-private `notes`, reason codes, hashes, or fixture ids rather than raw private payloads.
|
||||||
|
|
||||||
|
## Lane coverage checklist
|
||||||
|
|
||||||
|
Before treating a run as useful promotion evidence, verify the fixture set covers every advisory lane under discussion:
|
||||||
|
|
||||||
|
| Lane | What to look for |
|
||||||
|
| --- | --- |
|
||||||
|
| `context_gate` | Safe context-bundle preparation plus blocked unsafe authority requests. |
|
||||||
|
| `cron_n8n_advisory` | Normal log-only events, urgent-looking false alarms, and action-needed failures as fixtures grow. |
|
||||||
|
| `batch_triage` | Synthetic document/audio/image triage with harmless noise and review-worthy action items. |
|
||||||
|
| `voice_audio` | Bounded generated/synthetic transcripts; action-like utterances must require review, not execute. |
|
||||||
|
| `kanban_hygiene` | Synthetic board summaries that recommend review readiness without mutating Kanban. |
|
||||||
|
| `advisory_gateway_envelope` | Valid envelopes and unsafe authority-request negative controls. |
|
||||||
|
|
||||||
|
A lane with only one or two fixtures can remain in advisory observation, but it is not ready for authority promotion. Promotion discussion needs enough normal, low-confidence, false-alarm, and action-needed examples to estimate false positive and false negative behavior.
|
||||||
|
|
||||||
|
## Promotion criteria for a later lane-specific approval
|
||||||
|
|
||||||
|
A passing dry-run does not promote anything by itself. It only makes a lane eligible for a later approval discussion.
|
||||||
|
|
||||||
|
Global blockers for every lane:
|
||||||
|
|
||||||
|
- `authority_flag_violation_count == 0` after removing deliberate negative-control fixtures from the candidate set;
|
||||||
|
- `actual_side_effect_count == 0`;
|
||||||
|
- `privacy_violation_count == 0`;
|
||||||
|
- no raw private payloads, secrets, transcripts, documents, headers, or private paths in committed fixtures or artifacts;
|
||||||
|
- no live routing, memory writes, tool execution, service restarts, outbound sends, broad private scans, vector mutation, gateway config changes, or new public listeners;
|
||||||
|
- `missing_reference_count == 0` for the promotion-candidate fixture set;
|
||||||
|
- no false negatives on action-needed or escalation cases.
|
||||||
|
|
||||||
|
Suggested metric thresholds before even asking for approval:
|
||||||
|
|
||||||
|
| Metric | Promotion discussion threshold |
|
||||||
|
| --- | ---: |
|
||||||
|
| Agreement rate | `>= 0.95` overall and `>= 0.90` for the specific lane. |
|
||||||
|
| False positive rate | `<= 0.03` overall, with all high-severity false positives reviewed. |
|
||||||
|
| False negative rate | `<= 0.01` for action-needed/escalation cases. |
|
||||||
|
| Uncertain rate | `<= 0.15`, unless the lane is intentionally conservative. |
|
||||||
|
| Unexpected fallback rate | `<= 0.02`, with reason codes for every fallback. |
|
||||||
|
| NPU proof OK rate | `>= 0.98` for live proof-required lanes. |
|
||||||
|
| p95 latency | Within a documented lane-specific SLO. |
|
||||||
|
| Authority/privacy violations | exactly `0` in the candidate set. |
|
||||||
|
|
||||||
|
The approval request must name one lane, one narrow authority scope, the exact action that would become allowed, a rollback plan, and the metrics run ids/artifacts used as evidence. A passing context-gate eval cannot promote cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway behavior.
|
||||||
|
|
||||||
|
## Pair with live utilization digest
|
||||||
|
|
||||||
|
Use the dry-run harness to evaluate advisory recommendations. Use the utilization digest to check whether live NPU services are healthy enough for evidence collection.
|
||||||
|
|
||||||
|
Read-only live check:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional JSONL artifact for trend tracking:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
Digest interpretation:
|
||||||
|
|
||||||
|
- `services_ok` below the expected total means health is degraded; do not promote lanes based on incomplete live evidence.
|
||||||
|
- `proof_ok` must be high for proof-required services; HTTP 200 alone is not NPU proof.
|
||||||
|
- `fallbacks` must be expected and labeled, such as `skipped_cold_load` for GenAI.
|
||||||
|
- `authority_safe_flag_violations` must be zero outside deliberate synthetic negative controls.
|
||||||
|
- Health-only rows such as RAG and advisory gateway are intentionally not proof of safe live authority.
|
||||||
|
|
||||||
|
## Tests and review commands
|
||||||
|
|
||||||
|
Offline dry-run harness tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_advisory_dry_run_comparison.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Offline utilization digest tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Suggested pre-review bundle:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch >/tmp/npu-advisory-summary.json
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown >/tmp/npu-advisory-summary.md
|
||||||
|
python -m pytest tests/test_npu_advisory_dry_run_comparison.py tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Reviewers should confirm that generated summaries are compact, fixture-only, and free of private payloads; that the negative-control authority violation is detected; and that docs describe advisory outputs flowing into gates rather than direct actions.
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
# Explicit-root NPU batch triage dry-run examples
|
||||||
|
|
||||||
|
These examples are wrappers only. They do not install cron jobs, enable services,
|
||||||
|
change Atlas/Hermes routing, write Obsidian/RAG/vector DBs, move/delete files, or
|
||||||
|
send outbound messages.
|
||||||
|
|
||||||
|
The committed manifest template at `config/triage-roots.example.yaml` is
|
||||||
|
intentionally unapproved. For real private data, copy it to
|
||||||
|
`config/triage-roots.local.yaml` and approve exactly one narrow lane-specific
|
||||||
|
staging folder. Request-level `--root` may narrow that manifest root but cannot
|
||||||
|
broaden it.
|
||||||
|
|
||||||
|
Synthetic document/image smoke, CPU-only/no NPU claim:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane screenshots \
|
||||||
|
--root openvino-doc-image-triage-npu/samples \
|
||||||
|
--limit 5 \
|
||||||
|
--dry-run \
|
||||||
|
--no-npu \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Synthetic document/image smoke with the existing local embeddings NPU service,
|
||||||
|
if `127.0.0.1:18817` is healthy. Treat NPU as proven only when `npu.proof_ok` is
|
||||||
|
true and `npu.busy_delta_us` (or item-level delta) is positive:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane receipts \
|
||||||
|
--root openvino-doc-image-triage-npu/samples \
|
||||||
|
--limit 5 \
|
||||||
|
--dry-run \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Audio smoke should use generated/public synthetic audio only until a private
|
||||||
|
audio staging root is approved:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane voice_memos \
|
||||||
|
--root tmp/synthetic-voice-memos \
|
||||||
|
--limit 3 \
|
||||||
|
--dry-run \
|
||||||
|
--no-npu \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Cron/n8n shape (disabled example only):
|
||||||
|
|
||||||
|
```text
|
||||||
|
Manual Trigger / disabled cron
|
||||||
|
-> Execute Command: python /home/will/lab/swarm/scripts/npu-batch-triage-dry-run.py --manifest /home/will/lab/swarm/config/triage-roots.local.yaml --lane receipts --limit 25 --dry-run --json
|
||||||
|
-> IF ok && npu.proof_ok && files_processed > 0
|
||||||
|
-> local dashboard/report only
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not connect this output to Telegram/Discord/email sends, Obsidian writes,
|
||||||
|
RAG/vector reindex, file moves/deletes, Kanban mutation, service restarts, or
|
||||||
|
Atlas/Hermes routing without a separate reviewed approval gate.
|
||||||
@@ -0,0 +1,204 @@
|
|||||||
|
# NPU integrated health checks — operator runbook notes
|
||||||
|
|
||||||
|
Compact, read-only operator workflow that combines the existing
|
||||||
|
`scripts/npu-service-health.sh` listener/systemd/embedding-proof probe with the
|
||||||
|
reviewer-approved `scripts/npu-utilization-digest.py` per-service utilization
|
||||||
|
and fallback report. Together they form a single safe daily / on-demand NPU
|
||||||
|
health pass.
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
|
||||||
|
- Read-only against live services. No restarts, route changes, vector mutation,
|
||||||
|
advisory POSTs, outbound sends, or memory writes.
|
||||||
|
- No new persistent services, timers, sockets, compose services, or Dockerfiles
|
||||||
|
are introduced by this integration. Both scripts are foreground / on-demand.
|
||||||
|
- Binds verified local-only or on the approved Docker bridge (`172.19.0.1:18830`).
|
||||||
|
Pre-existing broader binds on the live baseline ports (`18810`, `18814`,
|
||||||
|
`18816`, `18817`) are noted in the runbook and unchanged here.
|
||||||
|
- NPU proof requires real inference plus a positive
|
||||||
|
`/sys/class/accel/accel0/device/npu_busy_time_us` delta. HTTP 200 alone is
|
||||||
|
not sufficient.
|
||||||
|
|
||||||
|
## When to run
|
||||||
|
|
||||||
|
- Daily / on-demand ops check.
|
||||||
|
- After upgrades that touch the NPU stack, OpenVINO, or any of the live
|
||||||
|
specialists.
|
||||||
|
- Before any approval-gated change that depends on the NPU reflex layer.
|
||||||
|
- As the read-only verification step of a deploy or recovery runbook.
|
||||||
|
|
||||||
|
## Required artifacts on the branch
|
||||||
|
|
||||||
|
| Path | Role |
|
||||||
|
| --- | --- |
|
||||||
|
| `scripts/npu-service-health.sh` | Listener / systemd / Docker / health endpoint / single embedding proof. Existing baseline script. |
|
||||||
|
| `scripts/npu-utilization-digest.py` | Per-service utilization digest with NPU proof per probe, compact text or JSONL output, optional JSONL artifact. |
|
||||||
|
| `docs/npu-utilization-digest.md` | Per-service digest reference. |
|
||||||
|
| `docs/npu-advisory-observability-runbook.md` | Dry-run comparison and later promotion criteria for advisory lanes. |
|
||||||
|
| `tests/test_npu_utilization_digest.py` | Offline unit tests for the digest (no live services required). |
|
||||||
|
|
||||||
|
## Integrated workflow
|
||||||
|
|
||||||
|
### Step 1 — Listener and service-state snapshot
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/lab/swarm
|
||||||
|
./scripts/npu-service-health.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
What it verifies, in order:
|
||||||
|
|
||||||
|
1. `npu_busy_time_us` counter is readable.
|
||||||
|
2. Required listeners are present on `18810 / 18814 / 18816 / 18817 / 18818 /
|
||||||
|
18819 / 18820 / 18829 / 18830`.
|
||||||
|
3. User systemd services are active/enabled for embeddings, RAG health,
|
||||||
|
reranker, router/classifier, and the small GenAI worker.
|
||||||
|
4. Docker Compose `whisper-server-npu` is up.
|
||||||
|
5. Health endpoints return JSON for the live baseline and local specialists.
|
||||||
|
6. A single non-private embeddings request to `:18817` produces a positive
|
||||||
|
sysfs `npu_busy_time_us` delta; the script exits nonzero if there is no
|
||||||
|
positive delta.
|
||||||
|
|
||||||
|
Read the last block (`== Embeddings NPU busy-time proof ==`) first. If
|
||||||
|
`result=ok` and `sysfs_delta_us > 0`, the central NPU path is healthy. If not,
|
||||||
|
do not run the digest; triage the embeddings service first.
|
||||||
|
|
||||||
|
### Step 2 — Per-service utilization digest
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Compact output shape:
|
||||||
|
|
||||||
|
```text
|
||||||
|
NPU utilization digest <timestamp>
|
||||||
|
counter=/sys/class/accel/accel0/device/npu_busy_time_us delta_us=<total>
|
||||||
|
services_ok=<ok>/<total> proof_ok=<ok>/<proof-capable> fallbacks=<n> gates_closed=<n>
|
||||||
|
- embeddings: ok=true calls=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- rerank: ok=true calls=1 docs=2 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- whisper: ok=true calls=1 jobs=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- classifier: ok=true calls=1 events=1 avg_ms=... npu_delta_us=... proof=true dry_run=true ...
|
||||||
|
- genai: ok=true jobs=0 loaded=false mode=loaded=false reason=skipped_cold_load
|
||||||
|
- doc_triage: ok=true calls=1 files=1 avg_ms=... npu_delta_us=... proof=true gate=closed:private-root
|
||||||
|
- rag_endpoint: ok=true mode=health_only gate=closed:vector-mutation
|
||||||
|
- rag_health: ok=true mode=health_only
|
||||||
|
- advisory_gateway: ok=true mode=health_only gate=closed:advisory-post
|
||||||
|
fallbacks: skipped_cold_load=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Read order for ops:
|
||||||
|
|
||||||
|
1. `services_ok` row — anything below `9/9` means a service is down or unhealthy.
|
||||||
|
2. `proof_ok` row — `proof_ok=5/5` means every probe that ran with a real
|
||||||
|
inference request produced a positive sysfs NPU delta.
|
||||||
|
3. `fallbacks:` line — `skipped_cold_load=1` is expected (GenAI worker is
|
||||||
|
intentionally not cold-loaded). Any other fallback label is a triage signal.
|
||||||
|
4. `gate=` labels — closed gates that remain closed by design.
|
||||||
|
|
||||||
|
### Step 3 — Optional artifact for trend tracking
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
Writes a single JSONL line per digest under
|
||||||
|
`/home/will/.local/state/npu-utilization/digests/<timestamp>.jsonl`. The first
|
||||||
|
line is the summary; subsequent lines are per-service rows. No JSONL write
|
||||||
|
happens with `--no-write`.
|
||||||
|
|
||||||
|
### Step 4 — Offline unit tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Does not require live services. Use to validate digest logic after edits or
|
||||||
|
before merging.
|
||||||
|
|
||||||
|
## Compact proof interpretation
|
||||||
|
|
||||||
|
For each proof-capable service, both the response-level `npu_busy_delta_us`
|
||||||
|
(when the service reports it) and the script's own sysfs before/after delta
|
||||||
|
must agree and be `> 0`. The proof is only valid when an actual inference
|
||||||
|
request ran. If a probe was skipped (`reason=skipped_cold_load` or
|
||||||
|
`reason=smoke_disabled`), `proof_ok` for that row is `None` and the row
|
||||||
|
contributes a labeled fallback instead of a proof failure.
|
||||||
|
|
||||||
|
Proof currently runs on:
|
||||||
|
|
||||||
|
- `embeddings` (`:18817`)
|
||||||
|
- `rerank` (`:18818`)
|
||||||
|
- `whisper` (`:18816`) when `--include-whisper-smoke=true` (default)
|
||||||
|
- `classifier` (`:18819`)
|
||||||
|
- `doc_triage` (`:18829`) when `--include-doc-triage-smoke=true` (default);
|
||||||
|
proof is via the embeddings service, not directly on the NPU device, so the
|
||||||
|
row reports `mode=NPU-via-embedding-service`.
|
||||||
|
|
||||||
|
Intentionally health-only (no proof row):
|
||||||
|
|
||||||
|
- `rag_endpoint` (`:18810`) — closed:vector-mutation
|
||||||
|
- `rag_health` (`:18814`)
|
||||||
|
- `advisory_gateway` (`172.19.0.1:18830`) — closed:advisory-post
|
||||||
|
|
||||||
|
Intentionally skipped by default:
|
||||||
|
|
||||||
|
- `genai` (`:18820`) — `loaded=false` until first use; cold-loading just to
|
||||||
|
prove the NPU is not free, so it is treated as a labeled fallback rather
|
||||||
|
than a proof failure. Opt in with `--include-genai-smoke=true` only when the
|
||||||
|
task actually needs a generation smoke.
|
||||||
|
|
||||||
|
## Exit codes and triage gates
|
||||||
|
|
||||||
|
`scripts/npu-service-health.sh`:
|
||||||
|
|
||||||
|
| Exit | Meaning | Next |
|
||||||
|
| ---: | --- | --- |
|
||||||
|
| 0 | All checks passed including embeddings proof. | Continue to digest. |
|
||||||
|
| 2 | `npu_busy_time_us` not readable. | Check kernel/driver; do not run digest. |
|
||||||
|
| 3 | Embedding request failed. | Triage `openvino-embeddings.service` and port `:18817`. |
|
||||||
|
| 4 | Embedding request succeeded but sysfs delta `<= 0`. | Service reachable but not on the NPU; check service logs and device bind. |
|
||||||
|
|
||||||
|
`scripts/npu-utilization-digest.py`:
|
||||||
|
|
||||||
|
| Exit | Meaning | Next |
|
||||||
|
| ---: | --- | --- |
|
||||||
|
| 0 | All reachable services handled; proof/fallback accounting completed. | Inspect `proof_ok` and `fallbacks:` for any unexpected labels. |
|
||||||
|
| 2 | `--strict-proof` was set and at least one proof-required probe ran without a positive sysfs delta. | Triage the named service's NPU path. |
|
||||||
|
|
||||||
|
## Approval gates left closed
|
||||||
|
|
||||||
|
The integrated workflow intentionally does not:
|
||||||
|
|
||||||
|
- start, stop, restart, enable, or disable any user systemd unit or Docker
|
||||||
|
Compose service;
|
||||||
|
- write to or mutate the Chroma collection `obsidian_bge_npu` or any other
|
||||||
|
vector store;
|
||||||
|
- change Atlas/Hermes routing or model defaults;
|
||||||
|
- post classification/generation/triage events to the advisory gateway;
|
||||||
|
- broaden private document, image, or audio roots;
|
||||||
|
- bind any new listener, including on `0.0.0.0`;
|
||||||
|
- write memory, send messages, execute tools, or mutate Kanban state.
|
||||||
|
|
||||||
|
These remain approval-gated and are tracked on the `npu-maximization` board.
|
||||||
|
|
||||||
|
For advisory-lane promotion decisions, pair this live utilization pass with the fixture-only dry-run comparison in `docs/npu-advisory-observability-runbook.md`. The digest can show whether live NPU services are healthy enough to collect evidence; it does not promote advisory outputs into authority. Promotion remains a separate lane-specific approval with explicit scope and rollback.
|
||||||
|
|
||||||
|
## Quick reference
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Single-pass NPU health check (listener + systemd + embeddings proof).
|
||||||
|
cd ~/lab/swarm && ./scripts/npu-service-health.sh
|
||||||
|
|
||||||
|
# Compact digest with per-service proof and fallback accounting.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
|
||||||
|
# Same, with a JSONL artifact for trend tracking.
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
|
||||||
|
# Strict mode for CI / pre-merge.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --strict-proof
|
||||||
|
|
||||||
|
# Offline digest logic tests.
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
# NPU utilization digest
|
||||||
|
|
||||||
|
Compact on-demand observability for Will's local OpenVINO/NPU specialists.
|
||||||
|
|
||||||
|
Script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/lab/swarm/scripts/npu-utilization-digest.py --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Safe defaults:
|
||||||
|
|
||||||
|
- read-only for services; no service starts/stops/restarts, routing changes, vector DB mutation, advisory POSTs, outbound sends, or memory writes;
|
||||||
|
- writes only a compact JSONL artifact under `/home/will/.local/state/npu-utilization/digests` unless `--no-write` is passed;
|
||||||
|
- uses synthetic/non-private requests for embeddings, rerank, classifier dry-run, and doc triage;
|
||||||
|
- keeps GenAI generation disabled by default when the worker is not loaded, to avoid cold-load side effects;
|
||||||
|
- advisory gateway remains health-only because POSTs write metadata/events;
|
||||||
|
- NPU proof is only true when an inference probe ran and `/sys/class/accel/accel0/device/npu_busy_time_us` increased around that probe.
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Compact CLI digest, plus JSONL artifact.
|
||||||
|
scripts/npu-utilization-digest.py --format text
|
||||||
|
|
||||||
|
# No artifact write; useful during reviews.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false
|
||||||
|
|
||||||
|
# Machine-readable stdout.
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl --no-write
|
||||||
|
|
||||||
|
# CI/unit tests; live services not required.
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Output shape is intentionally small: service booleans, request counts by service, average probe ms, sysfs/NPU busy deltas by service, proof flags, fallback totals and per-service fallback counts, confidence distribution, escalation/suppression recommendation counts, authority-safe flag violation totals, artifact path, and closed gates. `fallbacks` includes unavailable services, failed/missing proof, and skipped proof-capable smokes such as disabled Whisper/doc-triage probes or GenAI cold-load skips; intentionally health-only RAG/advisory rows are not fallbacks unless unavailable. It does not print raw embeddings, transcripts, OCR text, model completions, request headers, or full upstream JSON.
|
||||||
|
|
||||||
|
Covered rows:
|
||||||
|
|
||||||
|
- `embeddings`: `/v1/embeddings` synthetic string, positive sysfs delta required.
|
||||||
|
- `rerank`: `/rerank` with two synthetic docs, positive sysfs delta required.
|
||||||
|
- `whisper`: health-only unless the bounded generated-WAV smoke is enabled.
|
||||||
|
- `classifier`: `/v1/classify` with `dry_run=true` and `include_evidence=false`, positive sysfs delta required.
|
||||||
|
- `genai`: health-only by default; skips when `loaded=false` unless explicitly opted in.
|
||||||
|
- `doc_triage`: one approved synthetic sample under the service sample root, with `allowed_roots` narrowed to that sample directory; NPU proof is via embeddings.
|
||||||
|
- `rag_endpoint` and `rag_health`: health-only; no vector mutation.
|
||||||
|
- `advisory_gateway`: health-only; `closed:advisory-post` gate remains closed.
|
||||||
|
|
||||||
|
Closed gates left for later approval: sending/delivery, recurring timer, GenAI cold-load smoke, advisory POSTs, Atlas/Hermes routing changes, vector mutation/reindex, and broad private document/audio/image roots.
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
# NPU voice/audio local-file pipeline
|
||||||
|
|
||||||
|
This is the first-slice local-file voice/audio path for the NPU maximization program:
|
||||||
|
|
||||||
|
```text
|
||||||
|
local audio file or already-staged attachment
|
||||||
|
-> OpenVINO NPU Whisper (:18816)
|
||||||
|
-> OpenVINO NPU classifier (:18819)
|
||||||
|
-> explicit advisory gate
|
||||||
|
-> Atlas/Hermes only after separate approval
|
||||||
|
```
|
||||||
|
|
||||||
|
The implementation is `scripts/npu_voice_audio_pipeline.py`. It is a CLI wrapper only; it starts no listener and performs no outbound sends, Obsidian writes, memory writes, vector DB mutations, Kanban mutations, service restarts, platform API calls, or live Atlas/Hermes routing changes.
|
||||||
|
|
||||||
|
## Safety gates
|
||||||
|
|
||||||
|
Closed unless explicitly approved later:
|
||||||
|
|
||||||
|
- Telegram/Discord fetching by bot token or attachment URL.
|
||||||
|
- Outbound messages or auto-sends.
|
||||||
|
- Obsidian/vault writes.
|
||||||
|
- Memory writes.
|
||||||
|
- Vector DB mutation or reindex.
|
||||||
|
- Automatic Kanban mutation.
|
||||||
|
- Service restarts or new persistent listeners.
|
||||||
|
- Private-directory root broadening.
|
||||||
|
- Live Atlas/Hermes routing authority changes.
|
||||||
|
|
||||||
|
HTTP success is not NPU proof. For NPU claims, require real inference plus positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas. The CLI reports response deltas and observed sysfs deltas for Whisper and classifier calls.
|
||||||
|
|
||||||
|
## Example: synthetic local WAV smoke
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
python - <<'PY'
|
||||||
|
import math, struct, wave
|
||||||
|
path = '/tmp/npu-voice-smoke.wav'
|
||||||
|
sr = 16000
|
||||||
|
with wave.open(path, 'wb') as w:
|
||||||
|
w.setnchannels(1)
|
||||||
|
w.setsampwidth(2)
|
||||||
|
w.setframerate(sr)
|
||||||
|
frames = bytearray()
|
||||||
|
for i in range(int(sr * 0.6)):
|
||||||
|
frames.extend(struct.pack('<h', int(12000 * math.sin(2 * math.pi * 440 * i / sr))))
|
||||||
|
w.writeframes(frames)
|
||||||
|
print(path)
|
||||||
|
PY
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the local-file wrapper:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
|
||||||
|
--audio /tmp/npu-voice-smoke.wav \
|
||||||
|
--title "synthetic smoke" \
|
||||||
|
--source manual_smoke \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Compact output shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"source": "manual_smoke",
|
||||||
|
"transcript_chars": 3,
|
||||||
|
"action_worthy": false,
|
||||||
|
"atlas_gate": "suppressed_not_action_worthy",
|
||||||
|
"whisper_npu_delta_us": 85441,
|
||||||
|
"whisper_sysfs_delta_us": 85441,
|
||||||
|
"classifier_npu_delta_us": 85908,
|
||||||
|
"classifier_sysfs_delta_us": 85908,
|
||||||
|
"classifier_observed_sysfs_delta_us": 85908,
|
||||||
|
"external_sends": 0,
|
||||||
|
"writes": 0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
A non-actionable smoke should stay `suppressed_not_action_worthy`. A transcript with a reminder, task, follow-up, explicit question, or classifier `tool_needed=true` should become `advisory_only_not_sent`, not sent.
|
||||||
|
|
||||||
|
## Example: already-staged platform voice file
|
||||||
|
|
||||||
|
This example assumes another approved process has already placed the audio file locally. The wrapper does not fetch from Telegram/Discord and does not read bot tokens.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
|
||||||
|
--audio /tmp/staged-voice-message.ogg \
|
||||||
|
--source staged_telegram \
|
||||||
|
--title "staged local Telegram voice memo" \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Compact fields
|
||||||
|
|
||||||
|
The CLI always reports:
|
||||||
|
|
||||||
|
- `ok`
|
||||||
|
- `id`
|
||||||
|
- `source`
|
||||||
|
- `transcript_chars`
|
||||||
|
- `action_worthy`
|
||||||
|
- `atlas_gate`
|
||||||
|
- `next_gate`
|
||||||
|
- `whisper_npu_delta_us`
|
||||||
|
- `whisper_sysfs_delta_us`
|
||||||
|
- `classifier_npu_delta_us`
|
||||||
|
- `classifier_sysfs_delta_us`
|
||||||
|
- `classifier_observed_sysfs_delta_us`
|
||||||
|
- `labels.workflow_category`
|
||||||
|
- `labels.tool_needed`
|
||||||
|
- `labels.urgency`
|
||||||
|
- `labels.safety_confirmation_required`
|
||||||
|
- `external_sends`
|
||||||
|
- `writes`
|
||||||
|
|
||||||
|
Transcript text is omitted by default. Use `--include-transcript` or `--include-transcript-preview-chars N` only for explicit local debugging.
|
||||||
|
|
||||||
|
## Input limits
|
||||||
|
|
||||||
|
- `--audio` must be an absolute local path.
|
||||||
|
- Symlinks, directories, missing files, empty files, unsupported extensions, and files over `--max-bytes` are refused.
|
||||||
|
- WAV duration is capped by `--max-audio-seconds`; other codecs remain size-capped in this first slice.
|
||||||
|
- Classifier transcript payload is bounded by `--max-transcript-chars`.
|
||||||
|
|
||||||
|
## Health prerequisites
|
||||||
|
|
||||||
|
Read-only checks:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://127.0.0.1:18816/health
|
||||||
|
curl -fsS http://127.0.0.1:18819/healthz
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not restart services from this runbook. If either endpoint is unhealthy, stop and request an ops/remediation task.
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
{
|
||||||
|
"schema": "npu_advisory_dry_run_fixture_set_v1",
|
||||||
|
"description": "Synthetic/non-private fixtures for advisory-only NPU dry-run comparison.",
|
||||||
|
"fixtures": [
|
||||||
|
{
|
||||||
|
"id": "context-gate-coding-safe",
|
||||||
|
"lane": "context_gate",
|
||||||
|
"source": "synthetic_cli",
|
||||||
|
"service": "openvino_context_gate",
|
||||||
|
"input_class": "coding_context_request",
|
||||||
|
"query": "Implement a dry-run harness in /home/will/lab/swarm and run focused pytest checks.",
|
||||||
|
"context": {"platform": "kanban", "task_id": "t_synthetic", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
"expected_recommendation": "prepare_context_bundle",
|
||||||
|
"human_or_atlas_decision": "prepare_context_bundle",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "context-gate-live-routing-blocked",
|
||||||
|
"lane": "context_gate",
|
||||||
|
"source": "synthetic_cli",
|
||||||
|
"service": "openvino_context_gate",
|
||||||
|
"input_class": "unsafe_authority_request",
|
||||||
|
"query": "Change live routing and restart services based only on classifier output.",
|
||||||
|
"context": {"platform": "cli", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
"expected_recommendation": "require_human_review",
|
||||||
|
"human_or_atlas_decision": "require_human_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cron-normal-log",
|
||||||
|
"lane": "cron_n8n_advisory",
|
||||||
|
"source": "synthetic_cron",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "cron_health_check",
|
||||||
|
"event": {"workflow": "nightly-health", "severity": "normal", "kind": "health_check", "subject": "synthetic all clear", "dedupe_key": "nightly-health-ok"},
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-normal", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.74}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 10}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "log",
|
||||||
|
"human_or_atlas_decision": "log",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cron-urgent-false-alarm",
|
||||||
|
"lane": "cron_n8n_advisory",
|
||||||
|
"source": "synthetic_n8n",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "urgent_looking_false_alarm",
|
||||||
|
"event": {"workflow": "backup-monitor", "severity": "warning", "kind": "alert", "subject": "synthetic warning recovered before paging", "dedupe_key": "backup-recovered"},
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-warning", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.62}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 7}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "summarize",
|
||||||
|
"human_or_atlas_decision": "log",
|
||||||
|
"expected_outcome": "false_positive",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "batch-receipt-action",
|
||||||
|
"lane": "batch_triage",
|
||||||
|
"source": "synthetic_fixture_file",
|
||||||
|
"service": "npu_batch_triage_dry_run",
|
||||||
|
"input_class": "receipt_with_deadline",
|
||||||
|
"document_text": "Synthetic receipt. Amount due $42.00. Please follow up by 2026-06-10.",
|
||||||
|
"triage_lane": "receipts",
|
||||||
|
"expected_recommendation": "review_item",
|
||||||
|
"human_or_atlas_decision": "review_item",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "batch-noisy-harmless",
|
||||||
|
"lane": "batch_triage",
|
||||||
|
"source": "synthetic_fixture_file",
|
||||||
|
"service": "npu_batch_triage_dry_run",
|
||||||
|
"input_class": "harmless_noisy_output",
|
||||||
|
"document_text": "Synthetic screenshot text: lorem ipsum, random status output, no action signal.",
|
||||||
|
"triage_lane": "screenshots",
|
||||||
|
"expected_recommendation": "suppress",
|
||||||
|
"human_or_atlas_decision": "suppress",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "voice-audio-action-needed",
|
||||||
|
"lane": "voice_audio",
|
||||||
|
"source": "synthetic_voice_memo",
|
||||||
|
"service": "npu_voice_audio_pipeline",
|
||||||
|
"input_class": "voice_action_item",
|
||||||
|
"transcript": "Reminder: review the NPU dry-run metrics and ask for approval before changing routing.",
|
||||||
|
"labels": {"tool_needed": true, "urgency": "normal", "safety_confirmation_required": true},
|
||||||
|
"npu_proof": {"whisper": true, "classifier": true},
|
||||||
|
"expected_recommendation": "require_human_review",
|
||||||
|
"human_or_atlas_decision": "require_human_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "kanban-review-ready",
|
||||||
|
"lane": "kanban_hygiene",
|
||||||
|
"source": "synthetic_board_summary",
|
||||||
|
"service": "kanban_hygiene_advisory",
|
||||||
|
"input_class": "implementation_with_tests",
|
||||||
|
"tasks": [{"id": "t_synthetic_impl", "title": "implement: synthetic dry-run harness", "status": "blocked", "assignee": "engineer", "created_at": 1000, "updated_at": 2000, "body_excerpt": "NPU advisory harness", "changed_files": ["scripts/example.py"], "tests_run": 3, "last_comment_excerpt": "review-required handoff"}],
|
||||||
|
"now": 2600,
|
||||||
|
"expected_recommendation": "ready_for_review",
|
||||||
|
"human_or_atlas_decision": "ready_for_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gateway-authority-violation",
|
||||||
|
"lane": "advisory_gateway_envelope",
|
||||||
|
"source": "synthetic_gateway",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "authority_flag_violation",
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-violation", "result": {"labels": {"urgency": {"value": "critical", "confidence": 0.9}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 11}, "authority": {"may_send_external": true, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "block_authority_violation",
|
||||||
|
"human_or_atlas_decision": "block_authority_violation",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -33,6 +33,18 @@ POST /v1/advisory/generate
|
|||||||
POST /v1/advisory/triage
|
POST /v1/advisory/triage
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Cron and n8n advisory dry-run contract
|
||||||
|
|
||||||
|
For cron/n8n event classification, use the dry-run contract in `docs/cron-n8n-advisory-classifier.md`.
|
||||||
|
It defines the normalized event envelope, decision envelope, `suppress|log|summarize|escalate` recommendation mapping, and duplicate/stale/no-op/action-required examples.
|
||||||
|
|
||||||
|
Example artifacts:
|
||||||
|
|
||||||
|
- `examples/cron-advisory-dry-run.sh` — host-local cron wrapper that prints one compact decision line and performs no side effects.
|
||||||
|
- `examples/n8n-advisory-dry-run-fragment.json` — sanitized inactive n8n node fragment for Set -> HTTP Request -> Code decision mapping.
|
||||||
|
|
||||||
|
Both examples preserve the gateway authority boundary: advisory only, no send/restart/memory/tool/routing authority.
|
||||||
|
|
||||||
### Classifier shadow call
|
### Classifier shadow call
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -0,0 +1,256 @@
|
|||||||
|
# Cron and n8n advisory classifier contract
|
||||||
|
|
||||||
|
Status: dry-run specification and integration examples
|
||||||
|
Scope: cron and n8n alert/event classification through the OpenVINO advisory gateway
|
||||||
|
Gateway: `http://172.19.0.1:18830` from `n8n-agent` and host-local cron on the current bridge-bound service. Override `NPU_ADVISORY_GATEWAY_URL=http://127.0.0.1:18830` only if a localhost-bound instance is explicitly running.
|
||||||
|
|
||||||
|
## Authority boundary
|
||||||
|
|
||||||
|
This contract is advisory only. It may recommend one of `suppress`, `log`, `summarize`, or `escalate`, but it must not perform the action itself.
|
||||||
|
|
||||||
|
Every integration must preserve these authority flags:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"may_route": false,
|
||||||
|
"may_write_memory": false,
|
||||||
|
"may_send_external": false,
|
||||||
|
"may_process_private_dirs": false,
|
||||||
|
"may_execute_tools": false,
|
||||||
|
"may_restart_services": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Allowed side effects in dry-run mode:
|
||||||
|
|
||||||
|
- read an explicit cron/n8n event payload;
|
||||||
|
- call the advisory gateway classifier/generator;
|
||||||
|
- write compact local stdout or n8n execution logs;
|
||||||
|
- store metadata-only advisory counters if an existing log sink already does so.
|
||||||
|
|
||||||
|
Forbidden without separate explicit approval:
|
||||||
|
|
||||||
|
- outbound sends/pages/Discord/Telegram/email;
|
||||||
|
- service restarts, command execution, or tool calls;
|
||||||
|
- Hermes/Atlas routing changes;
|
||||||
|
- memory writes;
|
||||||
|
- broad private-directory processing;
|
||||||
|
- vector database mutation or reindexing.
|
||||||
|
|
||||||
|
## Input event envelope
|
||||||
|
|
||||||
|
Cron and n8n producers should normalize events before classification. Keep this input small and avoid raw private payloads.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema": "cron_n8n_event_v1",
|
||||||
|
"trace_id": "cron:service-health:2026-06-05T14:30:00Z",
|
||||||
|
"source": "cron",
|
||||||
|
"workflow": "npu-service-health",
|
||||||
|
"event_kind": "health_check",
|
||||||
|
"severity": "warning",
|
||||||
|
"subject": "openvino-reranker health check repeated warning",
|
||||||
|
"summary": "Two consecutive health probes reported timeout, no restart attempted.",
|
||||||
|
"dedupe_key": "service:openvino-reranker:timeout",
|
||||||
|
"observed_at": "2026-06-05T14:30:00Z",
|
||||||
|
"stale_after_s": 900,
|
||||||
|
"action_requested": false,
|
||||||
|
"dry_run": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Field rules:
|
||||||
|
|
||||||
|
- `source`: `cron` or `n8n`.
|
||||||
|
- `workflow`: compact job/workflow name, not a private URL.
|
||||||
|
- `subject` + `summary`: the only text sent to the classifier.
|
||||||
|
- `dedupe_key`: stable non-secret key for duplicate detection by the caller.
|
||||||
|
- `stale_after_s`: caller-side freshness gate; stale events should not page.
|
||||||
|
- `action_requested`: true only when an upstream job is asking a human/Atlas to consider action.
|
||||||
|
- `dry_run`: must remain true for this phase.
|
||||||
|
|
||||||
|
## Gateway classifier call
|
||||||
|
|
||||||
|
The current gateway `/v1/advisory/classify` accepts explicit text and wraps the classifier response in `openvino_advisory_v1` with NPU proof and authority fields.
|
||||||
|
|
||||||
|
Host cron example for the current bridge-bound service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"trace_id":"cron:service-health:sample",
|
||||||
|
"text":"source=cron workflow=npu-service-health severity=warning kind=health_check subject=openvino-reranker repeated timeout summary=Two consecutive health probes reported timeout; no restart attempted; dry_run=true"
|
||||||
|
}' | jq '{schema, mode, trace_id, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, authority, labels: .result.labels}'
|
||||||
|
```
|
||||||
|
|
||||||
|
n8n Docker-bridge example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"trace_id":"n8n:swarm-health:sample","text":"source=n8n workflow=swarm-health-watchdog severity=critical kind=health_check subject=multiple services unhealthy summary=Health probe failed for three services; dry_run=true"}' \
|
||||||
|
| jq '{mode, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, may_send_external: .authority.may_send_external}'
|
||||||
|
```
|
||||||
|
|
||||||
|
NPU proof gate: an HTTP 200 is not enough. Treat the classifier as NPU-backed only when `.npu_proof.ok == true` and `.npu_proof.npu_busy_delta_us > 0` for real inference.
|
||||||
|
|
||||||
|
## Advisory decision envelope
|
||||||
|
|
||||||
|
Cron/n8n wrappers should map the gateway response plus caller-side freshness/deduplication state into this compact decision envelope:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema": "cron_n8n_advisory_decision_v1",
|
||||||
|
"trace_id": "cron:service-health:2026-06-05T14:30:00Z",
|
||||||
|
"source": "cron",
|
||||||
|
"workflow": "npu-service-health",
|
||||||
|
"dry_run": true,
|
||||||
|
"recommendation": "summarize",
|
||||||
|
"classification": "action_required",
|
||||||
|
"confidence": 0.84,
|
||||||
|
"reason_codes": ["warning_or_high_urgency", "fresh_event", "not_duplicate"],
|
||||||
|
"npu_proof": {"required": true, "ok": true, "npu_busy_delta_us": 1234},
|
||||||
|
"authority": {
|
||||||
|
"may_route": false,
|
||||||
|
"may_write_memory": false,
|
||||||
|
"may_send_external": false,
|
||||||
|
"may_process_private_dirs": false,
|
||||||
|
"may_execute_tools": false,
|
||||||
|
"may_restart_services": false
|
||||||
|
},
|
||||||
|
"next_gate": "human_or_atlas_review_required_before_any_side_effect"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision fields:
|
||||||
|
|
||||||
|
- `recommendation`: `suppress`, `log`, `summarize`, or `escalate`.
|
||||||
|
- `classification`: `duplicate`, `stale`, `no_op`, or `action_required` for v1 examples.
|
||||||
|
- `confidence`: use classifier urgency/category confidence when available; otherwise use a conservative wrapper score.
|
||||||
|
- `reason_codes`: compact machine-readable rationale, not raw payload text.
|
||||||
|
- `next_gate`: always a review/approval gate before side effects.
|
||||||
|
|
||||||
|
## Recommendation mapping
|
||||||
|
|
||||||
|
This is the v1 dry-run mapping. It is intentionally conservative and caller-side; the NPU classifier advises, the wrapper chooses a recommendation, and humans/Atlas retain authority.
|
||||||
|
|
||||||
|
| Caller/classifier signal | Classification | Recommendation | Dry-run behavior |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Same `dedupe_key` observed inside caller cooldown | `duplicate` | `suppress` | Log compact duplicate count only. Do not send. |
|
||||||
|
| `observed_at + stale_after_s` is older than now | `stale` | `log` | Log stale event and age. Do not summarize/page. |
|
||||||
|
| Severity low/normal, no action requested, classifier urgency low/normal | `no_op` | `log` | Keep normal execution log only. |
|
||||||
|
| Warning/high urgency or action requested, NPU proof ok | `action_required` | `summarize` | Draft a local summary for review; no send/restart. |
|
||||||
|
| Critical severity or repeated failures and NPU proof ok | `action_required` | `escalate` | Recommend escalation to Atlas/human; wrapper still must not send/restart. |
|
||||||
|
| NPU proof missing or false | `action_required` or caller-specific | `log` | Log `npu_proof_failed`; do not claim NPU-backed advice. |
|
||||||
|
|
||||||
|
## Required examples
|
||||||
|
|
||||||
|
### Duplicate -> suppress
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"cron","workflow":"npu-service-health","severity":"warning","dedupe_key":"service:reranker:timeout","summary":"Same timeout as prior run inside cooldown.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"duplicate","recommendation":"suppress","reason_codes":["dedupe_key_in_cooldown"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stale -> log
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"n8n","workflow":"swarm-health-watchdog","severity":"warning","observed_at":"older_than_stale_after","stale_after_s":900,"summary":"Delayed webhook replay for an old probe.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"stale","recommendation":"log","reason_codes":["event_stale"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### No-op -> log
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"cron","workflow":"backup-check","severity":"normal","action_requested":false,"summary":"Backup completed and all expected files are present.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"no_op","recommendation":"log","reason_codes":["normal_severity","no_action_requested"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Action required -> summarize/escalate
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"n8n","workflow":"swarm-health-watchdog","severity":"critical","action_requested":true,"summary":"RAG and embeddings health failed repeatedly; no restart attempted.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"action_required","recommendation":"escalate","reason_codes":["critical_severity","action_requested","fresh_event"],"next_gate":"human_or_atlas_review_required_before_any_side_effect"}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Optional local summary draft
|
||||||
|
|
||||||
|
If the decision is `summarize` or `escalate`, a wrapper may request a bounded draft from `/v1/advisory/generate`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"trace_id":"cron:service-health:sample","job":"summary","input":"Health check warning: openvino-reranker timed out twice; no restart attempted.","max_new_tokens":48}' \
|
||||||
|
| jq '{mode, trace_id, npu_ok: .npu_proof.ok, authority, draft: .result.draft_text, final_authority: .result.final_authority}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The draft remains non-authoritative. It must not be automatically sent externally or written to memory.
|
||||||
|
|
||||||
|
## n8n integration pattern
|
||||||
|
|
||||||
|
Recommended node chain for dry-run workflows:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Schedule/Webhook/Failure Trigger
|
||||||
|
-> Set normalized event envelope
|
||||||
|
-> HTTP Request POST /v1/advisory/classify
|
||||||
|
-> Code node maps decision envelope
|
||||||
|
-> IF node on recommendation
|
||||||
|
suppress/log: execution log only
|
||||||
|
summarize/escalate: optional local summary draft, then execution log only
|
||||||
|
```
|
||||||
|
|
||||||
|
The IF node must not connect to outbound messaging, service restart, memory write, or Hermes routing nodes until a separate approval changes the authority boundary.
|
||||||
|
|
||||||
|
See `../examples/n8n-advisory-dry-run-fragment.json` for a sanitized node fragment.
|
||||||
|
|
||||||
|
## Cron integration pattern
|
||||||
|
|
||||||
|
Cron jobs should call a wrapper script that prints one compact line and exits successfully unless the wrapper itself fails. The wrapper should not page or restart.
|
||||||
|
|
||||||
|
Example crontab shape:
|
||||||
|
|
||||||
|
```text
|
||||||
|
*/15 * * * * /home/will/lab/swarm/openvino-advisory-gateway/examples/cron-advisory-dry-run.sh npu-service-health warning health_check "openvino-reranker timeout twice" "service:openvino-reranker:timeout" >> /home/will/.local/state/npu-advisory/cron.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
See `../examples/cron-advisory-dry-run.sh`.
|
||||||
|
|
||||||
|
## Verification checklist
|
||||||
|
|
||||||
|
- Gateway health is reachable on the intended interface.
|
||||||
|
- Classifier response includes `schema=openvino_advisory_v1`.
|
||||||
|
- `.authority.*` flags are all false for side-effect authority.
|
||||||
|
- `.npu_proof.ok` is true and `npu_busy_delta_us > 0` before claiming NPU-backed advice.
|
||||||
|
- Decision envelope is compact and contains only booleans/counts/paths/deltas/gates.
|
||||||
|
- Duplicate/stale/no-op/action-required examples remain dry-run only.
|
||||||
|
- No n8n workflow activation, outbound send, service restart, memory write, routing change, private-dir broadening, or vector DB mutation occurred.
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Dry-run cron/n8n advisory wrapper.
|
||||||
|
# It calls the advisory classifier and prints one compact decision line.
|
||||||
|
# It does not send, restart, route, execute tools, or write memory.
|
||||||
|
|
||||||
|
GATEWAY_URL="${NPU_ADVISORY_GATEWAY_URL:-http://172.19.0.1:18830}"
|
||||||
|
WORKFLOW="${1:-cron-advisory-sample}"
|
||||||
|
SEVERITY="${2:-normal}"
|
||||||
|
EVENT_KIND="${3:-health_check}"
|
||||||
|
SUBJECT="${4:-sample advisory event}"
|
||||||
|
DEDUPE_KEY="${5:-sample}"
|
||||||
|
TRACE_ID="${NPU_ADVISORY_TRACE_ID:-cron:${WORKFLOW}:$(date -u +%Y%m%dT%H%M%SZ)}"
|
||||||
|
|
||||||
|
TEXT="source=cron workflow=${WORKFLOW} severity=${SEVERITY} kind=${EVENT_KIND} subject=${SUBJECT} dedupe_key=${DEDUPE_KEY} dry_run=true authority=no-send,no-restart,no-memory"
|
||||||
|
|
||||||
|
payload=$(jq -nc --arg trace_id "$TRACE_ID" --arg text "$TEXT" '{trace_id:$trace_id,text:$text}')
|
||||||
|
response=$(curl -fsS "${GATEWAY_URL%/}/v1/advisory/classify" -H 'Content-Type: application/json' -d "$payload")
|
||||||
|
|
||||||
|
printf '%s\n' "$response" | jq -c --arg source cron --arg workflow "$WORKFLOW" --arg severity "$SEVERITY" --arg dedupe_key "$DEDUPE_KEY" '
|
||||||
|
. as $env
|
||||||
|
| ($env.result.labels.urgency.value // "normal") as $urgency
|
||||||
|
| ($env.result.labels.urgency.confidence // 0) as $confidence
|
||||||
|
| ($env.npu_proof.ok == true and (($env.npu_proof.npu_busy_delta_us // 0) > 0)) as $npu_ok
|
||||||
|
| (if ($npu_ok | not) then "log"
|
||||||
|
elif ($severity == "critical") then "escalate"
|
||||||
|
elif ($severity == "warning" or $urgency == "high" or $urgency == "critical") then "summarize"
|
||||||
|
else "log" end) as $recommendation
|
||||||
|
| (if ($recommendation == "log" and $severity == "normal") then "no_op" else "action_required" end) as $classification
|
||||||
|
| {
|
||||||
|
schema: "cron_n8n_advisory_decision_v1",
|
||||||
|
trace_id: $env.trace_id,
|
||||||
|
source: $source,
|
||||||
|
workflow: $workflow,
|
||||||
|
dry_run: true,
|
||||||
|
recommendation: $recommendation,
|
||||||
|
classification: $classification,
|
||||||
|
confidence: $confidence,
|
||||||
|
reason_codes: ([
|
||||||
|
(if $npu_ok then "npu_proof_ok" else "npu_proof_failed" end),
|
||||||
|
("severity_" + $severity),
|
||||||
|
("urgency_" + $urgency)
|
||||||
|
]),
|
||||||
|
npu_proof: $env.npu_proof,
|
||||||
|
authority: $env.authority,
|
||||||
|
next_gate: (if $recommendation == "escalate" or $recommendation == "summarize" then "human_or_atlas_review_required_before_any_side_effect" else "none_in_dry_run" end)
|
||||||
|
}'
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
{
|
||||||
|
"name": "OpenVINO Advisory Dry-Run Fragment",
|
||||||
|
"active": false,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"values": {
|
||||||
|
"string": [
|
||||||
|
{"name": "schema", "value": "cron_n8n_event_v1"},
|
||||||
|
{"name": "source", "value": "n8n"},
|
||||||
|
{"name": "workflow", "value": "swarm-health-watchdog"},
|
||||||
|
{"name": "event_kind", "value": "health_check"},
|
||||||
|
{"name": "severity", "value": "warning"},
|
||||||
|
{"name": "subject", "value": "OpenVINO service health warning"},
|
||||||
|
{"name": "summary", "value": "Health probe reported a warning; no restart or send is authorized."},
|
||||||
|
{"name": "dedupe_key", "value": "service:openvino:warning"},
|
||||||
|
{"name": "dry_run", "value": "true"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"options": {}
|
||||||
|
},
|
||||||
|
"id": "set-normalized-event",
|
||||||
|
"name": "Set normalized advisory event",
|
||||||
|
"type": "n8n-nodes-base.set",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [260, 300]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://172.19.0.1:18830/v1/advisory/classify",
|
||||||
|
"sendBody": true,
|
||||||
|
"contentType": "json",
|
||||||
|
"jsonBody": "={{ JSON.stringify({ trace_id: 'n8n:' + $json.workflow + ':' + $now.toISO(), text: 'source=n8n workflow=' + $json.workflow + ' severity=' + $json.severity + ' kind=' + $json.event_kind + ' subject=' + $json.subject + ' summary=' + $json.summary + ' dedupe_key=' + $json.dedupe_key + ' dry_run=true authority=no-send,no-restart,no-memory' }) }}",
|
||||||
|
"options": {
|
||||||
|
"timeout": 20000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "http-advisory-classify",
|
||||||
|
"name": "HTTP advisory classify dry-run",
|
||||||
|
"type": "n8n-nodes-base.httpRequest",
|
||||||
|
"typeVersion": 4,
|
||||||
|
"position": [520, 300]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"jsCode": "const env = $json;\nconst labels = env.result?.labels || {};\nconst urgency = labels.urgency?.value || 'normal';\nconst severity = $('Set normalized advisory event').first().json.severity || 'normal';\nconst npuOk = env.npu_proof?.ok === true && (env.npu_proof?.npu_busy_delta_us || 0) > 0;\nlet recommendation = 'log';\nlet classification = 'no_op';\nconst reason_codes = [npuOk ? 'npu_proof_ok' : 'npu_proof_failed', `severity_${severity}`, `urgency_${urgency}`];\nif (npuOk && severity === 'critical') { recommendation = 'escalate'; classification = 'action_required'; }\nelse if (npuOk && (severity === 'warning' || urgency === 'high' || urgency === 'critical')) { recommendation = 'summarize'; classification = 'action_required'; }\nif (!npuOk) reason_codes.push('log_only_no_npu_claim');\nreturn [{ json: { schema: 'cron_n8n_advisory_decision_v1', trace_id: env.trace_id, source: 'n8n', workflow: $('Set normalized advisory event').first().json.workflow, dry_run: true, recommendation, classification, confidence: labels.urgency?.confidence || 0, reason_codes, npu_proof: env.npu_proof, authority: env.authority, next_gate: (recommendation === 'summarize' || recommendation === 'escalate') ? 'human_or_atlas_review_required_before_any_side_effect' : 'none_in_dry_run' } } }];"
|
||||||
|
},
|
||||||
|
"id": "map-dry-run-decision",
|
||||||
|
"name": "Map dry-run decision (no side effects)",
|
||||||
|
"type": "n8n-nodes-base.code",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [780, 300]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"connections": {
|
||||||
|
"Set normalized advisory event": {
|
||||||
|
"main": [[{"node": "HTTP advisory classify dry-run", "type": "main", "index": 0}]]
|
||||||
|
},
|
||||||
|
"HTTP advisory classify dry-run": {
|
||||||
|
"main": [[{"node": "Map dry-run decision (no side effects)", "type": "main", "index": 0}]]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"settings": {
|
||||||
|
"executionOrder": "v1"
|
||||||
|
},
|
||||||
|
"pinData": {},
|
||||||
|
"staticData": null,
|
||||||
|
"tags": ["dry-run", "openvino", "advisory"]
|
||||||
|
}
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
# OpenVINO Context Gate
|
||||||
|
|
||||||
|
Local-only Atlas/Hermes context-gate advisory prototype.
|
||||||
|
|
||||||
|
This first slice is CLI-only and dry-run by design. It takes a non-private query,
|
||||||
|
optionally asks the localhost classifier on `127.0.0.1:18819` for advisory labels,
|
||||||
|
and emits a compact typed context bundle plan. It does not retrieve private
|
||||||
|
content or change live Atlas/Hermes behavior.
|
||||||
|
|
||||||
|
## Safety invariants
|
||||||
|
|
||||||
|
Closed in v1:
|
||||||
|
|
||||||
|
- live Atlas/Hermes routing changes
|
||||||
|
- memory writes
|
||||||
|
- outbound sends
|
||||||
|
- tool execution by the sidecar
|
||||||
|
- service restarts
|
||||||
|
- vector DB mutation or reindexing
|
||||||
|
- private root broadening
|
||||||
|
- live config changes
|
||||||
|
|
||||||
|
The CLI only plans which source classes an authoritative Atlas/Hermes agent might
|
||||||
|
use later: `durable_memory`, `session_search`, `rag_search`, `repo_files`,
|
||||||
|
`live_system`, `web`, or `no_retrieval`.
|
||||||
|
|
||||||
|
NPU proof is strict: `npu_verified=true` is only emitted when a live classifier
|
||||||
|
request reports a positive endpoint NPU delta and a positive sysfs/endpoint sysfs
|
||||||
|
busy delta. HTTP 200 alone is never treated as proof. Offline and fallback modes
|
||||||
|
set `npu_verified=false` and include a warning.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Live classifier path, with compact terminal output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--query "How do I check whether the RAG reranker is using the NPU?" \
|
||||||
|
--format compact
|
||||||
|
```
|
||||||
|
|
||||||
|
Deterministic offline smoke, safe for unit-test hosts without NPU services:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--offline \
|
||||||
|
--query "Write a haiku about Seattle rain." \
|
||||||
|
--format compact-json
|
||||||
|
```
|
||||||
|
|
||||||
|
Fallback plan if the classifier is down:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--allow-offline-fallback \
|
||||||
|
--query "Where did we leave the NPU context gate implementation plan?" \
|
||||||
|
--context platform=kanban \
|
||||||
|
--context repo_path=/home/will/lab/swarm \
|
||||||
|
--format compact-json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output shape
|
||||||
|
|
||||||
|
Full JSON includes:
|
||||||
|
|
||||||
|
- `schema=atlas_context_gate_plan_v1`
|
||||||
|
- `dry_run=true`
|
||||||
|
- `query_class`
|
||||||
|
- `source_plan`
|
||||||
|
- `bundle_plan`
|
||||||
|
- `npu_proof`
|
||||||
|
- closed `authority`
|
||||||
|
- closed approval `gates`
|
||||||
|
- compact `warnings`
|
||||||
|
|
||||||
|
Compact output intentionally avoids raw private snippets and raw JSON dumps:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ok=true schema=atlas_context_gate_plan_v1 bundle=OpsDebugBundle sources=live_system,repo_files,rag_search source_count=3 npu_verified=false classifier_delta_us=None outer_sysfs_delta_us=None gates=closed:route,memory,send,tools,restart,vector,private_roots,config warnings=offline_heuristic_classifier_no_npu_claim,npu_proof_inconclusive
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes for reviewers
|
||||||
|
|
||||||
|
- No HTTP service or systemd unit is added in this slice.
|
||||||
|
- The prototype does not call RAG, memory, session search, web, filesystem tools,
|
||||||
|
or the advisory gateway. It only emits a plan.
|
||||||
|
- Unit tests use fake/offline classifier results and do not require live NPU.
|
||||||
|
- Optional live smoke may call only the local classifier endpoint and read
|
||||||
|
`/sys/class/accel/accel0/device/npu_busy_time_us` for positive delta proof.
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
"""Atlas/Hermes local advisory context-gate prototype."""
|
||||||
|
|
||||||
|
from .context_gate import SCHEMA, ContextGateError, build_plan, compact_json, compact_line, validate_plan
|
||||||
|
|
||||||
|
__all__ = ["SCHEMA", "ContextGateError", "build_plan", "compact_json", "compact_line", "validate_plan"]
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .context_gate import (
|
||||||
|
DEFAULT_CLASSIFIER_URL,
|
||||||
|
ContextGateError,
|
||||||
|
build_plan,
|
||||||
|
classify_live,
|
||||||
|
classify_offline,
|
||||||
|
compact_json,
|
||||||
|
compact_line,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_context(raw_items: list[str]) -> dict[str, Any]:
|
||||||
|
context: dict[str, Any] = {}
|
||||||
|
for item in raw_items:
|
||||||
|
if "=" not in item:
|
||||||
|
raise ContextGateError(f"invalid_context_item:{item}")
|
||||||
|
key, value = item.split("=", 1)
|
||||||
|
if not key:
|
||||||
|
raise ContextGateError("invalid_context_key")
|
||||||
|
if value.lower() == "true":
|
||||||
|
parsed: Any = True
|
||||||
|
elif value.lower() == "false":
|
||||||
|
parsed = False
|
||||||
|
else:
|
||||||
|
parsed = value
|
||||||
|
context[key] = parsed
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Emit a local-only Atlas/Hermes advisory context bundle plan. No routing, retrieval, memory writes, sends, restarts, or vector mutations are performed.",
|
||||||
|
)
|
||||||
|
parser.add_argument("--query", required=True, help="Non-private query to plan for")
|
||||||
|
parser.add_argument("--format", choices=["compact", "compact-json", "json"], default="compact")
|
||||||
|
parser.add_argument("--context", action="append", default=[], metavar="KEY=VALUE", help="Optional compact request context, e.g. platform=kanban repo_path=/path")
|
||||||
|
parser.add_argument("--max-sources", type=int, default=4)
|
||||||
|
parser.add_argument("--trace-id")
|
||||||
|
parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
|
||||||
|
parser.add_argument("--classifier-timeout", type=float, default=8.0)
|
||||||
|
parser.add_argument("--offline", action="store_true", help="Use deterministic heuristic labels; makes no NPU claim")
|
||||||
|
parser.add_argument("--allow-offline-fallback", action="store_true", help="If live classifier is unavailable, emit an advisory fallback plan with npu_verified=false")
|
||||||
|
parser.add_argument("--no-require-npu-proof", action="store_true", help="Do not add npu_proof_inconclusive warning when running offline/fallback")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_arg_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
context = _parse_context(args.context)
|
||||||
|
options = {
|
||||||
|
"dry_run": True,
|
||||||
|
"max_sources": args.max_sources,
|
||||||
|
"include_private_text": False,
|
||||||
|
"require_npu_proof": not args.no_require_npu_proof,
|
||||||
|
"trace_id": args.trace_id,
|
||||||
|
}
|
||||||
|
if args.offline:
|
||||||
|
classifier = classify_offline(args.query, context)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
classifier = classify_live(args.query, context, classifier_url=args.classifier_url, timeout=args.classifier_timeout)
|
||||||
|
except ContextGateError as exc:
|
||||||
|
if not args.allow_offline_fallback:
|
||||||
|
raise
|
||||||
|
classifier = classify_offline(args.query, context, warning=str(exc))
|
||||||
|
plan = build_plan(args.query, context=context, options=options, classifier=classifier)
|
||||||
|
except ContextGateError as exc:
|
||||||
|
print(f"error={exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if args.format == "json":
|
||||||
|
print(json.dumps(plan, indent=2, sort_keys=True))
|
||||||
|
elif args.format == "compact-json":
|
||||||
|
print(compact_json(plan))
|
||||||
|
else:
|
||||||
|
print(compact_line(plan))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -0,0 +1,482 @@
|
|||||||
|
"""Local-only advisory context bundle planner for Atlas/Hermes.
|
||||||
|
|
||||||
|
This module intentionally emits a retrieval/authority plan only. It does not call
|
||||||
|
Hermes memory/session/RAG/web tools, mutate vector stores, broaden private roots,
|
||||||
|
or change live routing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import ipaddress
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Mapping, Sequence
|
||||||
|
|
||||||
|
SCHEMA = "atlas_context_gate_plan_v1"
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
|
||||||
|
AUTHORITY = {
|
||||||
|
"may_route": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_process_private_dirs": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
"may_mutate_vector_db": False,
|
||||||
|
"may_change_live_config": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
GATES = {
|
||||||
|
"live_routing_change": "closed_requires_explicit_approval",
|
||||||
|
"memory_write": "closed_requires_explicit_approval",
|
||||||
|
"outbound_send": "closed_requires_explicit_approval",
|
||||||
|
"tool_execution": "closed_requires_explicit_approval",
|
||||||
|
"service_restart": "closed_requires_explicit_approval",
|
||||||
|
"vector_mutation": "closed_requires_explicit_approval",
|
||||||
|
"private_root_broadening": "closed_requires_explicit_approval",
|
||||||
|
}
|
||||||
|
|
||||||
|
_ALLOWED_SOURCES = {
|
||||||
|
"durable_memory",
|
||||||
|
"session_search",
|
||||||
|
"rag_search",
|
||||||
|
"repo_files",
|
||||||
|
"live_system",
|
||||||
|
"web",
|
||||||
|
"no_retrieval",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ContextGateError(ValueError):
|
||||||
|
"""Raised for invalid requests or unavailable required local stages."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ClassifierResult:
|
||||||
|
labels: Mapping[str, Any]
|
||||||
|
npu_busy_delta_us: int | None
|
||||||
|
sysfs_npu_busy_delta_us: int | None
|
||||||
|
outer_sysfs_delta_us: int | None
|
||||||
|
live: bool
|
||||||
|
warning: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def read_npu_busy_time_us(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text(encoding="utf-8").strip())
|
||||||
|
except (FileNotFoundError, PermissionError, ValueError, OSError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _label_value(labels: Mapping[str, Any], name: str, default: Any) -> Any:
|
||||||
|
value = labels.get(name, default)
|
||||||
|
if isinstance(value, Mapping) and "value" in value:
|
||||||
|
return value.get("value", default)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _label_confidence(labels: Mapping[str, Any], name: str, default: float = 0.5) -> float:
|
||||||
|
value = labels.get(name)
|
||||||
|
if isinstance(value, Mapping):
|
||||||
|
try:
|
||||||
|
return float(value.get("confidence", default))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def heuristic_labels(query: str, context: Mapping[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
"""Small transparent fallback used by tests and explicit offline smoke mode."""
|
||||||
|
text = query.lower()
|
||||||
|
platform = str((context or {}).get("platform", "unknown")).lower()
|
||||||
|
|
||||||
|
current_words = ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs"]
|
||||||
|
prior_words = ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "plan"]
|
||||||
|
coding_words = ["implement", "code", "repo", "test", "pytest", "diff", "branch", "hermes"]
|
||||||
|
research_words = ["research", "compare", "summarize", "explain", "what is", "how do i"]
|
||||||
|
unsafe_words = ["change live routing", "live routing", "restart", "send", "write memory", "reindex", "mutate", "delete"]
|
||||||
|
|
||||||
|
safety = any(w in text for w in unsafe_words)
|
||||||
|
tool_needed = any(w in text for w in current_words + coding_words) or safety
|
||||||
|
|
||||||
|
if platform == "kanban" or "kanban" in text or any(w in text for w in coding_words):
|
||||||
|
category = "coding"
|
||||||
|
elif any(w in text for w in current_words):
|
||||||
|
category = "devops"
|
||||||
|
elif any(w in text for w in research_words + prior_words):
|
||||||
|
category = "research"
|
||||||
|
else:
|
||||||
|
category = "chat"
|
||||||
|
|
||||||
|
if "remember" in text or "preference" in text:
|
||||||
|
memory_candidate = "durable_user_fact"
|
||||||
|
elif "convention" in text or "workflow" in text:
|
||||||
|
memory_candidate = "workflow_convention"
|
||||||
|
else:
|
||||||
|
memory_candidate = "none"
|
||||||
|
|
||||||
|
urgency = "high" if any(w in text for w in ["urgent", "critical", "down", "broken"]) else "normal"
|
||||||
|
return {
|
||||||
|
"tool_needed": {"value": tool_needed, "confidence": 0.76 if tool_needed else 0.68},
|
||||||
|
"memory_candidate": {"value": memory_candidate, "confidence": 0.8 if memory_candidate != "none" else 0.35},
|
||||||
|
"urgency": {"value": urgency, "confidence": 0.8 if urgency == "high" else 0.65},
|
||||||
|
"workflow_category": {"value": category, "confidence": 0.78 if category != "chat" else 0.7},
|
||||||
|
"safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.2},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _NoClassifierRedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||||
|
"""Fail closed instead of following redirects away from a validated local URL."""
|
||||||
|
|
||||||
|
def redirect_request(self, req, fp, code, msg, headers, newurl): # type: ignore[no-untyped-def]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
_CLASSIFIER_OPENER = urllib.request.build_opener(_NoClassifierRedirectHandler)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_live(
|
||||||
|
query: str,
|
||||||
|
context: Mapping[str, Any] | None = None,
|
||||||
|
classifier_url: str = DEFAULT_CLASSIFIER_URL,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> ClassifierResult:
|
||||||
|
classifier_url = validate_classifier_url(classifier_url)
|
||||||
|
before = read_npu_busy_time_us()
|
||||||
|
payload = {
|
||||||
|
"id": f"context-gate-{int(time.time())}",
|
||||||
|
"text": query,
|
||||||
|
"context": {"platform": (context or {}).get("platform", "cli"), "source": "context_gate"},
|
||||||
|
"options": {"include_evidence": False, "include_embedding_debug": False, "dry_run": True},
|
||||||
|
}
|
||||||
|
req = urllib.request.Request(
|
||||||
|
classifier_url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with _CLASSIFIER_OPENER.open(req, timeout=timeout) as resp: # noqa: S310 - local configured endpoint only
|
||||||
|
raw = resp.read(256_000)
|
||||||
|
except (urllib.error.URLError, TimeoutError, OSError) as exc:
|
||||||
|
raise ContextGateError(f"classifier_unavailable: {exc}") from exc
|
||||||
|
after = read_npu_busy_time_us()
|
||||||
|
try:
|
||||||
|
data = json.loads(raw.decode("utf-8"))
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ContextGateError("classifier_invalid_json") from exc
|
||||||
|
labels = data.get("labels")
|
||||||
|
if not isinstance(labels, Mapping):
|
||||||
|
raise ContextGateError("classifier_missing_labels")
|
||||||
|
outer = after - before if before is not None and after is not None else None
|
||||||
|
return ClassifierResult(
|
||||||
|
labels=labels,
|
||||||
|
npu_busy_delta_us=_as_int_or_none(data.get("npu_busy_delta_us")),
|
||||||
|
sysfs_npu_busy_delta_us=_as_int_or_none(data.get("sysfs_npu_busy_delta_us")),
|
||||||
|
outer_sysfs_delta_us=outer,
|
||||||
|
live=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_classifier_url(classifier_url: str) -> str:
|
||||||
|
"""Validate the local-only classifier endpoint before any POST is attempted."""
|
||||||
|
parsed = urllib.parse.urlparse(classifier_url)
|
||||||
|
if parsed.scheme not in {"http", "https"}:
|
||||||
|
raise ContextGateError("invalid_classifier_url:scheme_must_be_http_or_https")
|
||||||
|
host = parsed.hostname
|
||||||
|
if not host:
|
||||||
|
raise ContextGateError("invalid_classifier_url:missing_host")
|
||||||
|
host_normalized = host.lower().rstrip(".")
|
||||||
|
if host_normalized == "localhost":
|
||||||
|
return classifier_url
|
||||||
|
try:
|
||||||
|
address = ipaddress.ip_address(host_normalized)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ContextGateError("invalid_classifier_url:host_must_be_loopback") from exc
|
||||||
|
if not address.is_loopback:
|
||||||
|
raise ContextGateError("invalid_classifier_url:host_must_be_loopback")
|
||||||
|
return classifier_url
|
||||||
|
|
||||||
|
|
||||||
|
def _as_int_or_none(value: Any) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def classify_offline(query: str, context: Mapping[str, Any] | None = None, warning: str | None = None) -> ClassifierResult:
|
||||||
|
return ClassifierResult(
|
||||||
|
labels=heuristic_labels(query, context),
|
||||||
|
npu_busy_delta_us=None,
|
||||||
|
sysfs_npu_busy_delta_us=None,
|
||||||
|
outer_sysfs_delta_us=None,
|
||||||
|
live=False,
|
||||||
|
warning=warning or "offline_heuristic_classifier_no_npu_claim",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_any(text: str, needles: list[str]) -> bool:
|
||||||
|
return any(n in text for n in needles)
|
||||||
|
|
||||||
|
|
||||||
|
def _source(source: str, action: str, reason: str, priority: int, freshness: str, confidence: float) -> dict[str, Any]:
|
||||||
|
assert source in _ALLOWED_SOURCES
|
||||||
|
return {
|
||||||
|
"source": source,
|
||||||
|
"action": action,
|
||||||
|
"reason": reason,
|
||||||
|
"priority": priority,
|
||||||
|
"freshness": freshness,
|
||||||
|
"permission": "tool_required_by_authoritative_agent" if source != "no_retrieval" else "none",
|
||||||
|
"missing_behavior": "retrieve_or_mark_missing" if source != "no_retrieval" else "skip_retrieval",
|
||||||
|
"confidence": round(confidence, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def select_sources(query: str, labels: Mapping[str, Any], context: Mapping[str, Any], max_sources: int) -> list[dict[str, Any]]:
|
||||||
|
text = query.lower()
|
||||||
|
sources: list[dict[str, Any]] = []
|
||||||
|
category = str(_label_value(labels, "workflow_category", "unknown"))
|
||||||
|
memory_candidate = str(_label_value(labels, "memory_candidate", "none"))
|
||||||
|
tool_needed = bool(_label_value(labels, "tool_needed", False))
|
||||||
|
|
||||||
|
if tool_needed or _has_any(text, ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs", "time", "date"]):
|
||||||
|
sources.append(_source("live_system", "inspect_with_terminal_or_domain_tool", "current service/system state requested", 1, "live_required", 0.9))
|
||||||
|
|
||||||
|
if context.get("repo_path") or category == "coding" or _has_any(text, ["repo", "code", "file", "test", "pytest", "diff", "implementation", "hermes", "atlas"]):
|
||||||
|
sources.append(_source("repo_files", "inspect_explicit_repo_paths", "repo-specific implementation or config context", 2, "current_filesystem", 0.84))
|
||||||
|
|
||||||
|
if _has_any(text, ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "prior", "last time"]):
|
||||||
|
sources.append(_source("session_search", "search_prior_sessions_or_kanban_handoffs", "prior decision or handoff requested", 3, "session-era", 0.82))
|
||||||
|
|
||||||
|
if _has_any(text, ["runbook", "note", "obsidian", "rag", "docs", "knowledge", "plan"]):
|
||||||
|
sources.append(_source("rag_search", "query_local_index_read_only", "local docs or indexed knowledge likely useful", 4, "cached_index", 0.76))
|
||||||
|
|
||||||
|
if memory_candidate != "none" or _has_any(text, ["preference", "remember", "profile", "durable fact"]):
|
||||||
|
sources.append(_source("durable_memory", "read_stable_facts_only", "stable preference/environment facts may be relevant", 5, "static", 0.72))
|
||||||
|
|
||||||
|
if _has_any(text, ["latest", "news", "version", "release", "public", "web"]):
|
||||||
|
sources.append(_source("web", "search_public_current_sources", "current external public fact requested", 6, "live_external", 0.7))
|
||||||
|
|
||||||
|
if not sources:
|
||||||
|
sources.append(_source("no_retrieval", "answer_directly", "no factual retrieval dependency detected", 1, "none", 0.78))
|
||||||
|
|
||||||
|
# Stable priority order and bounded compact plan.
|
||||||
|
seen: set[str] = set()
|
||||||
|
deduped = []
|
||||||
|
for item in sorted(sources, key=lambda x: x["priority"]):
|
||||||
|
if item["source"] not in seen:
|
||||||
|
seen.add(item["source"])
|
||||||
|
deduped.append(item)
|
||||||
|
return deduped[:max_sources]
|
||||||
|
|
||||||
|
|
||||||
|
def select_bundle_name(query: str, labels: Mapping[str, Any], context: Mapping[str, Any]) -> str:
|
||||||
|
text = query.lower()
|
||||||
|
category = str(_label_value(labels, "workflow_category", "unknown"))
|
||||||
|
if context.get("platform") == "kanban" or context.get("task_id") or category == "coding":
|
||||||
|
return "CodingTaskBundle"
|
||||||
|
if category in {"devops", "debugging"} or _has_any(text, ["health", "port", "systemd", "npu", "service", "logs"]):
|
||||||
|
return "OpsDebugBundle"
|
||||||
|
if category in {"note_taking", "productivity"} or _has_any(text, ["preference", "remember", "profile"]):
|
||||||
|
return "PersonalAssistantBundle"
|
||||||
|
if "no_retrieval" in [s["source"] for s in select_sources(query, labels, context, 1)]:
|
||||||
|
return "SimpleResponseBundle"
|
||||||
|
return "ResearchBundle"
|
||||||
|
|
||||||
|
|
||||||
|
def _field(field: str, shape: str, source: str, freshness: str, missing: str, privacy: str, confidence: float = 0.8) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"field": field,
|
||||||
|
"shape": shape,
|
||||||
|
"source_of_truth": source,
|
||||||
|
"freshness": freshness,
|
||||||
|
"provenance_required": True,
|
||||||
|
"missing_behavior": missing,
|
||||||
|
"privacy": privacy,
|
||||||
|
"confidence": round(confidence, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_bundle_plan(bundle_name: str, sources: Sequence[Mapping[str, Any]], query: str, labels: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
safety_required = bool(_label_value(labels, "safety_confirmation_required", False))
|
||||||
|
source_names = {s["source"] for s in sources}
|
||||||
|
if bundle_name == "OpsDebugBundle":
|
||||||
|
required = [
|
||||||
|
_field("problem_statement", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("target_scope", "service_repo_or_host", "query_or_classifier", "request", "ask_or_infer_low_confidence", "no_private_paths_beyond_explicit"),
|
||||||
|
_field("live_state", "status_table", "live_system", "live_required", "retrieve_or_fail_closed", "no_raw_logs_by_default"),
|
||||||
|
_field("safety_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("provenance", "tool_names_and_paths", "executing_agent", "run", "mark_missing", "paths_only"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "CodingTaskBundle":
|
||||||
|
required = [
|
||||||
|
_field("repo_root", "absolute_path", "task_or_context", "current", "ask_or_fail", "explicit_path_only"),
|
||||||
|
_field("git_state", "branch_dirty_counts", "live_system", "live_required", "retrieve_or_fail_closed", "no_diff_dump_by_default"),
|
||||||
|
_field("requirements", "bullet_summary", "user_kanban_files", "current", "retrieve_or_mark_missing", "no_private_snippets"),
|
||||||
|
_field("relevant_paths", "path_list", "repo_files", "current_filesystem", "search_narrowly", "paths_only"),
|
||||||
|
_field("tests_or_smokes", "command_list", "repo_files", "current_filesystem", "mark_missing", "commands_only"),
|
||||||
|
_field("review_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "PersonalAssistantBundle":
|
||||||
|
required = [
|
||||||
|
_field("user_intent", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("durable_facts_needed", "fact_keys", "durable_memory", "static", "retrieve_or_mark_missing", "no_raw_memory_dump"),
|
||||||
|
_field("prior_decisions_needed", "session_refs", "session_search", "session-era", "retrieve_or_mark_missing", "summaries_only"),
|
||||||
|
_field("privacy_boundary", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("action_authority", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "SimpleResponseBundle":
|
||||||
|
required = []
|
||||||
|
else:
|
||||||
|
required = [
|
||||||
|
_field("research_question", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("source_plan", "ordered_source_list", "context_gate", "run", "mark_missing", "no_private_snippets"),
|
||||||
|
_field("evidence_requirements", "provenance_rules", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("freshness_cutoff", "freshness_policy", "classifier_query", "request", "mark_missing", "no_private_data"),
|
||||||
|
_field("missing_data_behavior", "policy_enum", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
|
||||||
|
blocked = []
|
||||||
|
if safety_required or re.search(r"\b(route|routing|restart|send|write memory|reindex|delete|mutate)\b", query.lower()):
|
||||||
|
blocked.append(_field("authority_side_effect", "approval_required", "policy", "static", "fail_closed", "no_side_effects_in_v1", 0.95))
|
||||||
|
if "rag_search" in source_names:
|
||||||
|
blocked.append(_field("vector_db_mutation", "not_allowed", "policy", "static", "fail_closed", "read_only_query_plan", 0.95))
|
||||||
|
return {"bundle_name": bundle_name, "required_fields": required, "optional_fields": [], "blocked_fields": blocked}
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_query_class(labels: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"workflow_category": _label_value(labels, "workflow_category", "unknown"),
|
||||||
|
"urgency": _label_value(labels, "urgency", "normal"),
|
||||||
|
"tool_needed": bool(_label_value(labels, "tool_needed", False)),
|
||||||
|
"memory_candidate": _label_value(labels, "memory_candidate", "none"),
|
||||||
|
"safety_confirmation_required": bool(_label_value(labels, "safety_confirmation_required", False)),
|
||||||
|
"confidence": round(max(
|
||||||
|
_label_confidence(labels, "workflow_category", 0.5),
|
||||||
|
_label_confidence(labels, "tool_needed", 0.5),
|
||||||
|
_label_confidence(labels, "safety_confirmation_required", 0.5),
|
||||||
|
), 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def npu_proof_from_classifier(result: ClassifierResult, require_npu_proof: bool) -> tuple[dict[str, Any], list[str]]:
|
||||||
|
endpoint_delta = result.npu_busy_delta_us
|
||||||
|
endpoint_sysfs_delta = result.sysfs_npu_busy_delta_us
|
||||||
|
outer_delta = result.outer_sysfs_delta_us
|
||||||
|
positive_endpoint_sysfs = endpoint_sysfs_delta is not None and endpoint_sysfs_delta > 0
|
||||||
|
positive_outer = outer_delta is not None and outer_delta > 0
|
||||||
|
verified = bool(result.live and (positive_endpoint_sysfs or positive_outer))
|
||||||
|
warnings: list[str] = []
|
||||||
|
if result.warning:
|
||||||
|
warnings.append(result.warning)
|
||||||
|
if require_npu_proof and not verified:
|
||||||
|
warnings.append("npu_proof_inconclusive")
|
||||||
|
return {
|
||||||
|
"classifier_delta_us": endpoint_delta,
|
||||||
|
"classifier_sysfs_delta_us": endpoint_sysfs_delta,
|
||||||
|
"outer_sysfs_delta_us": outer_delta,
|
||||||
|
"rerank_delta_us": None,
|
||||||
|
"verified": verified,
|
||||||
|
"required": require_npu_proof,
|
||||||
|
"classifier_live": result.live,
|
||||||
|
}, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def build_plan(
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
context: Mapping[str, Any] | None = None,
|
||||||
|
options: Mapping[str, Any] | None = None,
|
||||||
|
classifier: ClassifierResult | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if not query or not query.strip():
|
||||||
|
raise ContextGateError("query_required")
|
||||||
|
context = dict(context or {})
|
||||||
|
options = dict(options or {})
|
||||||
|
if options.get("dry_run", True) is not True:
|
||||||
|
raise ContextGateError("dry_run_must_remain_true_in_v1")
|
||||||
|
if options.get("include_private_text", False):
|
||||||
|
raise ContextGateError("include_private_text_not_allowed_in_v1")
|
||||||
|
max_sources = max(1, min(6, int(options.get("max_sources", 4))))
|
||||||
|
require_npu = bool(options.get("require_npu_proof", True))
|
||||||
|
if classifier is None:
|
||||||
|
classifier = classify_offline(query, context)
|
||||||
|
labels = classifier.labels
|
||||||
|
source_plan = select_sources(query, labels, context, max_sources)
|
||||||
|
bundle_name = select_bundle_name(query, labels, context)
|
||||||
|
npu_proof, warnings = npu_proof_from_classifier(classifier, require_npu)
|
||||||
|
plan = {
|
||||||
|
"schema": SCHEMA,
|
||||||
|
"trace_id": options.get("trace_id") or context.get("trace_id"),
|
||||||
|
"dry_run": True,
|
||||||
|
"ok": True,
|
||||||
|
"query_class": summarize_query_class(labels),
|
||||||
|
"source_plan": source_plan,
|
||||||
|
"bundle_plan": build_bundle_plan(bundle_name, source_plan, query, labels),
|
||||||
|
"npu_proof": npu_proof,
|
||||||
|
"authority": dict(AUTHORITY),
|
||||||
|
"gates": dict(GATES),
|
||||||
|
"warnings": warnings,
|
||||||
|
}
|
||||||
|
validate_plan(plan)
|
||||||
|
return plan
|
||||||
|
|
||||||
|
|
||||||
|
def validate_plan(plan: Mapping[str, Any]) -> None:
|
||||||
|
if plan.get("schema") != SCHEMA:
|
||||||
|
raise ContextGateError("invalid_schema")
|
||||||
|
if plan.get("dry_run") is not True:
|
||||||
|
raise ContextGateError("dry_run_missing")
|
||||||
|
if plan.get("authority") != AUTHORITY:
|
||||||
|
raise ContextGateError("authority_not_closed")
|
||||||
|
sources = plan.get("source_plan")
|
||||||
|
if not isinstance(sources, list) or not sources:
|
||||||
|
raise ContextGateError("source_plan_required")
|
||||||
|
for item in sources:
|
||||||
|
if item.get("source") not in _ALLOWED_SOURCES:
|
||||||
|
raise ContextGateError(f"invalid_source:{item.get('source')}")
|
||||||
|
required_blocks = ["query_class", "bundle_plan", "npu_proof", "gates"]
|
||||||
|
for block in required_blocks:
|
||||||
|
if block not in plan:
|
||||||
|
raise ContextGateError(f"missing_block:{block}")
|
||||||
|
|
||||||
|
|
||||||
|
def compact_line(plan: Mapping[str, Any]) -> str:
|
||||||
|
sources = ",".join(str(s["source"]) for s in plan["source_plan"])
|
||||||
|
closed = "route,memory,send,tools,restart,vector,private_roots,config"
|
||||||
|
warnings = ",".join(plan.get("warnings") or []) or "none"
|
||||||
|
return (
|
||||||
|
f"ok={str(plan['ok']).lower()} schema={plan['schema']} "
|
||||||
|
f"bundle={plan['bundle_plan']['bundle_name']} sources={sources} "
|
||||||
|
f"source_count={len(plan['source_plan'])} "
|
||||||
|
f"npu_verified={str(plan['npu_proof']['verified']).lower()} "
|
||||||
|
f"classifier_delta_us={plan['npu_proof'].get('classifier_delta_us')} "
|
||||||
|
f"outer_sysfs_delta_us={plan['npu_proof'].get('outer_sysfs_delta_us')} "
|
||||||
|
f"gates=closed:{closed} warnings={warnings}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compact_json(plan: Mapping[str, Any]) -> str:
|
||||||
|
compact = {
|
||||||
|
"schema": plan["schema"],
|
||||||
|
"ok": plan["ok"],
|
||||||
|
"dry_run": plan["dry_run"],
|
||||||
|
"bundle_name": plan["bundle_plan"]["bundle_name"],
|
||||||
|
"sources": [s["source"] for s in plan["source_plan"]],
|
||||||
|
"source_count": len(plan["source_plan"]),
|
||||||
|
"query_class": plan["query_class"],
|
||||||
|
"npu_proof": plan["npu_proof"],
|
||||||
|
"authority": plan["authority"],
|
||||||
|
"gates_closed": list(plan["gates"].keys()),
|
||||||
|
"warnings": plan.get("warnings", []),
|
||||||
|
}
|
||||||
|
return json.dumps(compact, sort_keys=True, separators=(",", ":"))
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Thin repo-local wrapper for the Atlas/Hermes context-gate advisory CLI."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
if str(REPO_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
from openvino_context_gate.cli import main # noqa: E402
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+526
@@ -0,0 +1,526 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dry-run Kanban hygiene advisory classifier.
|
||||||
|
|
||||||
|
Reads compact board/task summaries and emits bounded labels/next gates without
|
||||||
|
mutating any Hermes Kanban state. Phase 1 is deterministic rules only; it does
|
||||||
|
not call kanban tools, restart services, write memory, or send outbound data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
SCHEMA = "kanban_hygiene_advisory_v1"
|
||||||
|
AUTHORITY = {
|
||||||
|
"may_mutate_board": False,
|
||||||
|
"may_assign": False,
|
||||||
|
"may_block_or_unblock": False,
|
||||||
|
"may_complete_or_archive": False,
|
||||||
|
"may_create_tasks": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
}
|
||||||
|
NPU_PROOF = {
|
||||||
|
"required_for_npu_claims": True,
|
||||||
|
"attempted": False,
|
||||||
|
"ok": None,
|
||||||
|
"npu_busy_delta_us": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
REQUIRED_TASK_FIELDS = {"id", "title", "status", "assignee", "created_at", "updated_at"}
|
||||||
|
SUPPORTED_STATUSES = {
|
||||||
|
"triage",
|
||||||
|
"todo",
|
||||||
|
"ready",
|
||||||
|
"running",
|
||||||
|
"blocked",
|
||||||
|
"done",
|
||||||
|
"archived",
|
||||||
|
"failed",
|
||||||
|
"cancelled",
|
||||||
|
}
|
||||||
|
TASK_TYPES = {
|
||||||
|
"charter",
|
||||||
|
"discovery",
|
||||||
|
"spec",
|
||||||
|
"implement",
|
||||||
|
"test",
|
||||||
|
"review",
|
||||||
|
"docs",
|
||||||
|
"ops",
|
||||||
|
"integration",
|
||||||
|
"final",
|
||||||
|
"unknown",
|
||||||
|
}
|
||||||
|
LANES = {
|
||||||
|
"observability_utilization",
|
||||||
|
"cron_n8n_classifier",
|
||||||
|
"rag_context_gate",
|
||||||
|
"doc_image_audio_triage",
|
||||||
|
"voice_audio_pipeline",
|
||||||
|
"kanban_hygiene",
|
||||||
|
"docs_runbook_service_map",
|
||||||
|
"ops_integration",
|
||||||
|
"final_closeout",
|
||||||
|
"general",
|
||||||
|
"unknown",
|
||||||
|
}
|
||||||
|
LIFECYCLE_PREFIXES = {
|
||||||
|
"charter",
|
||||||
|
"discovery",
|
||||||
|
"spec",
|
||||||
|
"implement",
|
||||||
|
"test",
|
||||||
|
"review",
|
||||||
|
"docs",
|
||||||
|
"doc",
|
||||||
|
"ops",
|
||||||
|
"integration",
|
||||||
|
"final",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compact_text(task: dict[str, Any]) -> str:
|
||||||
|
parts = [str(task.get("title", "")), str(task.get("body_excerpt", "")), str(task.get("last_run_summary_excerpt", "")), str(task.get("last_comment_excerpt", ""))]
|
||||||
|
return " ".join(part for part in parts if part).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def load_jsonl(raw: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
||||||
|
tasks = []
|
||||||
|
for line_no, line in enumerate(raw.splitlines(), start=1):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
row = json.loads(line)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ValueError(f"invalid JSONL on line {line_no}: {exc.msg}") from exc
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
raise ValueError(f"JSONL line {line_no} is not an object")
|
||||||
|
tasks.append(row)
|
||||||
|
return tasks, {}
|
||||||
|
|
||||||
|
|
||||||
|
def load_input(path: str | None, fmt: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
||||||
|
raw = sys.stdin.read() if not path or path == "-" else Path(path).read_text(encoding="utf-8")
|
||||||
|
if not raw.strip():
|
||||||
|
raise ValueError("input is empty")
|
||||||
|
|
||||||
|
parse_as_jsonl = fmt == "jsonl" or (fmt == "auto" and "\n" in raw.strip() and not raw.lstrip().startswith(("{", "[")))
|
||||||
|
if parse_as_jsonl:
|
||||||
|
return load_jsonl(raw)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
if fmt == "auto" and "\n" in raw.strip():
|
||||||
|
return load_jsonl(raw)
|
||||||
|
raise ValueError(f"invalid JSON input: {exc.msg}") from exc
|
||||||
|
if isinstance(data, list):
|
||||||
|
if not all(isinstance(item, dict) for item in data):
|
||||||
|
raise ValueError("JSON list must contain task objects")
|
||||||
|
return data, {}
|
||||||
|
if isinstance(data, dict):
|
||||||
|
tasks = data.get("tasks")
|
||||||
|
if tasks is None:
|
||||||
|
# Treat a single object with required task fields as a one-task summary.
|
||||||
|
if REQUIRED_TASK_FIELDS.issubset(data):
|
||||||
|
return [data], {}
|
||||||
|
raise ValueError("JSON object must contain a 'tasks' list")
|
||||||
|
if not isinstance(tasks, list) or not all(isinstance(item, dict) for item in tasks):
|
||||||
|
raise ValueError("'tasks' must be a list of objects")
|
||||||
|
metadata = {key: value for key, value in data.items() if key != "tasks"}
|
||||||
|
return tasks, metadata
|
||||||
|
raise ValueError("input must be JSON object, JSON list, or JSON Lines")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_task(task: dict[str, Any]) -> None:
|
||||||
|
missing = sorted(REQUIRED_TASK_FIELDS - set(task))
|
||||||
|
if missing:
|
||||||
|
task_id = task.get("id", "<unknown>")
|
||||||
|
raise ValueError(f"task {task_id} missing required fields: {', '.join(missing)}")
|
||||||
|
status = str(task.get("status"))
|
||||||
|
if status not in SUPPORTED_STATUSES:
|
||||||
|
raise ValueError(f"task {task.get('id')} has unsupported status: {status}")
|
||||||
|
for field in ("created_at", "updated_at"):
|
||||||
|
if not isinstance(task.get(field), (int, float)):
|
||||||
|
raise ValueError(f"task {task.get('id')} field {field} must be epoch seconds")
|
||||||
|
|
||||||
|
|
||||||
|
def confidence(value: float) -> float:
|
||||||
|
return round(max(0.0, min(1.0, value)), 2)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_task_type(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
title = str(task.get("title", "")).strip().lower()
|
||||||
|
body = compact_text(task)
|
||||||
|
prefix = title.split(":", 1)[0].strip() if ":" in title else ""
|
||||||
|
prefix_map = {"doc": "docs"}
|
||||||
|
if prefix in LIFECYCLE_PREFIXES:
|
||||||
|
value = prefix_map.get(prefix, prefix)
|
||||||
|
if value in TASK_TYPES:
|
||||||
|
return {"value": value, "confidence": 0.95, "reason_codes": [f"title_prefix_{value}"]}
|
||||||
|
keyword_rules = [
|
||||||
|
("discovery", ["discover", "inventory", "repo map", "read-only"]),
|
||||||
|
("spec", ["spec", "define", "contract", "schema"]),
|
||||||
|
("implement", ["implement", "engineer", "script", "code", "build"]),
|
||||||
|
("review", ["review", "approve", "findings"]),
|
||||||
|
("docs", ["docs", "runbook", "readme"]),
|
||||||
|
("ops", ["ops", "health", "monitor", "deploy", "cleanup"]),
|
||||||
|
("integration", ["integration", "merge", "cherry-pick", "fan-in"]),
|
||||||
|
("final", ["final", "closeout", "synthesis"]),
|
||||||
|
("test", ["test", "smoke", "validate"]),
|
||||||
|
("charter", ["charter", "program framing"]),
|
||||||
|
]
|
||||||
|
for value, needles in keyword_rules:
|
||||||
|
if any(needle in body for needle in needles):
|
||||||
|
return {"value": value, "confidence": 0.78, "reason_codes": [f"keyword_{value}"]}
|
||||||
|
return {"value": "unknown", "confidence": 0.2, "reason_codes": ["insufficient_signal"]}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_lane(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
text = compact_text(task)
|
||||||
|
rules = [
|
||||||
|
("kanban_hygiene", ["kanban", "task hygiene", "board summaries", "review-needed", "next gate"]),
|
||||||
|
("cron_n8n_classifier", ["cron", "n8n", "alert", "event classifier"]),
|
||||||
|
("rag_context_gate", ["rag", "context gate", "retrieval", "bundle"]),
|
||||||
|
("doc_image_audio_triage", ["document", "image", "audio triage", "ocr", "attachments"]),
|
||||||
|
("voice_audio_pipeline", ["voice", "whisper", "memo", "transcribe"]),
|
||||||
|
("docs_runbook_service_map", ["service map", "runbook", "readme"]),
|
||||||
|
("observability_utilization", ["health", "utilization", "metrics", "digest"]),
|
||||||
|
("ops_integration", ["merge", "integration", "cherry-pick", "fan-in"]),
|
||||||
|
("final_closeout", ["final", "closeout", "synthesis"]),
|
||||||
|
]
|
||||||
|
for value, needles in rules:
|
||||||
|
matched = [needle.replace(" ", "_") for needle in needles if needle in text]
|
||||||
|
if matched:
|
||||||
|
return {"value": value, "confidence": 0.9, "reason_codes": [f"mentions_{matched[0]}"]}
|
||||||
|
if text:
|
||||||
|
return {"value": "general", "confidence": 0.45, "reason_codes": ["no_lane_specific_signal"]}
|
||||||
|
return {"value": "unknown", "confidence": 0.1, "reason_codes": ["insufficient_signal"]}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_project(task: dict[str, Any], board: str | None, input_metadata: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
explicit = task.get("project") or input_metadata.get("project")
|
||||||
|
if explicit:
|
||||||
|
return {"value": str(explicit), "confidence": 0.9, "source": "input"}
|
||||||
|
board_name = board or input_metadata.get("board")
|
||||||
|
if board_name:
|
||||||
|
return {"value": str(board_name), "confidence": 0.98, "source": "board_name"}
|
||||||
|
text = compact_text(task)
|
||||||
|
if "npu" in text or "openvino" in text:
|
||||||
|
return {"value": "npu-maximization", "confidence": 0.72, "source": "body"}
|
||||||
|
return {"value": "unknown", "confidence": 0.1, "source": "unknown"}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_blocker(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
status = str(task.get("status"))
|
||||||
|
text = compact_text(task)
|
||||||
|
last_outcome = str(task.get("last_run_outcome") or "").lower()
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
value = "none"
|
||||||
|
blocked = False
|
||||||
|
conf = 0.0
|
||||||
|
|
||||||
|
if status == "blocked":
|
||||||
|
blocked = True
|
||||||
|
conf = 0.85
|
||||||
|
if "review-required" in text or "changes requested" in text:
|
||||||
|
value = "review_changes_requested"
|
||||||
|
reason_codes.append("blocked_review_required_or_changes")
|
||||||
|
elif any(word in text for word in ("credential", "token", "path", "spawn_failed")):
|
||||||
|
value = "missing_credentials"
|
||||||
|
reason_codes.append("blocked_missing_credentials_or_path")
|
||||||
|
elif any(word in text for word in ("human", "approval", "decision", "confirm")):
|
||||||
|
value = "human_decision"
|
||||||
|
reason_codes.append("blocked_human_decision")
|
||||||
|
else:
|
||||||
|
value = "unknown"
|
||||||
|
reason_codes.append("status_blocked")
|
||||||
|
elif status == "todo" and task.get("parents"):
|
||||||
|
value = "missing_parent"
|
||||||
|
conf = 0.75
|
||||||
|
reason_codes.append("todo_with_parents")
|
||||||
|
elif last_outcome in {"crashed", "timed_out", "failed"}:
|
||||||
|
value = "failed_tests"
|
||||||
|
conf = 0.65
|
||||||
|
reason_codes.append(f"last_run_{last_outcome}")
|
||||||
|
|
||||||
|
return {"value": value, "blocked": blocked, "confidence": confidence(conf), "reason_codes": reason_codes}
|
||||||
|
|
||||||
|
|
||||||
|
def age_hours(now: float, timestamp: Any) -> float | None:
|
||||||
|
if not isinstance(timestamp, (int, float)):
|
||||||
|
return None
|
||||||
|
return round(max(0.0, now - float(timestamp)) / 3600.0, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_staleness(task: dict[str, Any], now: float) -> dict[str, Any]:
|
||||||
|
status = str(task.get("status"))
|
||||||
|
created = float(task["created_at"])
|
||||||
|
activity_ts = float(task.get("heartbeat_at") or task.get("last_activity_at") or task.get("updated_at") or created)
|
||||||
|
age = age_hours(now, created)
|
||||||
|
last_activity = age_hours(now, activity_ts)
|
||||||
|
threshold = 24
|
||||||
|
value = "fresh"
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
|
||||||
|
if status == "running":
|
||||||
|
threshold = 1
|
||||||
|
if last_activity is not None and last_activity > 1:
|
||||||
|
value = "stale_lock"
|
||||||
|
reason_codes.append("running_no_recent_heartbeat")
|
||||||
|
elif status == "ready":
|
||||||
|
threshold = 24
|
||||||
|
if last_activity is not None and last_activity >= 72:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("ready_over_72h")
|
||||||
|
elif last_activity is not None and last_activity >= 24:
|
||||||
|
value = "aging"
|
||||||
|
reason_codes.append("ready_over_24h")
|
||||||
|
elif status == "blocked":
|
||||||
|
review_required = "review-required" in compact_text(task)
|
||||||
|
threshold = 24 if review_required else 48
|
||||||
|
if last_activity is not None and last_activity >= 168:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("blocked_over_7d")
|
||||||
|
elif review_required and last_activity is not None and last_activity >= 72:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("review_required_over_72h")
|
||||||
|
elif last_activity is not None and last_activity >= threshold:
|
||||||
|
value = "aging"
|
||||||
|
reason_codes.append("blocked_or_review_aging")
|
||||||
|
elif status == "todo" and not task.get("parents") and last_activity is not None and last_activity >= 72:
|
||||||
|
value = "orphaned"
|
||||||
|
threshold = 72
|
||||||
|
reason_codes.append("todo_without_parents_over_72h")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"value": value,
|
||||||
|
"age_hours": age,
|
||||||
|
"last_activity_hours": last_activity,
|
||||||
|
"threshold_hours": threshold,
|
||||||
|
"reason_codes": reason_codes,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_title(title: str) -> str:
|
||||||
|
text = title.lower().strip()
|
||||||
|
text = re.sub(r"^(charter|discovery|spec|implement|test|review|docs?|ops|integration|final)\s*:\s*", "", text)
|
||||||
|
text = re.sub(r"[^a-z0-9]+", " ", text)
|
||||||
|
return re.sub(r"\s+", " ", text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicates(tasks: list[dict[str, Any]], labels: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||||
|
groups: dict[tuple[str, str, str], list[str]] = {}
|
||||||
|
active_statuses = SUPPORTED_STATUSES - {"done", "archived", "cancelled"}
|
||||||
|
for task in tasks:
|
||||||
|
if str(task.get("status")) not in active_statuses:
|
||||||
|
continue
|
||||||
|
task_id = str(task["id"])
|
||||||
|
key = (
|
||||||
|
normalize_title(str(task.get("title", ""))),
|
||||||
|
labels[task_id]["lane"]["value"],
|
||||||
|
labels[task_id]["task_type"]["value"],
|
||||||
|
)
|
||||||
|
if key[0]:
|
||||||
|
groups.setdefault(key, []).append(task_id)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
str(task["id"]): {
|
||||||
|
"is_duplicate": False,
|
||||||
|
"canonical_task_id": None,
|
||||||
|
"candidate_ids": [],
|
||||||
|
"confidence": 0.0,
|
||||||
|
"reason_codes": [],
|
||||||
|
}
|
||||||
|
for task in tasks
|
||||||
|
}
|
||||||
|
for ids in groups.values():
|
||||||
|
if len(ids) < 2:
|
||||||
|
continue
|
||||||
|
canonical = sorted(ids)[0]
|
||||||
|
for task_id in ids:
|
||||||
|
candidates = [candidate for candidate in ids if candidate != task_id]
|
||||||
|
result[task_id] = {
|
||||||
|
"is_duplicate": task_id != canonical,
|
||||||
|
"canonical_task_id": canonical if task_id != canonical else None,
|
||||||
|
"candidate_ids": candidates,
|
||||||
|
"confidence": 0.86,
|
||||||
|
"reason_codes": ["same_normalized_title_lane_and_task_type"],
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def has_non_positive_npu_busy_delta(text: str) -> bool:
|
||||||
|
if "npu" not in text and "busy" not in text:
|
||||||
|
return False
|
||||||
|
patterns = [
|
||||||
|
r"\b(?:npu_)?busy(?:_time)?(?:_delta)?(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
|
||||||
|
r"\b(?:npu_)?delta(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
for match in re.finditer(pattern, text):
|
||||||
|
try:
|
||||||
|
if float(match.group(1)) <= 0:
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def classify_review_needed(task: dict[str, Any], task_type: str) -> dict[str, Any]:
|
||||||
|
text = compact_text(task)
|
||||||
|
changed_files = task.get("changed_files") or task.get("diff_path") or task.get("tests_run")
|
||||||
|
if has_non_positive_npu_busy_delta(text):
|
||||||
|
return {"value": True, "kind": "npu_proof_gate", "confidence": 0.84, "reason_codes": ["npu_claim_non_positive_busy_delta"]}
|
||||||
|
if "npu" in text and ("http 200" in text or "no busy" in text or "missing busy" in text):
|
||||||
|
return {"value": True, "kind": "npu_proof_gate", "confidence": 0.8, "reason_codes": ["npu_claim_needs_busy_delta"]}
|
||||||
|
if "review-required" in text:
|
||||||
|
kind = "code_change" if task_type == "implement" else "spec_review"
|
||||||
|
return {"value": True, "kind": kind, "confidence": 0.92, "reason_codes": ["review_required_marker"]}
|
||||||
|
if changed_files and task_type in {"implement", "ops", "docs"}:
|
||||||
|
return {"value": True, "kind": "code_change", "confidence": 0.86, "reason_codes": ["reported_changed_files_or_tests"]}
|
||||||
|
if any(needle in text for needle in ("routing authority", "restart service", "write memory", "send outbound", "private root", "wildcard bind", "vector db mutation")):
|
||||||
|
return {"value": True, "kind": "human_approval", "confidence": 0.84, "reason_codes": ["authority_change_requires_approval"]}
|
||||||
|
return {"value": False, "kind": "none", "confidence": 0.2, "reason_codes": []}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_next_gate(task: dict[str, Any], labels: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
task_type = labels["task_type"]["value"]
|
||||||
|
status = str(task.get("status"))
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
|
||||||
|
if labels["duplicate"]["is_duplicate"]:
|
||||||
|
return {"value": "dedupe_review", "confidence": 0.86, "reason_codes": ["duplicate_candidate"]}
|
||||||
|
if labels["staleness"]["value"] == "stale_lock":
|
||||||
|
return {"value": "investigate_stale_lock", "confidence": 0.88, "reason_codes": ["running_stale_lock"]}
|
||||||
|
blocker = labels["blocker"]
|
||||||
|
if blocker["value"] in {"human_decision", "missing_credentials", "unknown"} and blocker["blocked"]:
|
||||||
|
return {"value": "needs_human_decision", "confidence": 0.85, "reason_codes": blocker["reason_codes"] or ["blocked"]}
|
||||||
|
if blocker["value"] == "missing_parent":
|
||||||
|
return {"value": "wait_for_parents", "confidence": 0.82, "reason_codes": ["unfinished_parents"]}
|
||||||
|
if task_type == "implement" and not (task.get("tests_run") or task.get("test_evidence")) and status in {"blocked", "done"}:
|
||||||
|
return {"value": "needs_test_evidence", "confidence": 0.78, "reason_codes": ["implementation_without_test_evidence"]}
|
||||||
|
review_needed = labels["review_needed"]
|
||||||
|
if review_needed["kind"] == "npu_proof_gate":
|
||||||
|
return {"value": "needs_npu_proof", "confidence": 0.8, "reason_codes": review_needed["reason_codes"]}
|
||||||
|
if review_needed["value"]:
|
||||||
|
return {"value": "ready_for_review", "confidence": 0.86, "reason_codes": review_needed["reason_codes"]}
|
||||||
|
|
||||||
|
gate_by_type = {
|
||||||
|
"spec": "ready_for_implementation",
|
||||||
|
"implement": "ready_for_review",
|
||||||
|
"review": "ready_for_integration",
|
||||||
|
"docs": "ready_for_integration",
|
||||||
|
"ops": "ready_for_ops_validation",
|
||||||
|
"integration": "ready_for_closeout",
|
||||||
|
"final": "safe_to_complete",
|
||||||
|
"discovery": "safe_to_complete",
|
||||||
|
"charter": "ready_for_spec",
|
||||||
|
"test": "ready_for_review",
|
||||||
|
}
|
||||||
|
type_gate = gate_by_type.get(task_type, "unknown")
|
||||||
|
if task_type in gate_by_type:
|
||||||
|
reason_codes.append(f"task_type_{task_type}")
|
||||||
|
return {"value": type_gate, "confidence": 0.74 if type_gate != "unknown" else 0.2, "reason_codes": reason_codes}
|
||||||
|
|
||||||
|
|
||||||
|
def advisory(tasks: list[dict[str, Any]], *, board: str | None, now: float, input_metadata: dict[str, Any], include_evidence: bool) -> dict[str, Any]:
|
||||||
|
for task in tasks:
|
||||||
|
validate_task(task)
|
||||||
|
|
||||||
|
prelim: dict[str, dict[str, Any]] = {}
|
||||||
|
for task in tasks:
|
||||||
|
task_id = str(task["id"])
|
||||||
|
prelim[task_id] = {
|
||||||
|
"task_type": classify_task_type(task),
|
||||||
|
"project": classify_project(task, board, input_metadata),
|
||||||
|
"lane": classify_lane(task),
|
||||||
|
"blocker": classify_blocker(task),
|
||||||
|
"staleness": classify_staleness(task, now),
|
||||||
|
}
|
||||||
|
duplicates = find_duplicates(tasks, prelim)
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for task in tasks:
|
||||||
|
task_id = str(task["id"])
|
||||||
|
labels = dict(prelim[task_id])
|
||||||
|
labels["duplicate"] = duplicates[task_id]
|
||||||
|
labels["review_needed"] = classify_review_needed(task, labels["task_type"]["value"])
|
||||||
|
labels["next_gate"] = classify_next_gate(task, labels)
|
||||||
|
item = {
|
||||||
|
"task_id": task_id,
|
||||||
|
**labels,
|
||||||
|
"warnings": [],
|
||||||
|
}
|
||||||
|
if include_evidence:
|
||||||
|
item["evidence"] = {
|
||||||
|
"normalized_title": normalize_title(str(task.get("title", ""))),
|
||||||
|
"status": task.get("status"),
|
||||||
|
"parents_count": len(task.get("parents") or []),
|
||||||
|
"children_count": len(task.get("children") or []),
|
||||||
|
}
|
||||||
|
items.append(item)
|
||||||
|
|
||||||
|
counts = {
|
||||||
|
"tasks": len(items),
|
||||||
|
"duplicates": sum(1 for item in items if item["duplicate"]["is_duplicate"]),
|
||||||
|
"review_needed": sum(1 for item in items if item["review_needed"]["value"]),
|
||||||
|
"stale": sum(1 for item in items if item["staleness"]["value"] in {"stale", "stale_lock", "orphaned"}),
|
||||||
|
"blocked": sum(1 for item in items if item["blocker"]["blocked"]),
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"schema": SCHEMA,
|
||||||
|
"dry_run": True,
|
||||||
|
"created": int(now),
|
||||||
|
"board": board or input_metadata.get("board") or None,
|
||||||
|
"counts": counts,
|
||||||
|
"authority": AUTHORITY,
|
||||||
|
"npu_proof": NPU_PROOF,
|
||||||
|
"items": items,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Dry-run Kanban hygiene advisory classifier",
|
||||||
|
epilog="Input: JSON object with tasks[] or JSONL task objects. Required task fields: id,title,status,assignee,created_at,updated_at. Optional compact fields such as body_excerpt, parents, children, changed_files, tests_run, last_run_outcome, and last_comment_excerpt improve labels.",
|
||||||
|
)
|
||||||
|
parser.add_argument("--input", "-i", help="Input JSON/JSONL file; omit or '-' for stdin")
|
||||||
|
parser.add_argument("--format", choices=["auto", "json", "jsonl"], default="auto", help="Input format")
|
||||||
|
parser.add_argument("--board", help="Board/project name to include in output")
|
||||||
|
parser.add_argument("--now", type=float, default=None, help="Epoch seconds for deterministic staleness tests")
|
||||||
|
parser.add_argument("--compact", action="store_true", help="Accepted for compatibility; output is compact JSON by default")
|
||||||
|
parser.add_argument("--include-evidence", action="store_true", help="Include short derived evidence fields")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
tasks, metadata = load_input(args.input, args.format)
|
||||||
|
output = advisory(
|
||||||
|
tasks,
|
||||||
|
board=args.board,
|
||||||
|
now=args.now if args.now is not None else time.time(),
|
||||||
|
input_metadata=metadata,
|
||||||
|
include_evidence=args.include_evidence,
|
||||||
|
)
|
||||||
|
except (OSError, ValueError) as exc:
|
||||||
|
print(f"kanban-hygiene-advisory: {exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
print(json.dumps(output, sort_keys=True, separators=(",", ":")))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+567
@@ -0,0 +1,567 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dry-run comparison harness for advisory-only NPU lanes.
|
||||||
|
|
||||||
|
The harness evaluates synthetic/non-private fixtures against deterministic lane
|
||||||
|
adapters and emits compact npu_advisory_decision_v1 records plus JSON/markdown
|
||||||
|
summaries. It intentionally performs no live routing, memory writes, tool
|
||||||
|
execution, service restarts, outbound sends, broad private scans, or vector-store
|
||||||
|
mutation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import hashlib
|
||||||
|
import uuid
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
DEFAULT_FIXTURES = REPO_ROOT / "fixtures" / "npu_advisory_dry_run" / "fixtures.json"
|
||||||
|
SCHEMA = "npu_advisory_decision_v1"
|
||||||
|
HARNESS_SCHEMA = "npu_advisory_dry_run_summary_v1"
|
||||||
|
|
||||||
|
AUTHORITY_FLAGS_CLOSED = {
|
||||||
|
"can_route_atlas": False,
|
||||||
|
"can_write_memory": False,
|
||||||
|
"can_execute_tools": False,
|
||||||
|
"can_restart_services": False,
|
||||||
|
"can_send_outbound": False,
|
||||||
|
"can_scan_private_roots": False,
|
||||||
|
"can_mutate_vector_store": False,
|
||||||
|
"can_post_advisory_event": False,
|
||||||
|
"can_change_gateway_config": False,
|
||||||
|
"requires_human_approval": True,
|
||||||
|
"advisory_only": True,
|
||||||
|
}
|
||||||
|
MAY_TO_CAN = {
|
||||||
|
"may_route": "can_route_atlas",
|
||||||
|
"may_write_memory": "can_write_memory",
|
||||||
|
"may_execute_tools": "can_execute_tools",
|
||||||
|
"may_restart_services": "can_restart_services",
|
||||||
|
"may_send_external": "can_send_outbound",
|
||||||
|
"may_process_private_dirs": "can_scan_private_roots",
|
||||||
|
"may_mutate_vector_db": "can_mutate_vector_store",
|
||||||
|
"may_change_live_config": "can_change_gateway_config",
|
||||||
|
}
|
||||||
|
MUTATION_FLAGS_FALSE = {
|
||||||
|
"live_routing": False,
|
||||||
|
"memory_writes": False,
|
||||||
|
"tool_execution": False,
|
||||||
|
"service_restarts": False,
|
||||||
|
"outbound_sends": False,
|
||||||
|
"broad_private_scans": False,
|
||||||
|
"vector_store_mutation": False,
|
||||||
|
"gateway_restart": False,
|
||||||
|
}
|
||||||
|
ALLOWED_ACTIONS = ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"]
|
||||||
|
NO_ACTUAL_ACTION = {"kind": "dry_run_reported", "performed": False, "performed_by": "harness", "side_effects": []}
|
||||||
|
ACTION_PATTERNS = {
|
||||||
|
"follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
|
||||||
|
"date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
|
||||||
|
"decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
|
||||||
|
"task": re.compile(r"\b(todo|to-do|action item|assign|need to|please|reminder|review|ask)\b", re.I),
|
||||||
|
}
|
||||||
|
|
||||||
|
class HarnessError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(name: str, path: Path):
|
||||||
|
spec = importlib.util.spec_from_file_location(name, path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise HarnessError(f"module_import_failed:{path}")
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules.setdefault(name, module)
|
||||||
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def confidence_bucket(value: float | int | None) -> str:
|
||||||
|
if value is None:
|
||||||
|
return "unknown"
|
||||||
|
v = float(value)
|
||||||
|
if v >= 0.95:
|
||||||
|
return "very_high"
|
||||||
|
if v >= 0.80:
|
||||||
|
return "high"
|
||||||
|
if v >= 0.60:
|
||||||
|
return "medium"
|
||||||
|
if v >= 0.40:
|
||||||
|
return "low"
|
||||||
|
return "very_low"
|
||||||
|
|
||||||
|
|
||||||
|
def lane_confidence(output: Mapping[str, Any], fallback: float = 0.7) -> float:
|
||||||
|
for key in ("confidence", "score"):
|
||||||
|
try:
|
||||||
|
return float(output[key])
|
||||||
|
except (KeyError, TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
labels = output.get("labels")
|
||||||
|
if isinstance(labels, Mapping):
|
||||||
|
vals: list[float] = []
|
||||||
|
for value in labels.values():
|
||||||
|
if isinstance(value, Mapping) and "confidence" in value:
|
||||||
|
try:
|
||||||
|
vals.append(float(value["confidence"]))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if vals:
|
||||||
|
return max(vals)
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def closed_authority_flags(extra: Mapping[str, Any] | None = None) -> dict[str, bool]:
|
||||||
|
flags = dict(AUTHORITY_FLAGS_CLOSED)
|
||||||
|
for key, value in (extra or {}).items():
|
||||||
|
mapped = MAY_TO_CAN.get(key, key)
|
||||||
|
if mapped in flags and mapped not in {"requires_human_approval", "advisory_only"}:
|
||||||
|
flags[mapped] = bool(value)
|
||||||
|
return flags
|
||||||
|
|
||||||
|
|
||||||
|
def authority_violations(flags: Mapping[str, Any]) -> list[str]:
|
||||||
|
return sorted(
|
||||||
|
key for key, value in flags.items()
|
||||||
|
if key.startswith("can_") and bool(value)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def severity_for(label: str) -> str:
|
||||||
|
if label in {"escalate", "block_authority_violation"}:
|
||||||
|
return "critical"
|
||||||
|
if label in {"require_human_review", "review_item", "ready_for_review", "prepare_context_bundle"}:
|
||||||
|
return "medium"
|
||||||
|
if label in {"summarize", "log"}:
|
||||||
|
return "info"
|
||||||
|
return "none"
|
||||||
|
|
||||||
|
|
||||||
|
def npu_proof_v1(proof: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
busy = proof.get("npu_busy_delta_us") or proof.get("busy_delta_us")
|
||||||
|
service_delta = proof.get("service_reported_delta_us") or proof.get("npu_busy_delta_us")
|
||||||
|
proof_ok = proof.get("ok")
|
||||||
|
if proof_ok is None and busy is not None:
|
||||||
|
try:
|
||||||
|
proof_ok = int(busy) > 0
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
proof_ok = None
|
||||||
|
fixture_only = bool(proof.get("fixture_only", True))
|
||||||
|
return {
|
||||||
|
"proof_mode": "offline_fixture" if fixture_only else "service_reported_delta",
|
||||||
|
"busy_delta_us": int(busy) if isinstance(busy, int) or (isinstance(busy, str) and busy.isdigit()) else None,
|
||||||
|
"service_reported_delta_us": int(service_delta) if isinstance(service_delta, int) or (isinstance(service_delta, str) and service_delta.isdigit()) else None,
|
||||||
|
"inference_ran": bool(proof_ok) if proof_ok is not None else False,
|
||||||
|
"proof_ok": bool(proof_ok) if proof_ok is not None else None,
|
||||||
|
"counter_path": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compare_outcome(recommendation: str, expected: str, human: str) -> str:
|
||||||
|
if recommendation == human == expected:
|
||||||
|
return "agree"
|
||||||
|
if recommendation in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"} and human in {"log", "suppress", "none"}:
|
||||||
|
return "false_positive"
|
||||||
|
if recommendation in {"log", "suppress", "none"} and human in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"}:
|
||||||
|
return "false_negative"
|
||||||
|
if recommendation in {"uncertain", "defer"}:
|
||||||
|
return "uncertain"
|
||||||
|
return "disagree"
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_context_gate(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
context_gate = load_module("openvino_context_gate.context_gate", REPO_ROOT / "openvino_context_gate" / "context_gate.py")
|
||||||
|
plan = context_gate.build_plan(str(fixture["query"]), context=fixture.get("context") or {}, options={"require_npu_proof": False})
|
||||||
|
blocked = plan["bundle_plan"].get("blocked_fields") or []
|
||||||
|
if blocked:
|
||||||
|
recommendation = "require_human_review"
|
||||||
|
elif plan["bundle_plan"]["bundle_name"] in {"CodingTaskBundle", "OpsDebugBundle", "ResearchBundle"}:
|
||||||
|
recommendation = "prepare_context_bundle"
|
||||||
|
else:
|
||||||
|
recommendation = "answer_directly"
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": plan["query_class"].get("confidence", 0.7),
|
||||||
|
"npu_proof": plan["npu_proof"],
|
||||||
|
"notes": [f"bundle={plan['bundle_plan']['bundle_name']}", f"sources={','.join(s['source'] for s in plan['source_plan'])}"],
|
||||||
|
"raw_compact": {"bundle_name": plan["bundle_plan"]["bundle_name"], "sources": [s["source"] for s in plan["source_plan"]], "blocked_fields": [f["field"] for f in blocked]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def cron_recommendation(envelope: Mapping[str, Any], event: Mapping[str, Any]) -> str:
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
urgency = (((labels.get("urgency") or {}).get("value")) if isinstance(labels.get("urgency"), Mapping) else labels.get("urgency")) or "normal"
|
||||||
|
npu = envelope.get("npu_proof") or {}
|
||||||
|
npu_ok = bool(npu.get("ok") is True and int(npu.get("npu_busy_delta_us") or 0) > 0)
|
||||||
|
severity = str(event.get("severity") or "normal")
|
||||||
|
if not npu_ok:
|
||||||
|
return "log"
|
||||||
|
if severity == "critical":
|
||||||
|
return "escalate"
|
||||||
|
if severity == "warning" or urgency in {"high", "critical"}:
|
||||||
|
return "summarize"
|
||||||
|
return "log"
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_cron_n8n(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
envelope = fixture.get("gateway_envelope") or {}
|
||||||
|
event = fixture.get("event") or {}
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
confidence = lane_confidence({"labels": labels}, 0.6)
|
||||||
|
return {
|
||||||
|
"recommendation": cron_recommendation(envelope, event),
|
||||||
|
"confidence": confidence,
|
||||||
|
"npu_proof": envelope.get("npu_proof") or {},
|
||||||
|
"authority_from_envelope": envelope.get("authority") or {},
|
||||||
|
"notes": [f"workflow={event.get('workflow')}", f"severity={event.get('severity')}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_batch_triage(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
text = str(fixture.get("document_text") or "")
|
||||||
|
reasons = sorted(name for name, rx in ACTION_PATTERNS.items() if rx.search(text))
|
||||||
|
if reasons:
|
||||||
|
recommendation = "review_item"
|
||||||
|
conf = 0.82
|
||||||
|
elif len(text.strip()) < 20:
|
||||||
|
recommendation = "uncertain"
|
||||||
|
conf = 0.35
|
||||||
|
else:
|
||||||
|
recommendation = "suppress"
|
||||||
|
conf = 0.64
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": conf,
|
||||||
|
"npu_proof": {"verified": False, "required": False, "note": "fixture_rules_no_npu_claim"},
|
||||||
|
"notes": [f"lane={fixture.get('triage_lane')}", f"reason_codes={','.join(reasons) or 'none'}"],
|
||||||
|
"raw_compact": {"reasons": reasons, "raw_text_redacted": True, "full_path_included": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_voice_audio(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
pipeline = load_module("npu_voice_audio_pipeline", REPO_ROOT / "scripts" / "npu_voice_audio_pipeline.py")
|
||||||
|
proof = fixture.get("npu_proof") or {}
|
||||||
|
action_worthy, atlas_gate, next_gate = pipeline.decide_gate(
|
||||||
|
str(fixture.get("transcript") or ""),
|
||||||
|
dict(fixture.get("labels") or {}),
|
||||||
|
whisper_proven=bool(proof.get("whisper")),
|
||||||
|
classifier_proven=bool(proof.get("classifier")),
|
||||||
|
)
|
||||||
|
if atlas_gate.startswith("blocked"):
|
||||||
|
recommendation = "require_human_review"
|
||||||
|
elif action_worthy:
|
||||||
|
recommendation = "review_item"
|
||||||
|
else:
|
||||||
|
recommendation = "suppress"
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": 0.86 if action_worthy else 0.66,
|
||||||
|
"npu_proof": {"whisper": bool(proof.get("whisper")), "classifier": bool(proof.get("classifier")), "verified": bool(proof.get("whisper") and proof.get("classifier"))},
|
||||||
|
"notes": [f"atlas_gate={atlas_gate}", f"next_gate={next_gate}", "transcript_redacted=true"],
|
||||||
|
"raw_compact": {"action_worthy": action_worthy, "atlas_gate": atlas_gate, "next_gate": next_gate},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_kanban_hygiene(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
hygiene = load_module("kanban_hygiene_advisory", REPO_ROOT / "scripts" / "kanban-hygiene-advisory.py")
|
||||||
|
out = hygiene.advisory(list(fixture.get("tasks") or []), board="synthetic-npu", now=float(fixture.get("now") or time.time()), input_metadata={}, include_evidence=False)
|
||||||
|
item = out["items"][0]
|
||||||
|
next_gate = item["next_gate"]["value"]
|
||||||
|
return {
|
||||||
|
"recommendation": next_gate,
|
||||||
|
"confidence": item["next_gate"].get("confidence", 0.7),
|
||||||
|
"npu_proof": out["npu_proof"],
|
||||||
|
"notes": [f"task_id={item['task_id']}", f"review_needed={item['review_needed']['value']}"],
|
||||||
|
"raw_compact": {"counts": out["counts"], "next_gate": item["next_gate"]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_gateway_envelope(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
envelope = fixture.get("gateway_envelope") or {}
|
||||||
|
flags = closed_authority_flags(envelope.get("authority") or {})
|
||||||
|
violations = authority_violations(flags)
|
||||||
|
if violations:
|
||||||
|
recommendation = "block_authority_violation"
|
||||||
|
else:
|
||||||
|
recommendation = cron_recommendation(envelope, {"severity": "critical"})
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": lane_confidence({"labels": labels}, 0.8),
|
||||||
|
"npu_proof": envelope.get("npu_proof") or {},
|
||||||
|
"authority_from_envelope": envelope.get("authority") or {},
|
||||||
|
"notes": [f"violations={','.join(violations) or 'none'}", f"trace_id={envelope.get('trace_id')}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EVALUATORS = {
|
||||||
|
"context_gate": evaluate_context_gate,
|
||||||
|
"cron_n8n_advisory": evaluate_cron_n8n,
|
||||||
|
"batch_triage": evaluate_batch_triage,
|
||||||
|
"voice_audio": evaluate_voice_audio,
|
||||||
|
"kanban_hygiene": evaluate_kanban_hygiene,
|
||||||
|
"advisory_gateway_envelope": evaluate_gateway_envelope,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_decision(fixture: Mapping[str, Any], evaluated: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
extra_authority = evaluated.get("authority_from_envelope") if isinstance(evaluated.get("authority_from_envelope"), Mapping) else None
|
||||||
|
authority_flags = closed_authority_flags(extra_authority)
|
||||||
|
violations = authority_violations(authority_flags)
|
||||||
|
recommendation = str(evaluated["recommendation"])
|
||||||
|
human = str(fixture["human_or_atlas_decision"])
|
||||||
|
expected = str(fixture["expected_recommendation"])
|
||||||
|
outcome_label = compare_outcome(recommendation, expected, human)
|
||||||
|
if recommendation == expected and outcome_label != str(fixture.get("expected_outcome", outcome_label)):
|
||||||
|
outcome_label = str(fixture.get("expected_outcome"))
|
||||||
|
confidence_score = float(evaluated.get("confidence") or 0.0)
|
||||||
|
npu_raw = dict(evaluated.get("npu_proof") or {})
|
||||||
|
npu_raw.setdefault("fixture_only", True)
|
||||||
|
fixture_id = str(fixture.get("id"))
|
||||||
|
input_class = str(fixture.get("input_class") or fixture.get("lane") or "unknown")
|
||||||
|
service_name = str(fixture.get("service") or fixture.get("lane") or "unknown")
|
||||||
|
source_kind = str(fixture.get("source") or "fixture")
|
||||||
|
comparison = "agree" if outcome_label == "agree" else ("uncertain" if outcome_label == "uncertain" else "disagree")
|
||||||
|
error_type = outcome_label if outcome_label in {"false_positive", "false_negative", "severity_overcall", "severity_undercall"} else None
|
||||||
|
if violations:
|
||||||
|
error_type = "unsafe_authority"
|
||||||
|
return {
|
||||||
|
"schema_version": SCHEMA,
|
||||||
|
"decision_id": str(uuid.uuid5(uuid.NAMESPACE_URL, f"{SCHEMA}:{fixture_id}")),
|
||||||
|
"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"),
|
||||||
|
"source": {
|
||||||
|
"kind": "fixture",
|
||||||
|
"fixture_id": fixture_id,
|
||||||
|
"fixture_set": "npu_advisory_eval_v1",
|
||||||
|
"artifact_ref": None,
|
||||||
|
"content_hash": "sha256:" + hashlib.sha256(json.dumps(fixture, sort_keys=True, default=str).encode()).hexdigest(),
|
||||||
|
"privacy_class": "synthetic" if source_kind.startswith("synthetic") else "non_private",
|
||||||
|
},
|
||||||
|
"service": {
|
||||||
|
"name": service_name,
|
||||||
|
"endpoint": service_name,
|
||||||
|
"mode": "offline_fixture",
|
||||||
|
"model": "openvino-local-fixture",
|
||||||
|
},
|
||||||
|
"input_class": input_class,
|
||||||
|
"recommendation": {
|
||||||
|
"label": recommendation,
|
||||||
|
"severity": severity_for(recommendation),
|
||||||
|
"reasons": list(evaluated.get("notes") or []),
|
||||||
|
"evidence_refs": [f"fixture:{fixture_id}", f"lane:{fixture.get('lane')}"] ,
|
||||||
|
"raw_output_ref": None,
|
||||||
|
},
|
||||||
|
"expected_recommendation": expected,
|
||||||
|
"confidence": {
|
||||||
|
"score": round(confidence_score, 3),
|
||||||
|
"bucket": confidence_bucket(confidence_score),
|
||||||
|
"bucket_rule": "v1_default",
|
||||||
|
"calibrated": False,
|
||||||
|
},
|
||||||
|
"authority_flags": authority_flags,
|
||||||
|
"allowed_actions": ALLOWED_ACTIONS,
|
||||||
|
"actual_action": dict(NO_ACTUAL_ACTION),
|
||||||
|
"human_or_atlas_decision": {
|
||||||
|
"source": "fixture_expected",
|
||||||
|
"label": human,
|
||||||
|
"severity": severity_for(human),
|
||||||
|
"confidence": None,
|
||||||
|
"decision_ref": fixture_id,
|
||||||
|
"timestamp": None,
|
||||||
|
},
|
||||||
|
"outcome": {
|
||||||
|
"comparison": comparison,
|
||||||
|
"label": outcome_label,
|
||||||
|
"error_type": error_type,
|
||||||
|
"human_review_required": bool(violations or recommendation in {"require_human_review", "block_authority_violation"}),
|
||||||
|
"promotion_blocker": bool(violations or error_type in {"false_negative", "unsafe_authority", "privacy_violation"}),
|
||||||
|
},
|
||||||
|
"expected_outcome": fixture.get("expected_outcome"),
|
||||||
|
"npu_proof": npu_proof_v1(npu_raw),
|
||||||
|
"latency": {"total_ms": 0, "service_ms": None, "queue_ms": None, "timeout": False},
|
||||||
|
"fallback": {"occurred": True, "kind": "offline", "reason": "synthetic_fixture_deterministic_adapter_no_live_service_call", "expected": True},
|
||||||
|
"privacy": {"payload_logged": False, "redaction": "metadata_only", "retention": "local_audit", "contains_private_payload": False},
|
||||||
|
"notes": list(evaluated.get("notes") or []),
|
||||||
|
"authority_safe_flag_violations": violations,
|
||||||
|
# Compatibility fields for compact summaries/tests.
|
||||||
|
"fixture_id": fixture_id,
|
||||||
|
"lane": fixture.get("lane"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run(fixtures_path: Path) -> dict[str, Any]:
|
||||||
|
data = json.loads(fixtures_path.read_text(encoding="utf-8"))
|
||||||
|
fixtures = data.get("fixtures")
|
||||||
|
if not isinstance(fixtures, list) or not fixtures:
|
||||||
|
raise HarnessError("fixture_set_empty")
|
||||||
|
decisions = []
|
||||||
|
started = time.perf_counter()
|
||||||
|
for fixture in fixtures:
|
||||||
|
lane = fixture.get("lane")
|
||||||
|
evaluator = EVALUATORS.get(str(lane))
|
||||||
|
if evaluator is None:
|
||||||
|
raise HarnessError(f"unsupported_lane:{lane}")
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
evaluated = evaluator(fixture)
|
||||||
|
decision = build_decision(fixture, evaluated)
|
||||||
|
decision["latency"]["total_ms"] = round((time.perf_counter() - t0) * 1000, 3)
|
||||||
|
decisions.append(decision)
|
||||||
|
|
||||||
|
counts = Counter(d["outcome"]["label"] for d in decisions)
|
||||||
|
by_lane: dict[str, Counter[str]] = defaultdict(Counter)
|
||||||
|
confidence = Counter(d["confidence"]["bucket"] for d in decisions)
|
||||||
|
recommendations = Counter(d["recommendation"]["label"] for d in decisions)
|
||||||
|
violations = [d for d in decisions if d["authority_safe_flag_violations"]]
|
||||||
|
mismatches = [d for d in decisions if d["outcome"]["label"] != d.get("expected_outcome")]
|
||||||
|
return {
|
||||||
|
"schema": HARNESS_SCHEMA,
|
||||||
|
"fixture_file": str(fixtures_path),
|
||||||
|
"dry_run": True,
|
||||||
|
"mutations": dict(MUTATION_FLAGS_FALSE),
|
||||||
|
"totals": {
|
||||||
|
"fixtures": len(decisions),
|
||||||
|
"agree": counts.get("agree", 0),
|
||||||
|
"disagree": counts.get("disagree", 0),
|
||||||
|
"uncertain": counts.get("uncertain", 0),
|
||||||
|
"false_positive": counts.get("false_positive", 0),
|
||||||
|
"false_negative": counts.get("false_negative", 0),
|
||||||
|
"authority_safe_flag_violations": len(violations),
|
||||||
|
"expected_outcome_mismatches": len(mismatches),
|
||||||
|
"wall_ms": round((time.perf_counter() - started) * 1000, 3),
|
||||||
|
},
|
||||||
|
"by_lane": lane_summary(decisions),
|
||||||
|
"confidence_buckets": dict(sorted(confidence.items())),
|
||||||
|
"recommendations": dict(sorted(recommendations.items())),
|
||||||
|
"minimum_metrics": minimum_metrics(decisions),
|
||||||
|
"violations": [{"fixture_id": d["fixture_id"], "flags": d["authority_safe_flag_violations"]} for d in violations],
|
||||||
|
"mismatches": [{"fixture_id": d["fixture_id"], "outcome": d["outcome"]["label"], "expected_outcome": d.get("expected_outcome")} for d in mismatches],
|
||||||
|
"decisions": decisions,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def percentile(values: list[float], pct: float) -> float | None:
|
||||||
|
if not values:
|
||||||
|
return None
|
||||||
|
ordered = sorted(values)
|
||||||
|
idx = min(len(ordered) - 1, max(0, round((pct / 100) * (len(ordered) - 1))))
|
||||||
|
return ordered[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def minimum_metrics(decisions: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
by_input = Counter(d["input_class"] for d in decisions)
|
||||||
|
by_service = Counter(d["service"]["name"] for d in decisions)
|
||||||
|
fallback_kinds = Counter(d["fallback"]["kind"] for d in decisions if d["fallback"]["occurred"])
|
||||||
|
proof_ok = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is True)
|
||||||
|
proof_missing = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is False)
|
||||||
|
proof_na = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is None)
|
||||||
|
privacy_violations = sum(1 for d in decisions if d["privacy"]["contains_private_payload"] or d["privacy"]["payload_logged"])
|
||||||
|
side_effects = sum(1 for d in decisions if d["actual_action"]["performed"] or d["actual_action"]["side_effects"])
|
||||||
|
timeouts = sum(1 for d in decisions if d["latency"].get("timeout"))
|
||||||
|
lat_by_service: dict[str, dict[str, float | None]] = {}
|
||||||
|
for service in by_service:
|
||||||
|
vals = [float(d["latency"]["total_ms"]) for d in decisions if d["service"]["name"] == service]
|
||||||
|
lat_by_service[service] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
|
||||||
|
lat_by_input: dict[str, dict[str, float | None]] = {}
|
||||||
|
for input_class in by_input:
|
||||||
|
vals = [float(d["latency"]["total_ms"]) for d in decisions if d["input_class"] == input_class]
|
||||||
|
lat_by_input[input_class] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
|
||||||
|
outcomes = Counter(d["outcome"]["label"] for d in decisions)
|
||||||
|
return {
|
||||||
|
"total_records": len(decisions),
|
||||||
|
"records_by_input_class": dict(sorted(by_input.items())),
|
||||||
|
"records_by_service": dict(sorted(by_service.items())),
|
||||||
|
"privacy_violation_count": privacy_violations,
|
||||||
|
"actual_side_effect_count": side_effects,
|
||||||
|
"missing_reference_count": outcomes.get("missing_reference", 0),
|
||||||
|
"fallback_count": sum(fallback_kinds.values()),
|
||||||
|
"fallback_counts_by_kind": dict(sorted(fallback_kinds.items())),
|
||||||
|
"expected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and d["fallback"]["expected"]),
|
||||||
|
"unexpected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and not d["fallback"]["expected"]),
|
||||||
|
"npu_proof_ok_count": proof_ok,
|
||||||
|
"npu_proof_missing_count": proof_missing,
|
||||||
|
"npu_proof_not_applicable_count": proof_na,
|
||||||
|
"latency_by_service": lat_by_service,
|
||||||
|
"latency_by_input_class": lat_by_input,
|
||||||
|
"timeout_count": timeouts,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def lane_summary(decisions: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||||
|
lanes: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||||
|
for d in decisions:
|
||||||
|
lanes[str(d["lane"])].append(d)
|
||||||
|
out = {}
|
||||||
|
for lane, items in sorted(lanes.items()):
|
||||||
|
c = Counter(d["outcome"]["label"] for d in items)
|
||||||
|
out[lane] = {
|
||||||
|
"fixtures": len(items),
|
||||||
|
"agree": c.get("agree", 0),
|
||||||
|
"disagree": c.get("disagree", 0),
|
||||||
|
"false_positive": c.get("false_positive", 0),
|
||||||
|
"false_negative": c.get("false_negative", 0),
|
||||||
|
"uncertain": c.get("uncertain", 0),
|
||||||
|
"authority_safe_flag_violations": sum(1 for d in items if d["authority_safe_flag_violations"]),
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def markdown_summary(summary: Mapping[str, Any]) -> str:
|
||||||
|
totals = summary["totals"]
|
||||||
|
lines = [
|
||||||
|
"# NPU advisory dry-run comparison",
|
||||||
|
"",
|
||||||
|
f"fixtures: {totals['fixtures']} | agree: {totals['agree']} | disagree: {totals['disagree']} | false_positive: {totals['false_positive']} | false_negative: {totals['false_negative']} | uncertain: {totals['uncertain']}",
|
||||||
|
f"authority_safe_flag_violations: {totals['authority_safe_flag_violations']} | mutations: all_false",
|
||||||
|
"",
|
||||||
|
"| lane | fixtures | agree | false_positive | false_negative | violations |",
|
||||||
|
"| --- | ---: | ---: | ---: | ---: | ---: |",
|
||||||
|
]
|
||||||
|
for lane, row in summary["by_lane"].items():
|
||||||
|
lines.append(f"| {lane} | {row['fixtures']} | {row['agree']} | {row['false_positive']} | {row['false_negative']} | {row['authority_safe_flag_violations']} |")
|
||||||
|
if summary.get("violations"):
|
||||||
|
lines.extend(["", "## Authority-safe flag violations"])
|
||||||
|
for violation in summary["violations"]:
|
||||||
|
lines.append(f"- {violation['fixture_id']}: {', '.join(violation['flags'])}")
|
||||||
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Run synthetic advisory-only NPU dry-run fixture comparisons.")
|
||||||
|
parser.add_argument("--fixtures", default=str(DEFAULT_FIXTURES), help="Synthetic fixture JSON file")
|
||||||
|
parser.add_argument("--format", choices=["json", "markdown"], default="json")
|
||||||
|
parser.add_argument("--include-decisions", action="store_true", help="Include per-fixture decision records in JSON output")
|
||||||
|
parser.add_argument("--fail-on-mismatch", action="store_true", help="Return non-zero if observed outcome differs from fixture expected_outcome")
|
||||||
|
parser.add_argument("--fail-on-authority-violation", action="store_true", help="Return non-zero if any fixture exposes may_* authority flags set true")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
args = build_parser().parse_args(argv)
|
||||||
|
try:
|
||||||
|
summary = run(Path(args.fixtures).expanduser().resolve())
|
||||||
|
except (OSError, json.JSONDecodeError, HarnessError) as exc:
|
||||||
|
print(json.dumps({"ok": False, "error": str(exc), "dry_run": True, "mutations": MUTATION_FLAGS_FALSE}, sort_keys=True), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if args.format == "markdown":
|
||||||
|
print(markdown_summary(summary), end="")
|
||||||
|
else:
|
||||||
|
out = dict(summary)
|
||||||
|
if not args.include_decisions:
|
||||||
|
out.pop("decisions", None)
|
||||||
|
print(json.dumps(out, sort_keys=True, separators=(",", ":")))
|
||||||
|
if args.fail_on_mismatch and summary["totals"]["expected_outcome_mismatches"]:
|
||||||
|
return 1
|
||||||
|
if args.fail_on_authority_violation and summary["totals"]["authority_safe_flag_violations"]:
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+523
@@ -0,0 +1,523 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Explicit-root dry-run batch triage for local documents, images, and audio.
|
||||||
|
|
||||||
|
This wrapper is intentionally report-only. It requires a lane-scoped approved
|
||||||
|
root in a manifest, rejects request roots that broaden that approval, redacts raw
|
||||||
|
text/transcripts by default, and never mutates Obsidian, RAG/vector DBs, files,
|
||||||
|
routing, memory, services, or sends.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml # type: ignore
|
||||||
|
except Exception as exc: # pragma: no cover
|
||||||
|
raise SystemExit("PyYAML is required to read triage root manifests") from exc
|
||||||
|
|
||||||
|
LANES = (
|
||||||
|
"screenshots",
|
||||||
|
"receipts",
|
||||||
|
"downloads",
|
||||||
|
"obsidian_attachments",
|
||||||
|
"voice_memos",
|
||||||
|
"meeting_snippets",
|
||||||
|
)
|
||||||
|
AUDIO_LANES = {"voice_memos", "meeting_snippets"}
|
||||||
|
DOC_IMAGE_LANES = {"screenshots", "receipts", "downloads", "obsidian_attachments"}
|
||||||
|
SKIP_DIR_NAMES = {".git", ".obsidian", "__pycache__", ".cache", "cache", "chroma", "chromadb", "vector_db", "vectors"}
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
MUTATIONS_FALSE = {
|
||||||
|
"obsidian": False,
|
||||||
|
"rag": False,
|
||||||
|
"vector_db": False,
|
||||||
|
"sends": False,
|
||||||
|
"file_moves": False,
|
||||||
|
"routing": False,
|
||||||
|
"memory": False,
|
||||||
|
"service_restarts": False,
|
||||||
|
}
|
||||||
|
ACTION_PATTERNS = {
|
||||||
|
"follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
|
||||||
|
"date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
|
||||||
|
"decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
|
||||||
|
"task": re.compile(r"\b(todo|to-do|action item|assign|need to|please)\b", re.I),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FailClosed(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def sha256_text(text: str) -> str:
|
||||||
|
return "sha256:" + hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def sha256_file(path: Path) -> str:
|
||||||
|
h = hashlib.sha256()
|
||||||
|
with path.open("rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||||
|
h.update(chunk)
|
||||||
|
return "sha256:" + h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def read_busy(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def validate_local_whisper_url(whisper_url: str) -> str:
|
||||||
|
"""Fail closed unless Whisper transcription stays on the approved loopback service."""
|
||||||
|
try:
|
||||||
|
parsed = urllib.parse.urlparse(whisper_url)
|
||||||
|
port = parsed.port
|
||||||
|
except ValueError as exc:
|
||||||
|
raise FailClosed("whisper_url_invalid") from exc
|
||||||
|
|
||||||
|
if parsed.scheme != "http":
|
||||||
|
raise FailClosed("whisper_url_scheme_not_http")
|
||||||
|
if parsed.username or parsed.password:
|
||||||
|
raise FailClosed("whisper_url_credentials_not_allowed")
|
||||||
|
if port != 18816:
|
||||||
|
raise FailClosed("whisper_url_port_not_approved")
|
||||||
|
|
||||||
|
host = (parsed.hostname or "").strip().lower()
|
||||||
|
if host == "localhost":
|
||||||
|
return whisper_url
|
||||||
|
try:
|
||||||
|
if ipaddress.ip_address(host).is_loopback:
|
||||||
|
return whisper_url
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise FailClosed("whisper_url_not_loopback")
|
||||||
|
|
||||||
|
|
||||||
|
def is_under(path: Path, root: Path) -> bool:
|
||||||
|
try:
|
||||||
|
path.resolve().relative_to(root.resolve())
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_manifest(path: Path) -> dict[str, Any]:
|
||||||
|
if not path.exists():
|
||||||
|
raise FailClosed(f"manifest_missing:{path}")
|
||||||
|
data = yaml.safe_load(path.read_text())
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise FailClosed("manifest_invalid:not_mapping")
|
||||||
|
if data.get("version") != 1:
|
||||||
|
raise FailClosed("manifest_invalid:version_must_be_1")
|
||||||
|
policy = data.get("policy") or {}
|
||||||
|
if policy.get("default_mode", "dry_run") != "dry_run":
|
||||||
|
raise FailClosed("policy_invalid:default_mode_not_dry_run")
|
||||||
|
for key, expected in {
|
||||||
|
"require_explicit_root": True,
|
||||||
|
"allow_external_uploads": False,
|
||||||
|
"allow_mutations": False,
|
||||||
|
"log_raw_text": False,
|
||||||
|
}.items():
|
||||||
|
if policy.get(key) is not expected:
|
||||||
|
raise FailClosed(f"policy_invalid:{key}")
|
||||||
|
if not isinstance(data.get("roots"), dict):
|
||||||
|
raise FailClosed("manifest_invalid:roots_missing")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_lane_root(manifest: dict[str, Any], manifest_path: Path, lane: str, requested_root: str | None) -> tuple[dict[str, Any], Path, Path]:
|
||||||
|
lane_cfg = (manifest.get("roots") or {}).get(lane)
|
||||||
|
if not isinstance(lane_cfg, dict):
|
||||||
|
raise FailClosed(f"lane_missing:{lane}")
|
||||||
|
if lane_cfg.get("approved") is not True:
|
||||||
|
raise FailClosed(f"lane_unapproved:{lane}")
|
||||||
|
root_value = lane_cfg.get("root")
|
||||||
|
if not root_value:
|
||||||
|
raise FailClosed(f"root_missing:{lane}")
|
||||||
|
approved_root = Path(str(root_value)).expanduser()
|
||||||
|
if not approved_root.is_absolute():
|
||||||
|
approved_root = (manifest_path.parent / approved_root).resolve()
|
||||||
|
else:
|
||||||
|
approved_root = approved_root.resolve()
|
||||||
|
if not approved_root.exists() or not approved_root.is_dir():
|
||||||
|
raise FailClosed(f"approved_root_unavailable:{lane}")
|
||||||
|
|
||||||
|
selected_root = Path(requested_root).expanduser() if requested_root else approved_root
|
||||||
|
selected_root = selected_root.resolve()
|
||||||
|
if not selected_root.exists() or not selected_root.is_dir():
|
||||||
|
raise FailClosed(f"request_root_unavailable:{lane}")
|
||||||
|
if not is_under(selected_root, approved_root):
|
||||||
|
raise FailClosed(f"request_root_broadens_approval:{lane}")
|
||||||
|
return lane_cfg, approved_root, selected_root
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_exts(lane_cfg: dict[str, Any]) -> set[str]:
|
||||||
|
return {str(e).lower() if str(e).startswith(".") else "." + str(e).lower() for e in lane_cfg.get("allowed_extensions", [])}
|
||||||
|
|
||||||
|
|
||||||
|
def iter_files(root: Path, approved_root: Path, exts: set[str], max_file_mb: float, max_age_days: float | None) -> tuple[list[Path], dict[str, int], int]:
|
||||||
|
skipped = {"extension": 0, "size": 0, "symlink_escape": 0, "not_regular_file": 0, "too_old": 0, "policy": 0}
|
||||||
|
accepted: list[Path] = []
|
||||||
|
files_seen = 0
|
||||||
|
now = time.time()
|
||||||
|
max_bytes = int(max_file_mb * 1024 * 1024)
|
||||||
|
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||||
|
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES and not d.startswith(".")]
|
||||||
|
current = Path(dirpath)
|
||||||
|
if not is_under(current, approved_root):
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
dirnames[:] = []
|
||||||
|
continue
|
||||||
|
for name in filenames:
|
||||||
|
path = current / name
|
||||||
|
if name.startswith("."):
|
||||||
|
skipped["policy"] += 1
|
||||||
|
continue
|
||||||
|
files_seen += 1
|
||||||
|
try:
|
||||||
|
resolved = path.resolve()
|
||||||
|
except Exception:
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
continue
|
||||||
|
if not is_under(resolved, approved_root):
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
continue
|
||||||
|
if not resolved.is_file():
|
||||||
|
skipped["not_regular_file"] += 1
|
||||||
|
continue
|
||||||
|
if resolved.suffix.lower() not in exts:
|
||||||
|
skipped["extension"] += 1
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
st = resolved.stat()
|
||||||
|
except OSError:
|
||||||
|
skipped["not_regular_file"] += 1
|
||||||
|
continue
|
||||||
|
if st.st_size > max_bytes:
|
||||||
|
skipped["size"] += 1
|
||||||
|
continue
|
||||||
|
if max_age_days is not None and now - st.st_mtime > max_age_days * 86400:
|
||||||
|
skipped["too_old"] += 1
|
||||||
|
continue
|
||||||
|
accepted.append(resolved)
|
||||||
|
accepted.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
||||||
|
return accepted, skipped, files_seen
|
||||||
|
|
||||||
|
|
||||||
|
def load_doc_triage_module(repo_root: Path):
|
||||||
|
module_path = repo_root / "openvino-doc-image-triage-npu" / "triage.py"
|
||||||
|
spec = importlib.util.spec_from_file_location("doc_image_triage", module_path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise RuntimeError("doc_image_triage_import_failed")
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules.setdefault("doc_image_triage", module)
|
||||||
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def fallback_doc_item(path: Path, root: Path, lane: str) -> dict[str, Any]:
|
||||||
|
sidecar = path.with_suffix(path.suffix + ".txt")
|
||||||
|
text = ""
|
||||||
|
if sidecar.exists() and sidecar.is_file():
|
||||||
|
text = sidecar.read_text(errors="replace")[:12000]
|
||||||
|
lower = text.lower()
|
||||||
|
category = "unknown_or_low_confidence"
|
||||||
|
if any(w in lower for w in ("receipt", "subtotal", "store")):
|
||||||
|
category = "receipt"
|
||||||
|
elif any(w in lower for w in ("invoice", "amount due", "payment due")):
|
||||||
|
category = "bill_or_invoice"
|
||||||
|
elif lane == "screenshots":
|
||||||
|
category = "screenshot_web_or_app"
|
||||||
|
reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
|
||||||
|
return {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": sha256_file(path),
|
||||||
|
"media_type": infer_media_type(path),
|
||||||
|
"category": category,
|
||||||
|
"needs_attention": bool(reasons),
|
||||||
|
"reasons": sorted(reasons),
|
||||||
|
"raw_text_redacted": True,
|
||||||
|
"full_path_included": False,
|
||||||
|
"metadata": {"dates_count": len(set(re.findall(r"\b20\d{2}[-/]\d{1,2}[-/]\d{1,2}\b", text))), "amounts_count": len(set(re.findall(r"\$\s?\d+(?:\.\d{2})?", text))), "raw_values_redacted": True},
|
||||||
|
"processing": {"doc_image_triage": "fallback_cpu_sidecar_rules", "npu_verified": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def infer_media_type(path: Path) -> str:
|
||||||
|
if path.suffix.lower() == ".pdf":
|
||||||
|
return "pdf"
|
||||||
|
mt, _ = mimetypes.guess_type(path.name)
|
||||||
|
if mt and mt.startswith("image/"):
|
||||||
|
return "image"
|
||||||
|
if mt and mt.startswith("audio/"):
|
||||||
|
return "audio"
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def compact_doc_item(path: Path, root: Path, lane: str, triage_result: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
pages = triage_result.get("pages") or []
|
||||||
|
first = pages[0] if pages else {}
|
||||||
|
cls = first.get("classification") or {}
|
||||||
|
attn = first.get("needs_attention") or {}
|
||||||
|
meta = first.get("metadata") or {}
|
||||||
|
device_summary = triage_result.get("processing_device_summary") or {}
|
||||||
|
item = {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": triage_result.get("file_id") or sha256_file(path),
|
||||||
|
"media_type": triage_result.get("media_type") or infer_media_type(path),
|
||||||
|
"category": cls.get("label") or "unknown_or_low_confidence",
|
||||||
|
"needs_attention": bool(attn.get("value")),
|
||||||
|
"reasons": attn.get("reasons") or [],
|
||||||
|
"raw_text_redacted": True,
|
||||||
|
"full_path_included": False,
|
||||||
|
"metadata": {
|
||||||
|
"dates_count": meta.get("dates_count", 0),
|
||||||
|
"amounts_count": meta.get("amounts_count", 0),
|
||||||
|
"raw_values_redacted": True,
|
||||||
|
},
|
||||||
|
"processing": {
|
||||||
|
"doc_image_triage": "openvino-doc-image-triage-npu",
|
||||||
|
"image_category_device": (cls.get("device") or "CPU"),
|
||||||
|
"needs_attention_device": attn.get("device") or "CPU",
|
||||||
|
"npu_verified": bool(device_summary.get("npu_verified")),
|
||||||
|
"npu_busy_delta_us": device_summary.get("npu_busy_delta_us"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if lane == "receipts":
|
||||||
|
item["receipt_fields"] = {"vendor_present": bool((meta.get("detected_entities") or {}).get("org_present")), "amounts_count": item["metadata"]["amounts_count"], "dates_count": item["metadata"]["dates_count"]}
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def classify_transcript(text: str, lane: str) -> dict[str, Any]:
|
||||||
|
reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
|
||||||
|
action_count = sum(1 for rx in (ACTION_PATTERNS["follow_up"], ACTION_PATTERNS["task"]) if rx.search(text))
|
||||||
|
decisions = 1 if ACTION_PATTERNS["decision"].search(text) else 0
|
||||||
|
followups = 1 if ACTION_PATTERNS["follow_up"].search(text) else 0
|
||||||
|
return {
|
||||||
|
"category": "meeting_snippet" if lane == "meeting_snippets" else "voice_memo",
|
||||||
|
"action_worthy": bool(reasons),
|
||||||
|
"reasons": sorted(reasons),
|
||||||
|
"action_items_count": action_count,
|
||||||
|
"decisions_count": decisions,
|
||||||
|
"followups_count": followups,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def multipart_transcribe(path: Path, whisper_url: str, timeout: float) -> dict[str, Any]:
|
||||||
|
whisper_url = validate_local_whisper_url(whisper_url)
|
||||||
|
boundary = "----NpuBatchTriage" + hashlib.sha256(path.name.encode()).hexdigest()[:12]
|
||||||
|
data = path.read_bytes()
|
||||||
|
body = (
|
||||||
|
f"--{boundary}\r\n"
|
||||||
|
f'Content-Disposition: form-data; name="file"; filename="{path.name}"\r\n'
|
||||||
|
"Content-Type: application/octet-stream\r\n\r\n"
|
||||||
|
).encode() + data + (
|
||||||
|
f"\r\n--{boundary}\r\n"
|
||||||
|
'Content-Disposition: form-data; name="model"\r\n\r\n'
|
||||||
|
"whisper-1\r\n"
|
||||||
|
f"--{boundary}--\r\n"
|
||||||
|
).encode()
|
||||||
|
before = read_busy()
|
||||||
|
req = urllib.request.Request(whisper_url, data=body, headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
raw = resp.read(1024 * 1024)
|
||||||
|
status = resp.status
|
||||||
|
parsed = json.loads(raw.decode())
|
||||||
|
after = read_busy()
|
||||||
|
text = str(parsed.get("text") or parsed.get("transcription") or "").strip()
|
||||||
|
service_delta = parsed.get("npu_busy_delta_us")
|
||||||
|
sysfs_delta = None if before is None or after is None else after - before
|
||||||
|
proof_delta = service_delta if isinstance(service_delta, int) else sysfs_delta
|
||||||
|
return {
|
||||||
|
"ok": status == 200 and bool(text),
|
||||||
|
"text": text,
|
||||||
|
"transcript_chars": len(text),
|
||||||
|
"duration_seconds": parsed.get("duration_seconds"),
|
||||||
|
"language": parsed.get("language"),
|
||||||
|
"npu_busy_delta_us": proof_delta,
|
||||||
|
"verified_npu": bool(proof_delta and proof_delta > 0),
|
||||||
|
"wall_ms": round((time.perf_counter() - t0) * 1000, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compact_audio_item(path: Path, root: Path, lane: str, no_npu: bool, whisper_url: str, timeout: float) -> dict[str, Any]:
|
||||||
|
transcript = ""
|
||||||
|
transcribed = False
|
||||||
|
npu_delta = 0
|
||||||
|
proof_ok = False
|
||||||
|
duration = None
|
||||||
|
language = None
|
||||||
|
error = None
|
||||||
|
if not no_npu:
|
||||||
|
try:
|
||||||
|
result = multipart_transcribe(path, whisper_url, timeout)
|
||||||
|
transcript = result["text"]
|
||||||
|
transcribed = result["ok"]
|
||||||
|
npu_delta = result.get("npu_busy_delta_us") or 0
|
||||||
|
proof_ok = bool(result.get("verified_npu"))
|
||||||
|
duration = result.get("duration_seconds")
|
||||||
|
language = result.get("language")
|
||||||
|
except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc:
|
||||||
|
error = f"whisper_error:{type(exc).__name__}"
|
||||||
|
summary = classify_transcript(transcript, lane)
|
||||||
|
item = {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": sha256_file(path),
|
||||||
|
"media_type": "audio",
|
||||||
|
"duration_seconds": duration,
|
||||||
|
"transcribed": transcribed,
|
||||||
|
"transcript_chars": len(transcript),
|
||||||
|
"language": language,
|
||||||
|
**summary,
|
||||||
|
"npu_busy_delta_us": npu_delta,
|
||||||
|
"raw_transcript_logged": False,
|
||||||
|
"full_path_included": False,
|
||||||
|
}
|
||||||
|
if error:
|
||||||
|
item["error"] = error
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def process(args: argparse.Namespace) -> dict[str, Any]:
|
||||||
|
repo_root = Path(__file__).resolve().parents[1]
|
||||||
|
manifest_path = Path(args.manifest).expanduser().resolve()
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
lane_cfg, approved_root, root = resolve_lane_root(manifest, manifest_path, args.lane, args.root)
|
||||||
|
exts = allowed_exts(lane_cfg)
|
||||||
|
if not exts:
|
||||||
|
raise FailClosed(f"extensions_missing:{args.lane}")
|
||||||
|
manifest_limit = int(lane_cfg.get("max_files", 50))
|
||||||
|
limit = min(args.limit if args.limit is not None else manifest_limit, manifest_limit)
|
||||||
|
files, skipped, files_seen = iter_files(root, approved_root, exts, float(lane_cfg.get("max_file_mb", 25)), args.max_age_days)
|
||||||
|
selected = files[:limit]
|
||||||
|
npu_before = read_busy()
|
||||||
|
|
||||||
|
items: list[dict[str, Any]] = []
|
||||||
|
errors: list[str] = []
|
||||||
|
doc_module = None
|
||||||
|
if args.lane in AUDIO_LANES and not args.no_npu:
|
||||||
|
validate_local_whisper_url(args.whisper_url)
|
||||||
|
if args.lane in DOC_IMAGE_LANES and not args.no_npu:
|
||||||
|
try:
|
||||||
|
doc_module = load_doc_triage_module(repo_root)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"doc_triage_import_error:{type(exc).__name__}")
|
||||||
|
|
||||||
|
for path in selected:
|
||||||
|
try:
|
||||||
|
if args.lane in AUDIO_LANES:
|
||||||
|
item = compact_audio_item(path, root, args.lane, args.no_npu, args.whisper_url, args.timeout_seconds)
|
||||||
|
elif doc_module is not None:
|
||||||
|
opts = doc_module.TriageOptions(
|
||||||
|
dry_run=False,
|
||||||
|
include_ocr_text=False,
|
||||||
|
include_full_path=False,
|
||||||
|
use_embeddings=not args.no_npu,
|
||||||
|
allowed_roots=[approved_root],
|
||||||
|
timeout_seconds=args.timeout_seconds,
|
||||||
|
)
|
||||||
|
item = compact_doc_item(path, root, args.lane, doc_module.triage_file(path, opts))
|
||||||
|
else:
|
||||||
|
item = fallback_doc_item(path, root, args.lane)
|
||||||
|
if args.include_full_path:
|
||||||
|
item["full_path"] = str(path)
|
||||||
|
item["full_path_included"] = True
|
||||||
|
if args.include_raw_text:
|
||||||
|
item["raw_text_included"] = False
|
||||||
|
item["raw_text_note"] = "unsupported_by_batch_wrapper"
|
||||||
|
items.append(item)
|
||||||
|
except FailClosed:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"{path.name}:{type(exc).__name__}")
|
||||||
|
items.append({"basename": path.name, "ok": False, "error": type(exc).__name__, "raw_text_redacted": True, "full_path_included": False})
|
||||||
|
|
||||||
|
npu_after = read_busy()
|
||||||
|
sysfs_delta = None if npu_before is None or npu_after is None else npu_after - npu_before
|
||||||
|
item_deltas = [i.get("npu_busy_delta_us") for i in items if isinstance(i.get("npu_busy_delta_us"), int)]
|
||||||
|
claimed = not args.no_npu and any((d or 0) > 0 for d in item_deltas + ([sysfs_delta] if isinstance(sysfs_delta, int) else []))
|
||||||
|
proof_ok = claimed and bool(sysfs_delta is None or sysfs_delta > 0 or any((d or 0) > 0 for d in item_deltas))
|
||||||
|
return {
|
||||||
|
"ok": not errors,
|
||||||
|
"lane": args.lane,
|
||||||
|
"dry_run": True,
|
||||||
|
"approved_root": True,
|
||||||
|
"root_basename": root.name,
|
||||||
|
"files_seen": files_seen,
|
||||||
|
"files_processed": len(items),
|
||||||
|
"skipped": skipped,
|
||||||
|
"npu": {"claimed": claimed, "busy_delta_us": sysfs_delta, "proof_ok": proof_ok},
|
||||||
|
"mutations": MUTATIONS_FALSE.copy(),
|
||||||
|
"items": items,
|
||||||
|
"raw_content_redacted": not args.include_raw_text,
|
||||||
|
"full_paths_included": bool(args.include_full_path),
|
||||||
|
"errors": errors,
|
||||||
|
"gates": {
|
||||||
|
"external_uploads": False,
|
||||||
|
"private_root_broadening": False,
|
||||||
|
"obsidian_mutation": False,
|
||||||
|
"vector_db_mutation": False,
|
||||||
|
"outbound_sends": False,
|
||||||
|
"routing_changes": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Explicit-root dry-run batch triage wrapper")
|
||||||
|
parser.add_argument("--manifest", required=True, help="lane approval manifest; missing/unapproved fails closed")
|
||||||
|
parser.add_argument("--lane", required=True, choices=LANES)
|
||||||
|
parser.add_argument("--root", help="optional narrower root under the manifest-approved lane root")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="required; mutation modes are not implemented")
|
||||||
|
parser.add_argument("--limit", type=int, default=None)
|
||||||
|
parser.add_argument("--max-age-days", type=float, default=None)
|
||||||
|
parser.add_argument("--include-raw-text", action="store_true", help="kept redacted by this wrapper; present only for explicit operator attempts")
|
||||||
|
parser.add_argument("--include-full-path", action="store_true", help="operator-only local debugging")
|
||||||
|
parser.add_argument("--no-npu", action="store_true", help="CPU-only smoke; never claims NPU")
|
||||||
|
parser.add_argument("--json", action="store_true", help="emit compact JSON")
|
||||||
|
parser.add_argument("--pretty", action="store_true", help="pretty JSON for local debugging")
|
||||||
|
parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
|
||||||
|
parser.add_argument("--timeout-seconds", type=float, default=20.0)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
if not args.dry_run:
|
||||||
|
print(json.dumps({"ok": False, "error": "dry_run_required", "mutations": MUTATIONS_FALSE}), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if args.limit is not None and args.limit < 1:
|
||||||
|
print(json.dumps({"ok": False, "error": "limit_must_be_positive"}), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
try:
|
||||||
|
out = process(args)
|
||||||
|
except FailClosed as exc:
|
||||||
|
out = {"ok": False, "error": "fail_closed", "reason": str(exc), "dry_run": True, "mutations": MUTATIONS_FALSE.copy()}
|
||||||
|
print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True))
|
||||||
|
return 0 if out.get("ok") else 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+712
@@ -0,0 +1,712 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Compact, read-only NPU/OpenVINO utilization digest.
|
||||||
|
|
||||||
|
Default behavior is safe for on-demand or scheduled runs: health checks plus
|
||||||
|
bounded synthetic probes, one compact JSONL artifact, and no service restarts,
|
||||||
|
routing changes, advisory POSTs, vector mutations, outbound sends, or private
|
||||||
|
root broadening.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import datetime as dt
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import uuid
|
||||||
|
import wave
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_OUT_DIR = Path("/home/will/.local/state/npu-utilization/digests")
|
||||||
|
|
||||||
|
EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
|
||||||
|
EMBED_HEALTH_URL = "http://127.0.0.1:18817/healthz"
|
||||||
|
RERANK_URL = "http://127.0.0.1:18818/rerank"
|
||||||
|
RERANK_HEALTH_URL = "http://127.0.0.1:18818/readyz"
|
||||||
|
WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
WHISPER_HEALTH_URL = "http://127.0.0.1:18816/health"
|
||||||
|
CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
CLASSIFIER_HEALTH_URL = "http://127.0.0.1:18819/healthz"
|
||||||
|
GENAI_HEALTH_URL = "http://127.0.0.1:18820/healthz"
|
||||||
|
GENAI_GENERATE_URL = "http://127.0.0.1:18820/v1/generate"
|
||||||
|
DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
|
||||||
|
DOC_TRIAGE_HEALTH_URL = "http://127.0.0.1:18829/healthz"
|
||||||
|
RAG_ENDPOINT_HEALTH_URL = "http://127.0.0.1:18810/healthz"
|
||||||
|
RAG_HEALTH_URL = "http://127.0.0.1:18814/healthz"
|
||||||
|
ADVISORY_HEALTH_URL = "http://172.19.0.1:18830/healthz"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ServiceRow:
|
||||||
|
type: str = "service"
|
||||||
|
service: str = ""
|
||||||
|
reachable: bool = False
|
||||||
|
probe_ran: bool = False
|
||||||
|
proof_ok: bool | None = None
|
||||||
|
calls: int = 0
|
||||||
|
items: int = 0
|
||||||
|
avg_ms: float | None = None
|
||||||
|
npu_delta_us: int | None = None
|
||||||
|
response_delta_us: int | None = None
|
||||||
|
mode: str = "unavailable"
|
||||||
|
fallbacks: int = 0
|
||||||
|
warnings: list[str] = field(default_factory=list)
|
||||||
|
gate: str = "none"
|
||||||
|
jobs: int | None = None
|
||||||
|
events: int | None = None
|
||||||
|
files: int | None = None
|
||||||
|
docs: int | None = None
|
||||||
|
text_len: int | None = None
|
||||||
|
sample_rate: int | None = None
|
||||||
|
embedding_count: int | None = None
|
||||||
|
embedding_dim: int | None = None
|
||||||
|
dry_run: bool | None = None
|
||||||
|
suppress: int | None = None
|
||||||
|
escalate: int | None = None
|
||||||
|
recommendation: str | None = None
|
||||||
|
confidence: float | None = None
|
||||||
|
confidence_bucket: str | None = None
|
||||||
|
authority_violations: int | None = None
|
||||||
|
loaded: bool | None = None
|
||||||
|
allowed_roots_count: int | None = None
|
||||||
|
reason: str | None = None
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def compact_dict(obj: Any) -> dict[str, Any]:
|
||||||
|
data = asdict(obj) if hasattr(obj, "__dataclass_fields__") else dict(obj)
|
||||||
|
return {k: v for k, v in data.items() if v is not None and v != []}
|
||||||
|
|
||||||
|
|
||||||
|
AUTHORITY_SAFE_ACTIONS = {
|
||||||
|
"", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
|
||||||
|
"record_metric", "compare_with_expected_label", "include_in_digest",
|
||||||
|
"open_review_ticket_candidate", "recommend_human_review",
|
||||||
|
}
|
||||||
|
AUTHORITY_FLAG_KEYS = {
|
||||||
|
"advisory_post",
|
||||||
|
"atlas_routing",
|
||||||
|
"broad_private_scan",
|
||||||
|
"delivery_send",
|
||||||
|
"gateway_restart",
|
||||||
|
"live_routing",
|
||||||
|
"memory_write",
|
||||||
|
"outbound_send",
|
||||||
|
"private_root_scan",
|
||||||
|
"service_restart",
|
||||||
|
"tool_execution",
|
||||||
|
"vector_mutation",
|
||||||
|
}
|
||||||
|
AUTHORITY_FLAG_ALIASES = {
|
||||||
|
"can_route_atlas": "atlas_routing",
|
||||||
|
"can_write_memory": "memory_write",
|
||||||
|
"can_execute_tools": "tool_execution",
|
||||||
|
"can_restart_services": "service_restart",
|
||||||
|
"can_send_outbound": "outbound_send",
|
||||||
|
"can_scan_private_roots": "private_root_scan",
|
||||||
|
"can_mutate_vector_store": "vector_mutation",
|
||||||
|
"can_post_advisory_event": "advisory_post",
|
||||||
|
"can_change_gateway_config": "gateway_restart",
|
||||||
|
"may_route": "atlas_routing",
|
||||||
|
"may_write_memory": "memory_write",
|
||||||
|
"may_execute_tools": "tool_execution",
|
||||||
|
"may_restart_services": "service_restart",
|
||||||
|
"may_send_external": "outbound_send",
|
||||||
|
"may_process_private_dirs": "private_root_scan",
|
||||||
|
"may_mutate_vector_db": "vector_mutation",
|
||||||
|
"may_change_live_config": "gateway_restart",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def confidence_bucket(confidence: float | None) -> str | None:
|
||||||
|
if confidence is None:
|
||||||
|
return None
|
||||||
|
if confidence >= 0.8:
|
||||||
|
return "high"
|
||||||
|
if confidence >= 0.5:
|
||||||
|
return "medium"
|
||||||
|
return "low"
|
||||||
|
|
||||||
|
|
||||||
|
def coerce_confidence(value: Any) -> float | None:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return None
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return max(0.0, min(1.0, float(value)))
|
||||||
|
if isinstance(value, str):
|
||||||
|
try:
|
||||||
|
return max(0.0, min(1.0, float(value)))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_confidence(payload: dict[str, Any]) -> float | None:
|
||||||
|
direct = coerce_confidence(payload.get("confidence"))
|
||||||
|
if direct is not None:
|
||||||
|
return direct
|
||||||
|
raw_labels = payload.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
scores: list[float] = []
|
||||||
|
for value in labels.values():
|
||||||
|
if isinstance(value, dict):
|
||||||
|
for score_key in ("confidence", "score", "probability"):
|
||||||
|
if score_key in value:
|
||||||
|
score = coerce_confidence(value.get(score_key))
|
||||||
|
break
|
||||||
|
score = None
|
||||||
|
else:
|
||||||
|
score = coerce_confidence(value)
|
||||||
|
if score is not None:
|
||||||
|
scores.append(score)
|
||||||
|
return max(scores) if scores else None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_recommendation(payload: dict[str, Any]) -> str | None:
|
||||||
|
for key in ("recommendation", "classification", "input_class"):
|
||||||
|
value = payload.get(key)
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
return value[:48]
|
||||||
|
raw_action = payload.get("action")
|
||||||
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||||
|
value = action.get("recommendation") or action.get("type")
|
||||||
|
return str(value)[:48] if value else None
|
||||||
|
|
||||||
|
|
||||||
|
def count_authority_violations(payload: dict[str, Any]) -> int:
|
||||||
|
"""Count advisory response hints that would exceed read-only/dry-run authority.
|
||||||
|
|
||||||
|
Supports both legacy compact payloads and `npu_advisory_decision_v1`.
|
||||||
|
Valid schema-safe allowed actions and object-shaped no-op actual actions must
|
||||||
|
not count as violations; any true live-authority flag must count.
|
||||||
|
"""
|
||||||
|
violations = 0
|
||||||
|
raw_flags = payload.get("authority_flags")
|
||||||
|
flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
|
||||||
|
for key, value in flags.items():
|
||||||
|
canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
|
||||||
|
if canonical in AUTHORITY_FLAG_KEYS and bool(value):
|
||||||
|
violations += 1
|
||||||
|
|
||||||
|
raw_allowed = payload.get("allowed_actions")
|
||||||
|
allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
|
||||||
|
for action in allowed:
|
||||||
|
if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
|
||||||
|
violations += 1
|
||||||
|
|
||||||
|
raw_actual = payload.get("actual_action")
|
||||||
|
if isinstance(raw_actual, dict):
|
||||||
|
performed = bool(raw_actual.get("performed"))
|
||||||
|
side_effects = raw_actual.get("side_effects") or []
|
||||||
|
kind = str(raw_actual.get("kind") or "none").lower()
|
||||||
|
if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
|
||||||
|
violations += 1
|
||||||
|
else:
|
||||||
|
actual = str(raw_actual or "").lower()
|
||||||
|
if actual and actual not in AUTHORITY_SAFE_ACTIONS:
|
||||||
|
violations += 1
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def read_busy(path: Path = BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def safe_error(exc: BaseException) -> str:
|
||||||
|
return type(exc).__name__
|
||||||
|
|
||||||
|
|
||||||
|
def http_get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
body = resp.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(resp.status), json.loads(body or "{}")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
try:
|
||||||
|
body = exc.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(exc.code), json.loads(body or "{}")
|
||||||
|
except Exception:
|
||||||
|
return int(exc.code), {"error": "http_error"}
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def http_post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
|
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", "Accept": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
data = resp.read(2 * 1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(resp.status), json.loads(data or "{}")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
try:
|
||||||
|
data = exc.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(exc.code), json.loads(data or "{}")
|
||||||
|
except Exception:
|
||||||
|
return int(exc.code), {"error": "http_error"}
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def health_row(service: str, url: str, timeout: float, gate: str = "none", mode: str = "health_only") -> tuple[ServiceRow, dict[str, Any]]:
|
||||||
|
status, payload = http_get_json(url, timeout)
|
||||||
|
ok = status == 200 and payload.get("ok", True) is not False
|
||||||
|
row = ServiceRow(service=service, reachable=ok, mode=mode if ok else "unavailable", gate=gate)
|
||||||
|
if not ok:
|
||||||
|
row.fallbacks = 1
|
||||||
|
row.warnings.append("unavailable")
|
||||||
|
row.error = str(payload.get("error") or payload.get("ready_error") or f"http_{status}")[:80]
|
||||||
|
return row, payload
|
||||||
|
|
||||||
|
|
||||||
|
def measure_probe(fn: Callable[[], tuple[int, dict[str, Any]]], timeout_label: str, busy_path: Path = BUSY_PATH) -> tuple[int, dict[str, Any], float, int | None]:
|
||||||
|
before = read_busy(busy_path)
|
||||||
|
started = time.perf_counter()
|
||||||
|
status, payload = fn()
|
||||||
|
elapsed_ms = round((time.perf_counter() - started) * 1000, 3)
|
||||||
|
after = read_busy(busy_path)
|
||||||
|
delta = None if before is None or after is None else after - before
|
||||||
|
return status, payload, elapsed_ms, delta
|
||||||
|
|
||||||
|
|
||||||
|
def apply_proof(row: ServiceRow, delta: int | None) -> None:
|
||||||
|
row.npu_delta_us = delta
|
||||||
|
row.proof_ok = bool(delta is not None and delta > 0)
|
||||||
|
if not row.proof_ok:
|
||||||
|
row.fallbacks += 1
|
||||||
|
row.warnings.append("no_positive_sysfs_delta" if delta is not None else "missing_sysfs_counter")
|
||||||
|
|
||||||
|
|
||||||
|
def mark_skipped_fallback(row: ServiceRow, reason: str) -> None:
|
||||||
|
"""Record a skipped/unloaded proof condition as a fallback.
|
||||||
|
|
||||||
|
Health-only rows that are intentionally never proof probes should keep
|
||||||
|
fallbacks at zero. This helper is for proof-capable rows where a bounded
|
||||||
|
smoke was disabled or skipped to avoid side effects such as cold-loading.
|
||||||
|
"""
|
||||||
|
row.fallbacks += 1
|
||||||
|
row.warnings.append(reason)
|
||||||
|
|
||||||
|
|
||||||
|
def probe_embeddings(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("embeddings", EMBED_HEALTH_URL, timeout)
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
payload = {"input": "non-private npu utilization digest probe", "model": "bge-base-en-v1.5-int8-ov"}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(EMBED_URL, payload, timeout), "embeddings", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.items = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "data" in data
|
||||||
|
row.embedding_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0
|
||||||
|
row.embedding_dim = data.get("embedding_dim")
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_rerank(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("rerank", RERANK_HEALTH_URL, timeout)
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
docs = ["Intel NPU accelerates OpenVINO inference.", "Bananas ripen on a kitchen counter."]
|
||||||
|
payload = {"query": "OpenVINO NPU inference", "documents": docs, "top_k": 2, "return_documents": False}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(RERANK_URL, payload, timeout), "rerank", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.docs = len(docs)
|
||||||
|
row.avg_ms = float(data.get("duration_ms") or elapsed)
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("classifier", CLASSIFIER_HEALTH_URL, timeout, mode="dry_run")
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
payload = {
|
||||||
|
"id": "npu-digest-probe",
|
||||||
|
"text": "Non-private cron event: backup completed successfully, no user action required.",
|
||||||
|
"options": {"dry_run": True, "include_evidence": False},
|
||||||
|
}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(CLASSIFIER_URL, payload, timeout), "classifier", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.events = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "dry_run"
|
||||||
|
row.dry_run = True
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
row.response_delta_us = next((data.get(k) for k in ("sysfs_npu_busy_delta_us", "npu_busy_delta_us") if isinstance(data.get(k), int)), None)
|
||||||
|
raw_labels = data.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
raw_action = data.get("action")
|
||||||
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||||
|
row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
|
||||||
|
row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
|
||||||
|
row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
|
||||||
|
row.confidence = extract_confidence(data)
|
||||||
|
row.confidence_bucket = confidence_bucket(row.confidence)
|
||||||
|
row.authority_violations = count_authority_violations(data)
|
||||||
|
if row.authority_violations:
|
||||||
|
row.warnings.append("authority_violation")
|
||||||
|
row.items = len(labels)
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def write_tone_wav(path: Path, seconds: float = 0.35, sample_rate: int = 16000) -> None:
|
||||||
|
frames = int(seconds * sample_rate)
|
||||||
|
with wave.open(str(path), "wb") as wav:
|
||||||
|
wav.setnchannels(1)
|
||||||
|
wav.setsampwidth(2)
|
||||||
|
wav.setframerate(sample_rate)
|
||||||
|
for i in range(frames):
|
||||||
|
value = int(9000 * math.sin(2 * math.pi * 440 * (i / sample_rate)))
|
||||||
|
wav.writeframesraw(value.to_bytes(2, byteorder="little", signed=True))
|
||||||
|
|
||||||
|
|
||||||
|
def post_multipart_file(url: str, file_path: Path, timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
boundary = "----npu-digest-" + uuid.uuid4().hex
|
||||||
|
file_bytes = file_path.read_bytes()
|
||||||
|
parts = [
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nwhisper\r\n".encode(),
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"response_format\"\r\n\r\njson\r\n".encode(),
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"npu-digest.wav\"\r\nContent-Type: audio/wav\r\n\r\n".encode(),
|
||||||
|
file_bytes,
|
||||||
|
f"\r\n--{boundary}--\r\n".encode(),
|
||||||
|
]
|
||||||
|
req = urllib.request.Request(url, data=b"".join(parts), headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return int(resp.status), json.loads(resp.read(1024 * 1024).decode("utf-8", "replace") or "{}")
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def probe_whisper(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH) -> ServiceRow:
|
||||||
|
row, _ = health_row("whisper", WHISPER_HEALTH_URL, timeout)
|
||||||
|
row.jobs = 0
|
||||||
|
if not row.reachable or not include_smoke:
|
||||||
|
if row.reachable:
|
||||||
|
row.mode = "health_only"
|
||||||
|
row.reason = "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, "skipped")
|
||||||
|
return row
|
||||||
|
with tempfile.TemporaryDirectory(prefix="npu-digest-whisper-") as tmp:
|
||||||
|
wav_path = Path(tmp) / "probe.wav"
|
||||||
|
write_tone_wav(wav_path)
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_multipart_file(WHISPER_URL, wav_path, timeout), "whisper", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.jobs = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
row.text_len = len(str(data.get("text") or ""))
|
||||||
|
row.sample_rate = data.get("sample_rate") if isinstance(data.get("sample_rate"), int) else None
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_genai(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, health = health_row("genai", GENAI_HEALTH_URL, timeout)
|
||||||
|
row.loaded = bool(health.get("loaded")) if isinstance(health, dict) and "loaded" in health else None
|
||||||
|
row.jobs = 0
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
if not include_smoke or row.loaded is False:
|
||||||
|
row.mode = "loaded=false" if row.loaded is False else "health_only"
|
||||||
|
row.reason = "skipped_cold_load" if row.loaded is False else "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, row.reason)
|
||||||
|
return row
|
||||||
|
payload = {"prompt": "Say pong.", "max_new_tokens": 8}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(GENAI_GENERATE_URL, payload, timeout), "genai", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.jobs = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
apply_proof(row, delta)
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def doc_triage_sample_path() -> Path | None:
|
||||||
|
candidates = [
|
||||||
|
Path("/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png"),
|
||||||
|
Path(__file__).resolve().parents[1] / "openvino-doc-image-triage-npu" / "samples" / "synthetic_invoice.png",
|
||||||
|
]
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate.exists() and candidate.with_suffix(".png.txt").exists():
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def probe_doc_triage(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("doc_triage", DOC_TRIAGE_HEALTH_URL, timeout, gate="closed:private-root")
|
||||||
|
row.files = 0
|
||||||
|
if not row.reachable or not include_smoke:
|
||||||
|
if row.reachable:
|
||||||
|
row.mode = "health_only"
|
||||||
|
row.reason = "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, "skipped")
|
||||||
|
return row
|
||||||
|
sample = doc_triage_sample_path()
|
||||||
|
if sample is not None:
|
||||||
|
root = sample.parent.resolve()
|
||||||
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
||||||
|
else:
|
||||||
|
with tempfile.TemporaryDirectory(prefix="npu-digest-doc-") as tmp:
|
||||||
|
root = Path(tmp).resolve()
|
||||||
|
sample = root / "synthetic-invoice.png"
|
||||||
|
sample.write_bytes(base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="))
|
||||||
|
sample.with_suffix(".png.txt").write_text("Synthetic invoice. Amount due $12.34 by 2026-06-30. No private data.\n")
|
||||||
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.files = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU-via-embedding-service"
|
||||||
|
row.allowed_roots_count = 1
|
||||||
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
||||||
|
raw_result = data.get("result")
|
||||||
|
result: dict[str, Any] = raw_result if isinstance(raw_result, dict) else {}
|
||||||
|
raw_pages = result.get("pages")
|
||||||
|
pages: list[Any] = raw_pages if isinstance(raw_pages, list) else []
|
||||||
|
embedding: dict[str, Any] = {}
|
||||||
|
if pages and isinstance(pages[0], dict):
|
||||||
|
raw_attn = pages[0].get("needs_attention")
|
||||||
|
attn: dict[str, Any] = raw_attn if isinstance(raw_attn, dict) else {}
|
||||||
|
raw_embedding = attn.get("embedding")
|
||||||
|
embedding = raw_embedding if isinstance(raw_embedding, dict) else {}
|
||||||
|
row.response_delta_us = embedding.get("npu_busy_delta_us") if isinstance(embedding.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_delta_us: int | None, started_at: str) -> dict[str, Any]:
|
||||||
|
services_ok = sum(1 for r in rows if r.reachable)
|
||||||
|
proof_rows = [r for r in rows if r.probe_ran and r.proof_ok is not None]
|
||||||
|
proof_ok = sum(1 for r in proof_rows if r.proof_ok)
|
||||||
|
gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
|
||||||
|
fallbacks = sum(r.fallbacks for r in rows)
|
||||||
|
request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
|
||||||
|
npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
|
||||||
|
fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
|
||||||
|
recommendation_counts = {"escalate": 0, "suppress": 0}
|
||||||
|
confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
|
||||||
|
authority_violations = 0
|
||||||
|
warnings: dict[str, int] = {}
|
||||||
|
for row in rows:
|
||||||
|
recommendation = (row.recommendation or "").lower()
|
||||||
|
if recommendation in recommendation_counts:
|
||||||
|
recommendation_counts[recommendation] += 1
|
||||||
|
else:
|
||||||
|
recommendation_counts["escalate"] += row.escalate or 0
|
||||||
|
recommendation_counts["suppress"] += row.suppress or 0
|
||||||
|
if row.confidence_bucket:
|
||||||
|
confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
|
||||||
|
elif row.recommendation or row.escalate is not None or row.suppress is not None:
|
||||||
|
confidence_distribution["unknown"] += 1
|
||||||
|
authority_violations += row.authority_violations or 0
|
||||||
|
for warning in row.warnings:
|
||||||
|
warnings[warning] = warnings.get(warning, 0) + 1
|
||||||
|
confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
|
||||||
|
return {
|
||||||
|
"type": "summary",
|
||||||
|
"timestamp": started_at,
|
||||||
|
"counter": str(BUSY_PATH),
|
||||||
|
"delta_us": counter_delta_us,
|
||||||
|
"services_ok": services_ok,
|
||||||
|
"services_total": len(rows),
|
||||||
|
"proof_ok": proof_ok,
|
||||||
|
"proof_total": len(proof_rows),
|
||||||
|
"fallbacks": fallbacks,
|
||||||
|
"fallbacks_by_service": fallbacks_by_service,
|
||||||
|
"request_counts_by_service": request_counts_by_service,
|
||||||
|
"npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
|
||||||
|
"confidence_distribution": confidence_distribution,
|
||||||
|
"recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
|
||||||
|
"authority_violations": authority_violations,
|
||||||
|
"gates_closed": gates_closed,
|
||||||
|
"warnings": warnings,
|
||||||
|
"artifact": artifact_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
|
||||||
|
lines = [
|
||||||
|
f"NPU utilization digest {summary['timestamp']}",
|
||||||
|
f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
|
||||||
|
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
|
||||||
|
]
|
||||||
|
rec_counts = summary.get("recommendation_counts") or {}
|
||||||
|
if rec_counts:
|
||||||
|
lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
|
||||||
|
conf_dist = summary.get("confidence_distribution") or {}
|
||||||
|
if conf_dist:
|
||||||
|
lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
|
||||||
|
for r in rows:
|
||||||
|
parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
|
||||||
|
if r.calls:
|
||||||
|
parts.append(f"calls={r.calls}")
|
||||||
|
if r.jobs is not None:
|
||||||
|
parts.append(f"jobs={r.jobs}")
|
||||||
|
if r.events is not None:
|
||||||
|
parts.append(f"events={r.events}")
|
||||||
|
if r.files is not None:
|
||||||
|
parts.append(f"files={r.files}")
|
||||||
|
if r.docs is not None:
|
||||||
|
parts.append(f"docs={r.docs}")
|
||||||
|
if r.avg_ms is not None:
|
||||||
|
parts.append(f"avg_ms={r.avg_ms}")
|
||||||
|
if r.npu_delta_us is not None:
|
||||||
|
parts.append(f"npu_delta_us={r.npu_delta_us}")
|
||||||
|
if r.proof_ok is not None:
|
||||||
|
parts.append(f"proof={str(r.proof_ok).lower()}")
|
||||||
|
if r.dry_run is not None:
|
||||||
|
parts.append(f"dry_run={str(r.dry_run).lower()}")
|
||||||
|
if r.suppress is not None:
|
||||||
|
parts.append(f"suppress={r.suppress}")
|
||||||
|
if r.escalate is not None:
|
||||||
|
parts.append(f"escalate={r.escalate}")
|
||||||
|
if r.recommendation is not None:
|
||||||
|
parts.append(f"recommendation={r.recommendation}")
|
||||||
|
if r.confidence_bucket is not None:
|
||||||
|
parts.append(f"confidence={r.confidence_bucket}")
|
||||||
|
if r.authority_violations is not None:
|
||||||
|
parts.append(f"authority_violations={r.authority_violations}")
|
||||||
|
if r.loaded is not None:
|
||||||
|
parts.append(f"loaded={str(r.loaded).lower()}")
|
||||||
|
if r.allowed_roots_count is not None:
|
||||||
|
parts.append(f"allowed_roots={r.allowed_roots_count}")
|
||||||
|
if r.text_len is not None:
|
||||||
|
parts.append(f"text_len={r.text_len}")
|
||||||
|
if r.mode:
|
||||||
|
parts.append(f"mode={r.mode}")
|
||||||
|
if r.gate != "none":
|
||||||
|
parts.append(f"gate={r.gate}")
|
||||||
|
if r.reason:
|
||||||
|
parts.append(f"reason={r.reason}")
|
||||||
|
if r.warnings:
|
||||||
|
parts.append("warnings=" + ",".join(sorted(set(r.warnings))))
|
||||||
|
lines.append(" ".join(parts))
|
||||||
|
warning_counts = summary.get("warnings") or {}
|
||||||
|
lines.append("fallbacks: " + " ".join(f"{k}={v}" for k, v in sorted(warning_counts.items())) if warning_counts else "fallbacks: none")
|
||||||
|
if summary.get("artifact"):
|
||||||
|
lines.append(f"artifact: {summary['artifact']}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def write_jsonl(summary: dict[str, Any], rows: list[ServiceRow], out_dir: Path) -> Path:
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
stamp = summary["timestamp"].replace(":", "").replace("+", "").replace("-", "")
|
||||||
|
path = out_dir / f"{stamp}.jsonl"
|
||||||
|
with path.open("w", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(summary, sort_keys=True, separators=(",", ":")) + "\n")
|
||||||
|
for row in rows:
|
||||||
|
f.write(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")) + "\n")
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def str_bool(value: str) -> bool:
|
||||||
|
lowered = value.lower()
|
||||||
|
if lowered in {"1", "true", "yes", "y", "on"}:
|
||||||
|
return True
|
||||||
|
if lowered in {"0", "false", "no", "n", "off"}:
|
||||||
|
return False
|
||||||
|
raise argparse.ArgumentTypeError("expected true or false")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Compact NPU utilization digest")
|
||||||
|
parser.add_argument("--format", choices=("text", "jsonl"), default="text")
|
||||||
|
parser.add_argument("--out", default=str(DEFAULT_OUT_DIR))
|
||||||
|
parser.add_argument("--timeout-s", type=float, default=8.0)
|
||||||
|
parser.add_argument("--include-whisper-smoke", type=str_bool, default=True)
|
||||||
|
parser.add_argument("--include-genai-smoke", type=str_bool, default=False)
|
||||||
|
parser.add_argument("--include-doc-triage-smoke", type=str_bool, default=True)
|
||||||
|
parser.add_argument("--no-write", action="store_true")
|
||||||
|
parser.add_argument("--strict-proof", action="store_true", help="exit nonzero if a proof-required probe ran without positive sysfs delta")
|
||||||
|
parser.add_argument("--verbose", action="store_true")
|
||||||
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def run(args: argparse.Namespace) -> tuple[dict[str, Any], list[ServiceRow]]:
|
||||||
|
started_at = dt.datetime.now().astimezone().replace(microsecond=0).isoformat()
|
||||||
|
before_all = read_busy(BUSY_PATH)
|
||||||
|
rows = [
|
||||||
|
probe_embeddings(args.timeout_s),
|
||||||
|
probe_rerank(args.timeout_s),
|
||||||
|
probe_whisper(args.timeout_s, args.include_whisper_smoke),
|
||||||
|
probe_classifier(args.timeout_s),
|
||||||
|
probe_genai(args.timeout_s, args.include_genai_smoke),
|
||||||
|
probe_doc_triage(args.timeout_s, args.include_doc_triage_smoke),
|
||||||
|
]
|
||||||
|
rows.append(health_row("rag_endpoint", RAG_ENDPOINT_HEALTH_URL, args.timeout_s, gate="closed:vector-mutation")[0])
|
||||||
|
rows.append(health_row("rag_health", RAG_HEALTH_URL, args.timeout_s)[0])
|
||||||
|
rows.append(health_row("advisory_gateway", ADVISORY_HEALTH_URL, args.timeout_s, gate="closed:advisory-post")[0])
|
||||||
|
after_all = read_busy(BUSY_PATH)
|
||||||
|
delta_all = None if before_all is None or after_all is None else after_all - before_all
|
||||||
|
summary = build_summary(rows, artifact_path=None, counter_delta_us=delta_all, started_at=started_at)
|
||||||
|
return summary, rows
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
args = parse_args(argv)
|
||||||
|
summary, rows = run(args)
|
||||||
|
if not args.no_write:
|
||||||
|
artifact = write_jsonl(summary, rows, Path(args.out).expanduser())
|
||||||
|
summary["artifact"] = str(artifact)
|
||||||
|
# rewrite with artifact path included in the summary line
|
||||||
|
artifact.write_text("\n".join([json.dumps(summary, sort_keys=True, separators=(",", ":"))] + [json.dumps(compact_dict(r), sort_keys=True, separators=(",", ":")) for r in rows]) + "\n")
|
||||||
|
if args.format == "jsonl":
|
||||||
|
print(json.dumps(summary, sort_keys=True, separators=(",", ":")))
|
||||||
|
for row in rows:
|
||||||
|
print(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")))
|
||||||
|
else:
|
||||||
|
print(render_text(summary, rows))
|
||||||
|
if args.strict_proof and any(r.probe_ran and r.proof_ok is False for r in rows):
|
||||||
|
return 2
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+339
@@ -0,0 +1,339 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Local-file voice/audio NPU advisory pipeline.
|
||||||
|
|
||||||
|
Side-effect-free first slice:
|
||||||
|
local audio file -> Whisper NPU -> classifier NPU -> advisory gate
|
||||||
|
|
||||||
|
No platform fetching, outbound sends, Obsidian/memory/vector writes, service
|
||||||
|
restarts, or live Atlas/Hermes routing changes are performed by this script.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
import wave
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
AUDIO_EXTENSIONS = {".wav", ".ogg", ".oga", ".opus", ".mp3", ".m4a", ".mp4", ".webm", ".flac"}
|
||||||
|
ACTION_MARKERS = re.compile(
|
||||||
|
r"\b(remind|todo|to-do|task|follow[- ]?up|schedule|call|email|send|draft|inspect|check|fix|review|question|ask)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineError(RuntimeError):
|
||||||
|
def __init__(self, message: str, *, status: int = 1, details: dict[str, Any] | None = None):
|
||||||
|
super().__init__(message)
|
||||||
|
self.status = status
|
||||||
|
self.details = details or {}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_loopback_endpoint(url: str, *, label: str) -> str:
|
||||||
|
"""Return url when it targets an explicit local HTTP(S) endpoint.
|
||||||
|
|
||||||
|
The pipeline reads local audio and posts transcripts/audio bytes, so endpoint
|
||||||
|
overrides must not be able to exfiltrate data to remote hosts. Keep the
|
||||||
|
policy intentionally narrow: localhost, IPv4 loopback, or IPv6 ::1 only.
|
||||||
|
"""
|
||||||
|
parsed = urllib.parse.urlparse(url)
|
||||||
|
if parsed.scheme not in {"http", "https"}:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_scheme_not_allowed",
|
||||||
|
details={"url_host": parsed.hostname or "", "allowed_schemes": ["http", "https"]},
|
||||||
|
)
|
||||||
|
host = parsed.hostname
|
||||||
|
if not host:
|
||||||
|
raise PipelineError(f"{label}_url_missing_host")
|
||||||
|
normalized = host.rstrip(".").lower()
|
||||||
|
if normalized == "localhost":
|
||||||
|
return url
|
||||||
|
try:
|
||||||
|
address = ipaddress.ip_address(normalized)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_host_not_loopback",
|
||||||
|
details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
|
||||||
|
) from exc
|
||||||
|
if not address.is_loopback:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_host_not_loopback",
|
||||||
|
details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
|
||||||
|
)
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def read_npu_busy_us(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except (OSError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def delta_us(before: int | None, after: int | None) -> int | None:
|
||||||
|
if before is None or after is None:
|
||||||
|
return None
|
||||||
|
return max(0, after - before)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_multipart(fields: dict[str, str], files: dict[str, tuple[str, bytes, str]]) -> tuple[bytes, str]:
|
||||||
|
boundary = "----npu-voice-audio-" + uuid.uuid4().hex
|
||||||
|
parts: list[bytes] = []
|
||||||
|
for name, value in fields.items():
|
||||||
|
parts.append(f"--{boundary}\r\n".encode())
|
||||||
|
parts.append(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
|
||||||
|
parts.append(str(value).encode())
|
||||||
|
parts.append(b"\r\n")
|
||||||
|
for name, (filename, data, content_type) in files.items():
|
||||||
|
parts.append(f"--{boundary}\r\n".encode())
|
||||||
|
parts.append(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
|
||||||
|
parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
|
||||||
|
parts.append(data)
|
||||||
|
parts.append(b"\r\n")
|
||||||
|
parts.append(f"--{boundary}--\r\n".encode())
|
||||||
|
return b"".join(parts), f"multipart/form-data; boundary={boundary}"
|
||||||
|
|
||||||
|
|
||||||
|
def post_json(url: str, payload: dict[str, Any], *, timeout: int) -> dict[str, Any]:
|
||||||
|
url = validate_loopback_endpoint(url, label="classifier")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=json.dumps(payload).encode(),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
body = exc.read().decode(errors="replace")[:300]
|
||||||
|
raise PipelineError(f"classifier_http_{exc.code}", details={"body_preview": body}) from exc
|
||||||
|
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
|
||||||
|
raise PipelineError(f"classifier_request_failed: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def post_whisper(url: str, audio_path: Path, audio_data: bytes, language: str, *, timeout: int) -> dict[str, Any]:
|
||||||
|
url = validate_loopback_endpoint(url, label="whisper")
|
||||||
|
content_type = mimetypes.guess_type(audio_path.name)[0] or "application/octet-stream"
|
||||||
|
body, multipart_type = encode_multipart(
|
||||||
|
{"model": "whisper-1", "language": language, "response_format": "json"},
|
||||||
|
{"file": (audio_path.name, audio_data, content_type)},
|
||||||
|
)
|
||||||
|
req = urllib.request.Request(url, data=body, headers={"Content-Type": multipart_type}, method="POST")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
body = exc.read().decode(errors="replace")[:300]
|
||||||
|
raise PipelineError(f"whisper_http_{exc.code}", details={"body_preview": body}) from exc
|
||||||
|
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
|
||||||
|
raise PipelineError(f"whisper_request_failed: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def validate_audio_path(path_text: str, *, max_bytes: int, max_audio_seconds: float | None) -> tuple[Path, int]:
|
||||||
|
path = Path(path_text).expanduser()
|
||||||
|
if not path.is_absolute():
|
||||||
|
raise PipelineError("audio_path_must_be_absolute")
|
||||||
|
if path.is_symlink():
|
||||||
|
raise PipelineError("audio_path_must_not_be_symlink")
|
||||||
|
if not path.exists():
|
||||||
|
raise PipelineError("audio_path_not_found")
|
||||||
|
if not path.is_file():
|
||||||
|
raise PipelineError("audio_path_not_file")
|
||||||
|
if path.suffix.lower() not in AUDIO_EXTENSIONS:
|
||||||
|
raise PipelineError("unsupported_audio_extension", details={"extension": path.suffix.lower()})
|
||||||
|
size = path.stat().st_size
|
||||||
|
if size <= 0:
|
||||||
|
raise PipelineError("audio_file_empty")
|
||||||
|
if size > max_bytes:
|
||||||
|
raise PipelineError("audio_file_too_large", details={"bytes": size, "max_bytes": max_bytes})
|
||||||
|
if max_audio_seconds is not None and path.suffix.lower() == ".wav":
|
||||||
|
try:
|
||||||
|
with wave.open(str(path), "rb") as wav:
|
||||||
|
duration = wav.getnframes() / float(wav.getframerate())
|
||||||
|
except wave.Error as exc:
|
||||||
|
raise PipelineError(f"wav_decode_failed: {exc}") from exc
|
||||||
|
if duration > max_audio_seconds:
|
||||||
|
raise PipelineError("audio_duration_too_long", details={"duration_seconds": round(duration, 3), "max_audio_seconds": max_audio_seconds})
|
||||||
|
return path, size
|
||||||
|
|
||||||
|
|
||||||
|
def extract_transcript(payload: dict[str, Any]) -> str:
|
||||||
|
text = payload.get("text") or payload.get("transcript") or payload.get("transcription")
|
||||||
|
if not text and isinstance(payload.get("segments"), list):
|
||||||
|
text = " ".join(str(seg.get("text", "")) for seg in payload["segments"] if isinstance(seg, dict))
|
||||||
|
return str(text or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def label_value(labels: dict[str, Any], key: str, default: Any = None) -> Any:
|
||||||
|
value = labels.get(key, default)
|
||||||
|
if isinstance(value, dict) and "value" in value:
|
||||||
|
return value.get("value")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def compact_labels(classifier_payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
raw_labels = classifier_payload.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
return {
|
||||||
|
"workflow_category": label_value(labels, "workflow_category"),
|
||||||
|
"tool_needed": bool(label_value(labels, "tool_needed", False)),
|
||||||
|
"urgency": label_value(labels, "urgency", "normal"),
|
||||||
|
"safety_confirmation_required": bool(label_value(labels, "safety_confirmation_required", False)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_text(
|
||||||
|
*,
|
||||||
|
classifier_url: str,
|
||||||
|
item_id: str,
|
||||||
|
source: str,
|
||||||
|
title: str,
|
||||||
|
transcript: str,
|
||||||
|
max_transcript_chars: int,
|
||||||
|
dry_run: bool,
|
||||||
|
timeout: int,
|
||||||
|
) -> tuple[dict[str, Any], int | None]:
|
||||||
|
bounded_transcript = transcript[:max_transcript_chars]
|
||||||
|
title_line = f"Title: {title}\n" if title else ""
|
||||||
|
text = "Voice memo transcript summary candidate.\n" f"Source: {source}\n" f"{title_line}Transcript:\n{bounded_transcript}"
|
||||||
|
payload = {
|
||||||
|
"id": item_id,
|
||||||
|
"text": text,
|
||||||
|
"context": {"source": source, "media": "audio"},
|
||||||
|
"options": {"include_evidence": False, "dry_run": dry_run},
|
||||||
|
}
|
||||||
|
before = read_npu_busy_us()
|
||||||
|
data = post_json(classifier_url, payload, timeout=timeout)
|
||||||
|
after = read_npu_busy_us()
|
||||||
|
return data, delta_us(before, after)
|
||||||
|
|
||||||
|
|
||||||
|
def decide_gate(transcript: str, labels: dict[str, Any], whisper_proven: bool, classifier_proven: bool) -> tuple[bool, str, str]:
|
||||||
|
safety_required = bool(labels.get("safety_confirmation_required"))
|
||||||
|
urgency = str(labels.get("urgency") or "normal").lower()
|
||||||
|
action_worthy = bool(labels.get("tool_needed")) or urgency in {"high", "critical"} or bool(ACTION_MARKERS.search(transcript))
|
||||||
|
if not whisper_proven or not classifier_proven:
|
||||||
|
return action_worthy, "blocked_missing_npu_proof", "npu_proof_required"
|
||||||
|
if safety_required:
|
||||||
|
return action_worthy, "blocked_safety_confirmation_required", "human_approval_required"
|
||||||
|
if action_worthy:
|
||||||
|
return True, "advisory_only_not_sent", "dry_run_no_side_effects"
|
||||||
|
return False, "suppressed_not_action_worthy", "dry_run_no_side_effects"
|
||||||
|
|
||||||
|
|
||||||
|
def run_pipeline(args: argparse.Namespace) -> dict[str, Any]:
|
||||||
|
args.whisper_url = validate_loopback_endpoint(args.whisper_url, label="whisper")
|
||||||
|
args.classifier_url = validate_loopback_endpoint(args.classifier_url, label="classifier")
|
||||||
|
audio_path, audio_bytes = validate_audio_path(
|
||||||
|
args.audio,
|
||||||
|
max_bytes=args.max_bytes,
|
||||||
|
max_audio_seconds=args.max_audio_seconds,
|
||||||
|
)
|
||||||
|
audio_data = audio_path.read_bytes()
|
||||||
|
item_id = args.id or f"voice-audio-{int(time.time())}"
|
||||||
|
|
||||||
|
whisper_before = read_npu_busy_us()
|
||||||
|
whisper_payload = post_whisper(args.whisper_url, audio_path, audio_data, args.language, timeout=args.timeout)
|
||||||
|
whisper_after = read_npu_busy_us()
|
||||||
|
whisper_sysfs_delta = delta_us(whisper_before, whisper_after)
|
||||||
|
transcript = extract_transcript(whisper_payload)
|
||||||
|
if not transcript:
|
||||||
|
raise PipelineError("whisper_empty_transcript")
|
||||||
|
|
||||||
|
whisper_response_delta = int(whisper_payload.get("npu_busy_delta_us") or 0)
|
||||||
|
whisper_proven = whisper_response_delta > 0 and (whisper_sysfs_delta is None or whisper_sysfs_delta > 0)
|
||||||
|
|
||||||
|
classifier_payload, classifier_sysfs_observed = classify_text(
|
||||||
|
classifier_url=args.classifier_url,
|
||||||
|
item_id=item_id,
|
||||||
|
source=args.source,
|
||||||
|
title=args.title or "",
|
||||||
|
transcript=transcript,
|
||||||
|
max_transcript_chars=args.max_transcript_chars,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
timeout=args.timeout,
|
||||||
|
)
|
||||||
|
labels = compact_labels(classifier_payload)
|
||||||
|
classifier_response_delta = int(classifier_payload.get("npu_busy_delta_us") or 0)
|
||||||
|
classifier_response_sysfs_delta = int(classifier_payload.get("sysfs_npu_busy_delta_us") or 0)
|
||||||
|
classifier_proven = classifier_response_delta > 0 and classifier_response_sysfs_delta > 0 and (classifier_sysfs_observed is None or classifier_sysfs_observed > 0)
|
||||||
|
|
||||||
|
action_worthy, atlas_gate, next_gate = decide_gate(transcript, labels, whisper_proven, classifier_proven)
|
||||||
|
|
||||||
|
output: dict[str, Any] = {
|
||||||
|
"ok": True,
|
||||||
|
"id": item_id,
|
||||||
|
"source": args.source,
|
||||||
|
"transcript_chars": len(transcript),
|
||||||
|
"action_worthy": action_worthy,
|
||||||
|
"atlas_gate": atlas_gate,
|
||||||
|
"next_gate": next_gate,
|
||||||
|
"whisper_npu_delta_us": whisper_response_delta,
|
||||||
|
"whisper_sysfs_delta_us": whisper_sysfs_delta,
|
||||||
|
"classifier_npu_delta_us": classifier_response_delta,
|
||||||
|
"classifier_sysfs_delta_us": classifier_response_sysfs_delta,
|
||||||
|
"classifier_observed_sysfs_delta_us": classifier_sysfs_observed,
|
||||||
|
"labels": labels,
|
||||||
|
"external_sends": 0,
|
||||||
|
"writes": 0,
|
||||||
|
}
|
||||||
|
if args.include_transcript:
|
||||||
|
output["transcript"] = transcript
|
||||||
|
if args.include_transcript_preview_chars > 0:
|
||||||
|
output["transcript_preview"] = transcript[: args.include_transcript_preview_chars]
|
||||||
|
if args.include_raw:
|
||||||
|
output["raw"] = {"whisper": whisper_payload, "classifier": classifier_payload}
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Run local-file audio through NPU Whisper and NPU classifier in dry-run advisory mode.")
|
||||||
|
parser.add_argument("--audio", required=True, help="Absolute path to a local audio file; no URL/platform fetching is performed.")
|
||||||
|
parser.add_argument("--id", default="", help="Optional stable item id for classifier correlation.")
|
||||||
|
parser.add_argument("--source", default="local_file", choices=["local_file", "manual_smoke", "local_voice_memo", "meeting_snippet", "staged_telegram", "staged_discord"], help="Local/staged source label only.")
|
||||||
|
parser.add_argument("--title", default="", help="Optional short local title for classifier context.")
|
||||||
|
parser.add_argument("--language", default="en")
|
||||||
|
parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
|
||||||
|
parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
|
||||||
|
parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True, help="Keep classifier in dry-run advisory mode (default).")
|
||||||
|
parser.add_argument("--no-dry-run", dest="dry_run", action="store_false", help="Send dry_run=false to classifier; this script still performs no side effects.")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Emit compact JSON; default is JSON for machine-safe handoff.")
|
||||||
|
parser.add_argument("--include-transcript", action="store_true", help="Include full transcript in output; off by default.")
|
||||||
|
parser.add_argument("--include-transcript-preview-chars", type=int, default=0, help="Include a bounded transcript preview; default 0.")
|
||||||
|
parser.add_argument("--include-raw", action="store_true", help="Include raw service responses for one-off local debugging; off by default.")
|
||||||
|
parser.add_argument("--max-bytes", type=int, default=25 * 1024 * 1024)
|
||||||
|
parser.add_argument("--max-audio-seconds", type=float, default=300.0, help="Enforced for WAV inputs; other codecs remain size-capped.")
|
||||||
|
parser.add_argument("--max-transcript-chars", type=int, default=6000)
|
||||||
|
parser.add_argument("--timeout", type=int, default=300)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
result = run_pipeline(args)
|
||||||
|
print(json.dumps(result, ensure_ascii=False, sort_keys=True))
|
||||||
|
return 0
|
||||||
|
except PipelineError as exc:
|
||||||
|
result = {"ok": False, "error": str(exc), "external_sends": 0, "writes": 0, **exc.details}
|
||||||
|
print(json.dumps(result, ensure_ascii=False, sort_keys=True), file=sys.stderr)
|
||||||
|
return exc.status
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
+122
-134
@@ -3,7 +3,7 @@ type: runbook
|
|||||||
system: openvino-npu-services
|
system: openvino-npu-services
|
||||||
status: draft
|
status: draft
|
||||||
created: 2026-06-04
|
created: 2026-06-04
|
||||||
updated: 2026-06-04
|
updated: 2026-06-05
|
||||||
tags:
|
tags:
|
||||||
- runbook
|
- runbook
|
||||||
- openvino
|
- openvino
|
||||||
@@ -18,33 +18,92 @@ related:
|
|||||||
|
|
||||||
# OpenVINO NPU Services Runbook
|
# OpenVINO NPU Services Runbook
|
||||||
|
|
||||||
This runbook is the integrated operations view for Will's local Intel NPU/OpenVINO services from the `npu-capability-expansion` board.
|
This runbook is the integrated operations view for Will's local Intel NPU/OpenVINO services after the first approved `npu-maximization` lanes. It treats the NPU as a local reflex layer: classify, embed, rerank, transcribe, triage, and draft compact advisory output while Atlas/Hermes keeps final authority unless a separate approval changes that.
|
||||||
|
|
||||||
Safety posture:
|
Safety posture:
|
||||||
- Do not restart the live Atlas/Hermes gateway from this runbook.
|
- Do not restart the live Atlas/Hermes gateway from this runbook.
|
||||||
- Do not change primary Atlas/Hermes routing without explicit Will approval.
|
- Do not change primary Atlas/Hermes routing without explicit Will approval.
|
||||||
- Do not delete, overwrite, or in-place reindex existing Chroma/vector collections.
|
- Do not delete, overwrite, or in-place reindex existing Chroma/vector collections.
|
||||||
- Treat HTTP 200 as necessary but not sufficient for NPU-backed services; verify `/sys/class/accel/accel0/device/npu_busy_time_us` before/after an inference.
|
- Treat HTTP 200 as necessary but not sufficient for NPU-backed services; verify `/sys/class/accel/accel0/device/npu_busy_time_us` before/after a real inference.
|
||||||
- Keep endpoints local-only unless Will explicitly approves broader exposure.
|
- Keep endpoints local-only or on the approved Docker bridge only; do not add wildcard binds.
|
||||||
- Keep raw prompts, private documents, OCR text, and secrets out of logs and durable handoffs.
|
- Keep raw prompts, private documents, OCR text, transcripts, and secrets out of logs and durable handoffs.
|
||||||
|
- Keep operational outputs compact: booleans, counts, paths, deltas, and gates rather than raw JSON dumps.
|
||||||
|
|
||||||
## Current service map
|
## Reflex-layer topology
|
||||||
|
|
||||||
| Capability | Port | Runtime / service | Path | State | Health endpoint | NPU proof |
|
```text
|
||||||
| --- | ---: | --- | --- | --- | --- | --- |
|
event / audio / doc / query / task
|
||||||
| Obsidian/RAG endpoint | 18810 | `obsidian-reindex-endpoint.service` / local Python endpoint | `~/lab/swarm/scripts/` | live baseline; uses collection `obsidian_bge_npu` | `http://127.0.0.1:18810/healthz` | indirect via embeddings `:18817`; do not mutate existing collection |
|
-> local OpenVINO/NPU specialists
|
||||||
| RAG/embedding health wrapper | 18814 | `rag-embedding-health.service` | `~/lab/swarm/swarm-common/rag-embedding-health.service` | live baseline | `http://127.0.0.1:18814/healthz` | should exercise embeddings path when configured |
|
embeddings :18817, rerank :18818, whisper :18816,
|
||||||
| Whisper transcription, OpenVINO NPU | 18816 | Docker Compose service/container `whisper-server-npu` | `~/lab/swarm/whisper-openvino-npu/` | live baseline | `http://127.0.0.1:18816/health` | transcription response includes `npu_busy_delta_us`; sysfs delta must increase |
|
classifier :18819, genai worker :18820, doc/image triage :18829,
|
||||||
| OpenVINO embeddings | 18817 | user systemd `openvino-embeddings.service` | `~/lab/swarm/scripts/openvino-embeddings-server.py`; unit in `~/lab/swarm/swarm-common/openvino-embeddings.service` | live baseline, enabled | `http://127.0.0.1:18817/healthz` | embedding response and sysfs delta must be positive |
|
advisory gateway 172.19.0.1:18830
|
||||||
| NPU reranker prototype | 18818 | optional user systemd `openvino-reranker.service` | `~/lab/swarm/openvino-reranker-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:18818/readyz` | `/readyz` reports `device=NPU`; `/v1/rerank` response and sysfs delta must be positive |
|
-> explicit policy and authority gates
|
||||||
| NPU router/classifier prototype | 18819 | optional user systemd `openvino-router-classifier.service` | `~/lab/swarm/openvino-classifier-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:18819/healthz` | `/v1/classify` response has positive `npu_busy_delta_us` and `sysfs_npu_busy_delta_us` |
|
-> Atlas/Hermes or human only when approved/useful
|
||||||
| Small OpenVINO GenAI NPU worker | 18820 | optional user systemd `openvino-genai-npu-worker.service` | `~/lab/swarm/openvino-genai-npu-worker/` | approved prototype; not installed/enabled | `http://127.0.0.1:18820/healthz`; `GET /models` | generation response includes positive `npu_busy_delta_us` |
|
```
|
||||||
| Document/image triage prototype | optional 18829 for review only; 18828 was an earlier smoke alternate | CLI-first; foreground local-only server if needed; no persistent unit yet | `~/lab/swarm/openvino-doc-image-triage-npu/` | approved prototype; not installed/enabled | `http://127.0.0.1:18829/healthz`; `GET /models` | v1 NPU stage is semantic embedding through `:18817`; image classification/OCR remain CPU/local |
|
|
||||||
|
Authority split:
|
||||||
|
- NPU services may advise, label, score, transcribe, embed, rerank, triage explicit roots/files, and draft bounded summaries.
|
||||||
|
- NPU services must not route Atlas/Hermes, write memory, send outbound messages, restart services, execute tools, mutate Kanban, or mutate vector DBs without separate approval.
|
||||||
|
|
||||||
|
## Live baseline services
|
||||||
|
|
||||||
|
These are part of the current live local baseline. Use read-only checks unless Will explicitly asks for remediation.
|
||||||
|
|
||||||
|
| Capability | Port / bind | Runtime / service | State | Health / proof | Notes |
|
||||||
|
| --- | ---: | --- | --- | --- | --- |
|
||||||
|
| Obsidian/RAG endpoint | `18810` | `obsidian-reindex-endpoint.service` / local Python endpoint | live baseline | `http://127.0.0.1:18810/healthz`; NPU proof is indirect through embeddings/rerank | Uses collection `obsidian_bge_npu`; do not mutate/reindex in place. Discovery observed `RAG_RERANK_ENABLED=true` and `RAG_RERANK_REQUIRE_NPU_PROOF=true`; do not change from this runbook. |
|
||||||
|
| RAG/embedding health wrapper | `18814` | `rag-embedding-health.service` | live baseline | `http://127.0.0.1:18814/healthz` | Health wrapper only; use compact summaries. |
|
||||||
|
| Whisper transcription | `18816` | Docker Compose service/container `whisper-server-npu` | live baseline | `http://127.0.0.1:18816/health`; transcription response plus sysfs busy delta must increase | Use small non-private WAV fixtures for proof. Do not restart from docs. |
|
||||||
|
| OpenVINO embeddings | `18817` | user systemd `openvino-embeddings.service` | live baseline, enabled | `http://127.0.0.1:18817/healthz`; embedding response and sysfs delta must be positive | Model `bge-base-en-v1.5-int8-ov`, dim 768. Existing bind is broader than new-service guidance; do not broaden anything else. |
|
||||||
|
|
||||||
|
## Live local-only advisory specialists
|
||||||
|
|
||||||
|
These services are available locally for advisory/reflex work, not for authority. Some were originally prototypes but discovery/review found them active/enabled; do not reinstall or enable again blindly.
|
||||||
|
|
||||||
|
| Capability | Port / bind | Runtime / service | State | Health / proof | Authority boundary |
|
||||||
|
| --- | ---: | --- | --- | --- | --- |
|
||||||
|
| NPU reranker | `18818` localhost | `openvino-reranker.service` / `openvino-reranker-npu/` | live local specialist | `/readyz`; `/rerank` response and positive sysfs delta | Rerank only; no vector mutation. |
|
||||||
|
| NPU router/classifier | `18819` localhost | `openvino-router-classifier.service` / `openvino-classifier-npu/` | live local specialist, dry-run/advisory | `/healthz`; `/v1/classify` response and positive sysfs delta | Labels/recommendations only; no routing, sends, memory writes, restarts, or tool execution. |
|
||||||
|
| Small OpenVINO GenAI worker | `18820` localhost | `openvino-genai-npu-worker.service` / `openvino-genai-npu-worker/` | live local specialist; may report `loaded=false` until used | `/healthz`, `/models`; generation proof requires positive sysfs delta | Bounded draft/title/summary jobs only; not primary Atlas chat. Avoid cold-load generation unless the task requires it. |
|
||||||
|
| Document/image triage | `18829` localhost | `openvino-doc-image-triage-npu/` | live local specialist with explicit roots | `/healthz`, `/models`; v1 NPU proof is semantic embedding through `:18817` | Request roots may narrow configured roots, never broaden. OCR/image classification are CPU/local fallbacks. |
|
||||||
|
| Advisory gateway | `172.19.0.1:18830` approved bridge | `openvino-advisory-gateway.service` / `openvino-advisory-gateway/` | live bridge-facing advisory wrapper | `/healthz`; classify/generate/triage responses include NPU proof | For `n8n-agent` and host cron. POSTs can write metadata events, so use health-only unless classification/draft is in scope. No wildcard bind. |
|
||||||
|
|
||||||
Port notes:
|
Port notes:
|
||||||
- `18818`, `18819`, and `18820` are reserved prototype ports from the program plan; check listeners before binding.
|
- Prefer localhost for host-only sidecars. The advisory gateway bridge bind is intentionally for Docker bridge consumers such as `n8n-agent`.
|
||||||
- `18820` is reserved for the GenAI worker prototype. Use optional `18829` for document/image triage foreground review until Will approves a final persistent port. `18828` was used in earlier review smoke only and should not be treated as the preferred documented port.
|
- `18828` was an earlier review alternate for doc/image triage and should not be treated as the preferred documented port.
|
||||||
- Existing `:18817` is currently bound on `0.0.0.0` by the user service; prototype services should still default to `127.0.0.1`.
|
- Check listeners before foreground smokes: `ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18829|18830)\b'`.
|
||||||
|
|
||||||
|
## Dry-run examples and approved lane artifacts
|
||||||
|
|
||||||
|
The first-slice lanes below are approved as dry-run/local advisory examples. They may be merged into the repo by the integration lane, but they do not grant authority to mutate live Atlas/Hermes behavior.
|
||||||
|
|
||||||
|
| Lane | Approved branch / commit | Artifact paths | Safe use |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| Observability/utilization digest | `feature/npu-max-observability` @ `d661dc299` | `docs/npu-utilization-digest.md`, `scripts/npu-utilization-digest.py` | Read-only compact digest; can write JSONL under `~/.local/state/npu-utilization/digests` unless `--no-write`. Reviewer verified services_ok=9/9, proof_ok=5/5 on live smoke. |
|
||||||
|
| Context-gate advisory CLI | `feature/npu-max-context-gate` @ `b4ef90aff` | `openvino_context_gate/`, `scripts/context-gate-advisory.py` | Plans typed context bundle sources; no retrieval, routing, memory write, or private content. Classifier URL is loopback-only and redirects fail closed. |
|
||||||
|
| Cron/n8n advisory classifier | `feature/npu-max-cron-n8n` @ `54d3bcb7` | `openvino-advisory-gateway/docs/cron-n8n-advisory-classifier.md`, `examples/cron-advisory-dry-run.sh`, `examples/n8n-advisory-dry-run-fragment.json` | Dry-run event classification: duplicate/stale/no-op/action-required -> suppress/log/summarize/escalate recommendation, then human/Atlas gate before side effects. |
|
||||||
|
| Explicit-root batch doc/image/audio triage | `feature/npu-max-doc-audio-triage` @ `bfa62cddb` | `docs/npu-batch-triage-dry-run.md`, `scripts/npu-batch-triage-dry-run.py`, `config/triage-roots*.yaml` | Reads only approved/narrow staging roots; reports compact counts/proof; no file moves, Obsidian/RAG writes, sends, or vector mutation. Whisper endpoint override is loopback `:18816` only. |
|
||||||
|
| Voice/audio local-file pipeline | `feature/npu-max-voice` @ `534816249` | `docs/npu-voice-audio-pipeline.md`, `scripts/npu_voice_audio_pipeline.py` | Local audio file -> Whisper NPU -> classifier NPU -> advisory gate. No platform fetching, sends, writes, memory writes, or routing changes. |
|
||||||
|
| Kanban/task hygiene advisory | `feature/npu-max-kanban-hygiene` @ `575a3cef6` | `scripts/kanban-hygiene-advisory.py` | Reads compact board summaries and suggests labels/next gates only. Does not call Kanban tools or mutate the board. NPU proof failures dominate generic review-required gates. |
|
||||||
|
|
||||||
|
Dry-run command patterns:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Compact service/proof digest; no artifact write during review.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false
|
||||||
|
|
||||||
|
# Local-only context-gate planning; does not retrieve private content.
|
||||||
|
python scripts/context-gate-advisory.py --query "How do I check NPU reranker proof?" --format compact
|
||||||
|
|
||||||
|
# Cron/n8n event advisory wrapper; dry-run only, one compact decision line.
|
||||||
|
openvino-advisory-gateway/examples/cron-advisory-dry-run.sh npu-service-health warning health_check "openvino-reranker timeout twice" "service:openvino-reranker:timeout"
|
||||||
|
|
||||||
|
# Explicit-root triage; manifest root may be narrowed by --root, never broadened.
|
||||||
|
python scripts/npu-batch-triage-dry-run.py --manifest config/triage-roots.test.yaml --lane receipts --root openvino-doc-image-triage-npu/samples --limit 5 --dry-run --json
|
||||||
|
|
||||||
|
# Local-file audio advisory; transcript omitted unless explicitly requested.
|
||||||
|
/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py --audio /tmp/npu-voice-smoke.wav --title "synthetic smoke" --source manual_smoke --json
|
||||||
|
```
|
||||||
|
|
||||||
## Read-only unified health check
|
## Read-only unified health check
|
||||||
|
|
||||||
@@ -55,15 +114,15 @@ cd ~/lab/swarm
|
|||||||
./scripts/npu-service-health.sh
|
./scripts/npu-service-health.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
The script is read-only. It checks listeners for `18810`, `18816`, `18817`, `18818`, `18819`, `18820`, `18829` plus the existing `18814` wrapper and `18828` review alternate, user service state, Docker Compose state for `whisper-server-npu`, JSON health endpoints, and performs a non-private embeddings request while measuring `/sys/class/accel/accel0/device/npu_busy_time_us` before and after. A positive sysfs delta is required for the embeddings proof.
|
The script is read-only. It checks listeners for the live baseline and local specialists, user service state, Docker Compose state for `whisper-server-npu`, JSON health endpoints, and a non-private embeddings request while measuring `/sys/class/accel/accel0/device/npu_busy_time_us` before and after. A positive sysfs delta is required for the embeddings proof.
|
||||||
|
|
||||||
Manual minimal checks:
|
Manual minimal checks:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
cat "$BUSY"
|
cat "$BUSY"
|
||||||
ss -ltnp | grep -E ':(18810|18816|18817|18818|18819|18820|18829)\b' || true
|
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18829|18830)\b' || true
|
||||||
systemctl --user is-active openvino-embeddings.service rag-embedding-health.service
|
systemctl --user is-active openvino-embeddings.service rag-embedding-health.service openvino-reranker.service openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service openvino-advisory-gateway.service
|
||||||
cd ~/lab/swarm && docker compose ps whisper-server-npu
|
cd ~/lab/swarm && docker compose ps whisper-server-npu
|
||||||
curl -fsS http://127.0.0.1:18817/healthz | jq .
|
curl -fsS http://127.0.0.1:18817/healthz | jq .
|
||||||
```
|
```
|
||||||
@@ -87,23 +146,7 @@ A healthy NPU path has:
|
|||||||
|
|
||||||
## Service-specific smoke checks
|
## Service-specific smoke checks
|
||||||
|
|
||||||
For any foreground prototype server below, run it in a terminal you control or capture its PID and stop it at the end of the smoke. Do not use `systemctl --user enable`, Docker Compose `up -d`, `nohup`, or shell disowning for these review smokes unless Will explicitly approved persistent service enablement.
|
For any foreground prototype/server smoke, run it in a terminal you control or capture its PID and stop it at the end. Do not use `systemctl --user enable`, Docker Compose `up -d`, `nohup`, or shell disowning unless Will explicitly approved persistent service enablement. Several specialists are already live; do not start duplicate listeners.
|
||||||
|
|
||||||
Safe foreground-server pattern:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
server_pid=""
|
|
||||||
cleanup() {
|
|
||||||
if [[ -n "$server_pid" ]] && kill -0 "$server_pid" 2>/dev/null; then
|
|
||||||
kill "$server_pid"
|
|
||||||
wait "$server_pid" 2>/dev/null || true
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
trap cleanup EXIT
|
|
||||||
# start prototype server with --host 127.0.0.1 --port <port> &
|
|
||||||
# server_pid=$!
|
|
||||||
# run curl/smoke commands, then let trap stop it
|
|
||||||
```
|
|
||||||
|
|
||||||
### Whisper NPU (`:18816`)
|
### Whisper NPU (`:18816`)
|
||||||
|
|
||||||
@@ -115,7 +158,6 @@ curl -fsS http://127.0.0.1:18816/health | jq .
|
|||||||
|
|
||||||
Operational notes:
|
Operational notes:
|
||||||
- Managed as Docker Compose service/container `whisper-server-npu` in `~/lab/swarm`.
|
- Managed as Docker Compose service/container `whisper-server-npu` in `~/lab/swarm`.
|
||||||
- Consistent with existing swarm service patterns because it is a containerized service with Compose health.
|
|
||||||
- Do not restart it from this runbook unless Will asked for remediation.
|
- Do not restart it from this runbook unless Will asked for remediation.
|
||||||
|
|
||||||
### OpenVINO embeddings (`:18817`)
|
### OpenVINO embeddings (`:18817`)
|
||||||
@@ -127,26 +169,10 @@ curl -fsS http://127.0.0.1:18817/healthz | jq .
|
|||||||
|
|
||||||
Operational notes:
|
Operational notes:
|
||||||
- User systemd unit: `openvino-embeddings.service`.
|
- User systemd unit: `openvino-embeddings.service`.
|
||||||
- Model: `bge-base-en-v1.5-int8-ov`.
|
|
||||||
- Model directory: `/home/will/.cache/openvino-models/bge-base-en-v1.5-int8-ov`.
|
- Model directory: `/home/will/.cache/openvino-models/bge-base-en-v1.5-int8-ov`.
|
||||||
- Live RAG `:18810` uses Chroma collection `obsidian_bge_npu` through this service. Do not reindex or replace this collection in place.
|
- Live RAG `:18810` uses Chroma collection `obsidian_bge_npu` through this service. Do not reindex or replace this collection in place.
|
||||||
|
|
||||||
### Reranker prototype (`:18818`)
|
### Reranker (`:18818`)
|
||||||
|
|
||||||
Foreground review start only, after confirming port is free:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ss -ltnp | grep ':18818\b' || true
|
|
||||||
cd ~/lab/swarm/openvino-reranker-npu
|
|
||||||
source /home/will/.venvs/openvino-reranker/bin/activate
|
|
||||||
OPENVINO_RERANKER_HOST=127.0.0.1 \
|
|
||||||
OPENVINO_RERANKER_PORT=18818 \
|
|
||||||
OPENVINO_RERANKER_DEVICE=NPU \
|
|
||||||
OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
|
|
||||||
python server.py
|
|
||||||
```
|
|
||||||
|
|
||||||
From another shell:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsS http://127.0.0.1:18818/readyz | jq .
|
curl -fsS http://127.0.0.1:18818/readyz | jq .
|
||||||
@@ -154,107 +180,78 @@ python ~/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
|
|||||||
```
|
```
|
||||||
|
|
||||||
Approval gate:
|
Approval gate:
|
||||||
- May be installed as `openvino-reranker.service` only after foreground smoke and Will approval.
|
- Rerank may score candidate passages only. Any change to RAG answer selection, rerank policy, or vector DB behavior requires separate approval and rollback notes.
|
||||||
- May be integrated into RAG only behind disabled-by-default knobs such as `RAG_RERANK_ENABLED=false`; request-time reranking must not mutate Chroma.
|
|
||||||
|
|
||||||
### Router/classifier prototype (`:18819`)
|
### Router/classifier (`:18819`)
|
||||||
|
|
||||||
Foreground review start only, after confirming port is free:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ss -ltnp | grep ':18819\b' || true
|
|
||||||
cd ~/lab/swarm/openvino-classifier-npu
|
|
||||||
/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
|
|
||||||
```
|
|
||||||
|
|
||||||
Smoke:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsS http://127.0.0.1:18819/healthz | jq .
|
curl -fsS http://127.0.0.1:18819/healthz | jq .
|
||||||
curl -fsS http://127.0.0.1:18819/v1/classify \
|
curl -fsS http://127.0.0.1:18819/v1/classify \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"id":"smoke","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true,"dry_run":true}}' | jq .
|
-d '{"id":"smoke","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":false,"dry_run":true}}' | jq '{id, labels, npu_busy_delta_us, sysfs_npu_busy_delta_us}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Approval gate:
|
Approval gate:
|
||||||
- May be installed as `openvino-router-classifier.service` only after Will approves live service enablement.
|
- Must remain dry-run/advisory and must not alter Hermes/Atlas routing, memory writes, safety confirmation flow, or outbound messages without a separate explicit approval.
|
||||||
- Must remain dry-run and must not alter Hermes/Atlas routing, memory writes, safety confirmation flow, or outbound messages without a separate explicit approval.
|
|
||||||
|
|
||||||
### Small GenAI NPU worker (`:18820`)
|
### Small GenAI NPU worker (`:18820`)
|
||||||
|
|
||||||
Foreground review start only, after confirming port is free:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ss -ltnp | grep ':18820\b' || true
|
|
||||||
cd ~/lab/swarm/openvino-genai-npu-worker
|
|
||||||
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
|
|
||||||
```
|
|
||||||
|
|
||||||
Smoke:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsS http://127.0.0.1:18820/healthz | jq .
|
curl -fsS http://127.0.0.1:18820/healthz | jq .
|
||||||
curl -fsS http://127.0.0.1:18820/models | jq .
|
curl -fsS http://127.0.0.1:18820/models | jq .
|
||||||
curl -fsS http://127.0.0.1:18820/v1/worker/condense-notification \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{"input":"Non-private smoke notification for local NPU worker.","max_new_tokens":64}' | jq .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Approval gate:
|
Approval gate:
|
||||||
- May be installed as `openvino-genai-npu-worker.service` only after Will approves persistent service enablement.
|
- Must not become primary Atlas/Hermes model routing. Use only for bounded local jobs such as title, summary, notification condensation, and memory-candidate drafting after the relevant job is approved.
|
||||||
- Must not become primary Atlas/Hermes model routing. Use only for bounded background jobs such as title, summary, notification condensation, and memory-candidate drafting.
|
- Avoid generation smokes that cold-load the model unless the task explicitly calls for it.
|
||||||
|
|
||||||
### Document/image triage prototype (`:18829` optional review port)
|
### Document/image triage (`:18829`)
|
||||||
|
|
||||||
Foreground review start only, after confirming the port is free:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ss -ltnp | grep ':18829\b' || true
|
|
||||||
cd ~/lab/swarm/openvino-doc-image-triage-npu
|
|
||||||
/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
|
|
||||||
```
|
|
||||||
|
|
||||||
Smoke:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsS http://127.0.0.1:18829/healthz | jq .
|
curl -fsS http://127.0.0.1:18829/healthz | jq .
|
||||||
curl -fsS http://127.0.0.1:18829/models | jq .
|
curl -fsS http://127.0.0.1:18829/models | jq .
|
||||||
/home/will/.venvs/npu/bin/python tests/smoke_test.py
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Approval gate:
|
Approval gate:
|
||||||
- Do not point it at arbitrary directories; allowed roots must be equal to or under configured roots.
|
- Do not point it at arbitrary directories; allowed roots must be equal to or under configured roots.
|
||||||
- Do not include raw OCR text or full source paths unless Will explicitly asks for a one-off response.
|
- Do not include raw OCR text or full source paths unless Will explicitly asks for one-off debugging.
|
||||||
- v1 only uses the NPU through `:18817` embeddings for needs-attention; image category classification and OCR are CPU/local fallbacks.
|
- v1 only uses the NPU through `:18817` embeddings for needs-attention; image category classification and OCR are CPU/local fallbacks.
|
||||||
|
|
||||||
## Systemd and Compose recommendations
|
### Advisory gateway (`172.19.0.1:18830`)
|
||||||
|
|
||||||
Recommended management split:
|
```bash
|
||||||
- Keep containerized services in Docker Compose when they already have Docker build/runtime shape and Compose health (`whisper-server-npu`).
|
curl -fsS http://172.19.0.1:18830/healthz | jq .
|
||||||
- Keep host-side OpenVINO Python prototypes as user systemd services when they depend on local venvs, sysfs NPU access, model caches, and localhost-only APIs (`openvino-embeddings`, optional reranker/classifier/GenAI worker).
|
docker exec n8n-agent wget -qO- -T 8 http://172.19.0.1:18830/healthz
|
||||||
- Do not add the prototypes to the live gateway or primary routing during installation. Installation and routing are separate approval gates.
|
```
|
||||||
|
|
||||||
User-systemd unit expectations for optional prototypes:
|
Approval gate:
|
||||||
- `WorkingDirectory` points at the service directory under `~/lab/swarm/`.
|
- Classification/generation/triage POSTs are advisory only and may write metadata counters. Do not wire outputs to sends, restarts, memory writes, tool execution, or Atlas/Hermes routing without a separate reviewed approval.
|
||||||
- `ExecStart` uses the existing venv path documented by the prototype.
|
|
||||||
- `Environment` pins host to `127.0.0.1`, port, model path, device `NPU`, and any upstream endpoint.
|
|
||||||
- `Restart=on-failure`, not aggressive restart loops.
|
|
||||||
- Logs go to user journal; do not log raw request bodies.
|
|
||||||
- Start manually for smoke; enable on boot only after Will approval.
|
|
||||||
|
|
||||||
Compose expectations for existing swarm services:
|
## Approval-gated / not-live integrations
|
||||||
- Prefer `cd ~/lab/swarm && make ps`, `make status`, and targeted `docker compose ps <service>` for read-only checks.
|
|
||||||
- Do not run `docker compose up -d`, restart containers, pull images, or prune volumes from this runbook without approval.
|
The following remain closed even though dry-run examples and local specialists exist:
|
||||||
|
|
||||||
|
| Integration | Current gate |
|
||||||
|
| --- | --- |
|
||||||
|
| Primary Atlas/Hermes routing changes | closed; no live routing authority changes from this program slice |
|
||||||
|
| Memory writes from NPU classifier/GenAI/advisory gateway | closed |
|
||||||
|
| Telegram/Discord/email/outbound sends from cron/n8n/voice/advisory output | closed |
|
||||||
|
| Service restarts or tool execution triggered by classifier/gateway output | closed |
|
||||||
|
| Automatic Kanban task mutation, assignment, block/unblock, completion, or task creation | closed |
|
||||||
|
| Broad private document/image/audio root processing | closed; only explicit approved/narrow roots |
|
||||||
|
| Vector DB mutation/reindex or Chroma collection replacement | closed |
|
||||||
|
| Wildcard binds or broader exposure for new services | closed |
|
||||||
|
| GenAI worker as primary chat model | closed; bounded local drafts only |
|
||||||
|
| Diffusion/image generation on the NPU | rejected/parked for this program |
|
||||||
|
|
||||||
## Monitoring and logging notes
|
## Monitoring and logging notes
|
||||||
|
|
||||||
Minimum recurring monitoring should include:
|
Minimum recurring monitoring should include:
|
||||||
- Listener presence for `18816`, `18817`, and any approved optional prototype ports.
|
- Listener presence for live baseline and any approved specialist ports.
|
||||||
- User service state for `openvino-embeddings.service` and any approved optional prototype unit.
|
- User service state for OpenVINO services and Docker Compose health for `whisper-server-npu`.
|
||||||
- Docker Compose health for `whisper-server-npu`.
|
|
||||||
- HTTP health endpoint success.
|
- HTTP health endpoint success.
|
||||||
- Positive sysfs NPU busy-time delta on at least one non-private inference probe, preferably embeddings `:18817` because it is already live and central.
|
- Positive sysfs NPU busy-time delta on at least one non-private inference probe, preferably embeddings `:18817` because it is already live and central.
|
||||||
- Journal/container logs only at summary level. Avoid raw prompts, raw OCR text, private document names, credentials, and API keys.
|
- Compact counts/deltas/gates only. Avoid raw prompts, transcripts, OCR text, private document names, credentials, and API keys.
|
||||||
|
|
||||||
Useful log commands:
|
Useful log commands:
|
||||||
|
|
||||||
@@ -264,23 +261,14 @@ journalctl --user -u rag-embedding-health.service -n 100 --no-pager
|
|||||||
journalctl --user -u openvino-reranker.service -n 100 --no-pager
|
journalctl --user -u openvino-reranker.service -n 100 --no-pager
|
||||||
journalctl --user -u openvino-router-classifier.service -n 100 --no-pager
|
journalctl --user -u openvino-router-classifier.service -n 100 --no-pager
|
||||||
journalctl --user -u openvino-genai-npu-worker.service -n 100 --no-pager
|
journalctl --user -u openvino-genai-npu-worker.service -n 100 --no-pager
|
||||||
|
journalctl --user -u openvino-advisory-gateway.service -n 100 --no-pager
|
||||||
cd ~/lab/swarm && docker compose logs --tail 100 whisper-server-npu
|
cd ~/lab/swarm && docker compose logs --tail 100 whisper-server-npu
|
||||||
```
|
```
|
||||||
|
|
||||||
## Approval gates
|
## Approved/parked outcomes
|
||||||
|
|
||||||
Requires explicit Will approval before proceeding:
|
- Live baseline retained: RAG endpoint (`:18810`), RAG health wrapper (`:18814`), Whisper NPU (`:18816`), OpenVINO embeddings (`:18817`).
|
||||||
- Installing, enabling, or autostarting `openvino-reranker.service`, `openvino-router-classifier.service`, or `openvino-genai-npu-worker.service`.
|
- Live local-only advisory/reflex specialists: reranker (`:18818`), router/classifier (`:18819`), GenAI worker (`:18820`), doc/image triage (`:18829`), advisory gateway bridge (`172.19.0.1:18830`).
|
||||||
- Assigning a final persistent port to document/image triage or enabling it as a persistent service.
|
- Approved dry-run examples: utilization digest, context gate plan, cron/n8n advisory classifier, explicit-root batch triage, local-file voice/audio pipeline, Kanban hygiene advisory.
|
||||||
- Enabling live RAG reranking or any request path that changes Atlas/RAG answers.
|
|
||||||
- Changing primary Atlas/Hermes routing or connecting router/classifier outputs to live decisions.
|
|
||||||
- Connecting the GenAI worker to primary Atlas chat, gateway routing, memory writes, or outbound notifications.
|
|
||||||
- Restarting the live Atlas/Hermes gateway.
|
|
||||||
- Deleting, overwriting, or in-place reindexing existing vector collections.
|
|
||||||
- Broadening bind addresses or exposure beyond local-only defaults.
|
|
||||||
|
|
||||||
Approved/parked outcomes:
|
|
||||||
- Built/approved prototypes: reranker (`:18818`), router/classifier (`:18819`), small GenAI worker (`:18820`), document/image triage (review ports `:18828`/`:18829`).
|
|
||||||
- Live baseline retained: Whisper NPU (`:18816`), OpenVINO embeddings (`:18817`), RAG endpoint (`:18810`) using `obsidian_bge_npu`.
|
|
||||||
- Parked: always-on wake-word/audio and conventional vision detection until Will wants a concrete use case.
|
- Parked: always-on wake-word/audio and conventional vision detection until Will wants a concrete use case.
|
||||||
- Rejected for this NPU program: diffusion/image generation.
|
- Rejected for this NPU program: diffusion/image generation.
|
||||||
|
|||||||
@@ -0,0 +1,200 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
if str(REPO_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
from openvino_context_gate.context_gate import ( # noqa: E402
|
||||||
|
AUTHORITY,
|
||||||
|
ClassifierResult,
|
||||||
|
ContextGateError,
|
||||||
|
build_plan,
|
||||||
|
classify_live,
|
||||||
|
compact_json,
|
||||||
|
compact_line,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fake_classifier(
|
||||||
|
labels: dict,
|
||||||
|
*,
|
||||||
|
endpoint_delta: int | None = 120,
|
||||||
|
sysfs_delta: int | None = 120,
|
||||||
|
outer_delta: int | None = 80,
|
||||||
|
) -> ClassifierResult:
|
||||||
|
return ClassifierResult(
|
||||||
|
labels=labels,
|
||||||
|
npu_busy_delta_us=endpoint_delta,
|
||||||
|
sysfs_npu_busy_delta_us=sysfs_delta,
|
||||||
|
outer_sysfs_delta_us=outer_delta,
|
||||||
|
live=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def labels(category: str, *, tool: bool = False, safety: bool = False, memory: str = "none") -> dict:
|
||||||
|
return {
|
||||||
|
"tool_needed": {"value": tool, "confidence": 0.8 if tool else 0.4},
|
||||||
|
"memory_candidate": {"value": memory, "confidence": 0.8 if memory != "none" else 0.3},
|
||||||
|
"urgency": {"value": "normal", "confidence": 0.6},
|
||||||
|
"workflow_category": {"value": category, "confidence": 0.86},
|
||||||
|
"safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.1},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_current_npu_debug_query_selects_ops_live_and_repo_sources() -> None:
|
||||||
|
plan = build_plan(
|
||||||
|
"How do I check whether the RAG reranker is using the NPU?",
|
||||||
|
context={"platform": "cli", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
classifier=fake_classifier(labels("devops", tool=True)),
|
||||||
|
)
|
||||||
|
assert plan["schema"] == "atlas_context_gate_plan_v1"
|
||||||
|
assert plan["bundle_plan"]["bundle_name"] == "OpsDebugBundle"
|
||||||
|
assert [s["source"] for s in plan["source_plan"]][:2] == ["live_system", "repo_files"]
|
||||||
|
assert plan["npu_proof"]["verified"] is True
|
||||||
|
assert plan["authority"] == AUTHORITY
|
||||||
|
assert all(value.startswith("closed_") for value in plan["gates"].values())
|
||||||
|
|
||||||
|
|
||||||
|
def test_prior_plan_query_uses_session_or_rag_and_coding_for_kanban() -> None:
|
||||||
|
plan = build_plan(
|
||||||
|
"Where did we leave the NPU context gate implementation plan?",
|
||||||
|
context={"platform": "kanban", "task_id": "t_example", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
classifier=fake_classifier(labels("coding", tool=True)),
|
||||||
|
)
|
||||||
|
sources = [s["source"] for s in plan["source_plan"]]
|
||||||
|
assert plan["bundle_plan"]["bundle_name"] == "CodingTaskBundle"
|
||||||
|
assert "repo_files" in sources
|
||||||
|
assert "session_search" in sources
|
||||||
|
assert "rag_search" in sources
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_creative_query_no_retrieval_offline_no_npu_claim() -> None:
|
||||||
|
plan = build_plan("Write a haiku about Seattle rain.")
|
||||||
|
assert plan["bundle_plan"]["bundle_name"] == "SimpleResponseBundle"
|
||||||
|
assert [s["source"] for s in plan["source_plan"]] == ["no_retrieval"]
|
||||||
|
assert plan["npu_proof"]["verified"] is False
|
||||||
|
assert "npu_proof_inconclusive" in plan["warnings"]
|
||||||
|
assert "offline_heuristic_classifier_no_npu_claim" in plan["warnings"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_unsafe_live_routing_request_keeps_authority_closed_and_blocks_side_effect() -> None:
|
||||||
|
plan = build_plan(
|
||||||
|
"Change Hermes live routing to use the classifier automatically.",
|
||||||
|
context={"repo_path": "/home/will/lab/swarm"},
|
||||||
|
classifier=fake_classifier(labels("coding", tool=True, safety=True)),
|
||||||
|
)
|
||||||
|
assert plan["authority"] == AUTHORITY
|
||||||
|
assert plan["authority"]["may_route"] is False
|
||||||
|
assert any(field["field"] == "authority_side_effect" for field in plan["bundle_plan"]["blocked_fields"])
|
||||||
|
assert plan["gates"]["live_routing_change"] == "closed_requires_explicit_approval"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rejects_non_dry_run_and_private_text_options() -> None:
|
||||||
|
with pytest.raises(ContextGateError, match="dry_run_must_remain_true"):
|
||||||
|
build_plan("hello", options={"dry_run": False})
|
||||||
|
with pytest.raises(ContextGateError, match="include_private_text"):
|
||||||
|
build_plan("hello", options={"include_private_text": True})
|
||||||
|
|
||||||
|
|
||||||
|
def test_compact_outputs_are_small_and_parseable() -> None:
|
||||||
|
plan = build_plan("How do I check whether port 18819 is healthy?")
|
||||||
|
line = compact_line(plan)
|
||||||
|
assert "schema=atlas_context_gate_plan_v1" in line
|
||||||
|
assert "gates=closed:" in line
|
||||||
|
parsed = json.loads(compact_json(plan))
|
||||||
|
assert parsed["schema"] == "atlas_context_gate_plan_v1"
|
||||||
|
assert isinstance(parsed["sources"], list)
|
||||||
|
assert "authority" in parsed
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_offline_compact_json_smoke() -> None:
|
||||||
|
script = REPO_ROOT / "scripts" / "context-gate-advisory.py"
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, str(script), "--offline", "--query", "Write a haiku about Seattle rain.", "--format", "compact-json"],
|
||||||
|
check=True,
|
||||||
|
text=True,
|
||||||
|
capture_output=True,
|
||||||
|
cwd=REPO_ROOT,
|
||||||
|
)
|
||||||
|
parsed = json.loads(result.stdout)
|
||||||
|
assert parsed["ok"] is True
|
||||||
|
assert parsed["bundle_name"] == "SimpleResponseBundle"
|
||||||
|
assert parsed["sources"] == ["no_retrieval"]
|
||||||
|
assert parsed["npu_proof"]["verified"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_npu_proof_requires_positive_sysfs_delta() -> None:
|
||||||
|
classifier = fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=0, outer_delta=None)
|
||||||
|
plan = build_plan("How do I check whether the RAG reranker is using the NPU?", classifier=classifier)
|
||||||
|
assert plan["npu_proof"]["verified"] is False
|
||||||
|
assert "npu_proof_inconclusive" in plan["warnings"]
|
||||||
|
|
||||||
|
endpoint_sysfs_plan = build_plan(
|
||||||
|
"How do I check whether the RAG reranker is using the NPU?",
|
||||||
|
classifier=fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=1, outer_delta=None),
|
||||||
|
)
|
||||||
|
assert endpoint_sysfs_plan["npu_proof"]["verified"] is True
|
||||||
|
|
||||||
|
outer_sysfs_plan = build_plan(
|
||||||
|
"How do I check whether the RAG reranker is using the NPU?",
|
||||||
|
classifier=fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=0, outer_delta=1),
|
||||||
|
)
|
||||||
|
assert outer_sysfs_plan["npu_proof"]["verified"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_classifier_url_must_be_loopback_or_localhost() -> None:
|
||||||
|
for url in [
|
||||||
|
"http://example.com/v1/classify",
|
||||||
|
"https://10.0.0.5/v1/classify",
|
||||||
|
"http://0.0.0.0:18819/v1/classify",
|
||||||
|
"ftp://127.0.0.1/v1/classify",
|
||||||
|
]:
|
||||||
|
with pytest.raises(ContextGateError, match="invalid_classifier_url"):
|
||||||
|
classify_live("hello", classifier_url=url, timeout=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
def test_classifier_url_redirect_to_non_loopback_is_not_followed(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
requests: list[str] = []
|
||||||
|
|
||||||
|
class RedirectHandler(BaseHTTPRequestHandler):
|
||||||
|
def do_POST(self) -> None: # noqa: N802 - stdlib callback name
|
||||||
|
requests.append(self.path)
|
||||||
|
self.send_response(302)
|
||||||
|
self.send_header("Location", "http://example.com/v1/classify")
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def log_message(self, format: str, *args: object) -> None:
|
||||||
|
return
|
||||||
|
|
||||||
|
original_create_connection = socket.create_connection
|
||||||
|
|
||||||
|
def guarded_create_connection(address, *args, **kwargs): # type: ignore[no-untyped-def]
|
||||||
|
host = address[0]
|
||||||
|
if host not in {"127.0.0.1", "localhost"}:
|
||||||
|
raise AssertionError(f"attempted non-loopback redirect connection to {host}")
|
||||||
|
return original_create_connection(address, *args, **kwargs)
|
||||||
|
|
||||||
|
server = ThreadingHTTPServer(("127.0.0.1", 0), RedirectHandler)
|
||||||
|
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
monkeypatch.setattr(socket, "create_connection", guarded_create_connection)
|
||||||
|
try:
|
||||||
|
url = f"http://127.0.0.1:{server.server_port}/v1/classify"
|
||||||
|
with pytest.raises(ContextGateError, match="classifier_unavailable"):
|
||||||
|
classify_live("hello", classifier_url=url, timeout=1.0)
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
server.server_close()
|
||||||
|
thread.join(timeout=2)
|
||||||
|
|
||||||
|
assert requests == ["/v1/classify"]
|
||||||
@@ -0,0 +1,198 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
MODULE_PATH = ROOT / "scripts" / "kanban-hygiene-advisory.py"
|
||||||
|
|
||||||
|
|
||||||
|
def load_module():
|
||||||
|
spec = importlib.util.spec_from_file_location("kanban_hygiene_advisory", MODULE_PATH)
|
||||||
|
assert spec and spec.loader
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[spec.name] = module
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def task(task_id: str, title: str, status: str = "ready", **extra):
|
||||||
|
row = {
|
||||||
|
"id": task_id,
|
||||||
|
"title": title,
|
||||||
|
"status": status,
|
||||||
|
"assignee": "engineer",
|
||||||
|
"created_at": 1_780_000_000,
|
||||||
|
"updated_at": 1_780_000_100,
|
||||||
|
}
|
||||||
|
row.update(extra)
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
class KanbanHygieneAdvisoryTests(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.mod = load_module()
|
||||||
|
|
||||||
|
def advisory(self, tasks, now=1_780_003_600):
|
||||||
|
return self.mod.advisory(
|
||||||
|
tasks,
|
||||||
|
board="npu-maximization",
|
||||||
|
now=now,
|
||||||
|
input_metadata={},
|
||||||
|
include_evidence=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_output_contract_and_authority_flags_are_all_false(self):
|
||||||
|
output = self.advisory([
|
||||||
|
task("t_spec", "spec: Kanban/task hygiene classifier", body_excerpt="Define dry-run labels and next gate.")
|
||||||
|
])
|
||||||
|
self.assertEqual(output["schema"], "kanban_hygiene_advisory_v1")
|
||||||
|
self.assertTrue(output["dry_run"])
|
||||||
|
self.assertEqual(output["counts"]["tasks"], 1)
|
||||||
|
self.assertTrue(output["npu_proof"]["required_for_npu_claims"])
|
||||||
|
self.assertFalse(output["npu_proof"]["attempted"])
|
||||||
|
self.assertTrue(output["authority"])
|
||||||
|
self.assertTrue(all(value is False for value in output["authority"].values()))
|
||||||
|
|
||||||
|
def test_required_labels_and_kanban_lane_gate(self):
|
||||||
|
output = self.advisory([
|
||||||
|
task("t1", "spec: Kanban/task hygiene classifier", body_excerpt="Read board summaries and suggest review-needed next gate labels.")
|
||||||
|
])
|
||||||
|
item = output["items"][0]
|
||||||
|
for key in ["task_type", "project", "lane", "blocker", "staleness", "duplicate", "review_needed", "next_gate"]:
|
||||||
|
self.assertIn(key, item)
|
||||||
|
self.assertEqual(item["task_type"]["value"], "spec")
|
||||||
|
self.assertEqual(item["project"]["value"], "npu-maximization")
|
||||||
|
self.assertEqual(item["lane"]["value"], "kanban_hygiene")
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "ready_for_implementation")
|
||||||
|
|
||||||
|
def test_lifecycle_chain_is_not_duplicate_even_with_same_normalized_title(self):
|
||||||
|
rows = [
|
||||||
|
task("t_spec", "spec: Kanban hygiene advisory", children=["t_impl"]),
|
||||||
|
task("t_impl", "implement: Kanban hygiene advisory", parents=["t_spec"], children=["t_review"]),
|
||||||
|
task("t_review", "review: Kanban hygiene advisory", parents=["t_impl"]),
|
||||||
|
]
|
||||||
|
output = self.advisory(rows)
|
||||||
|
self.assertEqual(output["counts"]["duplicates"], 0)
|
||||||
|
self.assertTrue(all(not item["duplicate"]["is_duplicate"] for item in output["items"]))
|
||||||
|
|
||||||
|
def test_duplicate_same_type_lane_and_normalized_title_is_flagged(self):
|
||||||
|
rows = [
|
||||||
|
task("t_a", "implement: dry-run Kanban hygiene advisory", body_excerpt="Kanban board summaries"),
|
||||||
|
task("t_b", "implement: dry run kanban hygiene advisory", body_excerpt="Kanban board summaries"),
|
||||||
|
]
|
||||||
|
output = self.advisory(rows)
|
||||||
|
self.assertEqual(output["counts"]["duplicates"], 1)
|
||||||
|
dupes = [item for item in output["items"] if item["duplicate"]["is_duplicate"]]
|
||||||
|
self.assertEqual(len(dupes), 1)
|
||||||
|
self.assertEqual(dupes[0]["next_gate"]["value"], "dedupe_review")
|
||||||
|
|
||||||
|
def test_staleness_is_deterministic_with_now(self):
|
||||||
|
output = self.advisory([
|
||||||
|
task("t_run", "implement: NPU service", status="running", updated_at=1_780_000_000, heartbeat_at=1_780_000_000)
|
||||||
|
], now=1_780_007_201)
|
||||||
|
item = output["items"][0]
|
||||||
|
self.assertEqual(item["staleness"]["value"], "stale_lock")
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "investigate_stale_lock")
|
||||||
|
self.assertEqual(output["counts"]["stale"], 1)
|
||||||
|
|
||||||
|
def test_review_required_marker_sets_ready_for_review(self):
|
||||||
|
output = self.advisory([
|
||||||
|
task(
|
||||||
|
"t_impl",
|
||||||
|
"implement: dry-run Kanban hygiene advisory",
|
||||||
|
status="blocked",
|
||||||
|
body_excerpt="review-required: code change needs review",
|
||||||
|
changed_files=["scripts/kanban-hygiene-advisory.py"],
|
||||||
|
tests_run=8,
|
||||||
|
)
|
||||||
|
])
|
||||||
|
item = output["items"][0]
|
||||||
|
self.assertTrue(item["review_needed"]["value"])
|
||||||
|
self.assertEqual(item["review_needed"]["kind"], "code_change")
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "ready_for_review")
|
||||||
|
|
||||||
|
def test_missing_parent_waits_without_marking_blocked(self):
|
||||||
|
output = self.advisory([
|
||||||
|
task("t_child", "implement: context gate", status="todo", parents=["t_parent"], body_excerpt="RAG context gate")
|
||||||
|
])
|
||||||
|
item = output["items"][0]
|
||||||
|
self.assertEqual(item["blocker"]["value"], "missing_parent")
|
||||||
|
self.assertFalse(item["blocker"]["blocked"])
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "wait_for_parents")
|
||||||
|
|
||||||
|
def test_npu_claim_without_busy_delta_routes_to_proof_gate(self):
|
||||||
|
for excerpt in [
|
||||||
|
"NPU classifier returned HTTP 200 but missing busy delta evidence",
|
||||||
|
"NPU reranker reported npu_busy_delta_us=0",
|
||||||
|
"NPU reranker reported npu_busy_delta_us=-5",
|
||||||
|
"NPU reranker reported npu_busy_delta_us=-0.1",
|
||||||
|
]:
|
||||||
|
with self.subTest(excerpt=excerpt):
|
||||||
|
output = self.advisory([task("t_npu", "test: NPU classifier smoke", body_excerpt=excerpt)])
|
||||||
|
item = output["items"][0]
|
||||||
|
self.assertTrue(item["review_needed"]["value"])
|
||||||
|
self.assertEqual(item["review_needed"]["kind"], "npu_proof_gate")
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "needs_npu_proof")
|
||||||
|
|
||||||
|
def test_npu_proof_gate_dominates_review_required_marker(self):
|
||||||
|
for excerpt in [
|
||||||
|
"review-required: NPU reranker reported npu_busy_delta_us=0 after smoke",
|
||||||
|
"review-required: NPU classifier returned HTTP 200 but missing busy delta evidence",
|
||||||
|
]:
|
||||||
|
with self.subTest(excerpt=excerpt):
|
||||||
|
output = self.advisory([
|
||||||
|
task(
|
||||||
|
"t_npu_review",
|
||||||
|
"implement: NPU classifier smoke",
|
||||||
|
status="blocked",
|
||||||
|
body_excerpt=excerpt,
|
||||||
|
changed_files=["scripts/npu-classifier.py"],
|
||||||
|
tests_run=1,
|
||||||
|
)
|
||||||
|
])
|
||||||
|
item = output["items"][0]
|
||||||
|
self.assertTrue(item["review_needed"]["value"])
|
||||||
|
self.assertEqual(item["review_needed"]["kind"], "npu_proof_gate")
|
||||||
|
self.assertEqual(item["next_gate"]["value"], "needs_npu_proof")
|
||||||
|
|
||||||
|
def test_cli_accepts_jsonl_auto_format_and_invalid_schema_exits_nonzero(self):
|
||||||
|
good_rows = [
|
||||||
|
json.dumps(task("t1", "docs: service map update", body_excerpt="runbook README")),
|
||||||
|
json.dumps(task("t2", "ops: utilization digest", body_excerpt="health metrics digest")),
|
||||||
|
]
|
||||||
|
with tempfile.NamedTemporaryFile("w", suffix=".jsonl", delete=False) as handle:
|
||||||
|
handle.write("\n".join(good_rows))
|
||||||
|
good_path = handle.name
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, str(MODULE_PATH), "--input", good_path, "--board", "npu-maximization", "--now", "1780003600"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
Path(good_path).unlink(missing_ok=True)
|
||||||
|
self.assertEqual(result.returncode, 0, result.stderr)
|
||||||
|
parsed = json.loads(result.stdout)
|
||||||
|
self.assertEqual(parsed["counts"]["tasks"], 2)
|
||||||
|
|
||||||
|
bad = subprocess.run(
|
||||||
|
[sys.executable, str(MODULE_PATH)],
|
||||||
|
input=json.dumps({"tasks": [{"id": "missing-fields"}]}),
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
self.assertNotEqual(bad.returncode, 0)
|
||||||
|
self.assertIn("missing required fields", bad.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
SCRIPT = ROOT / "scripts" / "npu-advisory-dry-run-comparison.py"
|
||||||
|
FIXTURES = ROOT / "fixtures" / "npu_advisory_dry_run" / "fixtures.json"
|
||||||
|
|
||||||
|
|
||||||
|
def load_harness():
|
||||||
|
spec = importlib.util.spec_from_file_location("npu_advisory_dry_run_comparison", SCRIPT)
|
||||||
|
assert spec and spec.loader
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[spec.name] = module
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def test_fixture_set_covers_all_required_advisory_lanes() -> None:
|
||||||
|
fixtures = json.loads(FIXTURES.read_text())["fixtures"]
|
||||||
|
lanes = {fixture["lane"] for fixture in fixtures}
|
||||||
|
assert {
|
||||||
|
"context_gate",
|
||||||
|
"cron_n8n_advisory",
|
||||||
|
"batch_triage",
|
||||||
|
"voice_audio",
|
||||||
|
"kanban_hygiene",
|
||||||
|
"advisory_gateway_envelope",
|
||||||
|
}.issubset(lanes)
|
||||||
|
assert all("expected_recommendation" in fixture for fixture in fixtures)
|
||||||
|
assert all("human_or_atlas_decision" in fixture for fixture in fixtures)
|
||||||
|
|
||||||
|
|
||||||
|
def test_harness_outputs_compact_summary_and_decision_schema() -> None:
|
||||||
|
harness = load_harness()
|
||||||
|
summary = harness.run(FIXTURES)
|
||||||
|
assert summary["schema"] == "npu_advisory_dry_run_summary_v1"
|
||||||
|
assert summary["dry_run"] is True
|
||||||
|
assert all(value is False for value in summary["mutations"].values())
|
||||||
|
assert summary["totals"]["fixtures"] >= 6
|
||||||
|
assert summary["totals"]["agree"] >= 1
|
||||||
|
assert summary["totals"]["false_positive"] >= 1
|
||||||
|
assert summary["totals"]["authority_safe_flag_violations"] == 1
|
||||||
|
|
||||||
|
for decision in summary["decisions"]:
|
||||||
|
assert decision["schema_version"] == "npu_advisory_decision_v1"
|
||||||
|
assert decision["decision_id"]
|
||||||
|
assert isinstance(decision["source"], dict)
|
||||||
|
assert isinstance(decision["service"], dict)
|
||||||
|
assert isinstance(decision["recommendation"], dict)
|
||||||
|
assert isinstance(decision["confidence"], dict)
|
||||||
|
assert isinstance(decision["actual_action"], dict)
|
||||||
|
assert decision["actual_action"]["performed"] is False
|
||||||
|
assert decision["actual_action"]["side_effects"] == []
|
||||||
|
assert decision["allowed_actions"] == ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"]
|
||||||
|
assert isinstance(decision["human_or_atlas_decision"], dict)
|
||||||
|
assert isinstance(decision["outcome"], dict)
|
||||||
|
assert isinstance(decision["npu_proof"], dict)
|
||||||
|
assert isinstance(decision["latency"], dict)
|
||||||
|
assert isinstance(decision["fallback"], dict)
|
||||||
|
assert decision["privacy"]["payload_logged"] is False
|
||||||
|
assert decision["privacy"]["contains_private_payload"] is False
|
||||||
|
assert decision["authority_flags"]["advisory_only"] is True
|
||||||
|
assert decision["authority_flags"]["requires_human_approval"] is True
|
||||||
|
assert "notes" in decision
|
||||||
|
metrics = summary["minimum_metrics"]
|
||||||
|
assert metrics["privacy_violation_count"] == 0
|
||||||
|
assert metrics["actual_side_effect_count"] == 0
|
||||||
|
assert "records_by_input_class" in metrics
|
||||||
|
assert "records_by_service" in metrics
|
||||||
|
assert "fallback_counts_by_kind" in metrics
|
||||||
|
assert "latency_by_service" in metrics
|
||||||
|
|
||||||
|
|
||||||
|
def test_each_lane_has_expected_recommendation() -> None:
|
||||||
|
harness = load_harness()
|
||||||
|
summary = harness.run(FIXTURES)
|
||||||
|
by_id = {decision["source"]["fixture_id"]: decision for decision in summary["decisions"]}
|
||||||
|
assert by_id["context-gate-coding-safe"]["recommendation"]["label"] == "prepare_context_bundle"
|
||||||
|
assert by_id["cron-normal-log"]["recommendation"]["label"] == "log"
|
||||||
|
assert by_id["batch-receipt-action"]["recommendation"]["label"] == "review_item"
|
||||||
|
assert by_id["voice-audio-action-needed"]["recommendation"]["label"] == "require_human_review"
|
||||||
|
assert by_id["kanban-review-ready"]["recommendation"]["label"] == "ready_for_review"
|
||||||
|
assert by_id["gateway-authority-violation"]["recommendation"]["label"] == "block_authority_violation"
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_json_and_markdown_are_parseable_and_no_mismatch() -> None:
|
||||||
|
json_result = subprocess.run(
|
||||||
|
[sys.executable, str(SCRIPT), "--fixtures", str(FIXTURES), "--format", "json", "--fail-on-mismatch"],
|
||||||
|
cwd=ROOT,
|
||||||
|
text=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
assert json_result.returncode == 0, json_result.stderr
|
||||||
|
parsed = json.loads(json_result.stdout)
|
||||||
|
assert parsed["totals"]["expected_outcome_mismatches"] == 0
|
||||||
|
assert "decisions" not in parsed
|
||||||
|
|
||||||
|
md_result = subprocess.run(
|
||||||
|
[sys.executable, str(SCRIPT), "--fixtures", str(FIXTURES), "--format", "markdown"],
|
||||||
|
cwd=ROOT,
|
||||||
|
text=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
assert md_result.returncode == 0, md_result.stderr
|
||||||
|
assert "# NPU advisory dry-run comparison" in md_result.stdout
|
||||||
|
assert "| context_gate |" in md_result.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def test_authority_violation_gate_can_fail_ci_when_requested() -> None:
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, str(SCRIPT), "--fixtures", str(FIXTURES), "--fail-on-authority-violation"],
|
||||||
|
cwd=ROOT,
|
||||||
|
text=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
assert result.returncode == 1
|
||||||
|
parsed = json.loads(result.stdout)
|
||||||
|
assert parsed["totals"]["authority_safe_flag_violations"] == 1
|
||||||
@@ -0,0 +1,202 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
SCRIPT = ROOT / "scripts" / "npu-batch-triage-dry-run.py"
|
||||||
|
|
||||||
|
|
||||||
|
def load_script_module():
|
||||||
|
spec = importlib.util.spec_from_file_location("npu_batch_triage_dry_run", SCRIPT)
|
||||||
|
assert spec and spec.loader
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
|
||||||
|
return subprocess.run(
|
||||||
|
[sys.executable, str(SCRIPT), *args],
|
||||||
|
cwd=ROOT,
|
||||||
|
text=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def manifest(tmp_path: Path, lane: str = "screenshots", approved: bool = True, root: Path | None = None, exts: list[str] | None = None) -> Path:
|
||||||
|
root = root or tmp_path / "approved"
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
data = {
|
||||||
|
"version": 1,
|
||||||
|
"policy": {
|
||||||
|
"default_mode": "dry_run",
|
||||||
|
"require_explicit_root": True,
|
||||||
|
"allow_external_uploads": False,
|
||||||
|
"allow_mutations": False,
|
||||||
|
"log_raw_text": False,
|
||||||
|
},
|
||||||
|
"roots": {
|
||||||
|
lane: {
|
||||||
|
"approved": approved,
|
||||||
|
"root": str(root),
|
||||||
|
"allowed_extensions": exts or [".png", ".txt", ".wav"],
|
||||||
|
"max_files": 10,
|
||||||
|
"max_file_mb": 5,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
path = tmp_path / "triage-roots.yaml"
|
||||||
|
path.write_text(yaml.safe_dump(data))
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_manifest_fails_closed(tmp_path: Path) -> None:
|
||||||
|
result = run_cli("--manifest", str(tmp_path / "missing.yaml"), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 2
|
||||||
|
assert out["ok"] is False
|
||||||
|
assert out["error"] == "fail_closed"
|
||||||
|
assert "manifest_missing" in out["reason"]
|
||||||
|
assert all(v is False for v in out["mutations"].values())
|
||||||
|
|
||||||
|
|
||||||
|
def test_unapproved_lane_fails_closed(tmp_path: Path) -> None:
|
||||||
|
man = manifest(tmp_path, approved=False)
|
||||||
|
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 2
|
||||||
|
assert out["ok"] is False
|
||||||
|
assert "lane_unapproved:screenshots" in out["reason"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_request_root_cannot_broaden_manifest_root(tmp_path: Path) -> None:
|
||||||
|
approved = tmp_path / "approved" / "narrow"
|
||||||
|
man = manifest(tmp_path, root=approved)
|
||||||
|
broad = tmp_path / "approved"
|
||||||
|
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--root", str(broad), "--dry-run", "--no-npu", "--json")
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 2
|
||||||
|
assert out["ok"] is False
|
||||||
|
assert "request_root_broadens_approval:screenshots" in out["reason"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_symlink_escape_is_skipped_and_output_redacted(tmp_path: Path) -> None:
|
||||||
|
approved = tmp_path / "approved"
|
||||||
|
approved.mkdir()
|
||||||
|
outside = tmp_path / "outside"
|
||||||
|
outside.mkdir()
|
||||||
|
(approved / "note.png").write_bytes(b"fake image")
|
||||||
|
(approved / "note.png.txt").write_text("Invoice payment due 2026-06-10 $42.00")
|
||||||
|
(outside / "secret.png").write_bytes(b"secret")
|
||||||
|
os.symlink(outside / "secret.png", approved / "escape.png")
|
||||||
|
man = manifest(tmp_path, root=approved, exts=[".png"])
|
||||||
|
|
||||||
|
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert out["ok"] is True
|
||||||
|
assert out["files_processed"] == 1
|
||||||
|
assert out["skipped"]["symlink_escape"] == 1
|
||||||
|
item = out["items"][0]
|
||||||
|
assert item["basename"] == "note.png"
|
||||||
|
assert item["raw_text_redacted"] is True
|
||||||
|
assert item["full_path_included"] is False
|
||||||
|
assert "full_path" not in item
|
||||||
|
assert "Invoice" not in json.dumps(out)
|
||||||
|
assert out["npu"]["claimed"] is False
|
||||||
|
assert all(v is False for v in out["mutations"].values())
|
||||||
|
|
||||||
|
|
||||||
|
def test_committed_sample_manifest_cpu_smoke() -> None:
|
||||||
|
result = run_cli(
|
||||||
|
"--manifest",
|
||||||
|
"config/triage-roots.test.yaml",
|
||||||
|
"--lane",
|
||||||
|
"receipts",
|
||||||
|
"--root",
|
||||||
|
"openvino-doc-image-triage-npu/samples",
|
||||||
|
"--limit",
|
||||||
|
"2",
|
||||||
|
"--dry-run",
|
||||||
|
"--no-npu",
|
||||||
|
"--json",
|
||||||
|
)
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert out["ok"] is True
|
||||||
|
assert out["lane"] == "receipts"
|
||||||
|
assert out["dry_run"] is True
|
||||||
|
assert out["files_processed"] == 2
|
||||||
|
assert out["npu"] == {"busy_delta_us": None, "claimed": False, "proof_ok": False} or out["npu"]["claimed"] is False
|
||||||
|
assert all(v is False for v in out["mutations"].values())
|
||||||
|
assert all(item.get("raw_text_redacted", True) for item in out["items"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_audio_lane_no_npu_does_not_transcribe_or_claim(tmp_path: Path) -> None:
|
||||||
|
approved = tmp_path / "voice"
|
||||||
|
approved.mkdir()
|
||||||
|
(approved / "memo.wav").write_bytes(b"not really wav; no-npu mode must not decode")
|
||||||
|
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
|
||||||
|
result = run_cli("--manifest", str(man), "--lane", "voice_memos", "--dry-run", "--no-npu", "--json")
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 0
|
||||||
|
item = out["items"][0]
|
||||||
|
assert item["transcribed"] is False
|
||||||
|
assert item["raw_transcript_logged"] is False
|
||||||
|
assert out["npu"]["claimed"] is False
|
||||||
|
assert all(v is False for v in out["mutations"].values())
|
||||||
|
|
||||||
|
|
||||||
|
def test_external_whisper_url_fails_closed_before_audio_read(tmp_path: Path) -> None:
|
||||||
|
module = load_script_module()
|
||||||
|
with pytest.raises(module.FailClosed, match="whisper_url_not_loopback"):
|
||||||
|
module.multipart_transcribe(tmp_path / "missing.wav", "http://example.com:18816/v1/audio/transcriptions", 0.01)
|
||||||
|
|
||||||
|
|
||||||
|
def test_audio_lane_rejects_external_whisper_url(tmp_path: Path) -> None:
|
||||||
|
approved = tmp_path / "voice"
|
||||||
|
approved.mkdir()
|
||||||
|
(approved / "memo.wav").write_bytes(b"synthetic audio bytes")
|
||||||
|
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
|
||||||
|
result = run_cli(
|
||||||
|
"--manifest",
|
||||||
|
str(man),
|
||||||
|
"--lane",
|
||||||
|
"voice_memos",
|
||||||
|
"--dry-run",
|
||||||
|
"--whisper-url",
|
||||||
|
"https://example.com/v1/audio/transcriptions",
|
||||||
|
"--json",
|
||||||
|
)
|
||||||
|
out = json.loads(result.stdout)
|
||||||
|
assert result.returncode == 2
|
||||||
|
assert out["ok"] is False
|
||||||
|
assert out["error"] == "fail_closed"
|
||||||
|
assert out["reason"] == "whisper_url_scheme_not_http"
|
||||||
|
assert out["mutations"] == {
|
||||||
|
"obsidian": False,
|
||||||
|
"rag": False,
|
||||||
|
"vector_db": False,
|
||||||
|
"sends": False,
|
||||||
|
"file_moves": False,
|
||||||
|
"routing": False,
|
||||||
|
"memory": False,
|
||||||
|
"service_restarts": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_localhost_whisper_url_is_allowed() -> None:
|
||||||
|
module = load_script_module()
|
||||||
|
assert module.validate_local_whisper_url("http://localhost:18816/v1/audio/transcriptions")
|
||||||
|
assert module.validate_local_whisper_url("http://127.0.0.1:18816/v1/audio/transcriptions")
|
||||||
|
assert module.validate_local_whisper_url("http://[::1]:18816/v1/audio/transcriptions")
|
||||||
@@ -0,0 +1,236 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "npu-utilization-digest.py"
|
||||||
|
spec = importlib.util.spec_from_file_location("npu_utilization_digest", SCRIPT)
|
||||||
|
assert spec and spec.loader
|
||||||
|
digest = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[spec.name] = digest
|
||||||
|
spec.loader.exec_module(digest)
|
||||||
|
|
||||||
|
|
||||||
|
def test_positive_sysfs_delta_required_for_proof(tmp_path, monkeypatch):
|
||||||
|
busy = tmp_path / "busy"
|
||||||
|
busy.write_text("100")
|
||||||
|
|
||||||
|
def fake_health(service, url, timeout, gate="none", mode="health_only"):
|
||||||
|
return digest.ServiceRow(service=service, reachable=True, mode=mode, gate=gate), {"ok": True}
|
||||||
|
|
||||||
|
def fake_post(url, payload, timeout):
|
||||||
|
busy.write_text("100")
|
||||||
|
return 200, {"data": [{"embedding": [0.1, 0.2]}], "embedding_dim": 2, "npu_busy_delta_us": 7}
|
||||||
|
|
||||||
|
monkeypatch.setattr(digest, "health_row", fake_health)
|
||||||
|
row = digest.probe_embeddings(1, busy_path=busy, post_json=fake_post)
|
||||||
|
assert row.probe_ran is True
|
||||||
|
assert row.proof_ok is False
|
||||||
|
assert "no_positive_sysfs_delta" in row.warnings
|
||||||
|
|
||||||
|
|
||||||
|
def test_embeddings_row_redacts_vectors(tmp_path):
|
||||||
|
row = digest.ServiceRow(
|
||||||
|
service="embeddings",
|
||||||
|
reachable=True,
|
||||||
|
probe_ran=True,
|
||||||
|
proof_ok=True,
|
||||||
|
calls=1,
|
||||||
|
items=1,
|
||||||
|
avg_ms=12.3,
|
||||||
|
npu_delta_us=5,
|
||||||
|
embedding_count=1,
|
||||||
|
embedding_dim=3,
|
||||||
|
mode="NPU",
|
||||||
|
)
|
||||||
|
summary = digest.build_summary([row], None, 5, "2026-06-05T14:20:00-07:00")
|
||||||
|
text = digest.render_text(summary, [row])
|
||||||
|
assert "embedding_count" not in text # counts are intentionally terse in text
|
||||||
|
assert "0.1" not in text
|
||||||
|
out = digest.write_jsonl(summary, [row], tmp_path)
|
||||||
|
body = out.read_text()
|
||||||
|
assert "embedding" in body # compact metadata key is okay
|
||||||
|
assert "[0.1" not in body
|
||||||
|
assert "embedding_dim" in body
|
||||||
|
|
||||||
|
|
||||||
|
def test_classifier_dry_run_payload(tmp_path, monkeypatch):
|
||||||
|
busy = tmp_path / "busy"
|
||||||
|
busy.write_text("10")
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def fake_health(service, url, timeout, gate="none", mode="health_only"):
|
||||||
|
return digest.ServiceRow(service=service, reachable=True, mode=mode, gate=gate), {"ok": True}
|
||||||
|
|
||||||
|
def fake_post(url, payload, timeout):
|
||||||
|
seen.update(payload)
|
||||||
|
busy.write_text("35")
|
||||||
|
return 200, {
|
||||||
|
"labels": {"tool_needed": True, "duplicate": False},
|
||||||
|
"recommendation": "escalate",
|
||||||
|
"confidence": 0.84,
|
||||||
|
"authority_flags": {"tool_execution": False, "memory_write": False},
|
||||||
|
"allowed_actions": ["log", "recommend"],
|
||||||
|
"actual_action": "dry_run",
|
||||||
|
"npu_busy_delta_us": 25,
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(digest, "health_row", fake_health)
|
||||||
|
row = digest.probe_classifier(1, busy_path=busy, post_json=fake_post)
|
||||||
|
assert seen["options"]["dry_run"] is True
|
||||||
|
assert seen["options"]["include_evidence"] is False
|
||||||
|
assert row.escalate == 1
|
||||||
|
assert row.suppress == 0
|
||||||
|
assert row.recommendation == "escalate"
|
||||||
|
assert row.confidence == 0.84
|
||||||
|
assert row.confidence_bucket == "high"
|
||||||
|
assert row.authority_violations == 0
|
||||||
|
assert row.proof_ok is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_doc_triage_allowed_root_compact(tmp_path):
|
||||||
|
row = digest.ServiceRow(
|
||||||
|
service="doc_triage",
|
||||||
|
reachable=True,
|
||||||
|
probe_ran=True,
|
||||||
|
proof_ok=True,
|
||||||
|
files=1,
|
||||||
|
avg_ms=9,
|
||||||
|
npu_delta_us=11,
|
||||||
|
allowed_roots_count=1,
|
||||||
|
mode="NPU-via-embedding-service",
|
||||||
|
gate="closed:private-root",
|
||||||
|
)
|
||||||
|
summary = digest.build_summary([row], None, 11, "2026-06-05T14:20:00-07:00")
|
||||||
|
text = digest.render_text(summary, [row])
|
||||||
|
assert "allowed_roots=1" in text
|
||||||
|
assert str(tmp_path) not in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_advisory_gateway_health_only(monkeypatch):
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_get(url, timeout):
|
||||||
|
return 200, {"ok": True}
|
||||||
|
|
||||||
|
def fake_post(url, payload, timeout):
|
||||||
|
calls.append((url, payload))
|
||||||
|
return 200, {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(digest, "http_get_json", fake_get)
|
||||||
|
monkeypatch.setattr(digest, "http_post_json", fake_post)
|
||||||
|
row, _ = digest.health_row("advisory_gateway", digest.ADVISORY_HEALTH_URL, 1, gate="closed:advisory-post")
|
||||||
|
assert row.reachable is True
|
||||||
|
assert row.probe_ran is False
|
||||||
|
assert row.mode == "health_only"
|
||||||
|
assert calls == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_genai_loaded_false_skips_default_smoke(monkeypatch):
|
||||||
|
def fake_health(service, url, timeout, gate="none", mode="health_only"):
|
||||||
|
return digest.ServiceRow(service=service, reachable=True, mode=mode), {"ok": True, "loaded": False}
|
||||||
|
|
||||||
|
monkeypatch.setattr(digest, "health_row", fake_health)
|
||||||
|
row = digest.probe_genai(1, include_smoke=False)
|
||||||
|
assert row.probe_ran is False
|
||||||
|
assert row.loaded is False
|
||||||
|
assert row.reason == "skipped_cold_load"
|
||||||
|
assert row.fallbacks == 1
|
||||||
|
assert "skipped_cold_load" in row.warnings
|
||||||
|
|
||||||
|
|
||||||
|
def test_disabled_proof_smokes_count_as_fallbacks(monkeypatch):
|
||||||
|
def fake_health(service, url, timeout, gate="none", mode="health_only"):
|
||||||
|
return digest.ServiceRow(service=service, reachable=True, mode=mode, gate=gate), {"ok": True, "loaded": True}
|
||||||
|
|
||||||
|
monkeypatch.setattr(digest, "health_row", fake_health)
|
||||||
|
rows = [
|
||||||
|
digest.probe_whisper(1, include_smoke=False),
|
||||||
|
digest.probe_genai(1, include_smoke=False),
|
||||||
|
digest.probe_doc_triage(1, include_smoke=False),
|
||||||
|
]
|
||||||
|
summary = digest.build_summary(rows, None, 0, "2026-06-05T14:20:00-07:00")
|
||||||
|
assert summary["fallbacks"] == 3
|
||||||
|
assert summary["warnings"] == {"skipped": 2, "smoke_disabled": 1}
|
||||||
|
|
||||||
|
|
||||||
|
def test_jsonl_shape(tmp_path):
|
||||||
|
rows = [digest.ServiceRow(service="embeddings", reachable=True, probe_ran=True, proof_ok=True, calls=1, npu_delta_us=1)]
|
||||||
|
summary = digest.build_summary(rows, None, 1, "2026-06-05T14:20:00-07:00")
|
||||||
|
path = digest.write_jsonl(summary, rows, tmp_path)
|
||||||
|
lines = [json.loads(line) for line in path.read_text().splitlines()]
|
||||||
|
assert lines[0]["type"] == "summary"
|
||||||
|
assert lines[0]["request_counts_by_service"] == {"embeddings": 1}
|
||||||
|
assert lines[0]["npu_busy_delta_us_by_service"] == {"embeddings": 1}
|
||||||
|
assert lines[1]["type"] == "service"
|
||||||
|
assert lines[1]["service"] == "embeddings"
|
||||||
|
|
||||||
|
|
||||||
|
def test_summary_observability_rollups_and_text():
|
||||||
|
rows = [
|
||||||
|
digest.ServiceRow(service="classifier", reachable=True, calls=1, npu_delta_us=25, fallbacks=0, escalate=1, suppress=0, recommendation="escalate", confidence=0.84, confidence_bucket="high", authority_violations=0),
|
||||||
|
digest.ServiceRow(service="doc_triage", reachable=True, calls=1, npu_delta_us=7, fallbacks=1, warnings=["no_positive_sysfs_delta"]),
|
||||||
|
digest.ServiceRow(service="advisory_gateway", reachable=True, gate="closed:advisory-post", authority_violations=1, warnings=["authority_violation"]),
|
||||||
|
]
|
||||||
|
summary = digest.build_summary(rows, None, 32, "2026-06-05T14:20:00-07:00")
|
||||||
|
assert summary["request_counts_by_service"] == {"classifier": 1, "doc_triage": 1}
|
||||||
|
assert summary["npu_busy_delta_us_by_service"] == {"classifier": 25, "doc_triage": 7}
|
||||||
|
assert summary["fallbacks_by_service"] == {"doc_triage": 1}
|
||||||
|
assert summary["confidence_distribution"] == {"high": 1}
|
||||||
|
assert summary["recommendation_counts"] == {"escalate": 1}
|
||||||
|
assert summary["authority_violations"] == 1
|
||||||
|
text = digest.render_text(summary, rows)
|
||||||
|
assert "authority_violations=1" in text
|
||||||
|
assert "recommendations: escalate=1" in text
|
||||||
|
assert "confidence: high=1" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_authority_violation_detection():
|
||||||
|
assert digest.count_authority_violations({
|
||||||
|
"authority_flags": {"tool_execution": True, "memory_write": False},
|
||||||
|
"allowed_actions": ["log", "service_restart"],
|
||||||
|
"actual_action": "outbound_send",
|
||||||
|
}) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_v1_authority_violation_detection():
|
||||||
|
safe_payload = {
|
||||||
|
"authority_flags": {
|
||||||
|
"can_route_atlas": False,
|
||||||
|
"can_write_memory": False,
|
||||||
|
"can_execute_tools": False,
|
||||||
|
"can_restart_services": False,
|
||||||
|
"can_send_outbound": False,
|
||||||
|
"can_scan_private_roots": False,
|
||||||
|
"can_mutate_vector_store": False,
|
||||||
|
"can_post_advisory_event": False,
|
||||||
|
"can_change_gateway_config": False,
|
||||||
|
"requires_human_approval": True,
|
||||||
|
"advisory_only": True,
|
||||||
|
},
|
||||||
|
"allowed_actions": ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"],
|
||||||
|
"actual_action": {"kind": "dry_run_reported", "performed": False, "performed_by": "harness", "side_effects": []},
|
||||||
|
}
|
||||||
|
assert digest.count_authority_violations(safe_payload) == 0
|
||||||
|
unsafe = dict(safe_payload)
|
||||||
|
unsafe["authority_flags"] = dict(safe_payload["authority_flags"], can_execute_tools=True)
|
||||||
|
assert digest.count_authority_violations(unsafe) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_recommendation_only_and_zero_confidence_rollups():
|
||||||
|
payload = {"labels": {"no_op": {"confidence": 0.0, "score": 0.9}}, "recommendation": "suppress"}
|
||||||
|
assert digest.extract_confidence(payload) == 0.0
|
||||||
|
row = digest.ServiceRow(service="classifier", reachable=True, recommendation="suppress", confidence=0.0, confidence_bucket="low")
|
||||||
|
summary = digest.build_summary([row], None, None, "2026-06-05T14:20:00-07:00")
|
||||||
|
assert summary["recommendation_counts"] == {"suppress": 1}
|
||||||
|
assert summary["confidence_distribution"] == {"low": 1}
|
||||||
|
|
||||||
|
|
||||||
|
def test_exit_codes(monkeypatch):
|
||||||
|
rows = [digest.ServiceRow(service="embeddings", reachable=True, probe_ran=True, proof_ok=False, warnings=["no_positive_sysfs_delta"])]
|
||||||
|
summary = digest.build_summary(rows, None, 0, "2026-06-05T14:20:00-07:00")
|
||||||
|
monkeypatch.setattr(digest, "run", lambda args: (summary, rows))
|
||||||
|
assert digest.main(["--no-write"]) == 0
|
||||||
|
assert digest.main(["--no-write", "--strict-proof"]) == 2
|
||||||
@@ -0,0 +1,170 @@
|
|||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
import unittest
|
||||||
|
from argparse import Namespace
|
||||||
|
from pathlib import Path
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
from typing import cast
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "npu_voice_audio_pipeline.py"
|
||||||
|
|
||||||
|
|
||||||
|
def load_module():
|
||||||
|
spec = importlib.util.spec_from_file_location("npu_voice_audio_pipeline", MODULE_PATH)
|
||||||
|
assert spec is not None and spec.loader is not None
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[spec.name] = module
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return cast(types.ModuleType, module)
|
||||||
|
|
||||||
|
|
||||||
|
class NpuVoiceAudioPipelineTests(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.pipeline = load_module()
|
||||||
|
|
||||||
|
def test_rejects_relative_audio_path(self):
|
||||||
|
with self.assertRaisesRegex(self.pipeline.PipelineError, "audio_path_must_be_absolute"):
|
||||||
|
self.pipeline.validate_audio_path("memo.wav", max_bytes=1024, max_audio_seconds=300)
|
||||||
|
|
||||||
|
def test_rejects_symlink_audio_path(self):
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
root = Path(tmp)
|
||||||
|
target = root / "memo.wav"
|
||||||
|
target.write_bytes(b"RIFFfake")
|
||||||
|
link = root / "link.wav"
|
||||||
|
link.symlink_to(target)
|
||||||
|
with self.assertRaisesRegex(self.pipeline.PipelineError, "audio_path_must_not_be_symlink"):
|
||||||
|
self.pipeline.validate_audio_path(str(link), max_bytes=1024, max_audio_seconds=None)
|
||||||
|
|
||||||
|
def test_compact_labels_unwraps_classifier_label_values(self):
|
||||||
|
labels = self.pipeline.compact_labels(
|
||||||
|
{
|
||||||
|
"labels": {
|
||||||
|
"workflow_category": {"value": "media"},
|
||||||
|
"tool_needed": {"value": True},
|
||||||
|
"urgency": {"value": "high"},
|
||||||
|
"safety_confirmation_required": {"value": False},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.assertEqual(labels["workflow_category"], "media")
|
||||||
|
self.assertTrue(labels["tool_needed"])
|
||||||
|
self.assertEqual(labels["urgency"], "high")
|
||||||
|
self.assertFalse(labels["safety_confirmation_required"])
|
||||||
|
|
||||||
|
def test_gate_blocks_missing_npu_proof(self):
|
||||||
|
action_worthy, atlas_gate, next_gate = self.pipeline.decide_gate(
|
||||||
|
"remind me to review logs",
|
||||||
|
{"tool_needed": True, "urgency": "normal", "safety_confirmation_required": False},
|
||||||
|
whisper_proven=False,
|
||||||
|
classifier_proven=True,
|
||||||
|
)
|
||||||
|
self.assertTrue(action_worthy)
|
||||||
|
self.assertEqual(atlas_gate, "blocked_missing_npu_proof")
|
||||||
|
self.assertEqual(next_gate, "npu_proof_required")
|
||||||
|
|
||||||
|
def test_loopback_endpoint_policy_accepts_local_urls(self):
|
||||||
|
allowed = [
|
||||||
|
"http://localhost:18816/v1/audio/transcriptions",
|
||||||
|
"https://localhost:18816/v1/audio/transcriptions",
|
||||||
|
"http://127.0.0.1:18816/v1/audio/transcriptions",
|
||||||
|
"http://127.42.0.9:18816/v1/audio/transcriptions",
|
||||||
|
"http://[::1]:18816/v1/audio/transcriptions",
|
||||||
|
]
|
||||||
|
for url in allowed:
|
||||||
|
with self.subTest(url=url):
|
||||||
|
self.assertEqual(self.pipeline.validate_loopback_endpoint(url, label="whisper"), url)
|
||||||
|
|
||||||
|
def test_loopback_endpoint_policy_rejects_remote_urls(self):
|
||||||
|
rejected = [
|
||||||
|
"http://example.com:18816/v1/audio/transcriptions",
|
||||||
|
"https://10.0.0.5:18816/v1/audio/transcriptions",
|
||||||
|
"http://192.168.1.10:18816/v1/audio/transcriptions",
|
||||||
|
"http://[2001:db8::1]:18816/v1/audio/transcriptions",
|
||||||
|
"file:///tmp/audio.wav",
|
||||||
|
]
|
||||||
|
for url in rejected:
|
||||||
|
with self.subTest(url=url):
|
||||||
|
with self.assertRaisesRegex(self.pipeline.PipelineError, "whisper_url_.*not_.*|whisper_url_scheme_not_allowed"):
|
||||||
|
self.pipeline.validate_loopback_endpoint(url, label="whisper")
|
||||||
|
|
||||||
|
def test_run_pipeline_rejects_remote_url_before_audio_read(self):
|
||||||
|
args = Namespace(
|
||||||
|
audio="/tmp/does-not-exist-remote-rejection-smoke.ogg",
|
||||||
|
id="voice-smoke",
|
||||||
|
source="local_file",
|
||||||
|
title="synthetic smoke",
|
||||||
|
language="en",
|
||||||
|
whisper_url="http://example.com:18816/v1/audio/transcriptions",
|
||||||
|
classifier_url="http://127.0.0.1:18819/v1/classify",
|
||||||
|
dry_run=True,
|
||||||
|
include_transcript=False,
|
||||||
|
include_transcript_preview_chars=0,
|
||||||
|
include_raw=False,
|
||||||
|
max_bytes=1024 * 1024,
|
||||||
|
max_audio_seconds=300,
|
||||||
|
max_transcript_chars=6000,
|
||||||
|
timeout=1,
|
||||||
|
)
|
||||||
|
with self.assertRaisesRegex(self.pipeline.PipelineError, "whisper_url_host_not_loopback"):
|
||||||
|
self.pipeline.run_pipeline(args)
|
||||||
|
|
||||||
|
def test_run_pipeline_compact_success_with_mocked_services(self):
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
audio = Path(tmp) / "memo.ogg"
|
||||||
|
audio.write_bytes(b"not-real-audio-but-services-are-mocked")
|
||||||
|
args = Namespace(
|
||||||
|
audio=str(audio),
|
||||||
|
id="voice-smoke",
|
||||||
|
source="local_file",
|
||||||
|
title="synthetic smoke",
|
||||||
|
language="en",
|
||||||
|
whisper_url="http://127.0.0.1:18816/v1/audio/transcriptions",
|
||||||
|
classifier_url="http://127.0.0.1:18819/v1/classify",
|
||||||
|
dry_run=True,
|
||||||
|
include_transcript=False,
|
||||||
|
include_transcript_preview_chars=0,
|
||||||
|
include_raw=False,
|
||||||
|
max_bytes=1024 * 1024,
|
||||||
|
max_audio_seconds=300,
|
||||||
|
max_transcript_chars=6000,
|
||||||
|
timeout=1,
|
||||||
|
)
|
||||||
|
busy_values = iter([100, 150, 150, 225])
|
||||||
|
with mock.patch.object(self.pipeline, "read_npu_busy_us", side_effect=lambda: next(busy_values)):
|
||||||
|
with mock.patch.object(
|
||||||
|
self.pipeline,
|
||||||
|
"post_whisper",
|
||||||
|
return_value={"text": "remind me to check npu logs", "npu_busy_delta_us": 50},
|
||||||
|
):
|
||||||
|
with mock.patch.object(
|
||||||
|
self.pipeline,
|
||||||
|
"post_json",
|
||||||
|
return_value={
|
||||||
|
"dry_run": True,
|
||||||
|
"labels": {
|
||||||
|
"workflow_category": {"value": "media"},
|
||||||
|
"tool_needed": {"value": True},
|
||||||
|
"urgency": {"value": "normal"},
|
||||||
|
"safety_confirmation_required": {"value": False},
|
||||||
|
},
|
||||||
|
"npu_busy_delta_us": 75,
|
||||||
|
"sysfs_npu_busy_delta_us": 75,
|
||||||
|
},
|
||||||
|
):
|
||||||
|
result = self.pipeline.run_pipeline(args)
|
||||||
|
self.assertTrue(result["ok"])
|
||||||
|
self.assertEqual(result["external_sends"], 0)
|
||||||
|
self.assertEqual(result["writes"], 0)
|
||||||
|
self.assertEqual(result["whisper_sysfs_delta_us"], 50)
|
||||||
|
self.assertEqual(result["classifier_observed_sysfs_delta_us"], 75)
|
||||||
|
self.assertEqual(result["atlas_gate"], "advisory_only_not_sent")
|
||||||
|
self.assertNotIn("transcript", result)
|
||||||
|
json.dumps(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user