Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 22e6ee90d2 | |||
| 72434c8bc3 | |||
| dae2a57124 | |||
| 08fb9ca686 | |||
| 9e5ffa0fd0 | |||
| d2bad88596 | |||
| 6906c2079b | |||
| 6155b54ab5 | |||
| 5a14adaf58 | |||
| b7b4edf0f5 | |||
| 24d620e9c9 | |||
| ac3590df47 | |||
| cefd8789cd | |||
| aeb3c9f8fb | |||
| 59c5fd3e57 | |||
| 401321a6d5 | |||
| 85c496a59e | |||
| 06cd49247a | |||
| 71f3c05587 | |||
| 06f235d26b | |||
| d2f4dd7cef | |||
| dad13e7648 | |||
| 137a2c28d2 | |||
| 1772e5a1f3 | |||
| b88331be42 | |||
| 4815750011 | |||
| 99a4f93ce7 | |||
| 6536320774 | |||
| 420df812c0 | |||
| 703c1df860 | |||
| 2ef9e3dfd2 | |||
| d3373e7234 | |||
| ea452886f3 | |||
| 0683253157 | |||
| 0a6f84fbf3 | |||
| 83d0ced08c |
@@ -0,0 +1,16 @@
|
|||||||
|
# Telegram Bot Token from @BotFather
|
||||||
|
FLYNN_TELEGRAM_TOKEN=your-bot-token-here
|
||||||
|
|
||||||
|
# API Keys
|
||||||
|
ANTHROPIC_API_KEY=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
|
||||||
|
OPENAI_API_KEY=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
|
||||||
|
GEMINI_API_KEY=AIzaSyAdCsEwwzQZoWbhIXC65oKrLPWvbnOZ7MA
|
||||||
|
ZHIPUAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
|
||||||
|
ZAI_API_KEY=e15688b1fc9646289daa538b46029a9f.BVxwDGjdstOk4Kq6
|
||||||
|
|
||||||
|
BRAVE_API_KEY=BSAgLuWVVMnrGvobOt7pDQjmVJ5u380
|
||||||
|
GITHUB_TOKEN=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
|
||||||
|
|
||||||
|
# LiteLLM Configuration
|
||||||
|
LITELLM_MASTER_KEY=sk-a740fd7607c0accfca4c8bd8d66d1cedff0d3a9bfb4ebc359f7eaae9aac627f6
|
||||||
|
LITELLM_SALT_KEY=64526fe31468d539454d9ebeafd68face8d3b46ddb3d91b22b2e33eae0fde4ce
|
||||||
-38
@@ -3,9 +3,6 @@
|
|||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
*~
|
*~
|
||||||
.Trash-*/
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
|
|
||||||
# ── OpenClaw ephemeral / binary / noisy data ──────────────────────────────
|
# ── OpenClaw ephemeral / binary / noisy data ──────────────────────────────
|
||||||
openclaw/workspace/
|
openclaw/workspace/
|
||||||
@@ -17,21 +14,7 @@ openclaw/media/
|
|||||||
openclaw/memory/*.sqlite
|
openclaw/memory/*.sqlite
|
||||||
openclaw/memory/*.tmp*
|
openclaw/memory/*.tmp*
|
||||||
openclaw/agents/*/sessions/
|
openclaw/agents/*/sessions/
|
||||||
openclaw/agents/*/agent/auth-*.json
|
|
||||||
openclaw/agents/*/agent/harness-auth/
|
|
||||||
openclaw/cron/runs/
|
openclaw/cron/runs/
|
||||||
openclaw/cron/jobs-state.json
|
|
||||||
openclaw/devices/paired.json
|
|
||||||
openclaw/discord/model-picker-preferences.json
|
|
||||||
openclaw/flows/*.sqlite*
|
|
||||||
openclaw/identity/device-auth.json
|
|
||||||
openclaw/memory/
|
|
||||||
openclaw/openclaw.json.backup-before-*
|
|
||||||
openclaw/openclaw.json.failed
|
|
||||||
openclaw/plugin-runtime-deps/
|
|
||||||
openclaw/tasks/*.sqlite*
|
|
||||||
openclaw/telegram/update-offset-*.json
|
|
||||||
openclaw/update-check.json
|
|
||||||
|
|
||||||
# Temp files
|
# Temp files
|
||||||
*.tmp
|
*.tmp
|
||||||
@@ -39,24 +22,3 @@ openclaw/update-check.json
|
|||||||
|
|
||||||
# Runtime logs
|
# Runtime logs
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
# Local n8n SQLite recovery backups
|
|
||||||
.n8n-db-backups/
|
|
||||||
backups/
|
|
||||||
|
|
||||||
# Local secrets
|
|
||||||
.env
|
|
||||||
.env.*
|
|
||||||
*.pem
|
|
||||||
*.key
|
|
||||||
id_rsa
|
|
||||||
id_ed25519
|
|
||||||
credentials.json
|
|
||||||
|
|
||||||
# Obsidian local UI/runtime/plugin artifacts
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/workspace.json
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/graph.json
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/bookmarks.json
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/types.json
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/plugins/*/
|
|
||||||
swarm-common/obsidian-vault/**/.obsidian/themes/
|
|
||||||
|
|||||||
@@ -37,6 +37,9 @@ For the current host-side AI/search/voice automation stack, n8n watchdogs, and a
|
|||||||
- [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
|
- [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
|
||||||
- [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
|
- [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
|
||||||
- [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
|
- [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
|
||||||
|
- [`docs/npu-utilization-digest.md`](docs/npu-utilization-digest.md) — compact on-demand NPU proof/utilization digest runbook
|
||||||
|
- [`docs/npu-integrated-health-ops.md`](docs/npu-integrated-health-ops.md) — integrated operator health-check workflow combining `npu-service-health.sh` and the utilization digest
|
||||||
|
- OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.
|
||||||
|
|
||||||
## VM: zap
|
## VM: zap
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
# ── VM provisioning ────────────────────────────────────────────────────────
|
# ── VM provisioning ────────────────────────────────────────────────────────
|
||||||
vm_domain: "zap [claw]"
|
vm_domain: "zap [claw]"
|
||||||
vm_hostname: zap
|
vm_hostname: zap
|
||||||
vm_memory_mib: 6144
|
vm_memory_mib: 3072
|
||||||
vm_vcpus: 4
|
vm_vcpus: 2
|
||||||
vm_disk_path: /var/lib/libvirt/images/claw.qcow2
|
vm_disk_path: /var/lib/libvirt/images/claw.qcow2
|
||||||
vm_disk_size: "60G"
|
vm_disk_size: "60G"
|
||||||
vm_mac: "52:54:00:01:00:71"
|
vm_mac: "52:54:00:01:00:71"
|
||||||
|
|||||||
@@ -12,9 +12,6 @@
|
|||||||
- name: OpenClaw VM customizations
|
- name: OpenClaw VM customizations
|
||||||
hosts: openclaw_servers
|
hosts: openclaw_servers
|
||||||
become: true
|
become: true
|
||||||
vars:
|
|
||||||
openclaw_user: openclaw
|
|
||||||
openclaw_home: /home/openclaw
|
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
version: 1
|
||||||
|
policy:
|
||||||
|
default_mode: dry_run
|
||||||
|
require_explicit_root: true
|
||||||
|
allow_external_uploads: false
|
||||||
|
allow_mutations: false
|
||||||
|
log_raw_text: false
|
||||||
|
include_full_paths_default: false
|
||||||
|
npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
|
||||||
|
# Copy to config/triage-roots.local.yaml and approve exactly one narrow,
|
||||||
|
# lane-specific staging root. The committed template is intentionally
|
||||||
|
# unapproved/fail-closed; do not point any lane at broad home, Downloads,
|
||||||
|
# vault, screenshot, photo-library, or historical audio roots without explicit
|
||||||
|
# approval for that exact lane/root.
|
||||||
|
roots:
|
||||||
|
screenshots:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
receipts:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
downloads:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
obsidian_attachments:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 50
|
||||||
|
voice_memos:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 100
|
||||||
|
meeting_snippets:
|
||||||
|
approved: false
|
||||||
|
root: null
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 200
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
version: 1
|
||||||
|
policy:
|
||||||
|
default_mode: dry_run
|
||||||
|
require_explicit_root: true
|
||||||
|
allow_external_uploads: false
|
||||||
|
allow_mutations: false
|
||||||
|
log_raw_text: false
|
||||||
|
include_full_paths_default: false
|
||||||
|
npu_proof_path: /sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
roots:
|
||||||
|
screenshots:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .webp, .heic]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
receipts:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.png, .jpg, .jpeg, .pdf, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
downloads:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 25
|
||||||
|
obsidian_attachments:
|
||||||
|
approved: true
|
||||||
|
root: ../openvino-doc-image-triage-npu/samples
|
||||||
|
allowed_extensions: [.pdf, .png, .jpg, .jpeg, .webp, .mp3, .m4a, .wav, .ogg]
|
||||||
|
max_files: 50
|
||||||
|
max_file_mb: 50
|
||||||
|
voice_memos:
|
||||||
|
approved: true
|
||||||
|
root: ../tmp/synthetic-voice-memos
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 100
|
||||||
|
meeting_snippets:
|
||||||
|
approved: true
|
||||||
|
root: ../tmp/synthetic-meeting-snippets
|
||||||
|
allowed_extensions: [.mp3, .m4a, .wav, .ogg, .opus]
|
||||||
|
max_files: 25
|
||||||
|
max_file_mb: 200
|
||||||
@@ -15,6 +15,7 @@ Update the relevant diagram in the same change set when you change any of these:
|
|||||||
- n8n workflow architecture
|
- n8n workflow architecture
|
||||||
- Hermes/Atlas routing or gateway responsibilities
|
- Hermes/Atlas routing or gateway responsibilities
|
||||||
- local AI/search/voice endpoints
|
- local AI/search/voice endpoints
|
||||||
|
- OpenVINO NPU live/prototype status, ports, or safety gates (`:18810`, `:18816`, `:18817`, `:18818`, `:18819`, `:18820`, optional `:18829`)
|
||||||
- Obsidian/RAG data flow
|
- Obsidian/RAG data flow
|
||||||
- OpenClaw/VM operational mode
|
- OpenClaw/VM operational mode
|
||||||
- ownership/source-of-truth paths for a component
|
- ownership/source-of-truth paths for a component
|
||||||
@@ -27,6 +28,7 @@ Create a new focused diagram when the existing overview would become too dense.
|
|||||||
- agentmon internals: collectors → NATS → processor → Postgres → query/UI
|
- agentmon internals: collectors → NATS → processor → Postgres → query/UI
|
||||||
- Obsidian/RAG automation pipeline
|
- Obsidian/RAG automation pipeline
|
||||||
- local AI routing: Hermes/LiteLLM/llama.cpp/Ollama/provider boundaries
|
- local AI routing: Hermes/LiteLLM/llama.cpp/Ollama/provider boundaries
|
||||||
|
- OpenVINO NPU assistant sidecars, with live baseline and approved/not-live prototype lanes separated
|
||||||
- messaging/channel routing: Telegram/Discord/email → Hermes/n8n/alerts
|
- messaging/channel routing: Telegram/Discord/email → Hermes/n8n/alerts
|
||||||
- disaster recovery / backup topology
|
- disaster recovery / backup topology
|
||||||
|
|
||||||
@@ -37,6 +39,7 @@ Create a new focused diagram when the existing overview would become too dense.
|
|||||||
- Link diagrams from the nearest README or operational doc.
|
- Link diagrams from the nearest README or operational doc.
|
||||||
- Keep labels operational: service name, port, responsibility, and data direction.
|
- Keep labels operational: service name, port, responsibility, and data direction.
|
||||||
- Avoid secrets, credential names that imply secret values, private tokens, raw webhook URLs, or sensitive sample payloads.
|
- Avoid secrets, credential names that imply secret values, private tokens, raw webhook URLs, or sensitive sample payloads.
|
||||||
|
- Do not imply live Atlas/Hermes/RAG routing to an OpenVINO NPU prototype unless a reviewed implementation actually enabled it; label approved prototypes as `not live` or `approval required`.
|
||||||
- If a raw export or live config was used to build the diagram, commit only the sanitized diagram/docs, not the raw sensitive source.
|
- If a raw export or live config was used to build the diagram, commit only the sanitized diagram/docs, not the raw sensitive source.
|
||||||
|
|
||||||
## Verification before committing
|
## Verification before committing
|
||||||
|
|||||||
@@ -0,0 +1,456 @@
|
|||||||
|
# NPU advisory decision schema and dry-run evaluation metrics
|
||||||
|
|
||||||
|
This document defines the compact `npu_advisory_decision_v1` record and the
|
||||||
|
minimum dry-run metrics required before any OpenVINO/NPU advisory lane is
|
||||||
|
considered for promotion. The schema is advisory-only: it creates audit evidence
|
||||||
|
and comparison data, not live authority.
|
||||||
|
|
||||||
|
Scope and safety defaults:
|
||||||
|
|
||||||
|
- Local audit records only; no outbound sends, service restarts, tool execution,
|
||||||
|
memory writes, routing changes, vector-store mutation, or broad private scans.
|
||||||
|
- Synthetic or explicitly non-private fixtures only for dry-run evaluation.
|
||||||
|
- Raw prompts, transcripts, documents, images, headers, secrets, and full upstream
|
||||||
|
JSON payloads are not persisted by default.
|
||||||
|
- NPU output is evidence for a gate. It must never directly perform or trigger
|
||||||
|
an action.
|
||||||
|
|
||||||
|
## `npu_advisory_decision_v1`
|
||||||
|
|
||||||
|
Required top-level fields:
|
||||||
|
|
||||||
|
| Field | Type | Required | Notes |
|
||||||
|
| --- | --- | ---: | --- |
|
||||||
|
| `schema_version` | string | yes | Always `npu_advisory_decision_v1`. |
|
||||||
|
| `decision_id` | string | yes | Locally generated UUID/ULID. No payload-derived PII. |
|
||||||
|
| `timestamp` | string | yes | RFC3339/ISO-8601 UTC timestamp. |
|
||||||
|
| `source` | object | yes | Where the dry-run input came from. |
|
||||||
|
| `service` | object | yes | Advisory lane/service that produced the recommendation. |
|
||||||
|
| `input_class` | string | yes | Normalized class such as `context_gate`, `cron_n8n_event`, `batch_doc_triage`, `voice_audio`, `kanban_hygiene`, or `advisory_gateway_envelope`. |
|
||||||
|
| `recommendation` | object | yes | NPU/advisory recommendation and rationale metadata. |
|
||||||
|
| `confidence` | object | yes | Score, bucket, and calibration notes. |
|
||||||
|
| `authority_flags` | object | yes | Explicit booleans for authority boundaries; all default false. |
|
||||||
|
| `allowed_actions` | array[string] | yes | Actions a downstream gate may consider. Defaults to advisory-only actions. |
|
||||||
|
| `actual_action` | object | yes | What really happened. In this gate it should always be no-op/record-only. |
|
||||||
|
| `human_or_atlas_decision` | object | yes | Comparison target from fixture expected label, human label, or Atlas decision. |
|
||||||
|
| `outcome` | object | yes | Agreement/error bucket used by the eval harness. |
|
||||||
|
| `npu_proof` | object | yes | Evidence that a real NPU-backed inference ran, where available. |
|
||||||
|
| `latency` | object | yes | Request latency and optional queue/processing timings. |
|
||||||
|
| `fallback` | object | yes | Whether CPU/offline/health-only fallback happened and why. |
|
||||||
|
| `privacy` | object | yes | What was redacted/hashed and what retention class applies. |
|
||||||
|
| `notes` | array[string] | no | Short non-private audit notes. |
|
||||||
|
|
||||||
|
### Field details
|
||||||
|
|
||||||
|
`source`:
|
||||||
|
|
||||||
|
- `kind`: `fixture`, `manual_label`, `atlas_shadow`, `human_review`, or
|
||||||
|
`service_health_probe`.
|
||||||
|
- `fixture_id`: stable fixture identifier when applicable.
|
||||||
|
- `fixture_set`: fixture collection name/version.
|
||||||
|
- `artifact_ref`: optional local path or opaque run id; do not include raw
|
||||||
|
private content.
|
||||||
|
- `content_hash`: optional SHA-256 over sanitized fixture content.
|
||||||
|
- `privacy_class`: `synthetic`, `public`, `non_private`, `redacted`, or
|
||||||
|
`private_disallowed`.
|
||||||
|
|
||||||
|
`service`:
|
||||||
|
|
||||||
|
- `name`: e.g. `openvino_context_gate`, `cron_n8n_advisory`,
|
||||||
|
`npu_batch_triage`, `npu_voice_audio_pipeline`, `kanban_hygiene_advisory`,
|
||||||
|
`openvino_advisory_gateway`.
|
||||||
|
- `endpoint`: local endpoint label or script name; avoid sensitive URL params.
|
||||||
|
- `mode`: `dry_run`, `shadow`, `health_only`, or `offline_fixture`.
|
||||||
|
- `model`: optional model/backend label, if safe to log.
|
||||||
|
|
||||||
|
`recommendation`:
|
||||||
|
|
||||||
|
- `label`: normalized recommendation, e.g. `suppress`, `log`, `summarize`,
|
||||||
|
`escalate`, `retrieve_more_context`, `skip_private_root`, `needs_human`,
|
||||||
|
`no_action`, or `unknown`.
|
||||||
|
- `severity`: `none`, `info`, `low`, `medium`, `high`, or `critical`.
|
||||||
|
- `reasons`: short non-private reason codes, not raw excerpts.
|
||||||
|
- `evidence_refs`: bounded references to sanitized fixture fields or artifact ids.
|
||||||
|
- `raw_output_ref`: optional local artifact pointer; default null.
|
||||||
|
|
||||||
|
`confidence`:
|
||||||
|
|
||||||
|
- `score`: float from 0.0 to 1.0 when available, otherwise null.
|
||||||
|
- `bucket`: one of `very_low`, `low`, `medium`, `high`, `very_high`, or
|
||||||
|
`unknown`.
|
||||||
|
- `bucket_rule`: the threshold rule used by the harness.
|
||||||
|
- `calibrated`: boolean; false until enough labeled dry-run data exists.
|
||||||
|
|
||||||
|
Recommended confidence buckets:
|
||||||
|
|
||||||
|
| Bucket | Score range | Gate behavior |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `very_low` | `< 0.40` | Treat as uncertain; never escalate automatically. |
|
||||||
|
| `low` | `0.40-0.59` | Advisory note only; human/Atlas decides. |
|
||||||
|
| `medium` | `0.60-0.79` | Eligible for comparison metrics; no live action. |
|
||||||
|
| `high` | `0.80-0.94` | Strong advisory evidence; still gated. |
|
||||||
|
| `very_high` | `>= 0.95` | Promotion candidate only after repeated eval success. |
|
||||||
|
| `unknown` | null/missing | Count separately; do not coerce to zero. |
|
||||||
|
|
||||||
|
`authority_flags`:
|
||||||
|
|
||||||
|
All flags default to false and must remain false for this gate.
|
||||||
|
|
||||||
|
- `can_route_atlas`
|
||||||
|
- `can_write_memory`
|
||||||
|
- `can_execute_tools`
|
||||||
|
- `can_restart_services`
|
||||||
|
- `can_send_outbound`
|
||||||
|
- `can_scan_private_roots`
|
||||||
|
- `can_mutate_vector_store`
|
||||||
|
- `can_post_advisory_event`
|
||||||
|
- `can_change_gateway_config`
|
||||||
|
- `requires_human_approval`
|
||||||
|
- `advisory_only`
|
||||||
|
|
||||||
|
For this gate, `advisory_only=true` and `requires_human_approval=true` for any
|
||||||
|
recommendation that could eventually affect live behavior.
|
||||||
|
|
||||||
|
`allowed_actions`:
|
||||||
|
|
||||||
|
Allowed by default:
|
||||||
|
|
||||||
|
- `record_metric`
|
||||||
|
- `compare_with_expected_label`
|
||||||
|
- `include_in_digest`
|
||||||
|
- `open_review_ticket_candidate`
|
||||||
|
- `recommend_human_review`
|
||||||
|
|
||||||
|
Disallowed unless a later approval explicitly changes scope:
|
||||||
|
|
||||||
|
- `route_atlas`
|
||||||
|
- `write_memory`
|
||||||
|
- `execute_tool`
|
||||||
|
- `restart_service`
|
||||||
|
- `send_message`
|
||||||
|
- `scan_private_root`
|
||||||
|
- `mutate_vector_store`
|
||||||
|
- `post_gateway_event`
|
||||||
|
|
||||||
|
`actual_action`:
|
||||||
|
|
||||||
|
- `kind`: should be `none`, `recorded_metric`, or `dry_run_reported`.
|
||||||
|
- `performed`: boolean; false for live side effects in this gate.
|
||||||
|
- `performed_by`: `harness`, `human`, `atlas`, or null.
|
||||||
|
- `side_effects`: array; should be empty except local report/artifact writes.
|
||||||
|
|
||||||
|
`human_or_atlas_decision`:
|
||||||
|
|
||||||
|
- `source`: `fixture_expected`, `human_label`, `atlas_shadow`, or `missing`.
|
||||||
|
- `label`: normalized decision label using the same label set as
|
||||||
|
`recommendation.label` when possible.
|
||||||
|
- `severity`: normalized severity when applicable.
|
||||||
|
- `confidence`: optional Atlas/human confidence if available.
|
||||||
|
- `decision_ref`: optional review id, fixture id, or session/run id.
|
||||||
|
- `timestamp`: optional timestamp for the comparison decision.
|
||||||
|
|
||||||
|
`outcome`:
|
||||||
|
|
||||||
|
- `comparison`: `agree`, `disagree`, `uncertain`, `missing_reference`, or
|
||||||
|
`not_applicable`.
|
||||||
|
- `error_type`: null or one of `false_positive`, `false_negative`,
|
||||||
|
`severity_overcall`, `severity_undercall`, `unsafe_authority`,
|
||||||
|
`privacy_violation`, `fallback_unexpected`, `latency_slo_miss`,
|
||||||
|
`npu_proof_missing`.
|
||||||
|
- `human_review_required`: boolean.
|
||||||
|
- `promotion_blocker`: boolean.
|
||||||
|
|
||||||
|
`npu_proof`:
|
||||||
|
|
||||||
|
- `proof_mode`: `sysfs_busy_delta`, `service_reported_delta`, `health_only`,
|
||||||
|
`offline_fixture`, or `unavailable`.
|
||||||
|
- `busy_delta_us`: integer or null.
|
||||||
|
- `service_reported_delta_us`: integer or null.
|
||||||
|
- `inference_ran`: boolean.
|
||||||
|
- `proof_ok`: boolean or null. Null means not measurable, not false.
|
||||||
|
- `counter_path`: usually `/sys/class/accel/accel0/device/npu_busy_time_us`, if
|
||||||
|
logged safely.
|
||||||
|
|
||||||
|
`latency`:
|
||||||
|
|
||||||
|
- `total_ms`: end-to-end harness timing.
|
||||||
|
- `service_ms`: service-reported processing time when available.
|
||||||
|
- `queue_ms`: optional queue time.
|
||||||
|
- `timeout`: boolean.
|
||||||
|
|
||||||
|
`fallback`:
|
||||||
|
|
||||||
|
- `occurred`: boolean.
|
||||||
|
- `kind`: null, `cpu`, `offline`, `health_only`, `service_unavailable`,
|
||||||
|
`skipped_cold_load`, `private_root_blocked`, or `proof_unavailable`.
|
||||||
|
- `reason`: short reason code.
|
||||||
|
- `expected`: boolean. Expected fallbacks are counted but do not fail promotion
|
||||||
|
unless their rate exceeds the threshold for that lane.
|
||||||
|
|
||||||
|
`privacy`:
|
||||||
|
|
||||||
|
- `payload_logged`: must default false.
|
||||||
|
- `redaction`: `none_needed`, `hash_only`, `paths_only`, `metadata_only`, or
|
||||||
|
`blocked_private`.
|
||||||
|
- `retention`: `ephemeral`, `local_audit`, or `review_artifact`.
|
||||||
|
- `contains_private_payload`: must be false for committed fixtures.
|
||||||
|
|
||||||
|
## Minimal JSON shape
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema_version": "npu_advisory_decision_v1",
|
||||||
|
"decision_id": "01J00000000000000000000000",
|
||||||
|
"timestamp": "2026-06-06T00:00:00Z",
|
||||||
|
"source": {
|
||||||
|
"kind": "fixture",
|
||||||
|
"fixture_id": "cron_duplicate_success_001",
|
||||||
|
"fixture_set": "npu_advisory_eval_v1",
|
||||||
|
"artifact_ref": null,
|
||||||
|
"content_hash": "sha256:example",
|
||||||
|
"privacy_class": "synthetic"
|
||||||
|
},
|
||||||
|
"service": {
|
||||||
|
"name": "cron_n8n_advisory",
|
||||||
|
"endpoint": "openvino-advisory-gateway/examples/cron-advisory-dry-run.sh",
|
||||||
|
"mode": "dry_run",
|
||||||
|
"model": "openvino-local"
|
||||||
|
},
|
||||||
|
"input_class": "cron_n8n_event",
|
||||||
|
"recommendation": {
|
||||||
|
"label": "suppress",
|
||||||
|
"severity": "info",
|
||||||
|
"reasons": ["duplicate_success", "no_action_required"],
|
||||||
|
"evidence_refs": ["fixture:event_kind", "fixture:status"],
|
||||||
|
"raw_output_ref": null
|
||||||
|
},
|
||||||
|
"confidence": {
|
||||||
|
"score": 0.91,
|
||||||
|
"bucket": "high",
|
||||||
|
"bucket_rule": "v1_default",
|
||||||
|
"calibrated": false
|
||||||
|
},
|
||||||
|
"authority_flags": {
|
||||||
|
"can_route_atlas": false,
|
||||||
|
"can_write_memory": false,
|
||||||
|
"can_execute_tools": false,
|
||||||
|
"can_restart_services": false,
|
||||||
|
"can_send_outbound": false,
|
||||||
|
"can_scan_private_roots": false,
|
||||||
|
"can_mutate_vector_store": false,
|
||||||
|
"can_post_advisory_event": false,
|
||||||
|
"can_change_gateway_config": false,
|
||||||
|
"requires_human_approval": true,
|
||||||
|
"advisory_only": true
|
||||||
|
},
|
||||||
|
"allowed_actions": [
|
||||||
|
"record_metric",
|
||||||
|
"compare_with_expected_label",
|
||||||
|
"include_in_digest"
|
||||||
|
],
|
||||||
|
"actual_action": {
|
||||||
|
"kind": "dry_run_reported",
|
||||||
|
"performed": false,
|
||||||
|
"performed_by": "harness",
|
||||||
|
"side_effects": []
|
||||||
|
},
|
||||||
|
"human_or_atlas_decision": {
|
||||||
|
"source": "fixture_expected",
|
||||||
|
"label": "suppress",
|
||||||
|
"severity": "info",
|
||||||
|
"confidence": null,
|
||||||
|
"decision_ref": "cron_duplicate_success_001",
|
||||||
|
"timestamp": null
|
||||||
|
},
|
||||||
|
"outcome": {
|
||||||
|
"comparison": "agree",
|
||||||
|
"error_type": null,
|
||||||
|
"human_review_required": false,
|
||||||
|
"promotion_blocker": false
|
||||||
|
},
|
||||||
|
"npu_proof": {
|
||||||
|
"proof_mode": "sysfs_busy_delta",
|
||||||
|
"busy_delta_us": 1200,
|
||||||
|
"service_reported_delta_us": 1180,
|
||||||
|
"inference_ran": true,
|
||||||
|
"proof_ok": true,
|
||||||
|
"counter_path": "/sys/class/accel/accel0/device/npu_busy_time_us"
|
||||||
|
},
|
||||||
|
"latency": {
|
||||||
|
"total_ms": 42.5,
|
||||||
|
"service_ms": 39.1,
|
||||||
|
"queue_ms": null,
|
||||||
|
"timeout": false
|
||||||
|
},
|
||||||
|
"fallback": {
|
||||||
|
"occurred": false,
|
||||||
|
"kind": null,
|
||||||
|
"reason": null,
|
||||||
|
"expected": false
|
||||||
|
},
|
||||||
|
"privacy": {
|
||||||
|
"payload_logged": false,
|
||||||
|
"redaction": "metadata_only",
|
||||||
|
"retention": "local_audit",
|
||||||
|
"contains_private_payload": false
|
||||||
|
},
|
||||||
|
"notes": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dry-run comparison strategy
|
||||||
|
|
||||||
|
Each fixture or shadow input should produce one `npu_advisory_decision_v1`
|
||||||
|
record. The harness compares `recommendation` to `human_or_atlas_decision` in
|
||||||
|
this order:
|
||||||
|
|
||||||
|
1. Use `fixture_expected` labels for synthetic/non-private regression fixtures.
|
||||||
|
2. Use explicit `human_label` for reviewed samples.
|
||||||
|
3. Use `atlas_shadow` only as a comparison signal, not ground truth, when a human
|
||||||
|
label is unavailable.
|
||||||
|
4. Mark `missing_reference` rather than inventing a target decision.
|
||||||
|
|
||||||
|
Comparison categories:
|
||||||
|
|
||||||
|
- `agree`: normalized label and severity are compatible.
|
||||||
|
- `disagree`: label conflicts with the reference decision.
|
||||||
|
- `uncertain`: NPU bucket is `very_low`, `low`, or `unknown`, or the service
|
||||||
|
returned a deliberate `needs_human`/`unknown` label.
|
||||||
|
- `false_positive`: NPU recommended escalation/action but reference says
|
||||||
|
suppress/no-op.
|
||||||
|
- `false_negative`: NPU recommended suppress/no-op but reference says escalate or
|
||||||
|
action-needed.
|
||||||
|
- `severity_overcall` / `severity_undercall`: label matches but severity differs
|
||||||
|
by more than one level.
|
||||||
|
|
||||||
|
The summary should be grouped by lane (`input_class` and `service.name`) and by
|
||||||
|
confidence bucket. Unknown metrics remain null/`n/a`; do not coerce missing data
|
||||||
|
to zero.
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
Minimum per-run metrics:
|
||||||
|
|
||||||
|
- `total_records`
|
||||||
|
- `records_by_input_class`
|
||||||
|
- `records_by_service`
|
||||||
|
- `confidence_bucket_counts`
|
||||||
|
- `recommendation_counts`
|
||||||
|
- `authority_flag_violation_count`
|
||||||
|
- `privacy_violation_count`
|
||||||
|
- `actual_side_effect_count`
|
||||||
|
- `agree_count`, `disagree_count`, `uncertain_count`, `missing_reference_count`
|
||||||
|
- `false_positive_count`, `false_negative_count`
|
||||||
|
- `severity_overcall_count`, `severity_undercall_count`
|
||||||
|
- `fallback_count` and `fallback_counts_by_kind`
|
||||||
|
- `expected_fallback_count` vs `unexpected_fallback_count`
|
||||||
|
- `npu_proof_ok_count`, `npu_proof_missing_count`, `npu_proof_not_applicable_count`
|
||||||
|
- p50/p95 `latency.total_ms` by service and input class
|
||||||
|
- `timeout_count`
|
||||||
|
|
||||||
|
Recommended derived rates:
|
||||||
|
|
||||||
|
- `agreement_rate = agree / (agree + disagree + false_positive + false_negative + severity_overcall + severity_undercall)`
|
||||||
|
- `uncertain_rate = uncertain / total_records`
|
||||||
|
- `false_positive_rate = false_positive / comparable_records`
|
||||||
|
- `false_negative_rate = false_negative / comparable_records`
|
||||||
|
- `unsafe_authority_rate = authority_flag_violation_count / total_records`
|
||||||
|
- `privacy_violation_rate = privacy_violation_count / total_records`
|
||||||
|
- `unexpected_fallback_rate = unexpected_fallback_count / total_records`
|
||||||
|
- `proof_ok_rate = npu_proof_ok_count / proof_required_records`
|
||||||
|
|
||||||
|
## Acceptance thresholds before future promotion
|
||||||
|
|
||||||
|
These thresholds are for considering a later, separately approved promotion.
|
||||||
|
They do not grant authority by themselves.
|
||||||
|
|
||||||
|
Global blockers for every lane:
|
||||||
|
|
||||||
|
- `authority_flag_violation_count == 0`.
|
||||||
|
- `actual_side_effect_count == 0` for dry-run harness execution.
|
||||||
|
- `privacy_violation_count == 0` and no committed private fixtures/secrets.
|
||||||
|
- No raw private payloads in logs, reports, artifacts, or test fixtures.
|
||||||
|
- No service bind, route, memory, tool, send, restart, or vector-store mutation
|
||||||
|
introduced by the eval code.
|
||||||
|
|
||||||
|
Minimum data quality before promotion discussion:
|
||||||
|
|
||||||
|
- At least 30 comparable synthetic/non-private records per lane, or all available
|
||||||
|
lane fixtures if the lane is explicitly scoped smaller.
|
||||||
|
- Every advisory lane has at least one normal case, one low-confidence case, one
|
||||||
|
false-alarm/noise case, and one action-needed/escalation case.
|
||||||
|
- `missing_reference_count == 0` for promotion-candidate fixture sets.
|
||||||
|
- Confidence bucket distribution is reported and stable across at least three
|
||||||
|
dry-run executions.
|
||||||
|
|
||||||
|
Suggested metric thresholds:
|
||||||
|
|
||||||
|
| Metric | Threshold for promotion discussion |
|
||||||
|
| --- | ---: |
|
||||||
|
| Agreement rate | `>= 0.95` overall and `>= 0.90` per lane |
|
||||||
|
| False positive rate | `<= 0.03` overall and no repeated high-severity false positives |
|
||||||
|
| False negative rate | `<= 0.01` for action-needed/escalation cases |
|
||||||
|
| Uncertain rate | `<= 0.15` overall, unless lane is intentionally conservative |
|
||||||
|
| Unexpected fallback rate | `<= 0.02` and every fallback has a reason code |
|
||||||
|
| NPU proof OK rate | `>= 0.98` for proof-required lanes |
|
||||||
|
| p95 latency | Within the lane-specific SLO documented by the implementation task |
|
||||||
|
| Authority/privacy violations | exactly `0` |
|
||||||
|
|
||||||
|
Promotion remains lane-specific. A passing context-gate eval does not promote
|
||||||
|
cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway lanes.
|
||||||
|
Each lane needs its own human-approved scope, rollback plan, and review.
|
||||||
|
|
||||||
|
## Output formats
|
||||||
|
|
||||||
|
The dry-run harness should emit:
|
||||||
|
|
||||||
|
1. JSONL decisions: one `npu_advisory_decision_v1` object per line.
|
||||||
|
2. Compact JSON summary: aggregate counts/rates for dashboards and follow-up
|
||||||
|
digest scripts.
|
||||||
|
3. Compact Markdown/text summary: suitable for terminal, Telegram, or Discord.
|
||||||
|
|
||||||
|
The Markdown/text summary should include:
|
||||||
|
|
||||||
|
- run id, fixture set, generated-at timestamp;
|
||||||
|
- records by lane/service;
|
||||||
|
- agreement/uncertain/false-positive/false-negative counts;
|
||||||
|
- confidence bucket distribution;
|
||||||
|
- fallback counts;
|
||||||
|
- NPU proof counts;
|
||||||
|
- authority/privacy violation counts;
|
||||||
|
- promotion blockers and caveats.
|
||||||
|
|
||||||
|
## Fixture expectations
|
||||||
|
|
||||||
|
Use synthetic/non-private fixtures only. Required lanes:
|
||||||
|
|
||||||
|
- `context_gate`: retrieve/no-retrieve decisions with missing, conflicting, and
|
||||||
|
sufficient context cases.
|
||||||
|
- `cron_n8n_event`: duplicate success, stale warning, urgent false alarm, and
|
||||||
|
action-needed failure.
|
||||||
|
- `batch_doc_triage`: private-root blocked, approved synthetic sample, noisy OCR,
|
||||||
|
and needs-human cases.
|
||||||
|
- `voice_audio`: bounded generated audio, low-confidence transcript, harmless
|
||||||
|
background noise, and action-needed command-like utterance that must not
|
||||||
|
execute.
|
||||||
|
- `kanban_hygiene`: no-op healthy card, stale/card-needs-review, false alarm, and
|
||||||
|
action-needed label.
|
||||||
|
- `advisory_gateway_envelope`: valid classify/generate/triage envelope examples
|
||||||
|
plus malformed/unsafe authority-request examples.
|
||||||
|
|
||||||
|
Any fixture that resembles private content should be replaced with a synthetic
|
||||||
|
fixture or reduced to metadata/hash-only form before committing.
|
||||||
|
|
||||||
|
## Review checklist
|
||||||
|
|
||||||
|
Before implementation or docs depending on this spec are accepted, verify:
|
||||||
|
|
||||||
|
- `schema_version` is present and all authority flags default closed.
|
||||||
|
- Dry-run execution produces no live side effects beyond local report/artifact
|
||||||
|
writes.
|
||||||
|
- Unknown/missing metrics are represented as null/`n/a`, not fake zero.
|
||||||
|
- Raw payloads and private paths are not persisted by default.
|
||||||
|
- Summary metrics include confidence buckets, fallback counts, NPU proof, and
|
||||||
|
authority/privacy violations.
|
||||||
|
- Promotion language says "candidate" or "discussion" only; no automatic live
|
||||||
|
authority is granted by a passing eval.
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# NPU advisory dry-run comparison harness
|
||||||
|
|
||||||
|
This harness compares advisory-only NPU lane recommendations against synthetic/non-private expected decisions. It is an observability gate only: it does not route, send, write memory, execute tools, restart services, broaden private scans, restart gateways, or mutate vector stores.
|
||||||
|
|
||||||
|
For the operator runbook and promotion criteria, see `docs/npu-advisory-observability-runbook.md`. Treat this file as the compact command reference; the runbook is the source for how to interpret metrics and decide whether a lane is promotable later.
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
From `/home/will/lab/swarm`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown
|
||||||
|
```
|
||||||
|
|
||||||
|
Strict checks for CI/review:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --fail-on-mismatch
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --fail-on-authority-violation
|
||||||
|
```
|
||||||
|
|
||||||
|
`--fail-on-authority-violation` is expected to fail with the committed fixture set because one synthetic gateway fixture intentionally proves that `may_* = true` is caught and summarized.
|
||||||
|
|
||||||
|
## Fixture coverage
|
||||||
|
|
||||||
|
Fixtures live at `fixtures/npu_advisory_dry_run/fixtures.json` and cover:
|
||||||
|
|
||||||
|
- context gate;
|
||||||
|
- cron/n8n advisory events;
|
||||||
|
- batch document/audio triage shape;
|
||||||
|
- voice/audio advisory gate;
|
||||||
|
- Kanban hygiene advisory;
|
||||||
|
- advisory gateway envelopes.
|
||||||
|
|
||||||
|
All fixture payloads are synthetic and omit raw private content. Lane adapters use deterministic local rules or imported pure functions; they do not call live advisory services.
|
||||||
|
|
||||||
|
## Output shape
|
||||||
|
|
||||||
|
JSON output uses `npu_advisory_dry_run_summary_v1` and includes totals, per-lane counts, confidence buckets, recommendation counts, authority violations, expected-outcome mismatches, and optionally per-fixture `npu_advisory_decision_v1` records.
|
||||||
|
|
||||||
|
Each decision record includes timestamp, source, service, lane, input class, recommendation, expected recommendation, confidence/bucket, authority flags, allowed actions, actual action (`none_dry_run`), human/Atlas comparison, outcome, NPU proof, latency, fallback reason, and compact notes.
|
||||||
|
|
||||||
|
## Promotion gate
|
||||||
|
|
||||||
|
Before any future advisory lane receives authority, a separate approval should require at minimum:
|
||||||
|
|
||||||
|
- no expected-outcome mismatches for that lane's representative fixture set;
|
||||||
|
- no false negatives on action-needed events;
|
||||||
|
- intentionally reviewed false positives;
|
||||||
|
- zero authority-safe flag violations except known negative-control fixtures;
|
||||||
|
- documented rollback and a narrow, explicit authority scope.
|
||||||
|
|
||||||
|
Passing this harness never grants live authority by itself. Advisory outputs flow into `npu_advisory_decision_v1` records, summary metrics, and a human/Atlas review gate. Any later promotion must be lane-specific, explicitly approved, and reversible.
|
||||||
@@ -0,0 +1,246 @@
|
|||||||
|
# NPU advisory observability and promotion runbook
|
||||||
|
|
||||||
|
This runbook is the operator-facing gate for Will's OpenVINO/NPU advisory lanes. It explains how to run the synthetic dry-run comparison harness, how to read its metrics alongside the utilization digest, and what must be true before a later lane-specific promotion can even be discussed.
|
||||||
|
|
||||||
|
The current gate is observability only. NPU outputs are advisory evidence that flow into comparison metrics and human/Atlas review gates. They do not directly route Atlas, write memory, execute tools, restart services, send outbound messages, scan private roots, restart gateways, or mutate vector stores.
|
||||||
|
|
||||||
|
## Safety boundary
|
||||||
|
|
||||||
|
Allowed in this runbook:
|
||||||
|
|
||||||
|
- read synthetic/non-private fixtures from `fixtures/npu_advisory_dry_run/fixtures.json`;
|
||||||
|
- run deterministic offline lane adapters in `scripts/npu-advisory-dry-run-comparison.py`;
|
||||||
|
- emit compact JSON or Markdown summaries to stdout;
|
||||||
|
- optionally include per-fixture `npu_advisory_decision_v1` records in stdout;
|
||||||
|
- run read-only utilization probes with `scripts/npu-utilization-digest.py` when live service health is relevant.
|
||||||
|
|
||||||
|
Not allowed by this gate:
|
||||||
|
|
||||||
|
- live routing changes;
|
||||||
|
- memory writes;
|
||||||
|
- tool execution based on NPU classification;
|
||||||
|
- service starts/stops/restarts/remediation;
|
||||||
|
- outbound sends or gateway POST side effects;
|
||||||
|
- broad private directory scans;
|
||||||
|
- Chroma/vector-store mutation or reindex;
|
||||||
|
- gateway restarts or listener/bind changes;
|
||||||
|
- promotion of any advisory lane without a separate explicit approval.
|
||||||
|
|
||||||
|
## Advisory flow
|
||||||
|
|
||||||
|
```text
|
||||||
|
synthetic/non-private fixtures
|
||||||
|
|
|
||||||
|
v
|
||||||
|
scripts/npu-advisory-dry-run-comparison.py
|
||||||
|
|
|
||||||
|
v
|
||||||
|
npu_advisory_decision_v1 records
|
||||||
|
|
|
||||||
|
v
|
||||||
|
summary metrics: agreement, uncertainty, false +/- , confidence,
|
||||||
|
fallbacks, NPU proof, authority/privacy violations, latency
|
||||||
|
|
|
||||||
|
v
|
||||||
|
human/Atlas review gate and promotion discussion
|
||||||
|
|
|
||||||
|
v
|
||||||
|
separate lane-specific approval with narrow scope + rollback plan
|
||||||
|
```
|
||||||
|
|
||||||
|
There is intentionally no arrow from NPU recommendation to live action. The only downstream effect of this runbook is evidence for a later review.
|
||||||
|
|
||||||
|
## Required files
|
||||||
|
|
||||||
|
| Path | Role |
|
||||||
|
| --- | --- |
|
||||||
|
| `scripts/npu-advisory-dry-run-comparison.py` | Synthetic dry-run comparison harness. |
|
||||||
|
| `fixtures/npu_advisory_dry_run/fixtures.json` | Synthetic/non-private fixture set. |
|
||||||
|
| `docs/npu-advisory-decision-schema.md` | `npu_advisory_decision_v1` schema and metric definitions. |
|
||||||
|
| `docs/npu-advisory-dry-run-comparison.md` | Short harness reference. |
|
||||||
|
| `docs/npu-utilization-digest.md` | Live read-only utilization digest reference. |
|
||||||
|
| `tests/test_npu_advisory_dry_run_comparison.py` | Offline tests for fixture coverage and harness output. |
|
||||||
|
| `tests/test_npu_utilization_digest.py` | Offline tests for utilization digest metric logic. |
|
||||||
|
|
||||||
|
## Run the dry-run harness
|
||||||
|
|
||||||
|
From the repository root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json
|
||||||
|
```
|
||||||
|
|
||||||
|
Use Markdown when you want a compact human-readable terminal or chat summary. Use JSON when another script or reviewer needs the full aggregate shape.
|
||||||
|
|
||||||
|
To include per-fixture decision records:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --include-decisions
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the strict mismatch gate:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch
|
||||||
|
```
|
||||||
|
|
||||||
|
This should exit `0` when each fixture's observed outcome matches its `expected_outcome`.
|
||||||
|
|
||||||
|
To prove unsafe authority flags are detected:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-authority-violation
|
||||||
|
```
|
||||||
|
|
||||||
|
The committed fixture set intentionally includes `gateway-authority-violation`, so this command is expected to exit `1` while reporting `authority_safe_flag_violations: 1`. That is a negative-control fixture, not a permission grant.
|
||||||
|
|
||||||
|
## Expected compact output
|
||||||
|
|
||||||
|
Current fixture shape is expected to resemble:
|
||||||
|
|
||||||
|
```text
|
||||||
|
# NPU advisory dry-run comparison
|
||||||
|
|
||||||
|
fixtures: 9 | agree: 8 | disagree: 0 | false_positive: 1 | false_negative: 0 | uncertain: 0
|
||||||
|
authority_safe_flag_violations: 1 | mutations: all_false
|
||||||
|
|
||||||
|
| lane | fixtures | agree | false_positive | false_negative | violations |
|
||||||
|
| --- | ---: | ---: | ---: | ---: | ---: |
|
||||||
|
| advisory_gateway_envelope | 1 | 1 | 0 | 0 | 1 |
|
||||||
|
| batch_triage | 2 | 2 | 0 | 0 | 0 |
|
||||||
|
| context_gate | 2 | 2 | 0 | 0 | 0 |
|
||||||
|
| cron_n8n_advisory | 2 | 1 | 1 | 0 | 0 |
|
||||||
|
| kanban_hygiene | 1 | 1 | 0 | 0 | 0 |
|
||||||
|
| voice_audio | 1 | 1 | 0 | 0 | 0 |
|
||||||
|
|
||||||
|
## Authority-safe flag violations
|
||||||
|
- gateway-authority-violation: can_send_outbound
|
||||||
|
```
|
||||||
|
|
||||||
|
Interpretation:
|
||||||
|
|
||||||
|
- `fixtures` is the number of synthetic/non-private fixture cases evaluated.
|
||||||
|
- `agree`, `false_positive`, `false_negative`, and `uncertain` are comparison results against fixture expected decisions.
|
||||||
|
- `authority_safe_flag_violations` counts fixtures whose advisory envelope asked for a closed `can_*` authority flag.
|
||||||
|
- `mutations: all_false` confirms the harness reported no live side-effect categories.
|
||||||
|
- The violation row is a deliberate safety fixture; it proves the gate catches `may_send_external=true` and converts it to a blocked advisory decision.
|
||||||
|
|
||||||
|
## Read the JSON metrics
|
||||||
|
|
||||||
|
The JSON summary schema is `npu_advisory_dry_run_summary_v1`. Start with these fields:
|
||||||
|
|
||||||
|
1. `dry_run` must be `true`.
|
||||||
|
2. Every value under `mutations` must be `false`.
|
||||||
|
3. `totals.expected_outcome_mismatches` must be `0` for a clean regression run.
|
||||||
|
4. `minimum_metrics.privacy_violation_count` must be `0`.
|
||||||
|
5. `minimum_metrics.actual_side_effect_count` must be `0`.
|
||||||
|
6. `minimum_metrics.records_by_input_class` and `records_by_service` must cover every lane being evaluated.
|
||||||
|
7. `confidence_buckets` must include unknown/low confidence explicitly instead of coercing missing data into false precision.
|
||||||
|
8. `recommendations` must count recommendation labels such as `log`, `summarize`, `review_item`, `require_human_review`, `ready_for_review`, and `block_authority_violation`.
|
||||||
|
9. `minimum_metrics.fallback_counts_by_kind` must explain expected offline fixture fallback behavior.
|
||||||
|
10. `minimum_metrics.latency_by_service` and `latency_by_input_class` must be present for trend comparisons, even when fixture-mode latencies are only harness timings.
|
||||||
|
|
||||||
|
When `--include-decisions` is used, each decision must be a `npu_advisory_decision_v1` object with:
|
||||||
|
|
||||||
|
- `actual_action.performed=false` and `actual_action.side_effects=[]`;
|
||||||
|
- `authority_flags.advisory_only=true`;
|
||||||
|
- `authority_flags.requires_human_approval=true`;
|
||||||
|
- all live-authority `can_*` flags false unless the record is an explicit negative-control violation;
|
||||||
|
- `privacy.payload_logged=false` and `privacy.contains_private_payload=false`;
|
||||||
|
- `fallback.kind=offline` and `fallback.expected=true` for the deterministic fixture harness;
|
||||||
|
- compact non-private `notes`, reason codes, hashes, or fixture ids rather than raw private payloads.
|
||||||
|
|
||||||
|
## Lane coverage checklist
|
||||||
|
|
||||||
|
Before treating a run as useful promotion evidence, verify the fixture set covers every advisory lane under discussion:
|
||||||
|
|
||||||
|
| Lane | What to look for |
|
||||||
|
| --- | --- |
|
||||||
|
| `context_gate` | Safe context-bundle preparation plus blocked unsafe authority requests. |
|
||||||
|
| `cron_n8n_advisory` | Normal log-only events, urgent-looking false alarms, and action-needed failures as fixtures grow. |
|
||||||
|
| `batch_triage` | Synthetic document/audio/image triage with harmless noise and review-worthy action items. |
|
||||||
|
| `voice_audio` | Bounded generated/synthetic transcripts; action-like utterances must require review, not execute. |
|
||||||
|
| `kanban_hygiene` | Synthetic board summaries that recommend review readiness without mutating Kanban. |
|
||||||
|
| `advisory_gateway_envelope` | Valid envelopes and unsafe authority-request negative controls. |
|
||||||
|
|
||||||
|
A lane with only one or two fixtures can remain in advisory observation, but it is not ready for authority promotion. Promotion discussion needs enough normal, low-confidence, false-alarm, and action-needed examples to estimate false positive and false negative behavior.
|
||||||
|
|
||||||
|
## Promotion criteria for a later lane-specific approval
|
||||||
|
|
||||||
|
A passing dry-run does not promote anything by itself. It only makes a lane eligible for a later approval discussion.
|
||||||
|
|
||||||
|
Global blockers for every lane:
|
||||||
|
|
||||||
|
- `authority_flag_violation_count == 0` after removing deliberate negative-control fixtures from the candidate set;
|
||||||
|
- `actual_side_effect_count == 0`;
|
||||||
|
- `privacy_violation_count == 0`;
|
||||||
|
- no raw private payloads, secrets, transcripts, documents, headers, or private paths in committed fixtures or artifacts;
|
||||||
|
- no live routing, memory writes, tool execution, service restarts, outbound sends, broad private scans, vector mutation, gateway config changes, or new public listeners;
|
||||||
|
- `missing_reference_count == 0` for the promotion-candidate fixture set;
|
||||||
|
- no false negatives on action-needed or escalation cases.
|
||||||
|
|
||||||
|
Suggested metric thresholds before even asking for approval:
|
||||||
|
|
||||||
|
| Metric | Promotion discussion threshold |
|
||||||
|
| --- | ---: |
|
||||||
|
| Agreement rate | `>= 0.95` overall and `>= 0.90` for the specific lane. |
|
||||||
|
| False positive rate | `<= 0.03` overall, with all high-severity false positives reviewed. |
|
||||||
|
| False negative rate | `<= 0.01` for action-needed/escalation cases. |
|
||||||
|
| Uncertain rate | `<= 0.15`, unless the lane is intentionally conservative. |
|
||||||
|
| Unexpected fallback rate | `<= 0.02`, with reason codes for every fallback. |
|
||||||
|
| NPU proof OK rate | `>= 0.98` for live proof-required lanes. |
|
||||||
|
| p95 latency | Within a documented lane-specific SLO. |
|
||||||
|
| Authority/privacy violations | exactly `0` in the candidate set. |
|
||||||
|
|
||||||
|
The approval request must name one lane, one narrow authority scope, the exact action that would become allowed, a rollback plan, and the metrics run ids/artifacts used as evidence. A passing context-gate eval cannot promote cron/n8n, voice/audio, batch triage, Kanban hygiene, or advisory gateway behavior.
|
||||||
|
|
||||||
|
## Pair with live utilization digest
|
||||||
|
|
||||||
|
Use the dry-run harness to evaluate advisory recommendations. Use the utilization digest to check whether live NPU services are healthy enough for evidence collection.
|
||||||
|
|
||||||
|
Read-only live check:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional JSONL artifact for trend tracking:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
Digest interpretation:
|
||||||
|
|
||||||
|
- `services_ok` below the expected total means health is degraded; do not promote lanes based on incomplete live evidence.
|
||||||
|
- `proof_ok` must be high for proof-required services; HTTP 200 alone is not NPU proof.
|
||||||
|
- `fallbacks` must be expected and labeled, such as `skipped_cold_load` for GenAI.
|
||||||
|
- `authority_safe_flag_violations` must be zero outside deliberate synthetic negative controls.
|
||||||
|
- Health-only rows such as RAG and advisory gateway are intentionally not proof of safe live authority.
|
||||||
|
|
||||||
|
## Tests and review commands
|
||||||
|
|
||||||
|
Offline dry-run harness tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_advisory_dry_run_comparison.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Offline utilization digest tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Suggested pre-review bundle:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format json --fail-on-mismatch >/tmp/npu-advisory-summary.json
|
||||||
|
python scripts/npu-advisory-dry-run-comparison.py --format markdown >/tmp/npu-advisory-summary.md
|
||||||
|
python -m pytest tests/test_npu_advisory_dry_run_comparison.py tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Reviewers should confirm that generated summaries are compact, fixture-only, and free of private payloads; that the negative-control authority violation is detected; and that docs describe advisory outputs flowing into gates rather than direct actions.
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
# Explicit-root NPU batch triage dry-run examples
|
||||||
|
|
||||||
|
These examples are wrappers only. They do not install cron jobs, enable services,
|
||||||
|
change Atlas/Hermes routing, write Obsidian/RAG/vector DBs, move/delete files, or
|
||||||
|
send outbound messages.
|
||||||
|
|
||||||
|
The committed manifest template at `config/triage-roots.example.yaml` is
|
||||||
|
intentionally unapproved. For real private data, copy it to
|
||||||
|
`config/triage-roots.local.yaml` and approve exactly one narrow lane-specific
|
||||||
|
staging folder. Request-level `--root` may narrow that manifest root but cannot
|
||||||
|
broaden it.
|
||||||
|
|
||||||
|
Synthetic document/image smoke, CPU-only/no NPU claim:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane screenshots \
|
||||||
|
--root openvino-doc-image-triage-npu/samples \
|
||||||
|
--limit 5 \
|
||||||
|
--dry-run \
|
||||||
|
--no-npu \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Synthetic document/image smoke with the existing local embeddings NPU service,
|
||||||
|
if `127.0.0.1:18817` is healthy. Treat NPU as proven only when `npu.proof_ok` is
|
||||||
|
true and `npu.busy_delta_us` (or item-level delta) is positive:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane receipts \
|
||||||
|
--root openvino-doc-image-triage-npu/samples \
|
||||||
|
--limit 5 \
|
||||||
|
--dry-run \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Audio smoke should use generated/public synthetic audio only until a private
|
||||||
|
audio staging root is approved:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/npu-batch-triage-dry-run.py \
|
||||||
|
--manifest config/triage-roots.test.yaml \
|
||||||
|
--lane voice_memos \
|
||||||
|
--root tmp/synthetic-voice-memos \
|
||||||
|
--limit 3 \
|
||||||
|
--dry-run \
|
||||||
|
--no-npu \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Cron/n8n shape (disabled example only):
|
||||||
|
|
||||||
|
```text
|
||||||
|
Manual Trigger / disabled cron
|
||||||
|
-> Execute Command: python /home/will/lab/swarm/scripts/npu-batch-triage-dry-run.py --manifest /home/will/lab/swarm/config/triage-roots.local.yaml --lane receipts --limit 25 --dry-run --json
|
||||||
|
-> IF ok && npu.proof_ok && files_processed > 0
|
||||||
|
-> local dashboard/report only
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not connect this output to Telegram/Discord/email sends, Obsidian writes,
|
||||||
|
RAG/vector reindex, file moves/deletes, Kanban mutation, service restarts, or
|
||||||
|
Atlas/Hermes routing without a separate reviewed approval gate.
|
||||||
@@ -0,0 +1,204 @@
|
|||||||
|
# NPU integrated health checks — operator runbook notes
|
||||||
|
|
||||||
|
Compact, read-only operator workflow that combines the existing
|
||||||
|
`scripts/npu-service-health.sh` listener/systemd/embedding-proof probe with the
|
||||||
|
reviewer-approved `scripts/npu-utilization-digest.py` per-service utilization
|
||||||
|
and fallback report. Together they form a single safe daily / on-demand NPU
|
||||||
|
health pass.
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
|
||||||
|
- Read-only against live services. No restarts, route changes, vector mutation,
|
||||||
|
advisory POSTs, outbound sends, or memory writes.
|
||||||
|
- No new persistent services, timers, sockets, compose services, or Dockerfiles
|
||||||
|
are introduced by this integration. Both scripts are foreground / on-demand.
|
||||||
|
- Binds verified local-only or on the approved Docker bridge (`172.19.0.1:18830`).
|
||||||
|
Pre-existing broader binds on the live baseline ports (`18810`, `18814`,
|
||||||
|
`18816`, `18817`) are noted in the runbook and unchanged here.
|
||||||
|
- NPU proof requires real inference plus a positive
|
||||||
|
`/sys/class/accel/accel0/device/npu_busy_time_us` delta. HTTP 200 alone is
|
||||||
|
not sufficient.
|
||||||
|
|
||||||
|
## When to run
|
||||||
|
|
||||||
|
- Daily / on-demand ops check.
|
||||||
|
- After upgrades that touch the NPU stack, OpenVINO, or any of the live
|
||||||
|
specialists.
|
||||||
|
- Before any approval-gated change that depends on the NPU reflex layer.
|
||||||
|
- As the read-only verification step of a deploy or recovery runbook.
|
||||||
|
|
||||||
|
## Required artifacts on the branch
|
||||||
|
|
||||||
|
| Path | Role |
|
||||||
|
| --- | --- |
|
||||||
|
| `scripts/npu-service-health.sh` | Listener / systemd / Docker / health endpoint / single embedding proof. Existing baseline script. |
|
||||||
|
| `scripts/npu-utilization-digest.py` | Per-service utilization digest with NPU proof per probe, compact text or JSONL output, optional JSONL artifact. |
|
||||||
|
| `docs/npu-utilization-digest.md` | Per-service digest reference. |
|
||||||
|
| `docs/npu-advisory-observability-runbook.md` | Dry-run comparison and later promotion criteria for advisory lanes. |
|
||||||
|
| `tests/test_npu_utilization_digest.py` | Offline unit tests for the digest (no live services required). |
|
||||||
|
|
||||||
|
## Integrated workflow
|
||||||
|
|
||||||
|
### Step 1 — Listener and service-state snapshot
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/lab/swarm
|
||||||
|
./scripts/npu-service-health.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
What it verifies, in order:
|
||||||
|
|
||||||
|
1. `npu_busy_time_us` counter is readable.
|
||||||
|
2. Required listeners are present on `18810 / 18814 / 18816 / 18817 / 18818 /
|
||||||
|
18819 / 18820 / 18829 / 18830`.
|
||||||
|
3. User systemd services are active/enabled for embeddings, RAG health,
|
||||||
|
reranker, router/classifier, and the small GenAI worker.
|
||||||
|
4. Docker Compose `whisper-server-npu` is up.
|
||||||
|
5. Health endpoints return JSON for the live baseline and local specialists.
|
||||||
|
6. A single non-private embeddings request to `:18817` produces a positive
|
||||||
|
sysfs `npu_busy_time_us` delta; the script exits nonzero if there is no
|
||||||
|
positive delta.
|
||||||
|
|
||||||
|
Read the last block (`== Embeddings NPU busy-time proof ==`) first. If
|
||||||
|
`result=ok` and `sysfs_delta_us > 0`, the central NPU path is healthy. If not,
|
||||||
|
do not run the digest; triage the embeddings service first.
|
||||||
|
|
||||||
|
### Step 2 — Per-service utilization digest
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Compact output shape:
|
||||||
|
|
||||||
|
```text
|
||||||
|
NPU utilization digest <timestamp>
|
||||||
|
counter=/sys/class/accel/accel0/device/npu_busy_time_us delta_us=<total>
|
||||||
|
services_ok=<ok>/<total> proof_ok=<ok>/<proof-capable> fallbacks=<n> gates_closed=<n>
|
||||||
|
- embeddings: ok=true calls=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- rerank: ok=true calls=1 docs=2 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- whisper: ok=true calls=1 jobs=1 avg_ms=... npu_delta_us=... proof=true mode=NPU
|
||||||
|
- classifier: ok=true calls=1 events=1 avg_ms=... npu_delta_us=... proof=true dry_run=true ...
|
||||||
|
- genai: ok=true jobs=0 loaded=false mode=loaded=false reason=skipped_cold_load
|
||||||
|
- doc_triage: ok=true calls=1 files=1 avg_ms=... npu_delta_us=... proof=true gate=closed:private-root
|
||||||
|
- rag_endpoint: ok=true mode=health_only gate=closed:vector-mutation
|
||||||
|
- rag_health: ok=true mode=health_only
|
||||||
|
- advisory_gateway: ok=true mode=health_only gate=closed:advisory-post
|
||||||
|
fallbacks: skipped_cold_load=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Read order for ops:
|
||||||
|
|
||||||
|
1. `services_ok` row — anything below `9/9` means a service is down or unhealthy.
|
||||||
|
2. `proof_ok` row — `proof_ok=5/5` means every probe that ran with a real
|
||||||
|
inference request produced a positive sysfs NPU delta.
|
||||||
|
3. `fallbacks:` line — `skipped_cold_load=1` is expected (GenAI worker is
|
||||||
|
intentionally not cold-loaded). Any other fallback label is a triage signal.
|
||||||
|
4. `gate=` labels — closed gates that remain closed by design.
|
||||||
|
|
||||||
|
### Step 3 — Optional artifact for trend tracking
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
Writes a single JSONL line per digest under
|
||||||
|
`/home/will/.local/state/npu-utilization/digests/<timestamp>.jsonl`. The first
|
||||||
|
line is the summary; subsequent lines are per-service rows. No JSONL write
|
||||||
|
happens with `--no-write`.
|
||||||
|
|
||||||
|
### Step 4 — Offline unit tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Does not require live services. Use to validate digest logic after edits or
|
||||||
|
before merging.
|
||||||
|
|
||||||
|
## Compact proof interpretation
|
||||||
|
|
||||||
|
For each proof-capable service, both the response-level `npu_busy_delta_us`
|
||||||
|
(when the service reports it) and the script's own sysfs before/after delta
|
||||||
|
must agree and be `> 0`. The proof is only valid when an actual inference
|
||||||
|
request ran. If a probe was skipped (`reason=skipped_cold_load` or
|
||||||
|
`reason=smoke_disabled`), `proof_ok` for that row is `None` and the row
|
||||||
|
contributes a labeled fallback instead of a proof failure.
|
||||||
|
|
||||||
|
Proof currently runs on:
|
||||||
|
|
||||||
|
- `embeddings` (`:18817`)
|
||||||
|
- `rerank` (`:18818`)
|
||||||
|
- `whisper` (`:18816`) when `--include-whisper-smoke=true` (default)
|
||||||
|
- `classifier` (`:18819`)
|
||||||
|
- `doc_triage` (`:18829`) when `--include-doc-triage-smoke=true` (default);
|
||||||
|
proof is via the embeddings service, not directly on the NPU device, so the
|
||||||
|
row reports `mode=NPU-via-embedding-service`.
|
||||||
|
|
||||||
|
Intentionally health-only (no proof row):
|
||||||
|
|
||||||
|
- `rag_endpoint` (`:18810`) — closed:vector-mutation
|
||||||
|
- `rag_health` (`:18814`)
|
||||||
|
- `advisory_gateway` (`172.19.0.1:18830`) — closed:advisory-post
|
||||||
|
|
||||||
|
Intentionally skipped by default:
|
||||||
|
|
||||||
|
- `genai` (`:18820`) — `loaded=false` until first use; cold-loading just to
|
||||||
|
prove the NPU is not free, so it is treated as a labeled fallback rather
|
||||||
|
than a proof failure. Opt in with `--include-genai-smoke=true` only when the
|
||||||
|
task actually needs a generation smoke.
|
||||||
|
|
||||||
|
## Exit codes and triage gates
|
||||||
|
|
||||||
|
`scripts/npu-service-health.sh`:
|
||||||
|
|
||||||
|
| Exit | Meaning | Next |
|
||||||
|
| ---: | --- | --- |
|
||||||
|
| 0 | All checks passed including embeddings proof. | Continue to digest. |
|
||||||
|
| 2 | `npu_busy_time_us` not readable. | Check kernel/driver; do not run digest. |
|
||||||
|
| 3 | Embedding request failed. | Triage `openvino-embeddings.service` and port `:18817`. |
|
||||||
|
| 4 | Embedding request succeeded but sysfs delta `<= 0`. | Service reachable but not on the NPU; check service logs and device bind. |
|
||||||
|
|
||||||
|
`scripts/npu-utilization-digest.py`:
|
||||||
|
|
||||||
|
| Exit | Meaning | Next |
|
||||||
|
| ---: | --- | --- |
|
||||||
|
| 0 | All reachable services handled; proof/fallback accounting completed. | Inspect `proof_ok` and `fallbacks:` for any unexpected labels. |
|
||||||
|
| 2 | `--strict-proof` was set and at least one proof-required probe ran without a positive sysfs delta. | Triage the named service's NPU path. |
|
||||||
|
|
||||||
|
## Approval gates left closed
|
||||||
|
|
||||||
|
The integrated workflow intentionally does not:
|
||||||
|
|
||||||
|
- start, stop, restart, enable, or disable any user systemd unit or Docker
|
||||||
|
Compose service;
|
||||||
|
- write to or mutate the Chroma collection `obsidian_bge_npu` or any other
|
||||||
|
vector store;
|
||||||
|
- change Atlas/Hermes routing or model defaults;
|
||||||
|
- post classification/generation/triage events to the advisory gateway;
|
||||||
|
- broaden private document, image, or audio roots;
|
||||||
|
- bind any new listener, including on `0.0.0.0`;
|
||||||
|
- write memory, send messages, execute tools, or mutate Kanban state.
|
||||||
|
|
||||||
|
These remain approval-gated and are tracked on the `npu-maximization` board.
|
||||||
|
|
||||||
|
For advisory-lane promotion decisions, pair this live utilization pass with the fixture-only dry-run comparison in `docs/npu-advisory-observability-runbook.md`. The digest can show whether live NPU services are healthy enough to collect evidence; it does not promote advisory outputs into authority. Promotion remains a separate lane-specific approval with explicit scope and rollback.
|
||||||
|
|
||||||
|
## Quick reference
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Single-pass NPU health check (listener + systemd + embeddings proof).
|
||||||
|
cd ~/lab/swarm && ./scripts/npu-service-health.sh
|
||||||
|
|
||||||
|
# Compact digest with per-service proof and fallback accounting.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false --format text
|
||||||
|
|
||||||
|
# Same, with a JSONL artifact for trend tracking.
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl
|
||||||
|
|
||||||
|
# Strict mode for CI / pre-merge.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --strict-proof
|
||||||
|
|
||||||
|
# Offline digest logic tests.
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
# NPU utilization digest
|
||||||
|
|
||||||
|
Compact on-demand observability for Will's local OpenVINO/NPU specialists.
|
||||||
|
|
||||||
|
Script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/lab/swarm/scripts/npu-utilization-digest.py --format text
|
||||||
|
```
|
||||||
|
|
||||||
|
Safe defaults:
|
||||||
|
|
||||||
|
- read-only for services; no service starts/stops/restarts, routing changes, vector DB mutation, advisory POSTs, outbound sends, or memory writes;
|
||||||
|
- writes only a compact JSONL artifact under `/home/will/.local/state/npu-utilization/digests` unless `--no-write` is passed;
|
||||||
|
- uses synthetic/non-private requests for embeddings, rerank, classifier dry-run, and doc triage;
|
||||||
|
- keeps GenAI generation disabled by default when the worker is not loaded, to avoid cold-load side effects;
|
||||||
|
- advisory gateway remains health-only because POSTs write metadata/events;
|
||||||
|
- NPU proof is only true when an inference probe ran and `/sys/class/accel/accel0/device/npu_busy_time_us` increased around that probe.
|
||||||
|
|
||||||
|
Common commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Compact CLI digest, plus JSONL artifact.
|
||||||
|
scripts/npu-utilization-digest.py --format text
|
||||||
|
|
||||||
|
# No artifact write; useful during reviews.
|
||||||
|
scripts/npu-utilization-digest.py --no-write --include-genai-smoke false
|
||||||
|
|
||||||
|
# Machine-readable stdout.
|
||||||
|
scripts/npu-utilization-digest.py --format jsonl --no-write
|
||||||
|
|
||||||
|
# CI/unit tests; live services not required.
|
||||||
|
python -m pytest tests/test_npu_utilization_digest.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Output shape is intentionally small: service booleans, request counts by service, average probe ms, sysfs/NPU busy deltas by service, proof flags, fallback totals and per-service fallback counts, confidence distribution, escalation/suppression recommendation counts, authority-safe flag violation totals, artifact path, and closed gates. `fallbacks` includes unavailable services, failed/missing proof, and skipped proof-capable smokes such as disabled Whisper/doc-triage probes or GenAI cold-load skips; intentionally health-only RAG/advisory rows are not fallbacks unless unavailable. It does not print raw embeddings, transcripts, OCR text, model completions, request headers, or full upstream JSON.
|
||||||
|
|
||||||
|
Covered rows:
|
||||||
|
|
||||||
|
- `embeddings`: `/v1/embeddings` synthetic string, positive sysfs delta required.
|
||||||
|
- `rerank`: `/rerank` with two synthetic docs, positive sysfs delta required.
|
||||||
|
- `whisper`: health-only unless the bounded generated-WAV smoke is enabled.
|
||||||
|
- `classifier`: `/v1/classify` with `dry_run=true` and `include_evidence=false`, positive sysfs delta required.
|
||||||
|
- `genai`: health-only by default; skips when `loaded=false` unless explicitly opted in.
|
||||||
|
- `doc_triage`: one approved synthetic sample under the service sample root, with `allowed_roots` narrowed to that sample directory; NPU proof is via embeddings.
|
||||||
|
- `rag_endpoint` and `rag_health`: health-only; no vector mutation.
|
||||||
|
- `advisory_gateway`: health-only; `closed:advisory-post` gate remains closed.
|
||||||
|
|
||||||
|
Closed gates left for later approval: sending/delivery, recurring timer, GenAI cold-load smoke, advisory POSTs, Atlas/Hermes routing changes, vector mutation/reindex, and broad private document/audio/image roots.
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
# NPU voice/audio local-file pipeline
|
||||||
|
|
||||||
|
This is the first-slice local-file voice/audio path for the NPU maximization program:
|
||||||
|
|
||||||
|
```text
|
||||||
|
local audio file or already-staged attachment
|
||||||
|
-> OpenVINO NPU Whisper (:18816)
|
||||||
|
-> OpenVINO NPU classifier (:18819)
|
||||||
|
-> explicit advisory gate
|
||||||
|
-> Atlas/Hermes only after separate approval
|
||||||
|
```
|
||||||
|
|
||||||
|
The implementation is `scripts/npu_voice_audio_pipeline.py`. It is a CLI wrapper only; it starts no listener and performs no outbound sends, Obsidian writes, memory writes, vector DB mutations, Kanban mutations, service restarts, platform API calls, or live Atlas/Hermes routing changes.
|
||||||
|
|
||||||
|
## Safety gates
|
||||||
|
|
||||||
|
Closed unless explicitly approved later:
|
||||||
|
|
||||||
|
- Telegram/Discord fetching by bot token or attachment URL.
|
||||||
|
- Outbound messages or auto-sends.
|
||||||
|
- Obsidian/vault writes.
|
||||||
|
- Memory writes.
|
||||||
|
- Vector DB mutation or reindex.
|
||||||
|
- Automatic Kanban mutation.
|
||||||
|
- Service restarts or new persistent listeners.
|
||||||
|
- Private-directory root broadening.
|
||||||
|
- Live Atlas/Hermes routing authority changes.
|
||||||
|
|
||||||
|
HTTP success is not NPU proof. For NPU claims, require real inference plus positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas. The CLI reports response deltas and observed sysfs deltas for Whisper and classifier calls.
|
||||||
|
|
||||||
|
## Example: synthetic local WAV smoke
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
python - <<'PY'
|
||||||
|
import math, struct, wave
|
||||||
|
path = '/tmp/npu-voice-smoke.wav'
|
||||||
|
sr = 16000
|
||||||
|
with wave.open(path, 'wb') as w:
|
||||||
|
w.setnchannels(1)
|
||||||
|
w.setsampwidth(2)
|
||||||
|
w.setframerate(sr)
|
||||||
|
frames = bytearray()
|
||||||
|
for i in range(int(sr * 0.6)):
|
||||||
|
frames.extend(struct.pack('<h', int(12000 * math.sin(2 * math.pi * 440 * i / sr))))
|
||||||
|
w.writeframes(frames)
|
||||||
|
print(path)
|
||||||
|
PY
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the local-file wrapper:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
|
||||||
|
--audio /tmp/npu-voice-smoke.wav \
|
||||||
|
--title "synthetic smoke" \
|
||||||
|
--source manual_smoke \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Compact output shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"source": "manual_smoke",
|
||||||
|
"transcript_chars": 3,
|
||||||
|
"action_worthy": false,
|
||||||
|
"atlas_gate": "suppressed_not_action_worthy",
|
||||||
|
"whisper_npu_delta_us": 85441,
|
||||||
|
"whisper_sysfs_delta_us": 85441,
|
||||||
|
"classifier_npu_delta_us": 85908,
|
||||||
|
"classifier_sysfs_delta_us": 85908,
|
||||||
|
"classifier_observed_sysfs_delta_us": 85908,
|
||||||
|
"external_sends": 0,
|
||||||
|
"writes": 0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
A non-actionable smoke should stay `suppressed_not_action_worthy`. A transcript with a reminder, task, follow-up, explicit question, or classifier `tool_needed=true` should become `advisory_only_not_sent`, not sent.
|
||||||
|
|
||||||
|
## Example: already-staged platform voice file
|
||||||
|
|
||||||
|
This example assumes another approved process has already placed the audio file locally. The wrapper does not fetch from Telegram/Discord and does not read bot tokens.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python scripts/npu_voice_audio_pipeline.py \
|
||||||
|
--audio /tmp/staged-voice-message.ogg \
|
||||||
|
--source staged_telegram \
|
||||||
|
--title "staged local Telegram voice memo" \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Compact fields
|
||||||
|
|
||||||
|
The CLI always reports:
|
||||||
|
|
||||||
|
- `ok`
|
||||||
|
- `id`
|
||||||
|
- `source`
|
||||||
|
- `transcript_chars`
|
||||||
|
- `action_worthy`
|
||||||
|
- `atlas_gate`
|
||||||
|
- `next_gate`
|
||||||
|
- `whisper_npu_delta_us`
|
||||||
|
- `whisper_sysfs_delta_us`
|
||||||
|
- `classifier_npu_delta_us`
|
||||||
|
- `classifier_sysfs_delta_us`
|
||||||
|
- `classifier_observed_sysfs_delta_us`
|
||||||
|
- `labels.workflow_category`
|
||||||
|
- `labels.tool_needed`
|
||||||
|
- `labels.urgency`
|
||||||
|
- `labels.safety_confirmation_required`
|
||||||
|
- `external_sends`
|
||||||
|
- `writes`
|
||||||
|
|
||||||
|
Transcript text is omitted by default. Use `--include-transcript` or `--include-transcript-preview-chars N` only for explicit local debugging.
|
||||||
|
|
||||||
|
## Input limits
|
||||||
|
|
||||||
|
- `--audio` must be an absolute local path.
|
||||||
|
- Symlinks, directories, missing files, empty files, unsupported extensions, and files over `--max-bytes` are refused.
|
||||||
|
- WAV duration is capped by `--max-audio-seconds`; other codecs remain size-capped in this first slice.
|
||||||
|
- Classifier transcript payload is bounded by `--max-transcript-chars`.
|
||||||
|
|
||||||
|
## Health prerequisites
|
||||||
|
|
||||||
|
Read-only checks:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://127.0.0.1:18816/health
|
||||||
|
curl -fsS http://127.0.0.1:18819/healthz
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not restart services from this runbook. If either endpoint is unhealthy, stop and request an ops/remediation task.
|
||||||
@@ -0,0 +1,388 @@
|
|||||||
|
# OpenVINO/NPU VLM, audio, and wake-word feasibility
|
||||||
|
|
||||||
|
Date: 2026-06-04
|
||||||
|
Scope: feasibility/spec only for lower-priority assistant sidecars. This document does not enable services, alter Atlas/Hermes/gateway routing, mutate RAG/Chroma/vector collections, or process private document/image directories.
|
||||||
|
|
||||||
|
## Existing baseline and constraints
|
||||||
|
|
||||||
|
Live baseline discovered by parent task:
|
||||||
|
|
||||||
|
- RAG endpoint: `127.0.0.1:18810`
|
||||||
|
- RAG health wrapper: `127.0.0.1:18814`
|
||||||
|
- Whisper OpenVINO NPU: `127.0.0.1:18816`
|
||||||
|
- OpenVINO embeddings: `127.0.0.1:18817`
|
||||||
|
- Prototype ports currently reserved/not live: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, optional doc/image triage `:18829`
|
||||||
|
|
||||||
|
Local NPU runtime snapshot from the feasibility run:
|
||||||
|
|
||||||
|
- `/home/will/.venvs/npu` has `openvino==2026.2.0` and `openvino-genai==2026.2.0.0`.
|
||||||
|
- `openvino.Core().available_devices` reports `CPU`, `GPU.0`, `GPU.1`, and `NPU`.
|
||||||
|
- NPU device name: `Intel(R) AI Boost`.
|
||||||
|
- NPU claims must be verified by positive `/sys/class/accel/accel0/device/npu_busy_time_us` deltas around inference.
|
||||||
|
|
||||||
|
External release/project signals checked:
|
||||||
|
|
||||||
|
- OpenVINO 2026.2.0 release notes mention broader GenAI coverage and VLM samples, but the VLM acceleration notes are CPU/GPU-oriented; they do not provide a clear low-risk NPU VLM path.
|
||||||
|
- Prior OpenVINO release notes/search results mention OpenVINO Model Server VLM support for Qwen2-VL, Phi-3.5-Vision, and InternVL2.
|
||||||
|
- `openWakeWord` is an active Apache-2.0 local wake-word framework with ONNX Runtime/TFLite support, pre-trained wake-word models, optional VAD, and 16 kHz PCM streaming examples. It is not installed in the current NPU venv.
|
||||||
|
|
||||||
|
## Recommendation summary
|
||||||
|
|
||||||
|
| Lane | Recommendation | Priority | Why |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| VLM / image captioning | Defer NPU-first VLM. If pursued, prototype CPU/GPU VLM CLI first, then attempt NPU only after model/runtime compatibility is proven. | Low | NPU support for VLMs is not clearly mature in the current OpenVINO public notes; VLMs are memory/op-shape heavy; failures could be slow and noisy. Existing doc/image triage already covers practical local image metadata without a full VLM. |
|
||||||
|
| Lightweight image classification / caption fallback | Extend the existing `openvino-doc-image-triage-npu` lane before adding a new service. | Medium-low | It already has privacy boundaries, synthetic fixtures, CLI/server split, and NPU proof through embeddings. Add static-shape classifier only if a later task needs image labels beyond rule fallback. |
|
||||||
|
| Audio classification | Defer until a concrete assistant workflow needs it. Consider CPU/GPU/OpenVINO Runtime prototype using Speech Commands/ESC-style classifier before any daemon. | Low | Whisper NPU already covers transcription. Generic audio tags are less useful without a routing/product requirement and need dataset-specific threshold tuning. |
|
||||||
|
| Wake word | Worth a small CPU-only local smoke prototype; do not spend NPU time first. | Medium | Wake-word detection must be always-on, tiny, and reliable. CPU openWakeWord/ONNX/TFLite is the lowest-risk path and avoids starving existing NPU Whisper/embedding services. NPU use is only worth testing after CPU false-positive/latency behavior is acceptable. |
|
||||||
|
|
||||||
|
## VLM / image-captioning path
|
||||||
|
|
||||||
|
### Recommended model/runtime
|
||||||
|
|
||||||
|
Initial runtime: CLI-first OpenVINO GenAI or OpenVINO Model Server on CPU/GPU, not NPU-first.
|
||||||
|
|
||||||
|
Candidate models to evaluate, in order:
|
||||||
|
|
||||||
|
1. `Qwen2-VL-2B-Instruct` OpenVINO/OVMS-compatible export if a small converted artifact is already available.
|
||||||
|
2. `Phi-3.5-Vision-Instruct` only if memory/startup is acceptable.
|
||||||
|
3. `InternVL2` only as a compatibility reference; likely too heavy for a low-priority local assistant sidecar.
|
||||||
|
|
||||||
|
Why this order:
|
||||||
|
|
||||||
|
- Qwen2-VL is broadly supported by OpenVINO Model Server release notes/search results and has smaller variants.
|
||||||
|
- Phi-3.5-Vision is also named in OpenVINO Model Server VLM support, but may be heavier.
|
||||||
|
- NPU is not the first target because public OpenVINO 2026.2 release notes emphasize VLM improvements for CPU/GPU, not NPU. Treat NPU VLM as experimental until a smoke test proves compilation and positive busy-time deltas.
|
||||||
|
|
||||||
|
### Endpoint/CLI contract
|
||||||
|
|
||||||
|
CLI-first contract:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python vlm_caption.py \
|
||||||
|
--image /path/to/synthetic_or_explicitly_allowed_image.png \
|
||||||
|
--prompt "Describe this image in one sentence." \
|
||||||
|
--device CPU \
|
||||||
|
--max-new-tokens 96 \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Response shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"media_type": "image",
|
||||||
|
"source_path_basename": "synthetic_scene.png",
|
||||||
|
"source_sha256": "sha256:...",
|
||||||
|
"model": "qwen2-vl-small-openvino",
|
||||||
|
"runtime": "openvino-genai-or-ovms",
|
||||||
|
"device_requested": "CPU",
|
||||||
|
"device_observed": "CPU",
|
||||||
|
"caption": "A synthetic chart with three colored bars.",
|
||||||
|
"safety": {
|
||||||
|
"external_uploads": false,
|
||||||
|
"raw_image_logged": false,
|
||||||
|
"private_paths_allowed": false
|
||||||
|
},
|
||||||
|
"timing_ms": {
|
||||||
|
"load": 0,
|
||||||
|
"inference": 0,
|
||||||
|
"total": 0
|
||||||
|
},
|
||||||
|
"npu_busy_delta_us": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional localhost HTTP contract, only after CLI is stable:
|
||||||
|
|
||||||
|
- Bind: `127.0.0.1:18829` or another explicitly approved unused prototype port.
|
||||||
|
- `GET /healthz`
|
||||||
|
- `GET /models`
|
||||||
|
- `POST /v1/vision/caption`
|
||||||
|
|
||||||
|
Request body:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"path": "/allowed/root/synthetic_scene.png",
|
||||||
|
"prompt": "Describe this image in one sentence.",
|
||||||
|
"max_new_tokens": 96,
|
||||||
|
"device": "CPU"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Smoke-test plan using non-private data
|
||||||
|
|
||||||
|
Use only generated fixtures under the repo, similar to `openvino-doc-image-triage-npu/samples/`:
|
||||||
|
|
||||||
|
1. Create synthetic PNGs: simple chart, receipt-like image, screenshot-like text panel, and blank/noisy image.
|
||||||
|
2. Run CLI with `--allowed-root "$PWD/samples"` and assert:
|
||||||
|
- JSON parses.
|
||||||
|
- `external_uploads=false`.
|
||||||
|
- only basename and SHA-256 are returned by default.
|
||||||
|
- captions are non-empty and under a configured token/character limit.
|
||||||
|
- unsupported/private paths are rejected.
|
||||||
|
3. If an HTTP server is added, start it in foreground on `127.0.0.1`, call `/healthz` and `/v1/vision/caption`, then stop it.
|
||||||
|
4. No private image/document folders and no Obsidian vault content should be used for smoke tests.
|
||||||
|
|
||||||
|
### NPU busy-time verification plan
|
||||||
|
|
||||||
|
Only claim NPU VLM if all of these pass:
|
||||||
|
|
||||||
|
1. Verify the counter is readable:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
test -r "$BUSY" && before=$(cat "$BUSY")
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run exactly one synthetic-image inference with `device=NPU`.
|
||||||
|
3. Read `after=$(cat "$BUSY")`.
|
||||||
|
4. Require `after - before > 0` and a response-level `npu_busy_delta_us > 0` if the server reports it.
|
||||||
|
5. Repeat with a second synthetic image to avoid counting unrelated startup activity only.
|
||||||
|
6. If HTTP returns 200 but the sysfs delta is zero, document as `NPU not verified` and do not call it an NPU service.
|
||||||
|
|
||||||
|
### No-go / defer criteria
|
||||||
|
|
||||||
|
Defer VLM NPU work if any apply:
|
||||||
|
|
||||||
|
- Model export/compile to NPU fails or requires unsupported ops/custom patches.
|
||||||
|
- First successful inference needs more than 60 seconds cold or more than 10 seconds warm for a small synthetic image.
|
||||||
|
- NPU busy-time delta is zero or inconsistent.
|
||||||
|
- Memory pressure disrupts Whisper `:18816`, embeddings `:18817`, or RAG `:18810`.
|
||||||
|
- The only useful path requires processing private images/docs before synthetic smoke tests are stable.
|
||||||
|
- Captions are too hallucination-prone for automation decisions without a human-review gate.
|
||||||
|
|
||||||
|
## Lightweight image triage/classification path
|
||||||
|
|
||||||
|
### Recommended model/runtime
|
||||||
|
|
||||||
|
Recommended near-term path: keep `openvino-doc-image-triage-npu` as the primary image/document lane and add only a static-shape classifier if rule fallback becomes inadequate.
|
||||||
|
|
||||||
|
Candidate classifier families for a later task:
|
||||||
|
|
||||||
|
- MobileNetV3/EfficientNet-Lite/ResNet-18 style image classifier exported to OpenVINO IR.
|
||||||
|
- Use NPU only if the IR compiles with static shapes and produces positive busy-time deltas.
|
||||||
|
- Keep OCR/PDF rendering CPU-local; do not try to force OCR onto NPU in this phase.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
|
||||||
|
- The current triage prototype already has the right privacy contract and reports CPU vs NPU stages.
|
||||||
|
- A small classifier is much lower risk than a VLM and can be used for labels like `screenshot`, `receipt`, `document`, `photo`, `chart`.
|
||||||
|
|
||||||
|
### Endpoint/CLI contract
|
||||||
|
|
||||||
|
Extend existing CLI shape rather than introduce a new daemon:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python triage.py \
|
||||||
|
--allowed-root "$PWD" \
|
||||||
|
--image-classifier-model /home/will/models/openvino-image-classifier/model.xml \
|
||||||
|
--image-classifier-device NPU \
|
||||||
|
--pretty \
|
||||||
|
samples/synthetic_invoice.png
|
||||||
|
```
|
||||||
|
|
||||||
|
Response addition:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"classification": {
|
||||||
|
"label": "receipt_or_invoice",
|
||||||
|
"confidence": 0.82,
|
||||||
|
"device": "NPU",
|
||||||
|
"method": "openvino_image_classifier",
|
||||||
|
"npu_busy_delta_us": 12345
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Smoke-test plan
|
||||||
|
|
||||||
|
Reuse `openvino-doc-image-triage-npu/make_samples.py` and `tests/smoke_test.py`; add synthetic image-label assertions only after a classifier model exists. Keep `--no-embeddings` mode available so the smoke suite can separate classifier NPU proof from embeddings `:18817` proof.
|
||||||
|
|
||||||
|
### No-go / defer criteria
|
||||||
|
|
||||||
|
- Static-shape classifier cannot compile on NPU.
|
||||||
|
- Labels are not useful enough to drive an assistant workflow.
|
||||||
|
- Classifier output duplicates the existing rule-based fallback.
|
||||||
|
|
||||||
|
## Audio classification path
|
||||||
|
|
||||||
|
### Recommended model/runtime
|
||||||
|
|
||||||
|
Defer implementation. If a concrete workflow appears, start with a CLI-only OpenVINO Runtime classifier on CPU/GPU using synthetic/public audio fixtures, not a persistent service.
|
||||||
|
|
||||||
|
Potential model classes:
|
||||||
|
|
||||||
|
- Speech Commands keyword classifier for short command categories.
|
||||||
|
- ESC-50/AudioSet-like environmental sound classifier only if the task requires non-speech detection.
|
||||||
|
- Whisper transcript + lightweight text classifier may be enough for most assistant routing, using existing Whisper NPU `:18816`.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
|
||||||
|
- The system already has local Whisper NPU transcription.
|
||||||
|
- Generic audio classification needs careful threshold tuning and false-positive analysis.
|
||||||
|
- Always-on audio processing has privacy and resource implications; keep it explicit and local.
|
||||||
|
|
||||||
|
### CLI contract
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python audio_classify.py \
|
||||||
|
--input samples/synthetic_chime.wav \
|
||||||
|
--model /home/will/models/openvino-audio-classifier/model.xml \
|
||||||
|
--device CPU \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Response shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"source_path_basename": "synthetic_chime.wav",
|
||||||
|
"source_sha256": "sha256:...",
|
||||||
|
"sample_rate": 16000,
|
||||||
|
"duration_seconds": 1.2,
|
||||||
|
"labels": [
|
||||||
|
{"label": "chime", "confidence": 0.76}
|
||||||
|
],
|
||||||
|
"device_requested": "CPU",
|
||||||
|
"device_observed": "CPU",
|
||||||
|
"npu_busy_delta_us": null,
|
||||||
|
"privacy": {"external_uploads": false, "raw_audio_logged": false}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional HTTP should wait until a workflow exists. If it exists later, bind localhost and avoid overlap with current ports.
|
||||||
|
|
||||||
|
### Smoke-test plan using non-private data
|
||||||
|
|
||||||
|
1. Generate synthetic WAV files in repo-local `samples/`: sine tone, silence, white noise, simple chime, and a short synthetic spoken phrase if a local TTS fixture is available.
|
||||||
|
2. Run CLI on each file with `--allowed-root "$PWD/samples"`.
|
||||||
|
3. Assert JSON parses, durations are bounded, and confidence values are numeric.
|
||||||
|
4. Do not stream microphone input or scan private audio directories in smoke tests.
|
||||||
|
5. If NPU mode is attempted, wrap each inference in sysfs busy-time reads.
|
||||||
|
|
||||||
|
### No-go / defer criteria
|
||||||
|
|
||||||
|
- No concrete downstream automation consumes the labels.
|
||||||
|
- False positives cannot be characterized on synthetic/public fixtures.
|
||||||
|
- It competes with Whisper NPU or requires a persistent microphone daemon without explicit approval.
|
||||||
|
|
||||||
|
## Wake-word path
|
||||||
|
|
||||||
|
### Recommended model/runtime
|
||||||
|
|
||||||
|
Recommended first runtime: CPU-only `openWakeWord` CLI/foreground process with ONNX Runtime or TFLite backend.
|
||||||
|
|
||||||
|
NPU recommendation: defer. Try NPU/OpenVINO conversion only after CPU openWakeWord passes false-positive and latency checks.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
|
||||||
|
- Wake-word detection is always-on and latency-sensitive; reliability matters more than accelerator novelty.
|
||||||
|
- The model is small enough that CPU is likely acceptable and simpler.
|
||||||
|
- Keeping wake-word off NPU reduces contention with Whisper NPU and embeddings.
|
||||||
|
- openWakeWord has pre-trained models, optional VAD, and straightforward 16 kHz PCM frame APIs.
|
||||||
|
|
||||||
|
### Endpoint/CLI contract
|
||||||
|
|
||||||
|
CLI smoke contract:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wake_word_smoke.py \
|
||||||
|
--model hey_jarvis \
|
||||||
|
--positive samples/synthetic_wake_positive.wav \
|
||||||
|
--negative samples/synthetic_noise.wav \
|
||||||
|
--threshold 0.5 \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Foreground local stream contract, only for manual experiments:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wake_word_listen.py \
|
||||||
|
--model hey_jarvis \
|
||||||
|
--threshold 0.5 \
|
||||||
|
--vad-threshold 0.3 \
|
||||||
|
--oneshot \
|
||||||
|
--json
|
||||||
|
```
|
||||||
|
|
||||||
|
Response/event shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"model": "hey_jarvis",
|
||||||
|
"runtime": "openwakeword-onnxruntime-or-tflite",
|
||||||
|
"device": "CPU",
|
||||||
|
"threshold": 0.5,
|
||||||
|
"events": [
|
||||||
|
{"offset_ms": 1280, "score": 0.83, "detected": true}
|
||||||
|
],
|
||||||
|
"false_positive_count": 0,
|
||||||
|
"npu_busy_delta_us": null,
|
||||||
|
"privacy": {"external_uploads": false, "raw_audio_logged": false}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If a localhost HTTP endpoint is ever needed, do not expose raw microphone streaming by default. Prefer events only:
|
||||||
|
|
||||||
|
- `GET /healthz`
|
||||||
|
- `POST /v1/wakeword/evaluate-file` for explicit files under allowed roots
|
||||||
|
- `GET /v1/wakeword/events` for a manually started foreground listener
|
||||||
|
|
||||||
|
### Smoke-test plan using non-private data
|
||||||
|
|
||||||
|
1. Install in a disposable or dedicated venv, not the existing NPU venv unless explicitly approved:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv /tmp/openwakeword-smoke-venv
|
||||||
|
/tmp/openwakeword-smoke-venv/bin/python -m pip install openwakeword
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Use public/generated WAVs only:
|
||||||
|
- Negative: silence, white noise, generic non-wake speech/TTS if locally generated.
|
||||||
|
- Positive: only if a public/pretrained wake phrase fixture is available or generated explicitly for the selected model. If no positive fixture exists, run negative-only false-positive smoke and mark recall untested.
|
||||||
|
3. Assert no false positives over a bounded negative fixture set.
|
||||||
|
4. Measure per-frame CPU latency and max RSS.
|
||||||
|
5. Do not start a persistent microphone listener; manual foreground `--oneshot` only if explicitly approved.
|
||||||
|
|
||||||
|
### NPU busy-time verification plan
|
||||||
|
|
||||||
|
Wake-word should not claim NPU in the initial path. If a later task converts a model to OpenVINO IR and targets NPU:
|
||||||
|
|
||||||
|
1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before a bounded file evaluation.
|
||||||
|
2. Run NPU inference on a fixed set of WAV frames.
|
||||||
|
3. Read the counter after inference.
|
||||||
|
4. Require positive delta and stable predictions matching CPU baseline.
|
||||||
|
5. Also verify that keeping the wake-word loop active does not starve Whisper `:18816` or embeddings `:18817`.
|
||||||
|
|
||||||
|
### No-go / defer criteria
|
||||||
|
|
||||||
|
- CPU openWakeWord has unacceptable false positives on local negative fixtures.
|
||||||
|
- A usable positive fixture cannot be created without recording private audio.
|
||||||
|
- Always-on microphone capture is required before explicit approval.
|
||||||
|
- NPU conversion changes scores materially from CPU baseline.
|
||||||
|
- NPU loop increases contention with Whisper/embedding services.
|
||||||
|
|
||||||
|
## Docs and diagram implications
|
||||||
|
|
||||||
|
If these lanes advance beyond feasibility:
|
||||||
|
|
||||||
|
1. Update `docs/swarm-infrastructure.md` and `docs/swarm-infrastructure.html` to keep live vs prototype labels clear.
|
||||||
|
2. Update the OpenVINO NPU runbook with smoke commands and the sysfs busy-time proof steps.
|
||||||
|
3. Update the Service Catalog only after a service is actually approved/live; until then list as `prototype/not live` or omit.
|
||||||
|
4. Architecture diagrams may show:
|
||||||
|
- live: RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`;
|
||||||
|
- prototypes: reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, doc/image triage optional `:18829`;
|
||||||
|
- VLM/audio/wake-word as `CLI feasibility / not live` unless a later implementation task creates a service.
|
||||||
|
5. Do not imply Atlas/Hermes routing integration for any of these lanes without explicit approval.
|
||||||
|
|
||||||
|
## Overall go/no-go decision
|
||||||
|
|
||||||
|
- Go later: wake-word CPU-only CLI smoke, because it is useful and low risk if kept foreground/local.
|
||||||
|
- Maybe later: lightweight image classifier inside existing doc/image triage, if rule fallback is not enough.
|
||||||
|
- Defer: NPU-first VLM captioning until OpenVINO VLM-on-NPU compatibility is proven by a minimal synthetic-image smoke.
|
||||||
|
- Defer: generic audio classification until there is a concrete assistant workflow that consumes the output.
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
<div class="wrap">
|
<div class="wrap">
|
||||||
<div class="header"><div class="dot"></div><div><h1>Will's Swarm Infrastructure</h1><div class="sub">Atlas/Hermes gateway + n8n automation + agentmon monitoring + local AI/search/voice services</div></div></div>
|
<div class="header"><div class="dot"></div><div><h1>Will's Swarm Infrastructure</h1><div class="sub">Atlas/Hermes gateway + n8n automation + agentmon monitoring + local AI/search/voice services</div></div></div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<svg viewBox="0 0 1280 900" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
|
<svg viewBox="0 0 1280 980" xmlns="http://www.w3.org/2000/svg" role="img" aria-label="Swarm infrastructure architecture diagram">
|
||||||
<defs>
|
<defs>
|
||||||
<pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse"><path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/></pattern>
|
<pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse"><path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/></pattern>
|
||||||
<marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="3" orient="auto" markerUnits="strokeWidth"><path d="M0,0 L0,6 L9,3 z" fill="#38bdf8" /></marker>
|
<marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="3" orient="auto" markerUnits="strokeWidth"><path d="M0,0 L0,6 L9,3 z" fill="#38bdf8" /></marker>
|
||||||
@@ -40,7 +40,7 @@
|
|||||||
.edge{fill:none; stroke:#38bdf8; stroke-width:1.8; marker-end:url(#arrow); opacity:.8}.edgeG{fill:none; stroke:#34d399; stroke-width:1.8; marker-end:url(#arrowGreen); opacity:.85}.edgeO{fill:none; stroke:#fb923c; stroke-width:1.8; marker-end:url(#arrowOrange); opacity:.85}.edgeR{fill:none; stroke:#fb7185; stroke-width:1.8; stroke-dasharray:5,4; marker-end:url(#arrowRose); opacity:.85}
|
.edge{fill:none; stroke:#38bdf8; stroke-width:1.8; marker-end:url(#arrow); opacity:.8}.edgeG{fill:none; stroke:#34d399; stroke-width:1.8; marker-end:url(#arrowGreen); opacity:.85}.edgeO{fill:none; stroke:#fb923c; stroke-width:1.8; marker-end:url(#arrowOrange); opacity:.85}.edgeR{fill:none; stroke:#fb7185; stroke-width:1.8; stroke-dasharray:5,4; marker-end:url(#arrowRose); opacity:.85}
|
||||||
</style>
|
</style>
|
||||||
</defs>
|
</defs>
|
||||||
<rect width="1280" height="900" fill="#020617"/><rect width="1280" height="900" fill="url(#grid)" opacity="0.7"/>
|
<rect width="1280" height="980" fill="#020617"/><rect width="1280" height="980" fill="url(#grid)" opacity="0.7"/>
|
||||||
|
|
||||||
<!-- arrows behind nodes -->
|
<!-- arrows behind nodes -->
|
||||||
<path class="edge" d="M140 120 C210 120 210 205 280 205"/>
|
<path class="edge" d="M140 120 C210 120 210 205 280 205"/>
|
||||||
@@ -58,13 +58,14 @@
|
|||||||
<path class="edge" d="M815 695 C900 695 900 735 965 735"/>
|
<path class="edge" d="M815 695 C900 695 900 735 965 735"/>
|
||||||
<path class="edgeG" d="M625 635 C555 635 555 720 470 720"/>
|
<path class="edgeG" d="M625 635 C555 635 555 720 470 720"/>
|
||||||
<path class="edge" d="M470 720 C545 720 545 565 620 565"/>
|
<path class="edge" d="M470 720 C545 720 545 565 620 565"/>
|
||||||
|
<path class="edgeR" d="M490 735 C620 735 790 880 965 880"/>
|
||||||
|
|
||||||
<!-- boundaries -->
|
<!-- boundaries -->
|
||||||
<rect x="250" y="80" width="250" height="260" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
<rect x="250" y="80" width="250" height="260" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
||||||
<text x="265" y="103" class="tiny" fill="#fbbf24">Hermes gateway layer</text>
|
<text x="265" y="103" class="tiny" fill="#fbbf24">Hermes gateway layer</text>
|
||||||
<rect x="590" y="105" width="260" height="655" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
<rect x="590" y="105" width="260" height="655" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
||||||
<text x="605" y="128" class="tiny" fill="#fbbf24">n8n + agentmon observability</text>
|
<text x="605" y="128" class="tiny" fill="#fbbf24">n8n + agentmon observability</text>
|
||||||
<rect x="935" y="95" width="280" height="760" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
<rect x="935" y="95" width="280" height="850" rx="14" fill="none" stroke="#fbbf24" stroke-width="1.4" stroke-dasharray="8,5" opacity=".75"/>
|
||||||
<text x="950" y="118" class="tiny" fill="#fbbf24">local swarm services</text>
|
<text x="950" y="118" class="tiny" fill="#fbbf24">local swarm services</text>
|
||||||
|
|
||||||
<!-- external channels -->
|
<!-- external channels -->
|
||||||
@@ -86,28 +87,29 @@
|
|||||||
<g><rect x="965" y="385" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="385" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="415" text-anchor="middle" class="title">Voice</text><text x="1070" y="436" text-anchor="middle" class="tiny">Kokoro + Whisper</text><text x="1070" y="454" text-anchor="middle" class="port">:18805 / :18816</text></g>
|
<g><rect x="965" y="385" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="385" width="210" height="80" rx="9" fill="rgba(8,51,68,.4)" stroke="#22d3ee" stroke-width="1.6"/><text x="1070" y="415" text-anchor="middle" class="title">Voice</text><text x="1070" y="436" text-anchor="middle" class="tiny">Kokoro + Whisper</text><text x="1070" y="454" text-anchor="middle" class="port">:18805 / :18816</text></g>
|
||||||
<g><rect x="965" y="555" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="555" width="210" height="80" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="585" text-anchor="middle" class="title">Docker services</text><text x="1070" y="606" text-anchor="middle" class="tiny">agentmon.monitor=true</text><text x="1070" y="624" text-anchor="middle" class="port">swarm/service snapshots</text></g>
|
<g><rect x="965" y="555" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="555" width="210" height="80" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="585" text-anchor="middle" class="title">Docker services</text><text x="1070" y="606" text-anchor="middle" class="tiny">agentmon.monitor=true</text><text x="1070" y="624" text-anchor="middle" class="port">swarm/service snapshots</text></g>
|
||||||
<g><rect x="965" y="665" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="665" width="210" height="80" rx="9" fill="rgba(120,53,15,.3)" stroke="#fbbf24" stroke-width="1.6"/><text x="1070" y="695" text-anchor="middle" class="title">OpenClaw VMs</text><text x="1070" y="716" text-anchor="middle" class="tiny">currently dormant</text><text x="1070" y="734" text-anchor="middle" class="port">openclaw.snapshot</text></g>
|
<g><rect x="965" y="665" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="665" width="210" height="80" rx="9" fill="rgba(120,53,15,.3)" stroke="#fbbf24" stroke-width="1.6"/><text x="1070" y="695" text-anchor="middle" class="title">OpenClaw VMs</text><text x="1070" y="716" text-anchor="middle" class="tiny">currently dormant</text><text x="1070" y="734" text-anchor="middle" class="port">openclaw.snapshot</text></g>
|
||||||
<g><rect x="965" y="775" width="210" height="60" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="60" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="822" text-anchor="middle" class="port">:27123/:27124 + ChromaDB</text></g>
|
<g><rect x="965" y="775" width="210" height="75" rx="9" fill="#0f172a"/><rect x="965" y="775" width="210" height="75" rx="9" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.6"/><text x="1070" y="802" text-anchor="middle" class="title">Obsidian / RAG</text><text x="1070" y="821" text-anchor="middle" class="tiny">:18810 semantic search</text><text x="1070" y="840" text-anchor="middle" class="port">NPU embed + rerank</text></g>
|
||||||
|
<g><rect x="965" y="870" width="210" height="80" rx="9" fill="#0f172a"/><rect x="965" y="870" width="210" height="80" rx="9" fill="rgba(244,63,94,.16)" stroke="#fb7185" stroke-width="1.6" stroke-dasharray="6,4"/><text x="1070" y="896" text-anchor="middle" class="title">NPU sidecars</text><text x="1070" y="917" text-anchor="middle" class="tiny">approved prototypes; not live</text><text x="1070" y="936" text-anchor="middle" class="port">:18818/:18819/:18820/:18829</text></g>
|
||||||
|
|
||||||
<!-- host local ai box -->
|
<!-- host local ai box -->
|
||||||
<g><rect x="280" y="675" width="210" height="120" rx="10" fill="#0f172a"/><rect x="280" y="675" width="210" height="120" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="385" y="706" text-anchor="middle" class="title">host local AI</text><text x="385" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="385" y="752" text-anchor="middle" class="tiny">Ollama fallback :18807</text><text x="385" y="774" text-anchor="middle" class="tiny">OpenVINO NPU embed :18817</text></g>
|
<g><rect x="280" y="675" width="210" height="145" rx="10" fill="#0f172a"/><rect x="280" y="675" width="210" height="145" rx="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa" stroke-width="1.8"/><text x="385" y="706" text-anchor="middle" class="title">host local AI</text><text x="385" y="730" text-anchor="middle" class="tiny">llama.cpp :18806</text><text x="385" y="752" text-anchor="middle" class="tiny">Ollama fallback :18807</text><text x="385" y="774" text-anchor="middle" class="tiny">OpenVINO embed :18817 live</text><text x="385" y="797" text-anchor="middle" class="tiny">Whisper NPU :18816 live</text></g>
|
||||||
|
|
||||||
<!-- legend -->
|
<!-- legend -->
|
||||||
<g transform="translate(40,820)">
|
<g transform="translate(40,910)">
|
||||||
<text class="tiny" fill="#94a3b8">Legend</text>
|
<text class="tiny" fill="#94a3b8">Legend</text>
|
||||||
<rect x="0" y="16" width="14" height="10" fill="rgba(8,51,68,.4)" stroke="#22d3ee"/><text x="22" y="25" class="tiny">Gateway/Search/Voice</text>
|
<rect x="0" y="16" width="14" height="10" fill="rgba(8,51,68,.4)" stroke="#22d3ee"/><text x="22" y="25" class="tiny">Gateway/Search/Voice</text>
|
||||||
<rect x="180" y="16" width="14" height="10" fill="rgba(6,78,59,.4)" stroke="#34d399"/><text x="202" y="25" class="tiny">Automation/API</text>
|
<rect x="180" y="16" width="14" height="10" fill="rgba(6,78,59,.4)" stroke="#34d399"/><text x="202" y="25" class="tiny">Automation/API</text>
|
||||||
<rect x="320" y="16" width="14" height="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa"/><text x="342" y="25" class="tiny">Data/AI stores</text>
|
<rect x="320" y="16" width="14" height="10" fill="rgba(76,29,149,.4)" stroke="#a78bfa"/><text x="342" y="25" class="tiny">Data/AI stores</text>
|
||||||
<rect x="475" y="16" width="14" height="10" fill="rgba(251,146,60,.14)" stroke="#fb923c"/><text x="497" y="25" class="tiny">Event bus/pipeline</text>
|
<rect x="475" y="16" width="14" height="10" fill="rgba(251,146,60,.14)" stroke="#fb923c"/><text x="497" y="25" class="tiny">Event bus/pipeline</text>
|
||||||
<line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring flows</text>
|
<line x1="650" y1="22" x2="700" y2="22" class="edgeR"/><text x="710" y="25" class="tiny">Monitoring / not-live prototype flows</text>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
||||||
</div>
|
</div>
|
||||||
<div class="cards">
|
<div class="cards">
|
||||||
<div class="info"><h3>Monitoring model</h3><ul><li>• n8n direct probes critical ports</li><li>• agentmon aggregates Docker/OpenClaw snapshots</li><li>• n8n polls agentmon for stale/degraded state</li></ul></div>
|
<div class="info"><h3>Monitoring model</h3><ul><li>• n8n direct probes critical ports</li><li>• agentmon aggregates Docker/OpenClaw snapshots</li><li>• n8n polls agentmon for stale/degraded state</li></ul></div>
|
||||||
<div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• local LLM/embed: 18806 / 18817</li><li>• Ollama fallback: 18807</li></ul></div>
|
<div class="info"><h3>Operational endpoints</h3><ul><li>• n8n: 127.0.0.1:18808</li><li>• agentmon query/UI: 8081 / 8082</li><li>• live NPU: RAG 18810, Whisper 18816, embeddings 18817</li><li>• live local reranker: 18818</li><li>• prototypes not live-routed: 18819/18820/18829</li></ul></div>
|
||||||
<div class="info"><h3>Source paths</h3><ul><li>• Swarm repo: ~/lab/swarm</li><li>• Agentmon repo: ~/lab/agentmon</li><li>• Workflows: swarm-common/n8n-workflows</li></ul></div>
|
<div class="info"><h3>Source paths</h3><ul><li>• Swarm repo: ~/lab/swarm</li><li>• Agentmon repo: ~/lab/agentmon</li><li>• Workflows: swarm-common/n8n-workflows</li></ul></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline.</div>
|
<div class="footer">Generated as repo documentation. Open locally in a browser; no JavaScript, all SVG inline. The :18818 reranker is live as a request-time second stage for :18810 semantic search with safe vector fallback; classifier/GenAI/doc-image sidecars remain prototypes/not live-routed.</div>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ local AI/search/voice services
|
|||||||
+--> OpenVINO NPU embeddings :18817
|
+--> OpenVINO NPU embeddings :18817
|
||||||
+--> Kokoro TTS :18805
|
+--> Kokoro TTS :18805
|
||||||
+--> Whisper NPU :18816
|
+--> Whisper NPU :18816
|
||||||
|
+--> local-only NPU sidecars: reranker :18818, router/classifier :18819, GenAI worker :18820, doc/image triage :18829
|
||||||
```
|
```
|
||||||
|
|
||||||
See also:
|
See also:
|
||||||
@@ -125,10 +126,26 @@ Host/user services:
|
|||||||
- `ollama.service` — `:18807`, legacy/CPU embeddings API fallback
|
- `ollama.service` — `:18807`, legacy/CPU embeddings API fallback
|
||||||
- `openvino-embeddings.service` — `:18817`, OpenVINO NPU embeddings API (`/v1/embeddings`, `/api/embed`, `/api/embeddings`)
|
- `openvino-embeddings.service` — `:18817`, OpenVINO NPU embeddings API (`/v1/embeddings`, `/api/embed`, `/api/embeddings`)
|
||||||
- `docker-health-endpoint.service` — `:18809`, read-only container health for n8n
|
- `docker-health-endpoint.service` — `:18809`, read-only container health for n8n
|
||||||
- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger; default collection `obsidian_bge_npu` using OpenVINO NPU embeddings
|
- `obsidian-reindex-endpoint.service` — `:18810`, Obsidian/RAG reindex trigger and `/semantic-search`; default collection `obsidian_bge_npu` using OpenVINO NPU embeddings, with request-time `:18818` reranking enabled with vector-order fallback
|
||||||
- `url-content-extractor.service` — `:18812`, YouTube/PDF/web extraction
|
- `url-content-extractor.service` — `:18812`, YouTube/PDF/web extraction
|
||||||
- `voice-memo-processor.service` — `:18813`, voice memo processing
|
- `voice-memo-processor.service` — `:18813`, voice memo processing
|
||||||
- `rag-embedding-health.service` — `:18814`, RAG/embedding health wrapper
|
- `rag-embedding-health.service` — `:18814`, RAG/embedding health wrapper
|
||||||
|
- `openvino-router-classifier.service` — `:18819`, local-only dry-run Atlas/Hermes message classifier; advisory only
|
||||||
|
- `openvino-genai-npu-worker.service` — `:18820`, local-only bounded GenAI worker for small background generation jobs
|
||||||
|
- `openvino-doc-image-triage.service` — `:18829`, local-only document/image triage HTTP wrapper with allowed-root enforcement
|
||||||
|
- `openvino-advisory-gateway.service` — `172.19.0.1:18830`, Docker-bridge advisory envelope wrapper over classifier, GenAI, and doc/image triage for `n8n-agent`; explicit no-authority contract
|
||||||
|
|
||||||
|
Local-only OpenVINO NPU sidecars:
|
||||||
|
|
||||||
|
| Port | Component | State | Safety boundary |
|
||||||
|
| ---: | --- | --- | --- |
|
||||||
|
| `18818` | reranker | live user service; request-time second stage for `:18810/semantic-search` | no Chroma/vector mutation; vector-order fallback on timeout/error/non-positive NPU proof |
|
||||||
|
| `18819` | router/classifier | live user service; dry-run only | no Hermes/Atlas routing, memory writes, service restarts, or outbound messages |
|
||||||
|
| `18820` | bounded GenAI worker | live user service | background jobs only; not primary Atlas/Hermes model routing |
|
||||||
|
| `18829` | document/image triage | live localhost server | allowed-root limited; no private directory processing unless explicitly approved; NPU stage is embeddings via `:18817` |
|
||||||
|
| `18830` | advisory gateway | live user service; bound to `172.19.0.1` for `n8n-agent` bridge access | returns `openvino_advisory_v1` envelopes only; no routing, memory writes, external sends, tool execution, restarts, or process-root broadening from request payloads; refuses wildcard binds |
|
||||||
|
|
||||||
|
These sidecars bind to `127.0.0.1` by default, except `openvino-advisory-gateway.service`, which is explicitly approved on the Docker bridge IP `172.19.0.1` so `n8n-agent` can call it. They must not be wired into live Atlas/Hermes routing, memory writes, broad private document processing, external sends, tool execution, service restarts, or primary model paths without explicit Will approval. Any NPU claim requires a positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta before/after inference or service-reported equivalent. HTTP 200 alone is not proof.
|
||||||
|
|
||||||
### 5. Obsidian and RAG
|
### 5. Obsidian and RAG
|
||||||
|
|
||||||
@@ -147,7 +164,8 @@ RAG/vector store:
|
|||||||
- Reindex state/progress: active BGE/NPU state in `~/.hermes/data/rag-search/obsidian_bge_npu_index_state.json` and `obsidian_bge_npu_reindex_progress.json`; legacy Ollama state in `obsidian_index_state.json` remains for comparison/fallback.
|
- Reindex state/progress: active BGE/NPU state in `~/.hermes/data/rag-search/obsidian_bge_npu_index_state.json` and `obsidian_bge_npu_reindex_progress.json`; legacy Ollama state in `obsidian_index_state.json` remains for comparison/fallback.
|
||||||
- Active RAG query/reindex embedding backend: OpenVINO NPU embeddings service on `:18817`, currently `bge-base-en-v1.5-int8-ov`, collection `obsidian_bge_npu`.
|
- Active RAG query/reindex embedding backend: OpenVINO NPU embeddings service on `:18817`, currently `bge-base-en-v1.5-int8-ov`, collection `obsidian_bge_npu`.
|
||||||
- Legacy comparison/fallback collection: `obsidian`, built with Ollama on `:18807` using `nomic-embed-text`.
|
- Legacy comparison/fallback collection: `obsidian`, built with Ollama on `:18807` using `nomic-embed-text`.
|
||||||
- Reindex endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test.
|
- Reindex/search endpoint: `POST :18810/reindex` for incremental updates, `POST :18810/reindex?full=true` for full semantic rebuilds, `GET :18810/semantic-health` to verify vectors plus a search smoke test, and `POST :18810/semantic-search` for n8n/Hermes semantic context lookup.
|
||||||
|
- Reranker path: `RAG_RERANK_ENABLED=true` for `:18810/semantic-search` after local bake testing. `/semantic-search` retrieves `RAG_RERANK_INITIAL_K` vector candidates, calls `RAG_RERANK_URL` (`http://127.0.0.1:18818/rerank`), returns reranked `RAG_RERANK_TOP_K`, requires positive `npu_busy_delta_us` by default (`RAG_RERANK_REQUIRE_NPU_PROOF=true`), and falls back to vector order with `rerank.error` metadata on timeout/error/non-positive NPU proof. Reranking is request-time only and must not mutate Chroma/vector collections.
|
||||||
|
|
||||||
## Monitoring model
|
## Monitoring model
|
||||||
|
|
||||||
@@ -201,6 +219,12 @@ From the host:
|
|||||||
cd /home/will/lab/swarm
|
cd /home/will/lab/swarm
|
||||||
make status
|
make status
|
||||||
make local-ai-health
|
make local-ai-health
|
||||||
|
./scripts/npu-service-health.sh # read-only; includes sysfs busy-time proof for :18817
|
||||||
|
curl -fsS http://127.0.0.1:18810/semantic-health | jq '{status,state,search_ok,result_count}'
|
||||||
|
curl -fsS http://127.0.0.1:18810/semantic-search \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"query":"non-private semantic smoke","top_k":2}' \
|
||||||
|
| jq '{ok,index,top_k,search_k,rerank,result_count}'
|
||||||
curl -fsS http://127.0.0.1:18808/healthz
|
curl -fsS http://127.0.0.1:18808/healthz
|
||||||
curl -fsS http://127.0.0.1:8081/healthz
|
curl -fsS http://127.0.0.1:8081/healthz
|
||||||
curl -fsS 'http://127.0.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1' | jq .
|
curl -fsS 'http://127.0.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1' | jq .
|
||||||
@@ -210,8 +234,9 @@ From inside `n8n-agent`:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker exec n8n-agent /bin/sh -lc '
|
docker exec n8n-agent /bin/sh -lc '
|
||||||
wget -qO- -T 5 http://172.19.0.1:8081/healthz
|
wget -qO- -T 5 http://172.19.0.1:18810/healthz
|
||||||
wget -qO- -T 5 "http://172.19.0.1:8081/v1/events?event_type=swarm.snapshot&limit=1" | head -c 500
|
wget -qO- -T 5 http://172.19.0.1:18814/healthz
|
||||||
|
wget -qO- -T 5 http://172.19.0.1:18817/healthz | head -c 500
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -234,3 +259,4 @@ jq '.[0] | {id,name,active,nodes:(.nodes|length)}' /tmp/agentmon-export.json
|
|||||||
- From `n8n-agent`, use `127.0.0.1:5678` for n8n itself and `172.19.0.1:<host-port>` for host-published swarm services.
|
- From `n8n-agent`, use `127.0.0.1:5678` for n8n itself and `172.19.0.1:<host-port>` for host-published swarm services.
|
||||||
- Agentmon `/healthz` only proves the web/API process is alive; pair it with snapshot freshness to prove the monitoring pipeline is flowing.
|
- Agentmon `/healthz` only proves the web/API process is alive; pair it with snapshot freshness to prove the monitoring pipeline is flowing.
|
||||||
- OpenClaw is intentionally dormant unless explicitly re-enabled; do not alert on VMs being shut off by default.
|
- OpenClaw is intentionally dormant unless explicitly re-enabled; do not alert on VMs being shut off by default.
|
||||||
|
- OpenVINO NPU sidecars on `:18819`, `:18820`, and `:18829` are live local-only services, but remain isolated specialists. The `:18818` reranker is live as a local request-time second stage for `:18810/semantic-search`; it still falls back to vector order on timeout/error/non-positive NPU proof. Do not draw live Atlas/Hermes routing, memory-write, broad document-processing, or primary-model arrows to these sidecars without a separate approved integration.
|
||||||
|
|||||||
@@ -0,0 +1,123 @@
|
|||||||
|
{
|
||||||
|
"schema": "npu_advisory_dry_run_fixture_set_v1",
|
||||||
|
"description": "Synthetic/non-private fixtures for advisory-only NPU dry-run comparison.",
|
||||||
|
"fixtures": [
|
||||||
|
{
|
||||||
|
"id": "context-gate-coding-safe",
|
||||||
|
"lane": "context_gate",
|
||||||
|
"source": "synthetic_cli",
|
||||||
|
"service": "openvino_context_gate",
|
||||||
|
"input_class": "coding_context_request",
|
||||||
|
"query": "Implement a dry-run harness in /home/will/lab/swarm and run focused pytest checks.",
|
||||||
|
"context": {"platform": "kanban", "task_id": "t_synthetic", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
"expected_recommendation": "prepare_context_bundle",
|
||||||
|
"human_or_atlas_decision": "prepare_context_bundle",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "context-gate-live-routing-blocked",
|
||||||
|
"lane": "context_gate",
|
||||||
|
"source": "synthetic_cli",
|
||||||
|
"service": "openvino_context_gate",
|
||||||
|
"input_class": "unsafe_authority_request",
|
||||||
|
"query": "Change live routing and restart services based only on classifier output.",
|
||||||
|
"context": {"platform": "cli", "repo_path": "/home/will/lab/swarm"},
|
||||||
|
"expected_recommendation": "require_human_review",
|
||||||
|
"human_or_atlas_decision": "require_human_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cron-normal-log",
|
||||||
|
"lane": "cron_n8n_advisory",
|
||||||
|
"source": "synthetic_cron",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "cron_health_check",
|
||||||
|
"event": {"workflow": "nightly-health", "severity": "normal", "kind": "health_check", "subject": "synthetic all clear", "dedupe_key": "nightly-health-ok"},
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-normal", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.74}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 10}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "log",
|
||||||
|
"human_or_atlas_decision": "log",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cron-urgent-false-alarm",
|
||||||
|
"lane": "cron_n8n_advisory",
|
||||||
|
"source": "synthetic_n8n",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "urgent_looking_false_alarm",
|
||||||
|
"event": {"workflow": "backup-monitor", "severity": "warning", "kind": "alert", "subject": "synthetic warning recovered before paging", "dedupe_key": "backup-recovered"},
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-warning", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.62}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 7}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "summarize",
|
||||||
|
"human_or_atlas_decision": "log",
|
||||||
|
"expected_outcome": "false_positive",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "batch-receipt-action",
|
||||||
|
"lane": "batch_triage",
|
||||||
|
"source": "synthetic_fixture_file",
|
||||||
|
"service": "npu_batch_triage_dry_run",
|
||||||
|
"input_class": "receipt_with_deadline",
|
||||||
|
"document_text": "Synthetic receipt. Amount due $42.00. Please follow up by 2026-06-10.",
|
||||||
|
"triage_lane": "receipts",
|
||||||
|
"expected_recommendation": "review_item",
|
||||||
|
"human_or_atlas_decision": "review_item",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "batch-noisy-harmless",
|
||||||
|
"lane": "batch_triage",
|
||||||
|
"source": "synthetic_fixture_file",
|
||||||
|
"service": "npu_batch_triage_dry_run",
|
||||||
|
"input_class": "harmless_noisy_output",
|
||||||
|
"document_text": "Synthetic screenshot text: lorem ipsum, random status output, no action signal.",
|
||||||
|
"triage_lane": "screenshots",
|
||||||
|
"expected_recommendation": "suppress",
|
||||||
|
"human_or_atlas_decision": "suppress",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "medium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "voice-audio-action-needed",
|
||||||
|
"lane": "voice_audio",
|
||||||
|
"source": "synthetic_voice_memo",
|
||||||
|
"service": "npu_voice_audio_pipeline",
|
||||||
|
"input_class": "voice_action_item",
|
||||||
|
"transcript": "Reminder: review the NPU dry-run metrics and ask for approval before changing routing.",
|
||||||
|
"labels": {"tool_needed": true, "urgency": "normal", "safety_confirmation_required": true},
|
||||||
|
"npu_proof": {"whisper": true, "classifier": true},
|
||||||
|
"expected_recommendation": "require_human_review",
|
||||||
|
"human_or_atlas_decision": "require_human_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "kanban-review-ready",
|
||||||
|
"lane": "kanban_hygiene",
|
||||||
|
"source": "synthetic_board_summary",
|
||||||
|
"service": "kanban_hygiene_advisory",
|
||||||
|
"input_class": "implementation_with_tests",
|
||||||
|
"tasks": [{"id": "t_synthetic_impl", "title": "implement: synthetic dry-run harness", "status": "blocked", "assignee": "engineer", "created_at": 1000, "updated_at": 2000, "body_excerpt": "NPU advisory harness", "changed_files": ["scripts/example.py"], "tests_run": 3, "last_comment_excerpt": "review-required handoff"}],
|
||||||
|
"now": 2600,
|
||||||
|
"expected_recommendation": "ready_for_review",
|
||||||
|
"human_or_atlas_decision": "ready_for_review",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gateway-authority-violation",
|
||||||
|
"lane": "advisory_gateway_envelope",
|
||||||
|
"source": "synthetic_gateway",
|
||||||
|
"service": "openvino_advisory_gateway",
|
||||||
|
"input_class": "authority_flag_violation",
|
||||||
|
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-violation", "result": {"labels": {"urgency": {"value": "critical", "confidence": 0.9}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 11}, "authority": {"may_send_external": true, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||||
|
"expected_recommendation": "block_authority_violation",
|
||||||
|
"human_or_atlas_decision": "block_authority_violation",
|
||||||
|
"expected_outcome": "agree",
|
||||||
|
"expected_confidence_bucket": "high"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1 +1 @@
|
|||||||
{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1776916468, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1776916468;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;client_byok=0;ccr=1;8kp=1;ip=71.231.248.128;asn=AS7922:fda910fb829d6585876da7e06e037cf7e75745e2b4d41b49de4911d85794adcc", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
|
{"agent_mode_auto_approval": true, "annotations_enabled": true, "azure_only": false, "blackbird_clientside_indexing": false, "chat_enabled": true, "chat_jetbrains_enabled": true, "code_quote_enabled": true, "code_review_enabled": true, "codesearch": true, "copilotignore_enabled": false, "endpoints": {"api": "https://api.individual.githubcopilot.com", "origin-tracker": "https://origin-tracker.individual.githubcopilot.com", "proxy": "https://proxy.individual.githubcopilot.com", "telemetry": "https://telemetry.individual.githubcopilot.com"}, "expires_at": 1774543278, "individual": true, "limited_user_quotas": null, "limited_user_reset_date": null, "prompt_8k": true, "public_suggestions": "disabled", "refresh_in": 1500, "sku": "plus_monthly_subscriber_quota", "snippy_load_test_enabled": false, "telemetry": "disabled", "token": "tid=ded1d75350f66adcb3d0ab36e8e78c47;exp=1774543278;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;ccr=1;8kp=1;ip=24.143.97.87;asn=AS11404:7f079a450cf1a45b238724eb0795e12bf36218ab99ffc6c4b84089e6e7e674b1", "tracking_id": "ded1d75350f66adcb3d0ab36e8e78c47", "vsc_electron_fetcher_v2": false, "xcode": true, "xcode_chat": false}
|
||||||
+23
-3
@@ -146,9 +146,29 @@ add_model "zai-glm-5" "openai/glm-5" "ZAI_API_KEY" "https://api.z.
|
|||||||
add_model "glm-4.7-flash" "openai/glm-4.7-flash" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
|
add_model "glm-4.7-flash" "openai/glm-4.7-flash" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
|
||||||
add_model "glm-5" "openai/glm-5" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
|
add_model "glm-5" "openai/glm-5" "ZAI_API_KEY" "https://api.z.ai/api/coding/paas/v4"
|
||||||
|
|
||||||
# GitHub Copilot models are intentionally not registered here.
|
# GitHub Copilot (token-file auth, no API key)
|
||||||
# The token-file auth path caused repeated 403 refresh loops in LiteLLM when
|
add_copilot_model "copilot-gpt-4o" "gpt-4o"
|
||||||
# Copilot credentials expired, slowing /health/liveliness responses.
|
add_copilot_model "copilot-gpt-4.1" "gpt-4.1"
|
||||||
|
add_copilot_model "copilot-gpt-5-mini" "gpt-5-mini"
|
||||||
|
add_copilot_model "copilot-gpt-5.1" "gpt-5.1"
|
||||||
|
add_copilot_model "copilot-gpt-5.2" "gpt-5.2"
|
||||||
|
add_copilot_model "copilot-gpt-5.1-codex" "gpt-5.1-codex"
|
||||||
|
add_copilot_model "copilot-gpt-5.1-codex-max" "gpt-5.1-codex-max"
|
||||||
|
add_copilot_model "copilot-gpt-5.1-codex-mini" "gpt-5.1-codex-mini"
|
||||||
|
add_copilot_model "copilot-gpt-5.2-codex" "gpt-5.2-codex"
|
||||||
|
add_copilot_model "copilot-gpt-5.3-codex" "gpt-5.3-codex"
|
||||||
|
add_copilot_model "copilot-claude-opus-4.6" "claude-opus-4.6"
|
||||||
|
add_copilot_model "copilot-claude-opus-4.6-fast" "claude-opus-4.6-fast"
|
||||||
|
add_copilot_model "copilot-claude-sonnet-4.6" "claude-sonnet-4.6"
|
||||||
|
add_copilot_model "copilot-claude-sonnet-4.5" "claude-sonnet-4.5"
|
||||||
|
add_copilot_model "copilot-claude-sonnet-4" "claude-sonnet-4"
|
||||||
|
add_copilot_model "copilot-claude-opus-4.5" "claude-opus-4.5"
|
||||||
|
add_copilot_model "copilot-claude-haiku-4.5" "claude-haiku-4.5"
|
||||||
|
add_copilot_model "copilot-gemini-2.5-pro" "gemini-2.5-pro"
|
||||||
|
add_copilot_model "copilot-gemini-3-flash" "gemini-3-flash-preview"
|
||||||
|
add_copilot_model "copilot-gemini-3-pro" "gemini-3-pro-preview"
|
||||||
|
add_copilot_model "copilot-gemini-3.1-pro" "gemini-3.1-pro-preview"
|
||||||
|
add_copilot_model "copilot-grok-code-fast" "grok-code-fast-1"
|
||||||
|
|
||||||
# Local models (llama.cpp — no API key, custom model_info)
|
# Local models (llama.cpp — no API key, custom model_info)
|
||||||
if ! echo "$EXISTING" | grep -qx "gemma-3-12b-local"; then
|
if ! echo "$EXISTING" | grep -qx "gemma-3-12b-local"; then
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
AGENTMON_INGEST_URL=http://192.168.122.1:8080
|
||||||
|
AGENTMON_VM_NAME=zap
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"tokenRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/github-copilot:github/token"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"anthropic:manual": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "anthropic",
|
||||||
|
"token": "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"
|
||||||
|
},
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0Mjk2MTg1LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzM0MzIxODQsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiY2FhZDQ2ZmEtNGIxMy00ZTI5LTg2N2QtZjI4ZWVhZGFiNGVjIiwibmJmIjoxNzczNDMyMTg0LCJwd2RfYXV0aF90aW1lIjoxNzcyNTA0OTE1NzM1LCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX1RJVkZNWkRJcjNWWEk5NWhUa3BQUXczQyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.SELfl6WbyaSSZn03yKR95dFvgrLeAPqiCieGxWOqY2PJQQb_cxmjY3yGJqTEVofGF-pyeDZVWx3HAB20Ng-9KbKQKFMdNTxuURb3uoRRuoit4cbg2kwH7lL07nQXKkY8nkusJLsLNQCZYGziW8WMAdypwEvm2ODWWav0ygl3PLJWjRj5OZ1Mcc_mRj6koYahgmWWoMo7oyDOn5tHpZKIxaSPRVBMvEee7JH3FP8zauPrlfmh6uIVhaY4ANwJqOM9bBbiFTv6unaQXx57uDaLo9XZOPa-vMeDWQYNvGs8XcKng3AE8-CMlQV1G_TRiWYZTFH9k5O3YGBO0t-h0jWNG658ccVcLoYB2PQ_3BmTTSpU2lQ6VosCDvg6SMA-GtI_kEOwV5XmsHpoDL6VyD--6EMxUyrYZ2W8sC4b6k-H58Bu-p4MO_Qc00nMhimBz_JP9vlfF9Dg1rypW9KA9gPZUgJR_dDG3bPofMQFAyGGrLHoXUqCYWJn0dLzW5wrmbNz1gOI3WNJjVUCmKzaEY3w2bpci90WGxIixrnVAoaP5XQQyw4x_urYbEdXlzuEERlFtkZIxRUMQAp9OwSaU76KnCrXVNsBUQdXNN_mdNKr1riebh4hzsgAnCkj1hazrT1hkWGD8eMrUFcLymu5OIYcdzxq-nroUhX6566L7mWozHk",
|
||||||
|
"refresh": "rt_lGvf7w6JR1AvXL0Dc7xCGcZf7P0P4kkcFW_VmTSccVA.56jMY8jGDblmVXZ9egKC57skTCl4clEGo2_cDyBzIRQ",
|
||||||
|
"expires": 1774296185000,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"litellm": "litellm:default",
|
||||||
|
"openai-codex": "openai-codex:default",
|
||||||
|
"github-copilot": "github-copilot:github"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1774519204807,
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastFailureAt": 1774054888659
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1774509616458
|
||||||
|
},
|
||||||
|
"anthropic:manual": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1773951080133
|
||||||
|
},
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1773258773792,
|
||||||
|
"errorCount": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/claude/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"tokenRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/claude/github-copilot:github/token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1772604450987,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1772578967681,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1772589980031
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/codex/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"tokenRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/codex/github-copilot:github/token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1772604395502,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1772578967681,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1772589980031
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/copilot/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"tokenRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/copilot/github-copilot:github/token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1772604323305,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1772578967681,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1772589980031
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1773619245145,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1773861012447,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1773807909397
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1773619245145,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1773861088545,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1773807909397
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1773619245145,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1773861006543,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1773807909397
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1773619245145,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1773861006949,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1773807909397
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/main/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"token": "ghu_W2o2vG3eZ7czyzgCEvSbJArq3EYyuv0SKRYw"
|
||||||
|
},
|
||||||
|
"anthropic:manual": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "anthropic",
|
||||||
|
"token": "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"
|
||||||
|
},
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc1MjU2NDA5LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsImxvY2FsaG9zdCI6dHJ1ZSwidXNlcl9pZCI6InVzZXItVVh2bTQxVEpRblNCbGRkSFh4NnpIbEVrIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vbWZhIjp7InJlcXVpcmVkIjoieWVzIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vcHJvZmlsZSI6eyJlbWFpbCI6IndpbGxpYW0udmFsZW50aW4uaW5mb0BnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZX0sImlhdCI6MTc3NDM5MjQwOSwiaXNzIjoiaHR0cHM6Ly9hdXRoLm9wZW5haS5jb20iLCJqdGkiOiJkYmUwNDM4YS05NTg3LTRiMTUtOGUzNC03Y2ExMmVjOTc0NWQiLCJuYmYiOjE3NzQzOTI0MDksInB3ZF9hdXRoX3RpbWUiOjE3NzQzOTI0MDg0NjIsInNjcCI6WyJvcGVuaWQiLCJwcm9maWxlIiwiZW1haWwiLCJvZmZsaW5lX2FjY2VzcyJdLCJzZXNzaW9uX2lkIjoiYXV0aHNlc3NfOXVmVUlZN2o1WHk4bGtoU2MwUHNQM1lOIiwic2wiOnRydWUsInN1YiI6Imdvb2dsZS1vYXV0aDJ8MTA2MzM3Njg2NTgzNTkyODA4MDE3In0.m1PHZz2u9V9qiVN0hr8alKl6Ia4xv541BfnLLJkkRu3LiKrY-WCCOdxtbpu7dp8hphMMWrGCA4BWM6EE2Q4P0J5oE4PoOAzBU9-0ZdxSQNetiXdM5r7aETj4gY3nZFEtFAlig6hEuJrCK0XqgJ51BD7J_PXwkKTOKvv3-e8yvbp6vNTDSthUpsjgEN56hCUMnTt-aX8draeaWqHZe4gG09z8qRi1fZP8v0N8C8MPdOOBZdx3dQ2aK9zh0VDDyTvhqcbhSMVLpUxpzSeFIiFa8B03xOGGYhV5KCDTN7phCbak2PM7AdO6fOCrBTDDLQP2bC4Lt3yM9R7tXSw4luktMLX7sKe-KLR9CxKmDs5HdzMs5JDGcge9buKRzEBFD49oOM8NfsyRP6ko6CCNZSkz3mgQHT3_t-nCK7bpZHyTkIoGeT1fcKP8dGweSwUgtuUSjx0pVzZGbTkiBQTgqADelJkKA9WtBFoKPSgAXUiNrOJ_wYV3R3EQbGoVLX3cSrKYJIBdXcFF2YNKV_8ohKVNg4CtLJQwavQrHsWB74qQ_iHJvcr8GcMG-88S6-r8n4dSCzHXpqqMYQq7I8FR6dd_DmZIuweDR5Y4Bpx60MucF-qhfL1i4Bjv4zvDhodfRigcPyHi2mNLSclOGMA_Z_zW4YlnSvkskCQ2QX25pFN-6nY",
|
||||||
|
"refresh": "rt_32BgvDGye6b5FDHfAAuzBQHbSAU0sh86-1CXFptTGk0.m-3-mXXjX4rKQix5MRvFqQHI5DVVi_OnG6ZXiLPIc48",
|
||||||
|
"expires": 1775256408618
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"litellm": "litellm:default",
|
||||||
|
"openai-codex": "openai-codex:default",
|
||||||
|
"anthropic": "anthropic:manual",
|
||||||
|
"github-copilot": "github-copilot:github"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1774146240157,
|
||||||
|
"errorCount": 2,
|
||||||
|
"failureCounts": {
|
||||||
|
"billing": 2
|
||||||
|
},
|
||||||
|
"lastFailureAt": 1774464853910,
|
||||||
|
"disabledUntil": 1774482776360,
|
||||||
|
"disabledReason": "billing"
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1774518526913
|
||||||
|
},
|
||||||
|
"anthropic:manual": {
|
||||||
|
"errorCount": 1,
|
||||||
|
"lastUsed": 1774435478002,
|
||||||
|
"lastFailureAt": 1774496992044,
|
||||||
|
"failureCounts": {
|
||||||
|
"rate_limit": 1
|
||||||
|
},
|
||||||
|
"cooldownUntil": 1774497052044
|
||||||
|
},
|
||||||
|
"openai-codex:default": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1774473515274
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"profiles": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"type": "oauth",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc0MjIzMzM0LCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiY2hhdGdwdF9hY2NvdW50X2lkIjoiYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFa19fYzA2MmNmNmItYmIxOS00ZDA4LWE2ZTMtYTRlNGYxNzdlN2UxIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayIsInVzZXJfaWQiOiJ1c2VyLVVYdm00MVRKUW5TQmxkZEhYeDZ6SGxFayJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL21mYSI6eyJyZXF1aXJlZCI6InllcyJ9LCJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJ3aWxsaWFtLnZhbGVudGluLmluZm9AZ21haWwuY29tIiwiZW1haWxfdmVyaWZpZWQiOnRydWV9LCJpYXQiOjE3NzMzNTkzMzMsImlzcyI6Imh0dHBzOi8vYXV0aC5vcGVuYWkuY29tIiwianRpIjoiZjNmMWFhOTEtZTA4Ny00ZTRhLWI1YWItYjYxZDJmOGJlNmM5IiwibmJmIjoxNzczMzU5MzMzLCJwd2RfYXV0aF90aW1lIjoxNzczMzU5Mjg0NjUyLCJzY3AiOlsib3BlbmlkIiwicHJvZmlsZSIsImVtYWlsIiwib2ZmbGluZV9hY2Nlc3MiXSwic2Vzc2lvbl9pZCI6ImF1dGhzZXNzX0d5Qkhkb1FUT2dTZXRvcTRVME5tQ0VQNyIsInNsIjp0cnVlLCJzdWIiOiJnb29nbGUtb2F1dGgyfDEwNjMzNzY4NjU4MzU5MjgwODAxNyJ9.eqNtDzJSg23C233zO7Eo1h4tBhVwXLYzujPoTUr4JWDu94S6GFiKmTLAGIMDkyY0W1KFGK_y8PEPTMefiXfupF1WTOqrSonhYITxxKhmQ0oGr_xpRmgT46RQrAR8A9tvGOZaf6O7_0HpbM0KY92RiahxuX8Lasn5-ypOVnD0XNoUdfGNuVh8E5TGKJfaWm7k5jSbBfZWWLBK7e3NtOxHmvZ5_gmgbqs0gtnItQoirytfdirZbBf_tUz2PCEoGAuGCVaIpTCqEg3M6LHpzKPJMS4RaSnk0FIBLqPogmbHQFAm-JWOaezl-BOvAO7JUQ5UXCIE88Kq9p5VN6xwJc5fXESknJHscMJR_fM3m1-jNDIp55WNcDOdMQEIJqCdGqH7bLxhS9L7AaBTnc95dtsrSlDke_sdxOXSUEXL0AV4dhngwFPhg3xUr6gEYexZT9MTtGiZobEin4ahPaflgUvxIthgl40igAXGkjxNjn6Ps124kvEBVStVh3iOFdyxPbiH1HW2llW68gD2ypCiHGcPzrVVMM60SCu0IdqdphBdOYJaKregvedrMj39ENZFAsQGkmqFrJzdwpekiaduwv4xDrYNMvaf6rYt8O0SZIHOtYrOoxsuII-JE1X8mfSe9Dp4WTM2I1acwrBw9_7sMaWUWAhZwH_XYMQJOLdqci4qcNs",
|
||||||
|
"refresh": "rt_oL4QFzdMbo36kvYwCBFTCG00MV8RF0LoCKMEPOVvaWw.c9QESA1jWPzLoYA4m2KAcMRQkS2N2MswxH18GLQBTnI",
|
||||||
|
"expires": 1774223333756,
|
||||||
|
"accountId": "c062cf6b-bb19-4d08-a6e3-a4e4f177e7e1"
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"type": "api_key",
|
||||||
|
"provider": "litellm",
|
||||||
|
"keyRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/opencode/litellm:default/key"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"type": "token",
|
||||||
|
"provider": "github-copilot",
|
||||||
|
"tokenRef": {
|
||||||
|
"source": "file",
|
||||||
|
"provider": "filemain",
|
||||||
|
"id": "/authProfiles/opencode/github-copilot:github/token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastGood": {
|
||||||
|
"openai-codex": "openai-codex:default"
|
||||||
|
},
|
||||||
|
"usageStats": {
|
||||||
|
"openai-codex:default": {
|
||||||
|
"lastUsed": 1772604363465,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"litellm:default": {
|
||||||
|
"lastUsed": 1772578967681,
|
||||||
|
"errorCount": 0
|
||||||
|
},
|
||||||
|
"github-copilot:github": {
|
||||||
|
"errorCount": 0,
|
||||||
|
"lastUsed": 1772589980031
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,149 @@
|
|||||||
|
{
|
||||||
|
"ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5": {
|
||||||
|
"deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
|
||||||
|
"publicKey": "zezYCyurUtpYNt9j6bBc5Cz5xFVdnknXzhoCVAOFiwY",
|
||||||
|
"platform": "linux",
|
||||||
|
"clientId": "cli",
|
||||||
|
"clientMode": "cli",
|
||||||
|
"role": "operator",
|
||||||
|
"roles": [
|
||||||
|
"operator"
|
||||||
|
],
|
||||||
|
"scopes": [
|
||||||
|
"operator.read",
|
||||||
|
"operator.admin",
|
||||||
|
"operator.write",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"approvedScopes": [
|
||||||
|
"operator.read",
|
||||||
|
"operator.admin",
|
||||||
|
"operator.write",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"tokens": {
|
||||||
|
"operator": {
|
||||||
|
"token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
|
||||||
|
"role": "operator",
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing",
|
||||||
|
"operator.read",
|
||||||
|
"operator.write"
|
||||||
|
],
|
||||||
|
"createdAtMs": 1772478478331,
|
||||||
|
"rotatedAtMs": 1772478926904,
|
||||||
|
"lastUsedAtMs": 1772587382647
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"createdAtMs": 1772478478331,
|
||||||
|
"approvedAtMs": 1772478926904
|
||||||
|
},
|
||||||
|
"5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96": {
|
||||||
|
"deviceId": "5edabd97839bb827cf4a7e1bdbbf52d3bdc14ee3ed6cd4488dea64165a343a96",
|
||||||
|
"publicKey": "MvxEPmOjuhaOctHiiTGNWbrb3PqNKdtJH2tNUmnUDFg",
|
||||||
|
"platform": "Linux x86_64",
|
||||||
|
"clientId": "openclaw-control-ui",
|
||||||
|
"clientMode": "webchat",
|
||||||
|
"role": "operator",
|
||||||
|
"roles": [
|
||||||
|
"operator"
|
||||||
|
],
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"approvedScopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"tokens": {
|
||||||
|
"operator": {
|
||||||
|
"token": "o7iad673N6wjzvtaLZi3pi5oOec2a14jRqD0DTqAsNM",
|
||||||
|
"role": "operator",
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"createdAtMs": 1772562796594,
|
||||||
|
"lastUsedAtMs": 1772563663633
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"createdAtMs": 1772562796594,
|
||||||
|
"approvedAtMs": 1772562796594
|
||||||
|
},
|
||||||
|
"5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc": {
|
||||||
|
"deviceId": "5d129a0d4e4c48a61ac4132f4f71c6eccf4df41d066a03076bcf255f1e71f0dc",
|
||||||
|
"publicKey": "1KPQKT74AgGXb8B6O8vTQqkCFBBTI1_9Y2jVvzVI6G4",
|
||||||
|
"platform": "Linux x86_64",
|
||||||
|
"clientId": "openclaw-control-ui",
|
||||||
|
"clientMode": "webchat",
|
||||||
|
"role": "operator",
|
||||||
|
"roles": [
|
||||||
|
"operator"
|
||||||
|
],
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing",
|
||||||
|
"operator.read",
|
||||||
|
"operator.write"
|
||||||
|
],
|
||||||
|
"approvedScopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"tokens": {
|
||||||
|
"operator": {
|
||||||
|
"token": "2nXUowAOJpF7bCROTQ4-q50zUe2FHRzJDhmpFQe0DQ4",
|
||||||
|
"role": "operator",
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing"
|
||||||
|
],
|
||||||
|
"createdAtMs": 1772563930487,
|
||||||
|
"lastUsedAtMs": 1774510441434
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"createdAtMs": 1772563930487,
|
||||||
|
"approvedAtMs": 1772563930487
|
||||||
|
},
|
||||||
|
"549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1": {
|
||||||
|
"deviceId": "549bd550370c304528dad163bf24f004d94acb9bb659020fb44e88b4f73c1ee1",
|
||||||
|
"publicKey": "hX_4gWll3JPphbMZQ2fjPIXDXwp51gaILYB64KyimBE",
|
||||||
|
"displayName": "subagent-reliability-harness",
|
||||||
|
"platform": "linux",
|
||||||
|
"clientId": "test",
|
||||||
|
"clientMode": "test",
|
||||||
|
"role": "operator",
|
||||||
|
"roles": [
|
||||||
|
"operator"
|
||||||
|
],
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin"
|
||||||
|
],
|
||||||
|
"approvedScopes": [
|
||||||
|
"operator.admin"
|
||||||
|
],
|
||||||
|
"tokens": {
|
||||||
|
"operator": {
|
||||||
|
"token": "fDTz6u2K-fKNq4Cc-VoSQkbfltPCN1tqetg52yhsJk8",
|
||||||
|
"role": "operator",
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin"
|
||||||
|
],
|
||||||
|
"createdAtMs": 1773424919036
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"createdAtMs": 1773424919036,
|
||||||
|
"approvedAtMs": 1773424919036
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"entries": {
|
||||||
|
"discord:default:guild:425781660781641729:user:425208577846935553": {
|
||||||
|
"recent": [
|
||||||
|
"github-copilot/claude-sonnet-4.6",
|
||||||
|
"openai-codex/gpt-5.4"
|
||||||
|
],
|
||||||
|
"updatedAt": "2026-03-25T19:35:30.248Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Executable
+20
@@ -0,0 +1,20 @@
|
|||||||
|
---
|
||||||
|
name: boot-md
|
||||||
|
description: "Run BOOT.md on gateway startup"
|
||||||
|
homepage: https://docs.openclaw.ai/automation/hooks#boot-md
|
||||||
|
metadata:
|
||||||
|
{
|
||||||
|
"openclaw":
|
||||||
|
{
|
||||||
|
"emoji": "🚀",
|
||||||
|
"events": ["gateway:startup"],
|
||||||
|
"requires": { "config": ["workspace.dir"] },
|
||||||
|
"install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Boot Checklist Hook
|
||||||
|
|
||||||
|
Runs `BOOT.md` at gateway startup for each configured agent scope, if the file exists in that
|
||||||
|
agent's resolved workspace.
|
||||||
@@ -0,0 +1,221 @@
|
|||||||
|
import { c as resolveAgentWorkspaceDir, r as listAgentIds } from "../../run-with-concurrency-Cuc1THN9.js";
|
||||||
|
import "../../paths-hfkBoC7i.js";
|
||||||
|
import { a as defaultRuntime, t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
|
||||||
|
import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
|
||||||
|
import "../../logger-BW8uLq6f.js";
|
||||||
|
import "../../model-selection-BU6wl1le.js";
|
||||||
|
import "../../github-copilot-token-CQmATy5E.js";
|
||||||
|
import { a as isGatewayStartupEvent } from "../../legacy-names-BAf61_0I.js";
|
||||||
|
import "../../thinking-B5B36ffe.js";
|
||||||
|
import { n as SILENT_REPLY_TOKEN } from "../../tokens-CT3nywWU.js";
|
||||||
|
import { o as agentCommand, s as createDefaultDeps } from "../../pi-embedded-C6ITuRXf.js";
|
||||||
|
import "../../plugins-BZr8LJrk.js";
|
||||||
|
import "../../accounts-D4KOSoV2.js";
|
||||||
|
import "../../send-BLQvMYTW.js";
|
||||||
|
import "../../send-DyQ6zcob.js";
|
||||||
|
import "../../deliver-ClGktCjk.js";
|
||||||
|
import "../../diagnostic-B9sgiG77.js";
|
||||||
|
import "../../accounts-cJqOTvBI.js";
|
||||||
|
import "../../image-ops-D4vlUR_L.js";
|
||||||
|
import "../../send-D4CMR9ev.js";
|
||||||
|
import "../../pi-model-discovery--C0FuY_K.js";
|
||||||
|
import { Dt as resolveAgentMainSessionKey, W as loadSessionStore, Y as updateSessionStore, kt as resolveMainSessionKey } from "../../pi-embedded-helpers-CkWXaNFn.js";
|
||||||
|
import "../../chrome-u1QjWgKY.js";
|
||||||
|
import "../../frontmatter-CZF6xkL3.js";
|
||||||
|
import "../../skills-B24U0XQQ.js";
|
||||||
|
import "../../path-alias-guards-CouH80Zp.js";
|
||||||
|
import "../../redact-DSv8X-3F.js";
|
||||||
|
import "../../errors-_LEe37ld.js";
|
||||||
|
import "../../fs-safe-DOYVoR6M.js";
|
||||||
|
import "../../proxy-env-BZseFuIl.js";
|
||||||
|
import "../../store-BteyapSQ.js";
|
||||||
|
import { s as resolveStorePath } from "../../paths-Co-u8IhA.js";
|
||||||
|
import "../../tool-images-C0W994KU.js";
|
||||||
|
import "../../image-fMgabouP.js";
|
||||||
|
import "../../audio-transcription-runner-DfRfzdqH.js";
|
||||||
|
import "../../fetch-JzejSI-7.js";
|
||||||
|
import "../../fetch-guard-C3LWD6FT.js";
|
||||||
|
import "../../api-key-rotation-CLI6TxVv.js";
|
||||||
|
import "../../proxy-fetch-CbII9--S.js";
|
||||||
|
import "../../ir-D_UJzvhu.js";
|
||||||
|
import "../../render-7C7EDC8_.js";
|
||||||
|
import "../../target-errors-C8xePsI5.js";
|
||||||
|
import "../../commands-registry-DJWLO-6B.js";
|
||||||
|
import "../../skill-commands-B6iXy7Nx.js";
|
||||||
|
import "../../fetch-CONQGbzL.js";
|
||||||
|
import "../../channel-activity-CVe33Aey.js";
|
||||||
|
import "../../tables-DushlpuO.js";
|
||||||
|
import "../../send-CHthYes-.js";
|
||||||
|
import "../../outbound-attachment-3soL6fn0.js";
|
||||||
|
import "../../send-DYCEGbmH.js";
|
||||||
|
import "../../proxy-BzwL4n0W.js";
|
||||||
|
import "../../manager-DS9FBMMG.js";
|
||||||
|
import "../../query-expansion-DUWWrH-g.js";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
import crypto from "node:crypto";
|
||||||
|
//#region src/gateway/boot.ts
|
||||||
|
function generateBootSessionId() {
|
||||||
|
return `boot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "_").replace("Z", "")}-${crypto.randomUUID().slice(0, 8)}`;
|
||||||
|
}
|
||||||
|
const log$1 = createSubsystemLogger("gateway/boot");
|
||||||
|
const BOOT_FILENAME = "BOOT.md";
|
||||||
|
function buildBootPrompt(content) {
|
||||||
|
return [
|
||||||
|
"You are running a boot check. Follow BOOT.md instructions exactly.",
|
||||||
|
"",
|
||||||
|
"BOOT.md:",
|
||||||
|
content,
|
||||||
|
"",
|
||||||
|
"If BOOT.md asks you to send a message, use the message tool (action=send with channel + target).",
|
||||||
|
"Use the `target` field (not `to`) for message tool destinations.",
|
||||||
|
`After sending with the message tool, reply with ONLY: ${SILENT_REPLY_TOKEN}.`,
|
||||||
|
`If nothing needs attention, reply with ONLY: ${SILENT_REPLY_TOKEN}.`
|
||||||
|
].join("\n");
|
||||||
|
}
|
||||||
|
async function loadBootFile(workspaceDir) {
|
||||||
|
const bootPath = path.join(workspaceDir, BOOT_FILENAME);
|
||||||
|
try {
|
||||||
|
const trimmed = (await fs.readFile(bootPath, "utf-8")).trim();
|
||||||
|
if (!trimmed) return { status: "empty" };
|
||||||
|
return {
|
||||||
|
status: "ok",
|
||||||
|
content: trimmed
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
if (err.code === "ENOENT") return { status: "missing" };
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function snapshotMainSessionMapping(params) {
|
||||||
|
const agentId = resolveAgentIdFromSessionKey(params.sessionKey);
|
||||||
|
const storePath = resolveStorePath(params.cfg.session?.store, { agentId });
|
||||||
|
try {
|
||||||
|
const entry = loadSessionStore(storePath, { skipCache: true })[params.sessionKey];
|
||||||
|
if (!entry) return {
|
||||||
|
storePath,
|
||||||
|
sessionKey: params.sessionKey,
|
||||||
|
canRestore: true,
|
||||||
|
hadEntry: false
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
storePath,
|
||||||
|
sessionKey: params.sessionKey,
|
||||||
|
canRestore: true,
|
||||||
|
hadEntry: true,
|
||||||
|
entry: structuredClone(entry)
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
log$1.debug("boot: could not snapshot main session mapping", {
|
||||||
|
sessionKey: params.sessionKey,
|
||||||
|
error: String(err)
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
storePath,
|
||||||
|
sessionKey: params.sessionKey,
|
||||||
|
canRestore: false,
|
||||||
|
hadEntry: false
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function restoreMainSessionMapping(snapshot) {
|
||||||
|
if (!snapshot.canRestore) return;
|
||||||
|
try {
|
||||||
|
await updateSessionStore(snapshot.storePath, (store) => {
|
||||||
|
if (snapshot.hadEntry && snapshot.entry) {
|
||||||
|
store[snapshot.sessionKey] = snapshot.entry;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
delete store[snapshot.sessionKey];
|
||||||
|
}, { activeSessionKey: snapshot.sessionKey });
|
||||||
|
return;
|
||||||
|
} catch (err) {
|
||||||
|
return err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function runBootOnce(params) {
|
||||||
|
const bootRuntime = {
|
||||||
|
log: () => {},
|
||||||
|
error: (message) => log$1.error(String(message)),
|
||||||
|
exit: defaultRuntime.exit
|
||||||
|
};
|
||||||
|
let result;
|
||||||
|
try {
|
||||||
|
result = await loadBootFile(params.workspaceDir);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
log$1.error(`boot: failed to read ${BOOT_FILENAME}: ${message}`);
|
||||||
|
return {
|
||||||
|
status: "failed",
|
||||||
|
reason: message
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (result.status === "missing" || result.status === "empty") return {
|
||||||
|
status: "skipped",
|
||||||
|
reason: result.status
|
||||||
|
};
|
||||||
|
const sessionKey = params.agentId ? resolveAgentMainSessionKey({
|
||||||
|
cfg: params.cfg,
|
||||||
|
agentId: params.agentId
|
||||||
|
}) : resolveMainSessionKey(params.cfg);
|
||||||
|
const message = buildBootPrompt(result.content ?? "");
|
||||||
|
const sessionId = generateBootSessionId();
|
||||||
|
const mappingSnapshot = snapshotMainSessionMapping({
|
||||||
|
cfg: params.cfg,
|
||||||
|
sessionKey
|
||||||
|
});
|
||||||
|
let agentFailure;
|
||||||
|
try {
|
||||||
|
await agentCommand({
|
||||||
|
message,
|
||||||
|
sessionKey,
|
||||||
|
sessionId,
|
||||||
|
deliver: false,
|
||||||
|
senderIsOwner: true
|
||||||
|
}, bootRuntime, params.deps);
|
||||||
|
} catch (err) {
|
||||||
|
agentFailure = err instanceof Error ? err.message : String(err);
|
||||||
|
log$1.error(`boot: agent run failed: ${agentFailure}`);
|
||||||
|
}
|
||||||
|
const mappingRestoreFailure = await restoreMainSessionMapping(mappingSnapshot);
|
||||||
|
if (mappingRestoreFailure) log$1.error(`boot: failed to restore main session mapping: ${mappingRestoreFailure}`);
|
||||||
|
if (!agentFailure && !mappingRestoreFailure) return { status: "ran" };
|
||||||
|
return {
|
||||||
|
status: "failed",
|
||||||
|
reason: [agentFailure ? `agent run failed: ${agentFailure}` : void 0, mappingRestoreFailure ? `mapping restore failed: ${mappingRestoreFailure}` : void 0].filter((part) => Boolean(part)).join("; ")
|
||||||
|
};
|
||||||
|
}
|
||||||
|
//#endregion
|
||||||
|
//#region src/hooks/bundled/boot-md/handler.ts
|
||||||
|
const log = createSubsystemLogger("hooks/boot-md");
|
||||||
|
const runBootChecklist = async (event) => {
|
||||||
|
if (!isGatewayStartupEvent(event)) return;
|
||||||
|
if (!event.context.cfg) return;
|
||||||
|
const cfg = event.context.cfg;
|
||||||
|
const deps = event.context.deps ?? createDefaultDeps();
|
||||||
|
const agentIds = listAgentIds(cfg);
|
||||||
|
for (const agentId of agentIds) {
|
||||||
|
const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
|
||||||
|
const result = await runBootOnce({
|
||||||
|
cfg,
|
||||||
|
deps,
|
||||||
|
workspaceDir,
|
||||||
|
agentId
|
||||||
|
});
|
||||||
|
if (result.status === "failed") {
|
||||||
|
log.warn("boot-md failed for agent startup run", {
|
||||||
|
agentId,
|
||||||
|
workspaceDir,
|
||||||
|
reason: result.reason
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (result.status === "skipped") log.debug("boot-md skipped for agent startup run", {
|
||||||
|
agentId,
|
||||||
|
workspaceDir,
|
||||||
|
reason: result.reason
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//#endregion
|
||||||
|
export { runBootChecklist as default };
|
||||||
Executable
+53
@@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
name: bootstrap-extra-files
|
||||||
|
description: "Inject additional workspace bootstrap files via glob/path patterns"
|
||||||
|
homepage: https://docs.openclaw.ai/automation/hooks#bootstrap-extra-files
|
||||||
|
metadata:
|
||||||
|
{
|
||||||
|
"openclaw":
|
||||||
|
{
|
||||||
|
"emoji": "📎",
|
||||||
|
"events": ["agent:bootstrap"],
|
||||||
|
"requires": { "config": ["workspace.dir"] },
|
||||||
|
"install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Bootstrap Extra Files Hook
|
||||||
|
|
||||||
|
Loads additional bootstrap files into `Project Context` during `agent:bootstrap`.
|
||||||
|
|
||||||
|
## Why
|
||||||
|
|
||||||
|
Use this when your workspace has multiple context roots (for example monorepos) and
|
||||||
|
you want to include extra `AGENTS.md`/`TOOLS.md`-class files without changing the
|
||||||
|
workspace root.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"internal": {
|
||||||
|
"enabled": true,
|
||||||
|
"entries": {
|
||||||
|
"bootstrap-extra-files": {
|
||||||
|
"enabled": true,
|
||||||
|
"paths": ["packages/*/AGENTS.md", "packages/*/TOOLS.md"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
- `paths` (string[]): preferred list of glob/path patterns.
|
||||||
|
- `patterns` (string[]): alias of `paths`.
|
||||||
|
- `files` (string[]): alias of `paths`.
|
||||||
|
|
||||||
|
All paths are resolved from the workspace and must stay inside it (including realpath checks).
|
||||||
|
Only recognized bootstrap basenames are loaded (`AGENTS.md`, `SOUL.md`, `TOOLS.md`,
|
||||||
|
`IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md`, `MEMORY.md`, `memory.md`).
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
import "../../paths-hfkBoC7i.js";
|
||||||
|
import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
|
||||||
|
import { d as loadExtraBootstrapFilesWithDiagnostics, u as filterBootstrapFilesForSession } from "../../workspace-CaW79EXh.js";
|
||||||
|
import "../../logger-BW8uLq6f.js";
|
||||||
|
import { i as isAgentBootstrapEvent } from "../../legacy-names-BAf61_0I.js";
|
||||||
|
import "../../frontmatter-CZF6xkL3.js";
|
||||||
|
import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
|
||||||
|
//#region src/hooks/bundled/bootstrap-extra-files/handler.ts
|
||||||
|
const HOOK_KEY = "bootstrap-extra-files";
|
||||||
|
const log = createSubsystemLogger("bootstrap-extra-files");
|
||||||
|
function normalizeStringArray(value) {
|
||||||
|
if (!Array.isArray(value)) return [];
|
||||||
|
return value.map((v) => typeof v === "string" ? v.trim() : "").filter(Boolean);
|
||||||
|
}
|
||||||
|
function resolveExtraBootstrapPatterns(hookConfig) {
|
||||||
|
const fromPaths = normalizeStringArray(hookConfig.paths);
|
||||||
|
if (fromPaths.length > 0) return fromPaths;
|
||||||
|
const fromPatterns = normalizeStringArray(hookConfig.patterns);
|
||||||
|
if (fromPatterns.length > 0) return fromPatterns;
|
||||||
|
return normalizeStringArray(hookConfig.files);
|
||||||
|
}
|
||||||
|
const bootstrapExtraFilesHook = async (event) => {
|
||||||
|
if (!isAgentBootstrapEvent(event)) return;
|
||||||
|
const context = event.context;
|
||||||
|
const hookConfig = resolveHookConfig(context.cfg, HOOK_KEY);
|
||||||
|
if (!hookConfig || hookConfig.enabled === false) return;
|
||||||
|
const patterns = resolveExtraBootstrapPatterns(hookConfig);
|
||||||
|
if (patterns.length === 0) return;
|
||||||
|
try {
|
||||||
|
const { files: extras, diagnostics } = await loadExtraBootstrapFilesWithDiagnostics(context.workspaceDir, patterns);
|
||||||
|
if (diagnostics.length > 0) log.debug("skipped extra bootstrap candidates", {
|
||||||
|
skipped: diagnostics.length,
|
||||||
|
reasons: diagnostics.reduce((counts, item) => {
|
||||||
|
counts[item.reason] = (counts[item.reason] ?? 0) + 1;
|
||||||
|
return counts;
|
||||||
|
}, {})
|
||||||
|
});
|
||||||
|
if (extras.length === 0) return;
|
||||||
|
context.bootstrapFiles = filterBootstrapFilesForSession([...context.bootstrapFiles, ...extras], context.sessionKey);
|
||||||
|
} catch (err) {
|
||||||
|
log.warn(`failed: ${String(err)}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//#endregion
|
||||||
|
export { bootstrapExtraFilesHook as default };
|
||||||
Executable
+122
@@ -0,0 +1,122 @@
|
|||||||
|
---
|
||||||
|
name: command-logger
|
||||||
|
description: "Log all command events to a centralized audit file"
|
||||||
|
homepage: https://docs.openclaw.ai/automation/hooks#command-logger
|
||||||
|
metadata:
|
||||||
|
{
|
||||||
|
"openclaw":
|
||||||
|
{
|
||||||
|
"emoji": "📝",
|
||||||
|
"events": ["command"],
|
||||||
|
"install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Command Logger Hook
|
||||||
|
|
||||||
|
Logs all command events (`/new`, `/reset`, `/stop`, etc.) to a centralized audit log file for debugging and monitoring purposes.
|
||||||
|
|
||||||
|
## What It Does
|
||||||
|
|
||||||
|
Every time you issue a command to the agent:
|
||||||
|
|
||||||
|
1. **Captures event details** - Command action, timestamp, session key, sender ID, source
|
||||||
|
2. **Appends to log file** - Writes a JSON line to `~/.openclaw/logs/commands.log`
|
||||||
|
3. **Silent operation** - Runs in the background without user notifications
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Log entries are written in JSONL (JSON Lines) format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"timestamp":"2026-01-16T14:30:00.000Z","action":"new","sessionKey":"agent:main:main","senderId":"+1234567890","source":"telegram"}
|
||||||
|
{"timestamp":"2026-01-16T15:45:22.000Z","action":"stop","sessionKey":"agent:main:main","senderId":"user@example.com","source":"whatsapp"}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
- **Debugging**: Track when commands were issued and from which source
|
||||||
|
- **Auditing**: Monitor command usage across different channels
|
||||||
|
- **Analytics**: Analyze command patterns and frequency
|
||||||
|
- **Troubleshooting**: Investigate issues by reviewing command history
|
||||||
|
|
||||||
|
## Log File Location
|
||||||
|
|
||||||
|
`~/.openclaw/logs/commands.log`
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
No requirements - this hook works out of the box on all platforms.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
No configuration needed. The hook automatically:
|
||||||
|
|
||||||
|
- Creates the log directory if it doesn't exist
|
||||||
|
- Appends to the log file (doesn't overwrite)
|
||||||
|
- Handles errors silently without disrupting command execution
|
||||||
|
|
||||||
|
## Disabling
|
||||||
|
|
||||||
|
To disable this hook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
openclaw hooks disable command-logger
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"internal": {
|
||||||
|
"entries": {
|
||||||
|
"command-logger": { "enabled": false }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Log Rotation
|
||||||
|
|
||||||
|
The hook does not automatically rotate logs. To manage log size, you can:
|
||||||
|
|
||||||
|
1. **Manual rotation**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mv ~/.openclaw/logs/commands.log ~/.openclaw/logs/commands.log.old
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use logrotate** (Linux):
|
||||||
|
Create `/etc/logrotate.d/openclaw`:
|
||||||
|
```
|
||||||
|
/home/username/.openclaw/logs/commands.log {
|
||||||
|
weekly
|
||||||
|
rotate 4
|
||||||
|
compress
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Viewing Logs
|
||||||
|
|
||||||
|
View recent commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
tail -n 20 ~/.openclaw/logs/commands.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Pretty-print with jq:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cat ~/.openclaw/logs/commands.log | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
Filter by action:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
grep '"action":"new"' ~/.openclaw/logs/commands.log | jq .
|
||||||
|
```
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
|
||||||
|
import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
//#region src/hooks/bundled/command-logger/handler.ts
|
||||||
|
/**
|
||||||
|
* Example hook handler: Log all commands to a file
|
||||||
|
*
|
||||||
|
* This handler demonstrates how to create a hook that logs all command events
|
||||||
|
* to a centralized log file for audit/debugging purposes.
|
||||||
|
*
|
||||||
|
* To enable this handler, add it to your config:
|
||||||
|
*
|
||||||
|
* ```json
|
||||||
|
* {
|
||||||
|
* "hooks": {
|
||||||
|
* "internal": {
|
||||||
|
* "enabled": true,
|
||||||
|
* "handlers": [
|
||||||
|
* {
|
||||||
|
* "event": "command",
|
||||||
|
* "module": "./hooks/handlers/command-logger.ts"
|
||||||
|
* }
|
||||||
|
* ]
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
const log = createSubsystemLogger("command-logger");
|
||||||
|
/**
|
||||||
|
* Log all command events to a file
|
||||||
|
*/
|
||||||
|
const logCommand = async (event) => {
|
||||||
|
if (event.type !== "command") return;
|
||||||
|
try {
|
||||||
|
const stateDir = resolveStateDir(process.env, os.homedir);
|
||||||
|
const logDir = path.join(stateDir, "logs");
|
||||||
|
await fs.mkdir(logDir, { recursive: true });
|
||||||
|
const logFile = path.join(logDir, "commands.log");
|
||||||
|
const logLine = JSON.stringify({
|
||||||
|
timestamp: event.timestamp.toISOString(),
|
||||||
|
action: event.action,
|
||||||
|
sessionKey: event.sessionKey,
|
||||||
|
senderId: event.context.senderId ?? "unknown",
|
||||||
|
source: event.context.commandSource ?? "unknown"
|
||||||
|
}) + "\n";
|
||||||
|
await fs.appendFile(logFile, logLine, "utf-8");
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
log.error(`Failed to log command: ${message}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//#endregion
|
||||||
|
export { logCommand as default };
|
||||||
Executable
+109
@@ -0,0 +1,109 @@
|
|||||||
|
---
|
||||||
|
name: session-memory
|
||||||
|
description: "Save session context to memory when /new or /reset command is issued"
|
||||||
|
homepage: https://docs.openclaw.ai/automation/hooks#session-memory
|
||||||
|
metadata:
|
||||||
|
{
|
||||||
|
"openclaw":
|
||||||
|
{
|
||||||
|
"emoji": "💾",
|
||||||
|
"events": ["command:new", "command:reset"],
|
||||||
|
"requires": { "config": ["workspace.dir"] },
|
||||||
|
"install": [{ "id": "bundled", "kind": "bundled", "label": "Bundled with OpenClaw" }],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Session Memory Hook
|
||||||
|
|
||||||
|
Automatically saves session context to your workspace memory when you issue `/new` or `/reset`.
|
||||||
|
|
||||||
|
## What It Does
|
||||||
|
|
||||||
|
When you run `/new` or `/reset` to start a fresh session:
|
||||||
|
|
||||||
|
1. **Finds the previous session** - Uses the pre-reset session entry to locate the correct transcript
|
||||||
|
2. **Extracts conversation** - Reads the last N user/assistant messages from the session (default: 15, configurable)
|
||||||
|
3. **Generates descriptive slug** - Uses LLM to create a meaningful filename slug based on conversation content
|
||||||
|
4. **Saves to memory** - Creates a new file at `<workspace>/memory/YYYY-MM-DD-slug.md`
|
||||||
|
5. **Sends confirmation** - Notifies you with the file path
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Memory files are created with the following format:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# Session: 2026-01-16 14:30:00 UTC
|
||||||
|
|
||||||
|
- **Session Key**: agent:main:main
|
||||||
|
- **Session ID**: abc123def456
|
||||||
|
- **Source**: telegram
|
||||||
|
```
|
||||||
|
|
||||||
|
## Filename Examples
|
||||||
|
|
||||||
|
The LLM generates descriptive slugs based on your conversation:
|
||||||
|
|
||||||
|
- `2026-01-16-vendor-pitch.md` - Discussion about vendor evaluation
|
||||||
|
- `2026-01-16-api-design.md` - API architecture planning
|
||||||
|
- `2026-01-16-bug-fix.md` - Debugging session
|
||||||
|
- `2026-01-16-1430.md` - Fallback timestamp if slug generation fails
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- **Config**: `workspace.dir` must be set (automatically configured during onboarding)
|
||||||
|
|
||||||
|
The hook uses your configured LLM provider to generate slugs, so it works with any provider (Anthropic, OpenAI, etc.).
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The hook supports optional configuration:
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
| ---------- | ------ | ------- | --------------------------------------------------------------- |
|
||||||
|
| `messages` | number | 15 | Number of user/assistant messages to include in the memory file |
|
||||||
|
|
||||||
|
Example configuration:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"internal": {
|
||||||
|
"entries": {
|
||||||
|
"session-memory": {
|
||||||
|
"enabled": true,
|
||||||
|
"messages": 25
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The hook automatically:
|
||||||
|
|
||||||
|
- Uses your workspace directory (`~/.openclaw/workspace` by default)
|
||||||
|
- Uses your configured LLM for slug generation
|
||||||
|
- Falls back to timestamp slugs if LLM is unavailable
|
||||||
|
|
||||||
|
## Disabling
|
||||||
|
|
||||||
|
To disable this hook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
openclaw hooks disable session-memory
|
||||||
|
```
|
||||||
|
|
||||||
|
Or remove it from your config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"internal": {
|
||||||
|
"entries": {
|
||||||
|
"session-memory": { "enabled": false }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
@@ -0,0 +1,238 @@
|
|||||||
|
import { c as resolveAgentWorkspaceDir } from "../../run-with-concurrency-Cuc1THN9.js";
|
||||||
|
import { c as resolveStateDir } from "../../paths-hfkBoC7i.js";
|
||||||
|
import { t as createSubsystemLogger } from "../../subsystem-C-Cf_MFK.js";
|
||||||
|
import { B as resolveAgentIdFromSessionKey } from "../../workspace-CaW79EXh.js";
|
||||||
|
import "../../logger-BW8uLq6f.js";
|
||||||
|
import "../../model-selection-BU6wl1le.js";
|
||||||
|
import "../../github-copilot-token-CQmATy5E.js";
|
||||||
|
import "../../legacy-names-BAf61_0I.js";
|
||||||
|
import "../../thinking-B5B36ffe.js";
|
||||||
|
import "../../tokens-CT3nywWU.js";
|
||||||
|
import "../../pi-embedded-C6ITuRXf.js";
|
||||||
|
import "../../plugins-BZr8LJrk.js";
|
||||||
|
import "../../accounts-D4KOSoV2.js";
|
||||||
|
import "../../send-BLQvMYTW.js";
|
||||||
|
import "../../send-DyQ6zcob.js";
|
||||||
|
import "../../deliver-ClGktCjk.js";
|
||||||
|
import "../../diagnostic-B9sgiG77.js";
|
||||||
|
import "../../accounts-cJqOTvBI.js";
|
||||||
|
import "../../image-ops-D4vlUR_L.js";
|
||||||
|
import "../../send-D4CMR9ev.js";
|
||||||
|
import "../../pi-model-discovery--C0FuY_K.js";
|
||||||
|
import { pt as hasInterSessionUserProvenance } from "../../pi-embedded-helpers-CkWXaNFn.js";
|
||||||
|
import "../../chrome-u1QjWgKY.js";
|
||||||
|
import "../../frontmatter-CZF6xkL3.js";
|
||||||
|
import "../../skills-B24U0XQQ.js";
|
||||||
|
import "../../path-alias-guards-CouH80Zp.js";
|
||||||
|
import "../../redact-DSv8X-3F.js";
|
||||||
|
import "../../errors-_LEe37ld.js";
|
||||||
|
import { c as writeFileWithinRoot } from "../../fs-safe-DOYVoR6M.js";
|
||||||
|
import "../../proxy-env-BZseFuIl.js";
|
||||||
|
import "../../store-BteyapSQ.js";
|
||||||
|
import "../../paths-Co-u8IhA.js";
|
||||||
|
import "../../tool-images-C0W994KU.js";
|
||||||
|
import "../../image-fMgabouP.js";
|
||||||
|
import "../../audio-transcription-runner-DfRfzdqH.js";
|
||||||
|
import "../../fetch-JzejSI-7.js";
|
||||||
|
import "../../fetch-guard-C3LWD6FT.js";
|
||||||
|
import "../../api-key-rotation-CLI6TxVv.js";
|
||||||
|
import "../../proxy-fetch-CbII9--S.js";
|
||||||
|
import "../../ir-D_UJzvhu.js";
|
||||||
|
import "../../render-7C7EDC8_.js";
|
||||||
|
import "../../target-errors-C8xePsI5.js";
|
||||||
|
import "../../commands-registry-DJWLO-6B.js";
|
||||||
|
import "../../skill-commands-B6iXy7Nx.js";
|
||||||
|
import "../../fetch-CONQGbzL.js";
|
||||||
|
import "../../channel-activity-CVe33Aey.js";
|
||||||
|
import "../../tables-DushlpuO.js";
|
||||||
|
import "../../send-CHthYes-.js";
|
||||||
|
import "../../outbound-attachment-3soL6fn0.js";
|
||||||
|
import "../../send-DYCEGbmH.js";
|
||||||
|
import "../../proxy-BzwL4n0W.js";
|
||||||
|
import "../../manager-DS9FBMMG.js";
|
||||||
|
import "../../query-expansion-DUWWrH-g.js";
|
||||||
|
import { generateSlugViaLLM } from "../../llm-slug-generator.js";
|
||||||
|
import { t as resolveHookConfig } from "../../config-Bs6iYHRw.js";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
//#region src/hooks/bundled/session-memory/handler.ts
|
||||||
|
/**
|
||||||
|
* Session memory hook handler
|
||||||
|
*
|
||||||
|
* Saves session context to memory when /new or /reset command is triggered
|
||||||
|
* Creates a new dated memory file with LLM-generated slug
|
||||||
|
*/
|
||||||
|
const log = createSubsystemLogger("hooks/session-memory");
|
||||||
|
/**
|
||||||
|
* Read recent messages from session file for slug generation
|
||||||
|
*/
|
||||||
|
async function getRecentSessionContent(sessionFilePath, messageCount = 15) {
|
||||||
|
try {
|
||||||
|
const lines = (await fs.readFile(sessionFilePath, "utf-8")).trim().split("\n");
|
||||||
|
const allMessages = [];
|
||||||
|
for (const line of lines) try {
|
||||||
|
const entry = JSON.parse(line);
|
||||||
|
if (entry.type === "message" && entry.message) {
|
||||||
|
const msg = entry.message;
|
||||||
|
const role = msg.role;
|
||||||
|
if ((role === "user" || role === "assistant") && msg.content) {
|
||||||
|
if (role === "user" && hasInterSessionUserProvenance(msg)) continue;
|
||||||
|
const text = Array.isArray(msg.content) ? msg.content.find((c) => c.type === "text")?.text : msg.content;
|
||||||
|
if (text && !text.startsWith("/")) allMessages.push(`${role}: ${text}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
return allMessages.slice(-messageCount).join("\n");
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Try the active transcript first; if /new already rotated it,
|
||||||
|
* fallback to the latest .jsonl.reset.* sibling.
|
||||||
|
*/
|
||||||
|
async function getRecentSessionContentWithResetFallback(sessionFilePath, messageCount = 15) {
|
||||||
|
const primary = await getRecentSessionContent(sessionFilePath, messageCount);
|
||||||
|
if (primary) return primary;
|
||||||
|
try {
|
||||||
|
const dir = path.dirname(sessionFilePath);
|
||||||
|
const resetPrefix = `${path.basename(sessionFilePath)}.reset.`;
|
||||||
|
const resetCandidates = (await fs.readdir(dir)).filter((name) => name.startsWith(resetPrefix)).toSorted();
|
||||||
|
if (resetCandidates.length === 0) return primary;
|
||||||
|
const latestResetPath = path.join(dir, resetCandidates[resetCandidates.length - 1]);
|
||||||
|
const fallback = await getRecentSessionContent(latestResetPath, messageCount);
|
||||||
|
if (fallback) log.debug("Loaded session content from reset fallback", {
|
||||||
|
sessionFilePath,
|
||||||
|
latestResetPath
|
||||||
|
});
|
||||||
|
return fallback || primary;
|
||||||
|
} catch {
|
||||||
|
return primary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function stripResetSuffix(fileName) {
|
||||||
|
const resetIndex = fileName.indexOf(".reset.");
|
||||||
|
return resetIndex === -1 ? fileName : fileName.slice(0, resetIndex);
|
||||||
|
}
|
||||||
|
async function findPreviousSessionFile(params) {
|
||||||
|
try {
|
||||||
|
const files = await fs.readdir(params.sessionsDir);
|
||||||
|
const fileSet = new Set(files);
|
||||||
|
const baseFromReset = params.currentSessionFile ? stripResetSuffix(path.basename(params.currentSessionFile)) : void 0;
|
||||||
|
if (baseFromReset && fileSet.has(baseFromReset)) return path.join(params.sessionsDir, baseFromReset);
|
||||||
|
const trimmedSessionId = params.sessionId?.trim();
|
||||||
|
if (trimmedSessionId) {
|
||||||
|
const canonicalFile = `${trimmedSessionId}.jsonl`;
|
||||||
|
if (fileSet.has(canonicalFile)) return path.join(params.sessionsDir, canonicalFile);
|
||||||
|
const topicVariants = files.filter((name) => name.startsWith(`${trimmedSessionId}-topic-`) && name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
|
||||||
|
if (topicVariants.length > 0) return path.join(params.sessionsDir, topicVariants[0]);
|
||||||
|
}
|
||||||
|
if (!params.currentSessionFile) return;
|
||||||
|
const nonResetJsonl = files.filter((name) => name.endsWith(".jsonl") && !name.includes(".reset.")).toSorted().toReversed();
|
||||||
|
if (nonResetJsonl.length > 0) return path.join(params.sessionsDir, nonResetJsonl[0]);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Save session context to memory when /new or /reset command is triggered
|
||||||
|
*/
|
||||||
|
const saveSessionToMemory = async (event) => {
|
||||||
|
const isResetCommand = event.action === "new" || event.action === "reset";
|
||||||
|
if (event.type !== "command" || !isResetCommand) return;
|
||||||
|
try {
|
||||||
|
log.debug("Hook triggered for reset/new command", { action: event.action });
|
||||||
|
const context = event.context || {};
|
||||||
|
const cfg = context.cfg;
|
||||||
|
const agentId = resolveAgentIdFromSessionKey(event.sessionKey);
|
||||||
|
const workspaceDir = cfg ? resolveAgentWorkspaceDir(cfg, agentId) : path.join(resolveStateDir(process.env, os.homedir), "workspace");
|
||||||
|
const memoryDir = path.join(workspaceDir, "memory");
|
||||||
|
await fs.mkdir(memoryDir, { recursive: true });
|
||||||
|
const now = new Date(event.timestamp);
|
||||||
|
const dateStr = now.toISOString().split("T")[0];
|
||||||
|
const sessionEntry = context.previousSessionEntry || context.sessionEntry || {};
|
||||||
|
const currentSessionId = sessionEntry.sessionId;
|
||||||
|
let currentSessionFile = sessionEntry.sessionFile || void 0;
|
||||||
|
if (!currentSessionFile || currentSessionFile.includes(".reset.")) {
|
||||||
|
const sessionsDirs = /* @__PURE__ */ new Set();
|
||||||
|
if (currentSessionFile) sessionsDirs.add(path.dirname(currentSessionFile));
|
||||||
|
sessionsDirs.add(path.join(workspaceDir, "sessions"));
|
||||||
|
for (const sessionsDir of sessionsDirs) {
|
||||||
|
const recoveredSessionFile = await findPreviousSessionFile({
|
||||||
|
sessionsDir,
|
||||||
|
currentSessionFile,
|
||||||
|
sessionId: currentSessionId
|
||||||
|
});
|
||||||
|
if (!recoveredSessionFile) continue;
|
||||||
|
currentSessionFile = recoveredSessionFile;
|
||||||
|
log.debug("Found previous session file", { file: currentSessionFile });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.debug("Session context resolved", {
|
||||||
|
sessionId: currentSessionId,
|
||||||
|
sessionFile: currentSessionFile,
|
||||||
|
hasCfg: Boolean(cfg)
|
||||||
|
});
|
||||||
|
const sessionFile = currentSessionFile || void 0;
|
||||||
|
const hookConfig = resolveHookConfig(cfg, "session-memory");
|
||||||
|
const messageCount = typeof hookConfig?.messages === "number" && hookConfig.messages > 0 ? hookConfig.messages : 15;
|
||||||
|
let slug = null;
|
||||||
|
let sessionContent = null;
|
||||||
|
if (sessionFile) {
|
||||||
|
sessionContent = await getRecentSessionContentWithResetFallback(sessionFile, messageCount);
|
||||||
|
log.debug("Session content loaded", {
|
||||||
|
length: sessionContent?.length ?? 0,
|
||||||
|
messageCount
|
||||||
|
});
|
||||||
|
const allowLlmSlug = !(process.env.OPENCLAW_TEST_FAST === "1" || process.env.VITEST === "true" || process.env.VITEST === "1" || false) && hookConfig?.llmSlug !== false;
|
||||||
|
if (sessionContent && cfg && allowLlmSlug) {
|
||||||
|
log.debug("Calling generateSlugViaLLM...");
|
||||||
|
slug = await generateSlugViaLLM({
|
||||||
|
sessionContent,
|
||||||
|
cfg
|
||||||
|
});
|
||||||
|
log.debug("Generated slug", { slug });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!slug) {
|
||||||
|
slug = now.toISOString().split("T")[1].split(".")[0].replace(/:/g, "").slice(0, 4);
|
||||||
|
log.debug("Using fallback timestamp slug", { slug });
|
||||||
|
}
|
||||||
|
const filename = `${dateStr}-${slug}.md`;
|
||||||
|
const memoryFilePath = path.join(memoryDir, filename);
|
||||||
|
log.debug("Memory file path resolved", {
|
||||||
|
filename,
|
||||||
|
path: memoryFilePath.replace(os.homedir(), "~")
|
||||||
|
});
|
||||||
|
const timeStr = now.toISOString().split("T")[1].split(".")[0];
|
||||||
|
const sessionId = sessionEntry.sessionId || "unknown";
|
||||||
|
const source = context.commandSource || "unknown";
|
||||||
|
const entryParts = [
|
||||||
|
`# Session: ${dateStr} ${timeStr} UTC`,
|
||||||
|
"",
|
||||||
|
`- **Session Key**: ${event.sessionKey}`,
|
||||||
|
`- **Session ID**: ${sessionId}`,
|
||||||
|
`- **Source**: ${source}`,
|
||||||
|
""
|
||||||
|
];
|
||||||
|
if (sessionContent) entryParts.push("## Conversation Summary", "", sessionContent, "");
|
||||||
|
await writeFileWithinRoot({
|
||||||
|
rootDir: memoryDir,
|
||||||
|
relativePath: filename,
|
||||||
|
data: entryParts.join("\n"),
|
||||||
|
encoding: "utf-8"
|
||||||
|
});
|
||||||
|
log.debug("Memory file written successfully");
|
||||||
|
const relPath = memoryFilePath.replace(os.homedir(), "~");
|
||||||
|
log.info(`Session context saved to ${relPath}`);
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error) log.error("Failed to save session memory", {
|
||||||
|
errorName: err.name,
|
||||||
|
errorMessage: err.message,
|
||||||
|
stack: err.stack
|
||||||
|
});
|
||||||
|
else log.error("Failed to save session memory", { error: String(err) });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//#endregion
|
||||||
|
export { saveSessionToMemory as default };
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"deviceId": "ad0ebece2493ecaf2336b939a2cc27e65261695c8c8725416e1d349da02a14d5",
|
||||||
|
"tokens": {
|
||||||
|
"operator": {
|
||||||
|
"token": "pg1GmeUDISnd7tcZBg7egNxxZSfJOpYJ1CfjrVXA9r0",
|
||||||
|
"role": "operator",
|
||||||
|
"scopes": [
|
||||||
|
"operator.admin",
|
||||||
|
"operator.approvals",
|
||||||
|
"operator.pairing",
|
||||||
|
"operator.read",
|
||||||
|
"operator.write"
|
||||||
|
],
|
||||||
|
"updatedAtMs": 1774473144159
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"version": 2,
|
||||||
|
"lastUpdateId": 148911073,
|
||||||
|
"botId": "8792219052"
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"lastCheckedAt": "2026-03-24T22:42:51.772Z",
|
||||||
|
"lastNotifiedVersion": "2026.3.23-2",
|
||||||
|
"lastNotifiedTag": "latest",
|
||||||
|
"lastAvailableVersion": "2026.3.23-2",
|
||||||
|
"lastAvailableTag": "latest"
|
||||||
|
}
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
# OpenVINO NPU advisory gateway
|
||||||
|
|
||||||
|
Bounded Docker-bridge wrapper for the classifier, GenAI worker, and doc/image triage sidecars.
|
||||||
|
|
||||||
|
- HTTP bind: `172.19.0.1:18830` for `n8n-agent` on the `swarm_default` Docker bridge
|
||||||
|
- Service: `openvino-advisory-gateway.service`
|
||||||
|
- Mode: advisory/shadow/draft only
|
||||||
|
- Metadata log: `~/.local/state/openvino-advisory-gateway/events.sqlite`
|
||||||
|
|
||||||
|
## Authority boundary
|
||||||
|
|
||||||
|
Every response includes an explicit authority block:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"may_route": false,
|
||||||
|
"may_write_memory": false,
|
||||||
|
"may_send_external": false,
|
||||||
|
"may_process_private_dirs": false,
|
||||||
|
"may_execute_tools": false,
|
||||||
|
"may_restart_services": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This service may provide hints and drafts. It must not become the live Atlas/Hermes router, memory writer, primary chat model, external sender, tool executor, service restarter, or broad private document processor without a separate approved integration.
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /healthz
|
||||||
|
POST /v1/advisory/classify
|
||||||
|
POST /v1/advisory/generate
|
||||||
|
POST /v1/advisory/triage
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cron and n8n advisory dry-run contract
|
||||||
|
|
||||||
|
For cron/n8n event classification, use the dry-run contract in `docs/cron-n8n-advisory-classifier.md`.
|
||||||
|
It defines the normalized event envelope, decision envelope, `suppress|log|summarize|escalate` recommendation mapping, and duplicate/stale/no-op/action-required examples.
|
||||||
|
|
||||||
|
Example artifacts:
|
||||||
|
|
||||||
|
- `examples/cron-advisory-dry-run.sh` — host-local cron wrapper that prints one compact decision line and performs no side effects.
|
||||||
|
- `examples/n8n-advisory-dry-run-fragment.json` — sanitized inactive n8n node fragment for Set -> HTTP Request -> Code decision mapping.
|
||||||
|
|
||||||
|
Both examples preserve the gateway authority boundary: advisory only, no send/restart/memory/tool/routing authority.
|
||||||
|
|
||||||
|
### Classifier shadow call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"trace_id":"smoke","text":"Urgent: inspect service health and systemd status."}' | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Bounded GenAI draft
|
||||||
|
|
||||||
|
Allowed jobs: `title`, `summary`, `notification`, `memory_candidate`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"job":"title","input":"Summarize a local health check.","max_new_tokens":24}' | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Explicit-file doc/image triage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/triage \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}' | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
The gateway requires the path to be inside both:
|
||||||
|
|
||||||
|
1. a configured allowed root on the gateway process; and
|
||||||
|
2. the request's explicit `allowed_roots` list, if one is provided.
|
||||||
|
|
||||||
|
Requests cannot broaden the process-configured roots. Do not broaden configured roots to private folders without explicit approval for that root and task.
|
||||||
|
|
||||||
|
## Install / run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
install -m 0644 openvino-advisory-gateway.service ~/.config/systemd/user/openvino-advisory-gateway.service
|
||||||
|
systemctl --user daemon-reload
|
||||||
|
systemctl --user enable --now openvino-advisory-gateway.service
|
||||||
|
systemctl --user status openvino-advisory-gateway.service --no-pager
|
||||||
|
```
|
||||||
|
|
||||||
|
`--allowed-root` may be repeated in the systemd unit when additional non-private fixture/review directories are approved. Docker bridge exposure must use `--allow-docker-bridge` and the approved bridge IP `172.19.0.1`; the service still refuses wildcard binds such as `0.0.0.0`.
|
||||||
|
|
||||||
|
From `n8n-agent`, verify bridge reachability with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec n8n-agent wget -qO- -T 8 http://172.19.0.1:18830/healthz
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-advisory-gateway
|
||||||
|
python -m pytest tests/test_gateway.py -q
|
||||||
|
```
|
||||||
@@ -0,0 +1,256 @@
|
|||||||
|
# Cron and n8n advisory classifier contract
|
||||||
|
|
||||||
|
Status: dry-run specification and integration examples
|
||||||
|
Scope: cron and n8n alert/event classification through the OpenVINO advisory gateway
|
||||||
|
Gateway: `http://172.19.0.1:18830` from `n8n-agent` and host-local cron on the current bridge-bound service. Override `NPU_ADVISORY_GATEWAY_URL=http://127.0.0.1:18830` only if a localhost-bound instance is explicitly running.
|
||||||
|
|
||||||
|
## Authority boundary
|
||||||
|
|
||||||
|
This contract is advisory only. It may recommend one of `suppress`, `log`, `summarize`, or `escalate`, but it must not perform the action itself.
|
||||||
|
|
||||||
|
Every integration must preserve these authority flags:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"may_route": false,
|
||||||
|
"may_write_memory": false,
|
||||||
|
"may_send_external": false,
|
||||||
|
"may_process_private_dirs": false,
|
||||||
|
"may_execute_tools": false,
|
||||||
|
"may_restart_services": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Allowed side effects in dry-run mode:
|
||||||
|
|
||||||
|
- read an explicit cron/n8n event payload;
|
||||||
|
- call the advisory gateway classifier/generator;
|
||||||
|
- write compact local stdout or n8n execution logs;
|
||||||
|
- store metadata-only advisory counters if an existing log sink already does so.
|
||||||
|
|
||||||
|
Forbidden without separate explicit approval:
|
||||||
|
|
||||||
|
- outbound sends/pages/Discord/Telegram/email;
|
||||||
|
- service restarts, command execution, or tool calls;
|
||||||
|
- Hermes/Atlas routing changes;
|
||||||
|
- memory writes;
|
||||||
|
- broad private-directory processing;
|
||||||
|
- vector database mutation or reindexing.
|
||||||
|
|
||||||
|
## Input event envelope
|
||||||
|
|
||||||
|
Cron and n8n producers should normalize events before classification. Keep this input small and avoid raw private payloads.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema": "cron_n8n_event_v1",
|
||||||
|
"trace_id": "cron:service-health:2026-06-05T14:30:00Z",
|
||||||
|
"source": "cron",
|
||||||
|
"workflow": "npu-service-health",
|
||||||
|
"event_kind": "health_check",
|
||||||
|
"severity": "warning",
|
||||||
|
"subject": "openvino-reranker health check repeated warning",
|
||||||
|
"summary": "Two consecutive health probes reported timeout, no restart attempted.",
|
||||||
|
"dedupe_key": "service:openvino-reranker:timeout",
|
||||||
|
"observed_at": "2026-06-05T14:30:00Z",
|
||||||
|
"stale_after_s": 900,
|
||||||
|
"action_requested": false,
|
||||||
|
"dry_run": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Field rules:
|
||||||
|
|
||||||
|
- `source`: `cron` or `n8n`.
|
||||||
|
- `workflow`: compact job/workflow name, not a private URL.
|
||||||
|
- `subject` + `summary`: the only text sent to the classifier.
|
||||||
|
- `dedupe_key`: stable non-secret key for duplicate detection by the caller.
|
||||||
|
- `stale_after_s`: caller-side freshness gate; stale events should not page.
|
||||||
|
- `action_requested`: true only when an upstream job is asking a human/Atlas to consider action.
|
||||||
|
- `dry_run`: must remain true for this phase.
|
||||||
|
|
||||||
|
## Gateway classifier call
|
||||||
|
|
||||||
|
The current gateway `/v1/advisory/classify` accepts explicit text and wraps the classifier response in `openvino_advisory_v1` with NPU proof and authority fields.
|
||||||
|
|
||||||
|
Host cron example for the current bridge-bound service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"trace_id":"cron:service-health:sample",
|
||||||
|
"text":"source=cron workflow=npu-service-health severity=warning kind=health_check subject=openvino-reranker repeated timeout summary=Two consecutive health probes reported timeout; no restart attempted; dry_run=true"
|
||||||
|
}' | jq '{schema, mode, trace_id, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, authority, labels: .result.labels}'
|
||||||
|
```
|
||||||
|
|
||||||
|
n8n Docker-bridge example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"trace_id":"n8n:swarm-health:sample","text":"source=n8n workflow=swarm-health-watchdog severity=critical kind=health_check subject=multiple services unhealthy summary=Health probe failed for three services; dry_run=true"}' \
|
||||||
|
| jq '{mode, npu_ok: .npu_proof.ok, npu_delta: .npu_proof.npu_busy_delta_us, may_send_external: .authority.may_send_external}'
|
||||||
|
```
|
||||||
|
|
||||||
|
NPU proof gate: an HTTP 200 is not enough. Treat the classifier as NPU-backed only when `.npu_proof.ok == true` and `.npu_proof.npu_busy_delta_us > 0` for real inference.
|
||||||
|
|
||||||
|
## Advisory decision envelope
|
||||||
|
|
||||||
|
Cron/n8n wrappers should map the gateway response plus caller-side freshness/deduplication state into this compact decision envelope:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema": "cron_n8n_advisory_decision_v1",
|
||||||
|
"trace_id": "cron:service-health:2026-06-05T14:30:00Z",
|
||||||
|
"source": "cron",
|
||||||
|
"workflow": "npu-service-health",
|
||||||
|
"dry_run": true,
|
||||||
|
"recommendation": "summarize",
|
||||||
|
"classification": "action_required",
|
||||||
|
"confidence": 0.84,
|
||||||
|
"reason_codes": ["warning_or_high_urgency", "fresh_event", "not_duplicate"],
|
||||||
|
"npu_proof": {"required": true, "ok": true, "npu_busy_delta_us": 1234},
|
||||||
|
"authority": {
|
||||||
|
"may_route": false,
|
||||||
|
"may_write_memory": false,
|
||||||
|
"may_send_external": false,
|
||||||
|
"may_process_private_dirs": false,
|
||||||
|
"may_execute_tools": false,
|
||||||
|
"may_restart_services": false
|
||||||
|
},
|
||||||
|
"next_gate": "human_or_atlas_review_required_before_any_side_effect"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision fields:
|
||||||
|
|
||||||
|
- `recommendation`: `suppress`, `log`, `summarize`, or `escalate`.
|
||||||
|
- `classification`: `duplicate`, `stale`, `no_op`, or `action_required` for v1 examples.
|
||||||
|
- `confidence`: use classifier urgency/category confidence when available; otherwise use a conservative wrapper score.
|
||||||
|
- `reason_codes`: compact machine-readable rationale, not raw payload text.
|
||||||
|
- `next_gate`: always a review/approval gate before side effects.
|
||||||
|
|
||||||
|
## Recommendation mapping
|
||||||
|
|
||||||
|
This is the v1 dry-run mapping. It is intentionally conservative and caller-side; the NPU classifier advises, the wrapper chooses a recommendation, and humans/Atlas retain authority.
|
||||||
|
|
||||||
|
| Caller/classifier signal | Classification | Recommendation | Dry-run behavior |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Same `dedupe_key` observed inside caller cooldown | `duplicate` | `suppress` | Log compact duplicate count only. Do not send. |
|
||||||
|
| `observed_at + stale_after_s` is older than now | `stale` | `log` | Log stale event and age. Do not summarize/page. |
|
||||||
|
| Severity low/normal, no action requested, classifier urgency low/normal | `no_op` | `log` | Keep normal execution log only. |
|
||||||
|
| Warning/high urgency or action requested, NPU proof ok | `action_required` | `summarize` | Draft a local summary for review; no send/restart. |
|
||||||
|
| Critical severity or repeated failures and NPU proof ok | `action_required` | `escalate` | Recommend escalation to Atlas/human; wrapper still must not send/restart. |
|
||||||
|
| NPU proof missing or false | `action_required` or caller-specific | `log` | Log `npu_proof_failed`; do not claim NPU-backed advice. |
|
||||||
|
|
||||||
|
## Required examples
|
||||||
|
|
||||||
|
### Duplicate -> suppress
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"cron","workflow":"npu-service-health","severity":"warning","dedupe_key":"service:reranker:timeout","summary":"Same timeout as prior run inside cooldown.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"duplicate","recommendation":"suppress","reason_codes":["dedupe_key_in_cooldown"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stale -> log
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"n8n","workflow":"swarm-health-watchdog","severity":"warning","observed_at":"older_than_stale_after","stale_after_s":900,"summary":"Delayed webhook replay for an old probe.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"stale","recommendation":"log","reason_codes":["event_stale"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### No-op -> log
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"cron","workflow":"backup-check","severity":"normal","action_requested":false,"summary":"Backup completed and all expected files are present.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"no_op","recommendation":"log","reason_codes":["normal_severity","no_action_requested"],"next_gate":"none_in_dry_run"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Action required -> summarize/escalate
|
||||||
|
|
||||||
|
Input summary:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"source":"n8n","workflow":"swarm-health-watchdog","severity":"critical","action_requested":true,"summary":"RAG and embeddings health failed repeatedly; no restart attempted.","dry_run":true}
|
||||||
|
```
|
||||||
|
|
||||||
|
Decision:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"classification":"action_required","recommendation":"escalate","reason_codes":["critical_severity","action_requested","fresh_event"],"next_gate":"human_or_atlas_review_required_before_any_side_effect"}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Optional local summary draft
|
||||||
|
|
||||||
|
If the decision is `summarize` or `escalate`, a wrapper may request a bounded draft from `/v1/advisory/generate`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://172.19.0.1:18830/v1/advisory/generate \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"trace_id":"cron:service-health:sample","job":"summary","input":"Health check warning: openvino-reranker timed out twice; no restart attempted.","max_new_tokens":48}' \
|
||||||
|
| jq '{mode, trace_id, npu_ok: .npu_proof.ok, authority, draft: .result.draft_text, final_authority: .result.final_authority}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The draft remains non-authoritative. It must not be automatically sent externally or written to memory.
|
||||||
|
|
||||||
|
## n8n integration pattern
|
||||||
|
|
||||||
|
Recommended node chain for dry-run workflows:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Schedule/Webhook/Failure Trigger
|
||||||
|
-> Set normalized event envelope
|
||||||
|
-> HTTP Request POST /v1/advisory/classify
|
||||||
|
-> Code node maps decision envelope
|
||||||
|
-> IF node on recommendation
|
||||||
|
suppress/log: execution log only
|
||||||
|
summarize/escalate: optional local summary draft, then execution log only
|
||||||
|
```
|
||||||
|
|
||||||
|
The IF node must not connect to outbound messaging, service restart, memory write, or Hermes routing nodes until a separate approval changes the authority boundary.
|
||||||
|
|
||||||
|
See `../examples/n8n-advisory-dry-run-fragment.json` for a sanitized node fragment.
|
||||||
|
|
||||||
|
## Cron integration pattern
|
||||||
|
|
||||||
|
Cron jobs should call a wrapper script that prints one compact line and exits successfully unless the wrapper itself fails. The wrapper should not page or restart.
|
||||||
|
|
||||||
|
Example crontab shape:
|
||||||
|
|
||||||
|
```text
|
||||||
|
*/15 * * * * /home/will/lab/swarm/openvino-advisory-gateway/examples/cron-advisory-dry-run.sh npu-service-health warning health_check "openvino-reranker timeout twice" "service:openvino-reranker:timeout" >> /home/will/.local/state/npu-advisory/cron.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
See `../examples/cron-advisory-dry-run.sh`.
|
||||||
|
|
||||||
|
## Verification checklist
|
||||||
|
|
||||||
|
- Gateway health is reachable on the intended interface.
|
||||||
|
- Classifier response includes `schema=openvino_advisory_v1`.
|
||||||
|
- `.authority.*` flags are all false for side-effect authority.
|
||||||
|
- `.npu_proof.ok` is true and `npu_busy_delta_us > 0` before claiming NPU-backed advice.
|
||||||
|
- Decision envelope is compact and contains only booleans/counts/paths/deltas/gates.
|
||||||
|
- Duplicate/stale/no-op/action-required examples remain dry-run only.
|
||||||
|
- No n8n workflow activation, outbound send, service restart, memory write, routing change, private-dir broadening, or vector DB mutation occurred.
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Dry-run cron/n8n advisory wrapper.
|
||||||
|
# It calls the advisory classifier and prints one compact decision line.
|
||||||
|
# It does not send, restart, route, execute tools, or write memory.
|
||||||
|
|
||||||
|
GATEWAY_URL="${NPU_ADVISORY_GATEWAY_URL:-http://172.19.0.1:18830}"
|
||||||
|
WORKFLOW="${1:-cron-advisory-sample}"
|
||||||
|
SEVERITY="${2:-normal}"
|
||||||
|
EVENT_KIND="${3:-health_check}"
|
||||||
|
SUBJECT="${4:-sample advisory event}"
|
||||||
|
DEDUPE_KEY="${5:-sample}"
|
||||||
|
TRACE_ID="${NPU_ADVISORY_TRACE_ID:-cron:${WORKFLOW}:$(date -u +%Y%m%dT%H%M%SZ)}"
|
||||||
|
|
||||||
|
TEXT="source=cron workflow=${WORKFLOW} severity=${SEVERITY} kind=${EVENT_KIND} subject=${SUBJECT} dedupe_key=${DEDUPE_KEY} dry_run=true authority=no-send,no-restart,no-memory"
|
||||||
|
|
||||||
|
payload=$(jq -nc --arg trace_id "$TRACE_ID" --arg text "$TEXT" '{trace_id:$trace_id,text:$text}')
|
||||||
|
response=$(curl -fsS "${GATEWAY_URL%/}/v1/advisory/classify" -H 'Content-Type: application/json' -d "$payload")
|
||||||
|
|
||||||
|
printf '%s\n' "$response" | jq -c --arg source cron --arg workflow "$WORKFLOW" --arg severity "$SEVERITY" --arg dedupe_key "$DEDUPE_KEY" '
|
||||||
|
. as $env
|
||||||
|
| ($env.result.labels.urgency.value // "normal") as $urgency
|
||||||
|
| ($env.result.labels.urgency.confidence // 0) as $confidence
|
||||||
|
| ($env.npu_proof.ok == true and (($env.npu_proof.npu_busy_delta_us // 0) > 0)) as $npu_ok
|
||||||
|
| (if ($npu_ok | not) then "log"
|
||||||
|
elif ($severity == "critical") then "escalate"
|
||||||
|
elif ($severity == "warning" or $urgency == "high" or $urgency == "critical") then "summarize"
|
||||||
|
else "log" end) as $recommendation
|
||||||
|
| (if ($recommendation == "log" and $severity == "normal") then "no_op" else "action_required" end) as $classification
|
||||||
|
| {
|
||||||
|
schema: "cron_n8n_advisory_decision_v1",
|
||||||
|
trace_id: $env.trace_id,
|
||||||
|
source: $source,
|
||||||
|
workflow: $workflow,
|
||||||
|
dry_run: true,
|
||||||
|
recommendation: $recommendation,
|
||||||
|
classification: $classification,
|
||||||
|
confidence: $confidence,
|
||||||
|
reason_codes: ([
|
||||||
|
(if $npu_ok then "npu_proof_ok" else "npu_proof_failed" end),
|
||||||
|
("severity_" + $severity),
|
||||||
|
("urgency_" + $urgency)
|
||||||
|
]),
|
||||||
|
npu_proof: $env.npu_proof,
|
||||||
|
authority: $env.authority,
|
||||||
|
next_gate: (if $recommendation == "escalate" or $recommendation == "summarize" then "human_or_atlas_review_required_before_any_side_effect" else "none_in_dry_run" end)
|
||||||
|
}'
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
{
|
||||||
|
"name": "OpenVINO Advisory Dry-Run Fragment",
|
||||||
|
"active": false,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"values": {
|
||||||
|
"string": [
|
||||||
|
{"name": "schema", "value": "cron_n8n_event_v1"},
|
||||||
|
{"name": "source", "value": "n8n"},
|
||||||
|
{"name": "workflow", "value": "swarm-health-watchdog"},
|
||||||
|
{"name": "event_kind", "value": "health_check"},
|
||||||
|
{"name": "severity", "value": "warning"},
|
||||||
|
{"name": "subject", "value": "OpenVINO service health warning"},
|
||||||
|
{"name": "summary", "value": "Health probe reported a warning; no restart or send is authorized."},
|
||||||
|
{"name": "dedupe_key", "value": "service:openvino:warning"},
|
||||||
|
{"name": "dry_run", "value": "true"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"options": {}
|
||||||
|
},
|
||||||
|
"id": "set-normalized-event",
|
||||||
|
"name": "Set normalized advisory event",
|
||||||
|
"type": "n8n-nodes-base.set",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [260, 300]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://172.19.0.1:18830/v1/advisory/classify",
|
||||||
|
"sendBody": true,
|
||||||
|
"contentType": "json",
|
||||||
|
"jsonBody": "={{ JSON.stringify({ trace_id: 'n8n:' + $json.workflow + ':' + $now.toISO(), text: 'source=n8n workflow=' + $json.workflow + ' severity=' + $json.severity + ' kind=' + $json.event_kind + ' subject=' + $json.subject + ' summary=' + $json.summary + ' dedupe_key=' + $json.dedupe_key + ' dry_run=true authority=no-send,no-restart,no-memory' }) }}",
|
||||||
|
"options": {
|
||||||
|
"timeout": 20000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "http-advisory-classify",
|
||||||
|
"name": "HTTP advisory classify dry-run",
|
||||||
|
"type": "n8n-nodes-base.httpRequest",
|
||||||
|
"typeVersion": 4,
|
||||||
|
"position": [520, 300]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"jsCode": "const env = $json;\nconst labels = env.result?.labels || {};\nconst urgency = labels.urgency?.value || 'normal';\nconst severity = $('Set normalized advisory event').first().json.severity || 'normal';\nconst npuOk = env.npu_proof?.ok === true && (env.npu_proof?.npu_busy_delta_us || 0) > 0;\nlet recommendation = 'log';\nlet classification = 'no_op';\nconst reason_codes = [npuOk ? 'npu_proof_ok' : 'npu_proof_failed', `severity_${severity}`, `urgency_${urgency}`];\nif (npuOk && severity === 'critical') { recommendation = 'escalate'; classification = 'action_required'; }\nelse if (npuOk && (severity === 'warning' || urgency === 'high' || urgency === 'critical')) { recommendation = 'summarize'; classification = 'action_required'; }\nif (!npuOk) reason_codes.push('log_only_no_npu_claim');\nreturn [{ json: { schema: 'cron_n8n_advisory_decision_v1', trace_id: env.trace_id, source: 'n8n', workflow: $('Set normalized advisory event').first().json.workflow, dry_run: true, recommendation, classification, confidence: labels.urgency?.confidence || 0, reason_codes, npu_proof: env.npu_proof, authority: env.authority, next_gate: (recommendation === 'summarize' || recommendation === 'escalate') ? 'human_or_atlas_review_required_before_any_side_effect' : 'none_in_dry_run' } } }];"
|
||||||
|
},
|
||||||
|
"id": "map-dry-run-decision",
|
||||||
|
"name": "Map dry-run decision (no side effects)",
|
||||||
|
"type": "n8n-nodes-base.code",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [780, 300]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"connections": {
|
||||||
|
"Set normalized advisory event": {
|
||||||
|
"main": [[{"node": "HTTP advisory classify dry-run", "type": "main", "index": 0}]]
|
||||||
|
},
|
||||||
|
"HTTP advisory classify dry-run": {
|
||||||
|
"main": [[{"node": "Map dry-run decision (no side effects)", "type": "main", "index": 0}]]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"settings": {
|
||||||
|
"executionOrder": "v1"
|
||||||
|
},
|
||||||
|
"pinData": {},
|
||||||
|
"staticData": null,
|
||||||
|
"tags": ["dry-run", "openvino", "advisory"]
|
||||||
|
}
|
||||||
@@ -0,0 +1,374 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Local-only advisory gateway for OpenVINO NPU sidecars.
|
||||||
|
|
||||||
|
This service deliberately returns bounded advisory envelopes. It never routes,
|
||||||
|
writes memory, sends external messages, executes tools, restarts services, or
|
||||||
|
broadens document processing authority. Atlas/Hermes may use these outputs as
|
||||||
|
hints only.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
HOST = "127.0.0.1"
|
||||||
|
DOCKER_BRIDGE_HOST = "172.19.0.1"
|
||||||
|
PORT = 18830
|
||||||
|
CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
GENAI_URL = "http://127.0.0.1:18820/v1/worker/generate"
|
||||||
|
DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
|
||||||
|
DEFAULT_LOG_DB = Path(os.environ.get("NPU_ADVISORY_LOG_DB", "/home/will/.local/state/openvino-advisory-gateway/events.sqlite"))
|
||||||
|
DEFAULT_ALLOWED_ROOT = Path("/home/will/lab/swarm/openvino-doc-image-triage-npu")
|
||||||
|
DEFAULT_ALLOWED_ROOTS = [Path(p) for p in os.environ.get("NPU_ADVISORY_ALLOWED_ROOTS", str(DEFAULT_ALLOWED_ROOT)).split(os.pathsep) if p]
|
||||||
|
ALLOWED_GENAI_JOBS = {"title", "summary", "notification", "memory_candidate"}
|
||||||
|
|
||||||
|
AUTHORITY = {
|
||||||
|
"may_route": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_process_private_dirs": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_bind_host(host: str, *, allow_docker_bridge: bool = False) -> None:
|
||||||
|
"""Restrict service exposure to localhost or the explicitly approved Docker bridge bind."""
|
||||||
|
if host == "127.0.0.1":
|
||||||
|
return
|
||||||
|
if not allow_docker_bridge:
|
||||||
|
raise ValueError("refusing non-local bind without --allow-docker-bridge")
|
||||||
|
try:
|
||||||
|
addr = ipaddress.ip_address(host)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ValueError("bind host must be a literal IP address") from exc
|
||||||
|
if host != DOCKER_BRIDGE_HOST or not (addr.version == 4 and addr.is_private and not addr.is_loopback and not addr.is_unspecified):
|
||||||
|
raise ValueError(f"Docker bridge bind must use approved bridge IP {DOCKER_BRIDGE_HOST}")
|
||||||
|
|
||||||
|
|
||||||
|
def sha256_text(text: str) -> str:
|
||||||
|
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def http_post_json(url: str, payload: dict[str, Any], timeout_s: float = 20.0) -> dict[str, Any]:
|
||||||
|
req = urllib.request.Request(url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST")
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def http_get_json(url: str, timeout_s: float = 8.0) -> dict[str, Any]:
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout_s) as resp:
|
||||||
|
body = resp.read().decode("utf-8")
|
||||||
|
try:
|
||||||
|
return json.loads(body)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"ok": True, "raw_text": body[:120]}
|
||||||
|
|
||||||
|
|
||||||
|
def _npu_delta_from(result: dict[str, Any], fallback: int | None = None) -> int | None:
|
||||||
|
for key in ("npu_busy_delta_us", "sysfs_npu_busy_delta_us"):
|
||||||
|
value = result.get(key)
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
if isinstance(value, float):
|
||||||
|
return int(value)
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def _doc_triage_npu_delta(result: dict[str, Any]) -> int | None:
|
||||||
|
pages = ((result.get("result") or {}).get("pages") or []) if isinstance(result, dict) else []
|
||||||
|
best: int | None = None
|
||||||
|
for page in pages:
|
||||||
|
emb = ((page.get("needs_attention") or {}).get("embedding") or {}) if isinstance(page, dict) else {}
|
||||||
|
delta = emb.get("npu_busy_delta_us")
|
||||||
|
if isinstance(delta, int):
|
||||||
|
best = max(best or 0, delta)
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def build_envelope(
|
||||||
|
*,
|
||||||
|
service: str,
|
||||||
|
operation: str,
|
||||||
|
result: dict[str, Any],
|
||||||
|
mode: str = "advisory",
|
||||||
|
input_scope: str,
|
||||||
|
npu_busy_delta_us: int | None,
|
||||||
|
trace_id: str | None = None,
|
||||||
|
warnings: list[str] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
npu_ok = bool(isinstance(npu_busy_delta_us, int) and npu_busy_delta_us > 0)
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"schema": "openvino_advisory_v1",
|
||||||
|
"service": service,
|
||||||
|
"operation": operation,
|
||||||
|
"mode": mode,
|
||||||
|
"trace_id": trace_id,
|
||||||
|
"input_scope": input_scope,
|
||||||
|
"result": result,
|
||||||
|
"npu_proof": {"required": True, "ok": npu_ok, "npu_busy_delta_us": npu_busy_delta_us},
|
||||||
|
"authority": dict(AUTHORITY),
|
||||||
|
"warnings": warnings or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisoryLogger:
|
||||||
|
def __init__(self, db_path: str | Path = DEFAULT_LOG_DB):
|
||||||
|
self.db_path = Path(db_path)
|
||||||
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._init()
|
||||||
|
|
||||||
|
def _init(self) -> None:
|
||||||
|
with sqlite3.connect(self.db_path) as con:
|
||||||
|
con.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS advisory_events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
created_at REAL NOT NULL,
|
||||||
|
service TEXT NOT NULL,
|
||||||
|
operation TEXT NOT NULL,
|
||||||
|
mode TEXT NOT NULL,
|
||||||
|
input_scope TEXT NOT NULL,
|
||||||
|
input_ref TEXT NOT NULL,
|
||||||
|
npu_busy_delta_us INTEGER,
|
||||||
|
ok INTEGER NOT NULL,
|
||||||
|
raw_payload TEXT
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
def log(self, envelope: dict[str, Any], *, input_ref: str) -> None:
|
||||||
|
proof = envelope.get("npu_proof") or {}
|
||||||
|
with sqlite3.connect(self.db_path) as con:
|
||||||
|
con.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO advisory_events(created_at, service, operation, mode, input_scope, input_ref,
|
||||||
|
npu_busy_delta_us, ok, raw_payload)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
time.time(),
|
||||||
|
str(envelope.get("service")),
|
||||||
|
str(envelope.get("operation")),
|
||||||
|
str(envelope.get("mode")),
|
||||||
|
str(envelope.get("input_scope")),
|
||||||
|
input_ref,
|
||||||
|
proof.get("npu_busy_delta_us"),
|
||||||
|
1 if envelope.get("ok") else 0,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_text(
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
trace_id: str | None = None,
|
||||||
|
http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
|
||||||
|
logger: AdvisoryLogger | None = None,
|
||||||
|
timeout_s: float = 20.0,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if not isinstance(text, str) or not text.strip():
|
||||||
|
raise ValueError("text must be a non-empty string")
|
||||||
|
payload = {"id": trace_id or "advisory", "text": text, "options": {"include_evidence": False, "dry_run": True}}
|
||||||
|
result = http_post_json(CLASSIFIER_URL, payload, timeout_s)
|
||||||
|
envelope = build_envelope(
|
||||||
|
service="classifier",
|
||||||
|
operation="classify",
|
||||||
|
mode="shadow",
|
||||||
|
input_scope="explicit_text",
|
||||||
|
trace_id=trace_id,
|
||||||
|
result={"labels": result.get("labels", {}), "model": result.get("model"), "service_mode": result.get("mode", "dry_run")},
|
||||||
|
npu_busy_delta_us=_npu_delta_from(result),
|
||||||
|
)
|
||||||
|
if logger:
|
||||||
|
logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
|
||||||
|
return envelope
|
||||||
|
|
||||||
|
|
||||||
|
def generate_bounded(
|
||||||
|
job: str,
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
max_new_tokens: int | None = None,
|
||||||
|
trace_id: str | None = None,
|
||||||
|
http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
|
||||||
|
logger: AdvisoryLogger | None = None,
|
||||||
|
timeout_s: float = 180.0,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if job not in ALLOWED_GENAI_JOBS:
|
||||||
|
raise ValueError("unsupported advisory generation job")
|
||||||
|
if not isinstance(text, str) or not text.strip():
|
||||||
|
raise ValueError("input must be a non-empty string")
|
||||||
|
payload: dict[str, Any] = {"job": job, "input": text}
|
||||||
|
if max_new_tokens is not None:
|
||||||
|
payload["max_new_tokens"] = max_new_tokens
|
||||||
|
result = http_post_json(GENAI_URL, payload, timeout_s)
|
||||||
|
envelope = build_envelope(
|
||||||
|
service="genai",
|
||||||
|
operation=f"generate:{job}",
|
||||||
|
mode="draft",
|
||||||
|
input_scope="explicit_text",
|
||||||
|
trace_id=trace_id,
|
||||||
|
result={"draft_text": result.get("text", ""), "json": result.get("json"), "timing_ms": result.get("timing_ms"), "final_authority": False},
|
||||||
|
npu_busy_delta_us=_npu_delta_from(result),
|
||||||
|
)
|
||||||
|
if logger:
|
||||||
|
logger.log(envelope, input_ref="text:sha256:" + sha256_text(text))
|
||||||
|
return envelope
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_allowed(path: str, allowed_roots: list[str] | None, configured_roots: list[Path] | None = None) -> tuple[Path, list[Path]]:
|
||||||
|
configured = [p.expanduser().resolve() for p in (configured_roots or DEFAULT_ALLOWED_ROOTS)]
|
||||||
|
if not configured:
|
||||||
|
raise ValueError("at least one configured allowed root is required")
|
||||||
|
requested = [Path(p).expanduser().resolve() for p in (allowed_roots or [str(p) for p in configured])]
|
||||||
|
if not requested:
|
||||||
|
raise ValueError("at least one requested allowed root is required")
|
||||||
|
for root in requested:
|
||||||
|
if not any(root == base or root.is_relative_to(base) for base in configured):
|
||||||
|
raise ValueError("requested allowed root is outside configured roots")
|
||||||
|
roots = requested
|
||||||
|
candidate = Path(path).expanduser().resolve()
|
||||||
|
if not any(candidate == root or candidate.is_relative_to(root) for root in roots):
|
||||||
|
raise ValueError("path must be inside an allowed root")
|
||||||
|
if not candidate.exists() or not candidate.is_file():
|
||||||
|
raise ValueError("path must be an existing file")
|
||||||
|
return candidate, roots
|
||||||
|
|
||||||
|
|
||||||
|
def triage_file(
|
||||||
|
path: str,
|
||||||
|
*,
|
||||||
|
allowed_roots: list[str] | None = None,
|
||||||
|
configured_roots: list[Path] | None = None,
|
||||||
|
trace_id: str | None = None,
|
||||||
|
http_post_json: Callable[[str, dict[str, Any], float], dict[str, Any]] = http_post_json,
|
||||||
|
logger: AdvisoryLogger | None = None,
|
||||||
|
timeout_s: float = 60.0,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
candidate, roots = _resolve_allowed(path, allowed_roots, configured_roots)
|
||||||
|
payload = {"path": str(candidate), "options": {"allowed_roots": [str(r) for r in roots], "max_pages": 3}}
|
||||||
|
result = http_post_json(DOC_TRIAGE_URL, payload, timeout_s)
|
||||||
|
delta = _doc_triage_npu_delta(result)
|
||||||
|
envelope = build_envelope(
|
||||||
|
service="doc_triage",
|
||||||
|
operation="triage_file",
|
||||||
|
mode="reviewable_artifact",
|
||||||
|
input_scope="explicit_file",
|
||||||
|
trace_id=trace_id,
|
||||||
|
result={"triage": result.get("result"), "final_authority": False},
|
||||||
|
npu_busy_delta_us=delta,
|
||||||
|
)
|
||||||
|
if logger:
|
||||||
|
envelope["warnings"].append("metadata-only log; raw file contents are not logged")
|
||||||
|
logger.log(envelope, input_ref="file:sha256path:" + sha256_text(str(candidate)))
|
||||||
|
return envelope
|
||||||
|
|
||||||
|
|
||||||
|
def health(*, http_get_json: Callable[[str, float], dict[str, Any]] = http_get_json) -> dict[str, Any]:
|
||||||
|
deps = {
|
||||||
|
"classifier": "http://127.0.0.1:18819/healthz",
|
||||||
|
"genai": "http://127.0.0.1:18820/healthz",
|
||||||
|
"doc_triage": "http://127.0.0.1:18829/healthz",
|
||||||
|
}
|
||||||
|
out: dict[str, Any] = {"ok": True, "service": "openvino-advisory-gateway", "mode": "advisory_only", "authority": dict(AUTHORITY), "dependencies": {}}
|
||||||
|
for name, url in deps.items():
|
||||||
|
try:
|
||||||
|
data = http_get_json(url, 8.0)
|
||||||
|
out["dependencies"][name] = {"ok": bool(data.get("ok", data.get("status") == "ok")), "service": data.get("service"), "device": data.get("device")}
|
||||||
|
except Exception as exc:
|
||||||
|
out["ok"] = False
|
||||||
|
out["dependencies"][name] = {"ok": False, "error": str(exc)}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _read_json(handler: BaseHTTPRequestHandler, max_bytes: int = 256 * 1024) -> dict[str, Any]:
|
||||||
|
length = int(handler.headers.get("Content-Length", "0"))
|
||||||
|
if length > max_bytes:
|
||||||
|
raise ValueError("request JSON too large")
|
||||||
|
raw = handler.rfile.read(length)
|
||||||
|
if not raw:
|
||||||
|
return {}
|
||||||
|
return json.loads(raw.decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def make_handler(logger: AdvisoryLogger, configured_roots: list[Path]):
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
server_version = "openvino-advisory-gateway/0.1"
|
||||||
|
|
||||||
|
def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - stdlib override name
|
||||||
|
# Do not log request bodies or private paths.
|
||||||
|
print(f"{self.client_address[0]} {format % args}")
|
||||||
|
|
||||||
|
def send_json(self, status: int, payload: Any) -> None:
|
||||||
|
body = json.dumps(payload, indent=2, sort_keys=True).encode("utf-8")
|
||||||
|
self.send_response(status)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def do_GET(self) -> None: # noqa: N802
|
||||||
|
if urlparse(self.path).path in ("/", "/health", "/healthz"):
|
||||||
|
self.send_json(200, health())
|
||||||
|
return
|
||||||
|
self.send_json(404, {"ok": False, "error": "not_found"})
|
||||||
|
|
||||||
|
def do_POST(self) -> None: # noqa: N802
|
||||||
|
path = urlparse(self.path).path
|
||||||
|
try:
|
||||||
|
payload = _read_json(self)
|
||||||
|
if path == "/v1/advisory/classify":
|
||||||
|
self.send_json(200, classify_text(str(payload.get("text", "")), trace_id=payload.get("trace_id"), logger=logger))
|
||||||
|
return
|
||||||
|
if path == "/v1/advisory/generate":
|
||||||
|
self.send_json(200, generate_bounded(str(payload.get("job", "summary")), str(payload.get("input", "")), max_new_tokens=payload.get("max_new_tokens"), trace_id=payload.get("trace_id"), logger=logger))
|
||||||
|
return
|
||||||
|
if path == "/v1/advisory/triage":
|
||||||
|
self.send_json(200, triage_file(str(payload.get("path", "")), allowed_roots=payload.get("allowed_roots"), configured_roots=configured_roots, trace_id=payload.get("trace_id"), logger=logger))
|
||||||
|
return
|
||||||
|
self.send_json(404, {"ok": False, "error": "not_found"})
|
||||||
|
except Exception as exc:
|
||||||
|
self.send_json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc), "authority": dict(AUTHORITY)})
|
||||||
|
|
||||||
|
return Handler
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = argparse.ArgumentParser(description="Local-only OpenVINO NPU advisory gateway")
|
||||||
|
parser.add_argument("--host", default=os.environ.get("NPU_ADVISORY_HOST", HOST))
|
||||||
|
parser.add_argument("--port", type=int, default=int(os.environ.get("NPU_ADVISORY_PORT", str(PORT))))
|
||||||
|
parser.add_argument("--log-db", default=str(DEFAULT_LOG_DB))
|
||||||
|
parser.add_argument("--allowed-root", action="append", dest="allowed_roots", default=None, help="Configured file root allowed for advisory doc/image triage. May be repeated.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--allow-docker-bridge",
|
||||||
|
action="store_true",
|
||||||
|
default=os.environ.get("NPU_ADVISORY_ALLOW_DOCKER_BRIDGE", "").lower() in {"1", "true", "yes"},
|
||||||
|
help="Permit binding to a private Docker bridge IP instead of 127.0.0.1.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
validate_bind_host(args.host, allow_docker_bridge=args.allow_docker_bridge)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise SystemExit(str(exc)) from exc
|
||||||
|
configured_roots = [Path(p).expanduser().resolve() for p in (args.allowed_roots or DEFAULT_ALLOWED_ROOTS)]
|
||||||
|
logger = AdvisoryLogger(args.log_db)
|
||||||
|
server = ThreadingHTTPServer((args.host, args.port), make_handler(logger, configured_roots))
|
||||||
|
print(json.dumps({"service": "openvino-advisory-gateway", "host": args.host, "port": args.port, "mode": "advisory_only"}), flush=True)
|
||||||
|
server.serve_forever()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=OpenVINO NPU advisory gateway (Docker bridge, port 18830)
|
||||||
|
After=network.target openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
|
||||||
|
Wants=openvino-router-classifier.service openvino-genai-npu-worker.service openvino-doc-image-triage.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
WorkingDirectory=/home/will/lab/swarm/openvino-advisory-gateway
|
||||||
|
Environment=NPU_ADVISORY_HOST=172.19.0.1
|
||||||
|
Environment=NPU_ADVISORY_PORT=18830
|
||||||
|
Environment=NPU_ADVISORY_ALLOW_DOCKER_BRIDGE=true
|
||||||
|
Environment=NPU_ADVISORY_LOG_DB=/home/will/.local/state/openvino-advisory-gateway/events.sqlite
|
||||||
|
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-advisory-gateway/gateway.py --host 172.19.0.1 --port 18830 --allow-docker-bridge --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||||
|
import gateway
|
||||||
|
|
||||||
|
|
||||||
|
def test_authority_envelope_is_advisory_and_forbids_side_effects() -> None:
|
||||||
|
env = gateway.build_envelope(
|
||||||
|
service="classifier",
|
||||||
|
operation="classify",
|
||||||
|
mode="shadow",
|
||||||
|
result={"labels": {"workflow_category": {"value": "devops"}}},
|
||||||
|
npu_busy_delta_us=123,
|
||||||
|
input_scope="explicit_text",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert env["ok"] is True
|
||||||
|
assert env["mode"] == "shadow"
|
||||||
|
assert env["authority"] == {
|
||||||
|
"may_route": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_process_private_dirs": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
}
|
||||||
|
assert env["npu_proof"] == {"required": True, "ok": True, "npu_busy_delta_us": 123}
|
||||||
|
|
||||||
|
|
||||||
|
def test_bind_host_requires_explicit_docker_bridge_approval() -> None:
|
||||||
|
gateway.validate_bind_host("127.0.0.1")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="without --allow-docker-bridge"):
|
||||||
|
gateway.validate_bind_host("172.19.0.1")
|
||||||
|
|
||||||
|
gateway.validate_bind_host("172.19.0.1", allow_docker_bridge=True)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="approved bridge IP"):
|
||||||
|
gateway.validate_bind_host("0.0.0.0", allow_docker_bridge=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_classify_calls_sidecar_and_logs_metadata_only(tmp_path: Path) -> None:
|
||||||
|
calls: list[tuple[str, dict]] = []
|
||||||
|
|
||||||
|
def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
|
||||||
|
calls.append((url, payload))
|
||||||
|
return {
|
||||||
|
"labels": {"tool_needed": {"value": True}},
|
||||||
|
"npu_busy_delta_us": 55,
|
||||||
|
"sysfs_npu_busy_delta_us": 55,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger = gateway.AdvisoryLogger(tmp_path / "events.sqlite")
|
||||||
|
env = gateway.classify_text(
|
||||||
|
"Inspect live service status",
|
||||||
|
trace_id="t1",
|
||||||
|
http_post_json=fake_post,
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert calls[0][0].endswith(":18819/v1/classify")
|
||||||
|
assert calls[0][1]["options"]["dry_run"] is True
|
||||||
|
assert env["service"] == "classifier"
|
||||||
|
assert env["authority"]["may_route"] is False
|
||||||
|
assert env["npu_proof"]["ok"] is True
|
||||||
|
|
||||||
|
with sqlite3.connect(tmp_path / "events.sqlite") as con:
|
||||||
|
row = con.execute("select service, operation, input_ref, raw_payload from advisory_events").fetchone()
|
||||||
|
assert row == ("classifier", "classify", "text:sha256:" + gateway.sha256_text("Inspect live service status"), None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_allows_only_bounded_jobs() -> None:
|
||||||
|
with pytest.raises(ValueError, match="unsupported advisory generation job"):
|
||||||
|
gateway.generate_bounded("primary_chat", "hello", http_post_json=lambda *_: {})
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_wraps_draft_without_final_authority() -> None:
|
||||||
|
def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
|
||||||
|
return {"text": "Short title", "npu_busy_delta_us": 99, "timing_ms": {"total": 10}}
|
||||||
|
|
||||||
|
env = gateway.generate_bounded("title", "Summarize this local health check", http_post_json=fake_post)
|
||||||
|
|
||||||
|
assert env["service"] == "genai"
|
||||||
|
assert env["operation"] == "generate:title"
|
||||||
|
assert env["result"]["draft_text"] == "Short title"
|
||||||
|
assert env["result"]["final_authority"] is False
|
||||||
|
assert env["authority"]["may_send_external"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_doc_triage_requires_explicit_file_under_allowed_root(tmp_path: Path) -> None:
|
||||||
|
allowed = tmp_path / "allowed"
|
||||||
|
allowed.mkdir()
|
||||||
|
target = allowed / "synthetic.png"
|
||||||
|
target.write_bytes(b"not real image for unit test")
|
||||||
|
|
||||||
|
def fake_post(url: str, payload: dict, timeout_s: float) -> dict:
|
||||||
|
assert payload["path"] == str(target.resolve())
|
||||||
|
assert payload["options"]["allowed_roots"] == [str(allowed.resolve())]
|
||||||
|
return {"ok": True, "result": {"pages": [{"needs_attention": {"embedding": {"verified_npu": True, "npu_busy_delta_us": 42}}}]}}
|
||||||
|
|
||||||
|
env = gateway.triage_file(str(target), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=fake_post)
|
||||||
|
|
||||||
|
assert env["service"] == "doc_triage"
|
||||||
|
assert env["input_scope"] == "explicit_file"
|
||||||
|
assert env["npu_proof"]["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_doc_triage_rejects_private_root_broadening(tmp_path: Path) -> None:
|
||||||
|
allowed = tmp_path / "allowed"
|
||||||
|
allowed.mkdir()
|
||||||
|
with pytest.raises(ValueError, match="path must be inside an allowed root"):
|
||||||
|
gateway.triage_file(str(tmp_path / "outside.png"), allowed_roots=[str(allowed)], configured_roots=[allowed], http_post_json=lambda *_: {})
|
||||||
|
|
||||||
|
|
||||||
|
def test_doc_triage_rejects_requested_root_outside_configured_roots(tmp_path: Path) -> None:
|
||||||
|
configured = tmp_path / "configured"
|
||||||
|
requested = tmp_path / "private"
|
||||||
|
requested.mkdir()
|
||||||
|
target = requested / "file.png"
|
||||||
|
target.write_bytes(b"synthetic")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="requested allowed root is outside configured roots"):
|
||||||
|
gateway.triage_file(
|
||||||
|
str(target),
|
||||||
|
allowed_roots=[str(requested)],
|
||||||
|
configured_roots=[configured],
|
||||||
|
http_post_json=lambda *_: {},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_health_aggregates_dependencies_without_raw_private_data() -> None:
|
||||||
|
def fake_get(url: str, timeout_s: float) -> dict:
|
||||||
|
return {"ok": True, "service": url.rsplit(":", 1)[-1]}
|
||||||
|
|
||||||
|
health = gateway.health(http_get_json=fake_get)
|
||||||
|
|
||||||
|
assert health["ok"] is True
|
||||||
|
assert set(health["dependencies"]) == {"classifier", "genai", "doc_triage"}
|
||||||
|
assert "raw" not in json.dumps(health).lower()
|
||||||
@@ -0,0 +1,339 @@
|
|||||||
|
# OpenVINO NPU classifier/router dry-run contract
|
||||||
|
|
||||||
|
Status: specification for dry-run prototype refresh
|
||||||
|
Target port: `127.0.0.1:18819`
|
||||||
|
Owner context: Atlas/Hermes local assistant sidecar evaluation
|
||||||
|
|
||||||
|
This service is an advisory classifier for Atlas/Hermes automation hints. It may suggest labels such as tool-needed, memory-candidate type, urgency, workflow category, and safety-confirmation-required, but it must not make or enforce live routing, memory, tool, or safety decisions without a separate explicit approval from Will.
|
||||||
|
|
||||||
|
## Recommended model and runtime
|
||||||
|
|
||||||
|
Recommended v1 runtime: small local Python HTTP/CLI service backed by the existing OpenVINO NPU embeddings service on `127.0.0.1:18817`.
|
||||||
|
|
||||||
|
Recommended v1 model shape:
|
||||||
|
|
||||||
|
- Primary signal: `bge-base-en-v1.5-int8-ov` embeddings from the live embeddings service.
|
||||||
|
- Classifier layer: inspectable deterministic rules plus cosine similarity against curated synthetic/prototype utterances.
|
||||||
|
- Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`.
|
||||||
|
- Device proof: request-level `npu_busy_delta_us` from `:18817` plus direct sysfs before/after reads from `/sys/class/accel/accel0/device/npu_busy_time_us`.
|
||||||
|
|
||||||
|
Why this is preferred for the dry run:
|
||||||
|
|
||||||
|
1. It reuses the already-live NPU embeddings path rather than adding a second model conversion/runtime dependency before contract validation.
|
||||||
|
2. Rules and prototypes are transparent enough for safety-sensitive routing hints; a reviewer can inspect why a message was labeled.
|
||||||
|
3. It avoids fine-tuning or training on private Atlas/Hermes transcripts.
|
||||||
|
4. It keeps the service small, localhost-only, and easy to start/stop during smoke tests.
|
||||||
|
5. It produces NPU activity through the embeddings path while making clear that final decision logic remains advisory.
|
||||||
|
|
||||||
|
Defer a dedicated NPU sequence-classification model such as TinyBERT/MiniLM until the dry-run labels and thresholds have been evaluated against synthetic fixtures and explicitly-approved non-private examples. If pursued later, use OpenVINO Runtime/Optimum export with fixed input shapes suitable for NPU, and keep the rule layer for safety gates.
|
||||||
|
|
||||||
|
## Non-goals and safety invariants
|
||||||
|
|
||||||
|
The service must not:
|
||||||
|
|
||||||
|
- Change Hermes/Atlas model routing, gateway routing, memory writes, tool-use permissions, or safety-confirmation behavior.
|
||||||
|
- Restart, stop, enable, or persist any live Atlas/Hermes/gateway/RAG service.
|
||||||
|
- Bind to anything broader than `127.0.0.1` by default.
|
||||||
|
- Mutate Chroma/vector collections, trigger reindexing, or write to RAG state.
|
||||||
|
- Process private document/image directories or private transcript dumps for smoke testing.
|
||||||
|
- Log raw prompts by default beyond normal foreground stderr during local review.
|
||||||
|
- Claim NPU success from HTTP 200 alone.
|
||||||
|
|
||||||
|
## Endpoint contract
|
||||||
|
|
||||||
|
All HTTP endpoints are local-only by default.
|
||||||
|
|
||||||
|
Base URL:
|
||||||
|
|
||||||
|
```text
|
||||||
|
http://127.0.0.1:18819
|
||||||
|
```
|
||||||
|
|
||||||
|
### GET `/healthz`, `/health`, `/readyz`, `/`
|
||||||
|
|
||||||
|
Purpose: liveness/readiness metadata.
|
||||||
|
|
||||||
|
Response fields:
|
||||||
|
|
||||||
|
- `status`: `starting | ok`
|
||||||
|
- `service`: `atlas-router-classifier`
|
||||||
|
- `version`: service version string
|
||||||
|
- `mode`: always `dry_run`
|
||||||
|
- `model`: model/runtime label
|
||||||
|
- `embed_url`: upstream embeddings URL
|
||||||
|
- `device`: expected to say `NPU-via-embedding-service` or equivalent
|
||||||
|
- `labels`: supported label names
|
||||||
|
- `embedding_dim`: embedding dimension after warmup
|
||||||
|
- `prototype_count`: number of synthetic prototype examples loaded
|
||||||
|
- `prototype_npu_busy_delta_us`: warmup delta reported by upstream embeddings, if available
|
||||||
|
- `npu_busy_time_us`: current sysfs counter value, if readable
|
||||||
|
- `warnings`: list of non-fatal warnings
|
||||||
|
|
||||||
|
A healthy service is not enough to prove NPU execution. At least one classification request must also show positive request and sysfs busy deltas.
|
||||||
|
|
||||||
|
### GET `/v1/labels`
|
||||||
|
|
||||||
|
Purpose: publish schema information without dumping private examples.
|
||||||
|
|
||||||
|
Response fields:
|
||||||
|
|
||||||
|
- `model`
|
||||||
|
- `thresholds`
|
||||||
|
- `tool_needed`: recommended threshold `0.72`
|
||||||
|
- `memory_candidate`: recommended threshold `0.78`
|
||||||
|
- `safety_confirmation_required`: recommended threshold `0.80`
|
||||||
|
- `workflow_category`: recommended threshold `0.52`
|
||||||
|
- `enums`
|
||||||
|
- `memory_candidate`: `none`, `user_preference`, `durable_user_fact`, `environment_fact`, `workflow_convention`, `skill_candidate`
|
||||||
|
- `urgency`: `low`, `normal`, `high`, `critical`
|
||||||
|
- `workflow_category`: `chat`, `research`, `coding`, `debugging`, `devops`, `smart_home`, `media`, `note_taking`, `productivity`, `kanban`, `unknown`
|
||||||
|
- `prototype_ids`: names of curated synthetic prototype buckets
|
||||||
|
|
||||||
|
### POST `/v1/classify`
|
||||||
|
|
||||||
|
Purpose: classify one user/task message for advisory dry-run hints.
|
||||||
|
|
||||||
|
Request:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "optional-trace-id",
|
||||||
|
"text": "Urgent: check whether port 18817 is listening and inspect systemd logs.",
|
||||||
|
"context": {
|
||||||
|
"platform": "cli",
|
||||||
|
"source": "user"
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"include_evidence": true,
|
||||||
|
"include_embedding_debug": false,
|
||||||
|
"dry_run": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Required behavior:
|
||||||
|
|
||||||
|
- Reject empty text with HTTP 400.
|
||||||
|
- Default `dry_run` to true.
|
||||||
|
- Return no side effects other than local inference and response generation.
|
||||||
|
- Include evidence by default unless `include_evidence=false`.
|
||||||
|
- Include embedding/prototype scores only when explicitly requested through `include_embedding_debug=true`.
|
||||||
|
|
||||||
|
Response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "optional-trace-id",
|
||||||
|
"model": "bge-base-en-v1.5-int8-ov/prototype-router-v0",
|
||||||
|
"created": 1780590000,
|
||||||
|
"duration_ms": 12.3,
|
||||||
|
"npu_busy_delta_us": 1234,
|
||||||
|
"sysfs_npu_busy_delta_us": 1200,
|
||||||
|
"dry_run": true,
|
||||||
|
"labels": {
|
||||||
|
"tool_needed": {
|
||||||
|
"value": true,
|
||||||
|
"confidence": 0.84,
|
||||||
|
"threshold": 0.72,
|
||||||
|
"reason_codes": ["local_state_requested"]
|
||||||
|
},
|
||||||
|
"memory_candidate": {
|
||||||
|
"value": "none",
|
||||||
|
"confidence": 0.31,
|
||||||
|
"threshold": 0.78,
|
||||||
|
"reason_codes": []
|
||||||
|
},
|
||||||
|
"urgency": {
|
||||||
|
"value": "high",
|
||||||
|
"confidence": 0.84,
|
||||||
|
"scores": {"low": 0.0, "normal": 0.2, "high": 0.84, "critical": 0.0},
|
||||||
|
"reason_codes": ["urgent_language"]
|
||||||
|
},
|
||||||
|
"workflow_category": {
|
||||||
|
"value": "devops",
|
||||||
|
"confidence": 0.86,
|
||||||
|
"scores": {"devops": 0.86, "unknown": 0.14}
|
||||||
|
},
|
||||||
|
"safety_confirmation_required": {
|
||||||
|
"value": false,
|
||||||
|
"confidence": 0.0,
|
||||||
|
"threshold": 0.8,
|
||||||
|
"reason_codes": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"warnings": [],
|
||||||
|
"evidence": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### POST `/v1/batch_classify`
|
||||||
|
|
||||||
|
Purpose: classify a bounded batch of non-private synthetic or explicitly-approved messages.
|
||||||
|
|
||||||
|
Request:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{"id": "m1", "text": "What time is it in Seattle right now?"},
|
||||||
|
{"id": "m2", "text": "Restart the live Atlas gateway and switch primary routing."}
|
||||||
|
],
|
||||||
|
"options": {"include_evidence": false, "dry_run": true}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response:
|
||||||
|
|
||||||
|
- `model`
|
||||||
|
- `duration_ms`
|
||||||
|
- aggregate `npu_busy_delta_us`
|
||||||
|
- `results`: array of `/v1/classify` responses
|
||||||
|
|
||||||
|
Batch limits for prototype review:
|
||||||
|
|
||||||
|
- Keep batches small; the prototype rejects empty batches and batches larger than `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE` (default `32`).
|
||||||
|
- Use only synthetic fixtures unless Will explicitly approves a real non-private sample set.
|
||||||
|
- Do not retain request bodies to disk.
|
||||||
|
|
||||||
|
## CLI contract
|
||||||
|
|
||||||
|
The same implementation should support foreground review from the service directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-classifier-npu
|
||||||
|
/home/will/.venvs/npu/bin/python router_classifier.py \
|
||||||
|
--host 127.0.0.1 \
|
||||||
|
--port 18819 \
|
||||||
|
--embed-url http://127.0.0.1:18817/v1/embeddings
|
||||||
|
```
|
||||||
|
|
||||||
|
Required flags/env:
|
||||||
|
|
||||||
|
- `--host` / `OPENVINO_CLASSIFIER_HOST`; default `127.0.0.1`.
|
||||||
|
- `--port` / `OPENVINO_CLASSIFIER_PORT`; default `18819`.
|
||||||
|
- `--embed-url` / `OPENVINO_CLASSIFIER_EMBED_URL`; default `http://127.0.0.1:18817/v1/embeddings`.
|
||||||
|
- `--timeout-s` / `OPENVINO_CLASSIFIER_TIMEOUT_S`; default `30`.
|
||||||
|
- `--max-batch-size` / `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`; default `32`.
|
||||||
|
- `--no-warmup` to defer prototype embedding until first request.
|
||||||
|
|
||||||
|
A future dedicated CLI mode may be added for one-shot JSONL classification, but foreground HTTP review is sufficient for the dry-run contract.
|
||||||
|
|
||||||
|
## Synthetic smoke-test plan
|
||||||
|
|
||||||
|
Preconditions:
|
||||||
|
|
||||||
|
1. Confirm `:18817` embeddings service is healthy.
|
||||||
|
2. Confirm `:18819` is not already listening.
|
||||||
|
3. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before starting the request smoke.
|
||||||
|
4. Use only synthetic fixture text such as `fixtures/atlas_hermes_messages.jsonl`.
|
||||||
|
|
||||||
|
Unit/schema smoke, no NPU dependency:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm
|
||||||
|
/home/will/.venvs/npu/bin/python -m unittest discover -s openvino-classifier-npu/tests -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Foreground service smoke:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ss -ltnp | grep ':18819\b' || true
|
||||||
|
cd /home/will/lab/swarm/openvino-classifier-npu
|
||||||
|
/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
|
||||||
|
```
|
||||||
|
|
||||||
|
From another shell:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://127.0.0.1:18819/healthz | jq .
|
||||||
|
curl -fsS http://127.0.0.1:18819/v1/labels | jq .
|
||||||
|
curl -fsS http://127.0.0.1:18819/v1/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"id":"smoke-devops","text":"Urgent: check whether port 18817 is listening and inspect systemd logs.","options":{"include_evidence":true,"dry_run":true}}' | jq .
|
||||||
|
curl -fsS http://127.0.0.1:18819/v1/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"id":"smoke-safety","text":"Restart the live Atlas gateway and switch primary routing to the new classifier.","options":{"include_evidence":true,"dry_run":true}}' | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected label checks:
|
||||||
|
|
||||||
|
- `smoke-devops`: `tool_needed.value=true`, `urgency.value=high`, `workflow_category.value=devops`.
|
||||||
|
- `smoke-safety`: `safety_confirmation_required.value=true`, no actual restart or routing change.
|
||||||
|
- Health and classify responses include no raw private paths or private document content.
|
||||||
|
|
||||||
|
Shutdown:
|
||||||
|
|
||||||
|
- Stop the foreground server with Ctrl-C.
|
||||||
|
- Re-run `ss -ltnp | grep ':18819\b' || true` and confirm no listener remains.
|
||||||
|
|
||||||
|
## NPU busy-time verification plan
|
||||||
|
|
||||||
|
Use sysfs plus service response fields; do not accept HTTP 200 alone.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
before=$(cat "$BUSY")
|
||||||
|
response=$(curl -fsS http://127.0.0.1:18819/v1/classify \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"id":"npu-proof","text":"Check current systemd service status for the embeddings service.","options":{"include_evidence":false,"dry_run":true}}')
|
||||||
|
after=$(cat "$BUSY")
|
||||||
|
echo "$response" | jq '{npu_busy_delta_us, sysfs_npu_busy_delta_us, warnings}'
|
||||||
|
echo "outer_sysfs_npu_busy_delta_us=$((after-before))"
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional localhost smoke helper, after starting the foreground service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python openvino-classifier-npu/smoke_classifier.py \
|
||||||
|
--base-url http://127.0.0.1:18819
|
||||||
|
```
|
||||||
|
|
||||||
|
Acceptance for an NPU-backed classification request:
|
||||||
|
|
||||||
|
- HTTP request succeeds.
|
||||||
|
- Response `npu_busy_delta_us > 0` from upstream embeddings.
|
||||||
|
- Response `sysfs_npu_busy_delta_us > 0` when sysfs is readable.
|
||||||
|
- Outer shell `after-before > 0`.
|
||||||
|
- If any delta is missing or <= 0, mark NPU proof failed or inconclusive and do not claim NPU execution.
|
||||||
|
|
||||||
|
## Docs and diagram implications
|
||||||
|
|
||||||
|
If this prototype is refreshed or reviewed, update documentation to show:
|
||||||
|
|
||||||
|
- Live baseline remains RAG `:18810`, RAG health `:18814`, Whisper NPU `:18816`, and embeddings `:18817`.
|
||||||
|
- Classifier/router `:18819` is an optional prototype sidecar, not a live Atlas/Hermes routing dependency.
|
||||||
|
- Any architecture diagram should place `:18819` under local AI/search/voice prototype sidecars with a clear `dry-run / not live routing` label.
|
||||||
|
- Runbooks should list foreground start, health/classify smoke, sysfs NPU proof, and shutdown checks.
|
||||||
|
- Service catalog entries should state `not installed/enabled` until Will approves persistent service enablement.
|
||||||
|
- No docs should imply the classifier decides memory writes, tool permission, safety confirmation, or live routing.
|
||||||
|
|
||||||
|
Relevant docs inventory:
|
||||||
|
|
||||||
|
- `docs/swarm-infrastructure.md`
|
||||||
|
- `docs/swarm-infrastructure.html`
|
||||||
|
- `docs/diagram-maintenance.md`
|
||||||
|
- `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`
|
||||||
|
- `swarm-common/obsidian-vault/will/will-shared-zap/Resources/Service Catalog.md`
|
||||||
|
|
||||||
|
## No-go / defer criteria
|
||||||
|
|
||||||
|
Do not proceed to implementation refresh, persistent service enablement, or live integration if any of the following hold:
|
||||||
|
|
||||||
|
- `:18817` embeddings is unavailable and no approved NPU embedding fallback exists.
|
||||||
|
- `/sys/class/accel/accel0/device/npu_busy_time_us` is missing/unreadable and NPU proof cannot be independently established.
|
||||||
|
- Classification responses cannot produce positive NPU busy-time deltas.
|
||||||
|
- `:18819` is already occupied by an unknown or live service.
|
||||||
|
- Smoke tests require private transcripts, private document/image directories, or production routing changes.
|
||||||
|
- Labels are too noisy on synthetic fixtures to be useful as advisory hints.
|
||||||
|
- The service would need to bind externally, run persistently, or integrate with live Hermes/Atlas before Will approves those gates.
|
||||||
|
- Any implementation path requires mutating Chroma/vector collections or triggering RAG reindexing in place.
|
||||||
|
|
||||||
|
## Implementation handoff notes
|
||||||
|
|
||||||
|
Recommended next engineer actions:
|
||||||
|
|
||||||
|
1. Verify or refresh `openvino-classifier-npu/router_classifier.py` to match this contract.
|
||||||
|
2. Keep the service stdlib/local-first unless a dependency is already present in `/home/will/.venvs/npu`.
|
||||||
|
3. Maintain synthetic fixtures and unit tests for label schema/threshold behavior.
|
||||||
|
4. Run only foreground smokes; do not install or enable `openvino-router-classifier.service`.
|
||||||
|
5. Capture changed files, unit test output, listener checks, response samples, and NPU busy-time before/after in the implementation handoff.
|
||||||
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
Dry-run Atlas/Hermes message classifier/router prototype.
|
Dry-run Atlas/Hermes message classifier/router prototype.
|
||||||
|
|
||||||
|
The detailed dry-run contract is in [`CONTRACT.md`](./CONTRACT.md), including the
|
||||||
|
recommended model/runtime, HTTP/CLI schema, smoke-test plan, NPU busy-time proof,
|
||||||
|
docs/diagram implications, and no-go/defer criteria.
|
||||||
|
|
||||||
It reuses the existing OpenVINO NPU embeddings service on `127.0.0.1:18817` and
|
It reuses the existing OpenVINO NPU embeddings service on `127.0.0.1:18817` and
|
||||||
serves an inspectable stdlib HTTP API on `127.0.0.1:18819`. It does not change
|
serves an inspectable stdlib HTTP API on `127.0.0.1:18819`. It does not change
|
||||||
live Hermes/Atlas routing, write memory, mutate vector collections, restart
|
live Hermes/Atlas routing, write memory, mutate vector collections, restart
|
||||||
@@ -13,6 +17,7 @@ services, or send external messages.
|
|||||||
- Default port: `18819`
|
- Default port: `18819`
|
||||||
- Default bind: `127.0.0.1`
|
- Default bind: `127.0.0.1`
|
||||||
- Upstream: `http://127.0.0.1:18817/v1/embeddings`
|
- Upstream: `http://127.0.0.1:18817/v1/embeddings`
|
||||||
|
- Batch limit: `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`, default `32`
|
||||||
- Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`
|
- Model label: `bge-base-en-v1.5-int8-ov/prototype-router-v0`
|
||||||
- NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` before/after plus upstream `npu_busy_delta_us`
|
- NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` before/after plus upstream `npu_busy_delta_us`
|
||||||
|
|
||||||
@@ -86,6 +91,10 @@ cd /home/will/lab/swarm/openvino-classifier-npu
|
|||||||
/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
|
/home/will/.venvs/npu/bin/python router_classifier.py --host 127.0.0.1 --port 18819
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Environment variables mirror the flags: `OPENVINO_CLASSIFIER_HOST`,
|
||||||
|
`OPENVINO_CLASSIFIER_PORT`, `OPENVINO_CLASSIFIER_EMBED_URL`,
|
||||||
|
`OPENVINO_CLASSIFIER_TIMEOUT_S`, and `OPENVINO_CLASSIFIER_MAX_BATCH_SIZE`.
|
||||||
|
|
||||||
Then from another shell:
|
Then from another shell:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -98,6 +107,15 @@ curl -fsS http://127.0.0.1:18819/v1/classify \
|
|||||||
A valid NPU-backed response must have positive `npu_busy_delta_us`; HTTP 200 by
|
A valid NPU-backed response must have positive `npu_busy_delta_us`; HTTP 200 by
|
||||||
itself is not considered proof.
|
itself is not considered proof.
|
||||||
|
|
||||||
|
Synthetic fixture smoke helper, after the foreground service is running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/home/will/.venvs/npu/bin/python smoke_classifier.py --base-url http://127.0.0.1:18819
|
||||||
|
```
|
||||||
|
|
||||||
|
The helper refuses non-local URLs, checks fixture label expectations, and prints
|
||||||
|
response plus outer sysfs NPU busy deltas.
|
||||||
|
|
||||||
## Tests
|
## Tests
|
||||||
|
|
||||||
Unit tests use a fake embedding client and do not touch the NPU:
|
Unit tests use a fake embedding client and do not touch the NPU:
|
||||||
@@ -110,13 +128,13 @@ Fixture messages live at `fixtures/atlas_hermes_messages.jsonl`.
|
|||||||
|
|
||||||
## Optional systemd user unit
|
## Optional systemd user unit
|
||||||
|
|
||||||
A draft unit is included as `openvino-router-classifier.service`. Install only
|
A reviewed local-only user service unit is included as `openvino-router-classifier.service`. Install/enable it when the dry-run classifier should persist across logins:
|
||||||
after review/approval:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp openvino-router-classifier.service ~/.config/systemd/user/openvino-router-classifier.service
|
cp openvino-router-classifier.service ~/.config/systemd/user/openvino-router-classifier.service
|
||||||
systemctl --user daemon-reload
|
systemctl --user daemon-reload
|
||||||
systemctl --user enable --now openvino-router-classifier.service
|
systemctl --user enable --now openvino-router-classifier.service
|
||||||
|
systemctl --user status openvino-router-classifier.service --no-pager
|
||||||
```
|
```
|
||||||
|
|
||||||
Do not enable it as part of this prototype task without explicit approval.
|
The service is persistent, but classifier decisions remain dry-run until a separate approved routing change lands. Do not connect it to live Atlas/Hermes routing, memory writes, service restarts, or outbound messages.
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ WorkingDirectory=/home/will/lab/swarm/openvino-classifier-npu
|
|||||||
Environment=OPENVINO_CLASSIFIER_HOST=127.0.0.1
|
Environment=OPENVINO_CLASSIFIER_HOST=127.0.0.1
|
||||||
Environment=OPENVINO_CLASSIFIER_PORT=18819
|
Environment=OPENVINO_CLASSIFIER_PORT=18819
|
||||||
Environment=OPENVINO_CLASSIFIER_EMBED_URL=http://127.0.0.1:18817/v1/embeddings
|
Environment=OPENVINO_CLASSIFIER_EMBED_URL=http://127.0.0.1:18817/v1/embeddings
|
||||||
|
Environment=OPENVINO_CLASSIFIER_MAX_BATCH_SIZE=32
|
||||||
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-classifier-npu/router_classifier.py
|
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-classifier-npu/router_classifier.py
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ MODEL = "bge-base-en-v1.5-int8-ov/prototype-router-v0"
|
|||||||
DEFAULT_HOST = "127.0.0.1"
|
DEFAULT_HOST = "127.0.0.1"
|
||||||
DEFAULT_PORT = 18819
|
DEFAULT_PORT = 18819
|
||||||
DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
|
DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
|
||||||
|
DEFAULT_MAX_BATCH_SIZE = 32
|
||||||
NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
|
||||||
WORKFLOW_CATEGORIES = [
|
WORKFLOW_CATEGORIES = [
|
||||||
@@ -150,6 +151,26 @@ def npu_busy_time_us() -> int | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def env_int(name: str, default: int) -> int:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise SystemExit(f"{name} must be an integer, got {raw!r}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def env_float(name: str, default: float) -> float:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return float(raw)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise SystemExit(f"{name} must be a number, got {raw!r}") from exc
|
||||||
|
|
||||||
|
|
||||||
def clamp01(value: float) -> float:
|
def clamp01(value: float) -> float:
|
||||||
return max(0.0, min(1.0, value))
|
return max(0.0, min(1.0, value))
|
||||||
|
|
||||||
@@ -220,9 +241,10 @@ class EmbeddingClient:
|
|||||||
|
|
||||||
|
|
||||||
class ClassifierService:
|
class ClassifierService:
|
||||||
def __init__(self, embed_url: str, *, timeout_s: float = 30.0) -> None:
|
def __init__(self, embed_url: str, *, timeout_s: float = 30.0, max_batch_size: int = DEFAULT_MAX_BATCH_SIZE) -> None:
|
||||||
self.embed_url = embed_url
|
self.embed_url = embed_url
|
||||||
self.client = EmbeddingClient(embed_url, timeout_s=timeout_s)
|
self.client = EmbeddingClient(embed_url, timeout_s=timeout_s)
|
||||||
|
self.max_batch_size = max(1, int(max_batch_size))
|
||||||
self.loaded_at = time.time()
|
self.loaded_at = time.time()
|
||||||
self.prototype_texts: list[str] = []
|
self.prototype_texts: list[str] = []
|
||||||
self.prototype_keys: list[str] = []
|
self.prototype_keys: list[str] = []
|
||||||
@@ -255,6 +277,7 @@ class ClassifierService:
|
|||||||
"labels": ["tool_needed", "memory_candidate", "urgency", "workflow_category", "safety_confirmation_required"],
|
"labels": ["tool_needed", "memory_candidate", "urgency", "workflow_category", "safety_confirmation_required"],
|
||||||
"embedding_dim": self.embedding_dim,
|
"embedding_dim": self.embedding_dim,
|
||||||
"prototype_count": len(self.prototype_texts),
|
"prototype_count": len(self.prototype_texts),
|
||||||
|
"max_batch_size": self.max_batch_size,
|
||||||
"prototype_npu_busy_delta_us": self.prototype_npu_busy_delta_us,
|
"prototype_npu_busy_delta_us": self.prototype_npu_busy_delta_us,
|
||||||
"npu_busy_time_us": npu_busy_time_us(),
|
"npu_busy_time_us": npu_busy_time_us(),
|
||||||
"uptime_s": round(time.time() - self.loaded_at, 3),
|
"uptime_s": round(time.time() - self.loaded_at, 3),
|
||||||
@@ -271,6 +294,7 @@ class ClassifierService:
|
|||||||
"workflow_category": 0.52,
|
"workflow_category": 0.52,
|
||||||
},
|
},
|
||||||
"enums": {"memory_candidate": MEMORY_VALUES, "urgency": URGENCY_VALUES, "workflow_category": WORKFLOW_CATEGORIES},
|
"enums": {"memory_candidate": MEMORY_VALUES, "urgency": URGENCY_VALUES, "workflow_category": WORKFLOW_CATEGORIES},
|
||||||
|
"limits": {"max_batch_size": self.max_batch_size},
|
||||||
"prototype_ids": sorted(PROTOTYPES),
|
"prototype_ids": sorted(PROTOTYPES),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -351,6 +375,10 @@ class ClassifierService:
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
def batch_classify(self, items: list[dict[str, Any]], options: dict[str, Any] | None = None) -> dict[str, Any]:
|
def batch_classify(self, items: list[dict[str, Any]], options: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
if not items:
|
||||||
|
raise ValueError("items must contain at least one classification request")
|
||||||
|
if len(items) > self.max_batch_size:
|
||||||
|
raise ValueError(f"items exceeds max_batch_size={self.max_batch_size}")
|
||||||
started = time.perf_counter()
|
started = time.perf_counter()
|
||||||
results = [self.classify(item.get("id"), str(item.get("text") or ""), options) for item in items]
|
results = [self.classify(item.get("id"), str(item.get("text") or ""), options) for item in items]
|
||||||
return {
|
return {
|
||||||
@@ -400,13 +428,15 @@ class ClassifierService:
|
|||||||
high_rule, high_codes, high_ev = best_rule(text, "urgency_high")
|
high_rule, high_codes, high_ev = best_rule(text, "urgency_high")
|
||||||
critical_rule, critical_codes, critical_ev = best_rule(text, "urgency_critical")
|
critical_rule, critical_codes, critical_ev = best_rule(text, "urgency_critical")
|
||||||
low_rule = 0.82 if re.search(r"\b(no rush|whenever convenient|low priority|someday|backlog)\b", text, re.I) else 0.0
|
low_rule = 0.82 if re.search(r"\b(no rush|whenever convenient|low priority|someday|backlog)\b", text, re.I) else 0.0
|
||||||
# Urgency is safety-sensitive for notifications. Prefer explicit rules;
|
# Urgency is safety-sensitive for notifications, so require explicit
|
||||||
# use prototype scores only when they are unusually strong.
|
# language instead of relying on broad prototype similarity.
|
||||||
score_map = {
|
score_map = {
|
||||||
"low": max(low_rule, scores.get("urgency_low", 0.0) if scores.get("urgency_low", 0.0) >= 0.9 else 0.0),
|
# Urgency should be explicit; broad embedding similarity otherwise
|
||||||
|
# turns neutral requests such as "what time is it" into low/high/critical urgency.
|
||||||
|
"low": low_rule,
|
||||||
"normal": 0.68,
|
"normal": 0.68,
|
||||||
"high": max(high_rule, scores.get("urgency_high", 0.0) if scores.get("urgency_high", 0.0) >= 0.9 else 0.0),
|
"high": high_rule,
|
||||||
"critical": max(critical_rule, scores.get("urgency_critical", 0.0) if scores.get("urgency_critical", 0.0) >= 0.92 else 0.0),
|
"critical": critical_rule,
|
||||||
}
|
}
|
||||||
if score_map["critical"] >= 0.9:
|
if score_map["critical"] >= 0.9:
|
||||||
score_map["normal"] = 0.05
|
score_map["normal"] = 0.05
|
||||||
@@ -509,13 +539,14 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
def main() -> int:
|
def main() -> int:
|
||||||
parser = argparse.ArgumentParser(description="Dry-run Atlas/Hermes router classifier")
|
parser = argparse.ArgumentParser(description="Dry-run Atlas/Hermes router classifier")
|
||||||
parser.add_argument("--host", default=os.environ.get("OPENVINO_CLASSIFIER_HOST", DEFAULT_HOST))
|
parser.add_argument("--host", default=os.environ.get("OPENVINO_CLASSIFIER_HOST", DEFAULT_HOST))
|
||||||
parser.add_argument("--port", type=int, default=int(os.environ.get("OPENVINO_CLASSIFIER_PORT", DEFAULT_PORT)))
|
parser.add_argument("--port", type=int, default=env_int("OPENVINO_CLASSIFIER_PORT", DEFAULT_PORT))
|
||||||
parser.add_argument("--embed-url", default=os.environ.get("OPENVINO_CLASSIFIER_EMBED_URL", DEFAULT_EMBED_URL))
|
parser.add_argument("--embed-url", default=os.environ.get("OPENVINO_CLASSIFIER_EMBED_URL", DEFAULT_EMBED_URL))
|
||||||
parser.add_argument("--timeout-s", type=float, default=float(os.environ.get("OPENVINO_CLASSIFIER_TIMEOUT_S", "30")))
|
parser.add_argument("--timeout-s", type=float, default=env_float("OPENVINO_CLASSIFIER_TIMEOUT_S", 30.0))
|
||||||
|
parser.add_argument("--max-batch-size", type=int, default=env_int("OPENVINO_CLASSIFIER_MAX_BATCH_SIZE", DEFAULT_MAX_BATCH_SIZE))
|
||||||
parser.add_argument("--no-warmup", action="store_true", help="skip prototype embedding warmup until first request")
|
parser.add_argument("--no-warmup", action="store_true", help="skip prototype embedding warmup until first request")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
service = ClassifierService(args.embed_url, timeout_s=args.timeout_s)
|
service = ClassifierService(args.embed_url, timeout_s=args.timeout_s, max_batch_size=args.max_batch_size)
|
||||||
if not args.no_warmup:
|
if not args.no_warmup:
|
||||||
service.warmup()
|
service.warmup()
|
||||||
httpd = ThreadingHTTPServer((args.host, args.port), Handler)
|
httpd = ThreadingHTTPServer((args.host, args.port), Handler)
|
||||||
|
|||||||
@@ -0,0 +1,113 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Local-only smoke test for the dry-run OpenVINO router classifier.
|
||||||
|
|
||||||
|
This script uses only synthetic fixture messages. It assumes router_classifier.py is
|
||||||
|
already running on localhost and never installs/enables a persistent service.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
DEFAULT_BASE_URL = "http://127.0.0.1:18819"
|
||||||
|
BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
FIXTURE = Path(__file__).resolve().parent / "fixtures" / "atlas_hermes_messages.jsonl"
|
||||||
|
|
||||||
|
|
||||||
|
def npu_busy_time_us() -> int | None:
|
||||||
|
try:
|
||||||
|
return int(BUSY_FILE.read_text().strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_json(url: str, timeout_s: float) -> dict[str, Any]:
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout_s) as response: # noqa: S310 - localhost smoke URL
|
||||||
|
return json.loads(response.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def post_json(url: str, payload: dict[str, Any], timeout_s: float) -> dict[str, Any]:
|
||||||
|
request = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=timeout_s) as response: # noqa: S310 - localhost smoke URL
|
||||||
|
return json.loads(response.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def load_fixture(limit: int) -> list[dict[str, Any]]:
|
||||||
|
rows = [json.loads(line) for line in FIXTURE.read_text().splitlines() if line.strip()]
|
||||||
|
return rows[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def assert_expected(result: dict[str, Any], expected: dict[str, Any]) -> list[str]:
|
||||||
|
failures: list[str] = []
|
||||||
|
labels = result.get("labels", {})
|
||||||
|
for key, value in expected.items():
|
||||||
|
actual_label = labels.get(key, {})
|
||||||
|
actual_value = actual_label.get("value")
|
||||||
|
if actual_value != value:
|
||||||
|
failures.append(f"{result.get('id')}: {key} expected {value!r}, got {actual_value!r}")
|
||||||
|
return failures
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description="Smoke-test a running localhost router classifier")
|
||||||
|
parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
|
||||||
|
parser.add_argument("--timeout-s", type=float, default=30.0)
|
||||||
|
parser.add_argument("--limit", type=int, default=10)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.base_url.startswith("http://127.0.0.1:") and not args.base_url.startswith("http://localhost:"):
|
||||||
|
raise SystemExit("refusing non-local base URL; this smoke is localhost-only")
|
||||||
|
|
||||||
|
before = npu_busy_time_us()
|
||||||
|
started = time.perf_counter()
|
||||||
|
try:
|
||||||
|
health = get_json(f"{args.base_url.rstrip('/')}/healthz", args.timeout_s)
|
||||||
|
labels = get_json(f"{args.base_url.rstrip('/')}/v1/labels", args.timeout_s)
|
||||||
|
rows = load_fixture(args.limit)
|
||||||
|
results = []
|
||||||
|
failures: list[str] = []
|
||||||
|
for row in rows:
|
||||||
|
result = post_json(
|
||||||
|
f"{args.base_url.rstrip('/')}/v1/classify",
|
||||||
|
{"id": row["id"], "text": row["text"], "options": {"include_evidence": False, "dry_run": True}},
|
||||||
|
args.timeout_s,
|
||||||
|
)
|
||||||
|
results.append(result)
|
||||||
|
failures.extend(assert_expected(result, row.get("expected", {})))
|
||||||
|
after = npu_busy_time_us()
|
||||||
|
except urllib.error.URLError as exc:
|
||||||
|
raise SystemExit(f"smoke failed: {exc}") from exc
|
||||||
|
|
||||||
|
response_npu_delta = sum((r.get("npu_busy_delta_us") or 0) for r in results)
|
||||||
|
outer_sysfs_delta = None if before is None or after is None else after - before
|
||||||
|
npu_proven = response_npu_delta > 0 and (outer_sysfs_delta is None or outer_sysfs_delta > 0)
|
||||||
|
summary = {
|
||||||
|
"ok": not failures,
|
||||||
|
"service": health.get("service"),
|
||||||
|
"mode": health.get("mode"),
|
||||||
|
"model": health.get("model"),
|
||||||
|
"label_count": len(labels.get("prototype_ids", [])),
|
||||||
|
"fixture_count": len(results),
|
||||||
|
"duration_ms": round((time.perf_counter() - started) * 1000, 3),
|
||||||
|
"response_npu_busy_delta_us": response_npu_delta,
|
||||||
|
"outer_sysfs_npu_busy_delta_us": outer_sysfs_delta,
|
||||||
|
"npu_proven": npu_proven,
|
||||||
|
"failures": failures,
|
||||||
|
}
|
||||||
|
print(json.dumps(summary, indent=2, sort_keys=True))
|
||||||
|
return 0 if not failures and npu_proven else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -88,6 +88,14 @@ class RouterClassifierTests(unittest.TestCase):
|
|||||||
self.assertEqual(len(result["results"]), 2)
|
self.assertEqual(len(result["results"]), 2)
|
||||||
self.assertGreater(result["npu_busy_delta_us"], 0)
|
self.assertGreater(result["npu_busy_delta_us"], 0)
|
||||||
|
|
||||||
|
def test_batch_limits_are_enforced(self):
|
||||||
|
svc = self.service()
|
||||||
|
with self.assertRaisesRegex(ValueError, "at least one"):
|
||||||
|
svc.batch_classify([])
|
||||||
|
too_many = [{"id": str(i), "text": "What time is it?"} for i in range(router_classifier.DEFAULT_MAX_BATCH_SIZE + 1)]
|
||||||
|
with self.assertRaisesRegex(ValueError, "max_batch_size"):
|
||||||
|
svc.batch_classify(too_many)
|
||||||
|
|
||||||
def test_fixture_file_is_valid_jsonl(self):
|
def test_fixture_file_is_valid_jsonl(self):
|
||||||
fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
|
fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
|
||||||
rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
|
rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
|
||||||
@@ -97,6 +105,17 @@ class RouterClassifierTests(unittest.TestCase):
|
|||||||
self.assertIn("text", row)
|
self.assertIn("text", row)
|
||||||
self.assertIn("expected", row)
|
self.assertIn("expected", row)
|
||||||
|
|
||||||
|
def test_synthetic_fixture_expectations(self):
|
||||||
|
svc = self.service()
|
||||||
|
fixture = ROOT / "fixtures" / "atlas_hermes_messages.jsonl"
|
||||||
|
rows = [json.loads(line) for line in fixture.read_text().splitlines() if line.strip()]
|
||||||
|
for row in rows:
|
||||||
|
with self.subTest(row=row["id"]):
|
||||||
|
result = svc.classify(row["id"], row["text"], {"include_evidence": False})
|
||||||
|
labels = result["labels"]
|
||||||
|
for label_name, expected_value in row["expected"].items():
|
||||||
|
self.assertEqual(labels[label_name]["value"], expected_value)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
# OpenVINO NPU document/image triage prototype
|
# OpenVINO NPU document/image triage prototype
|
||||||
|
|
||||||
Local-only prototype for triaging screenshots, photos/scans, and PDF page images.
|
Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images.
|
||||||
It returns structured JSON metadata and explicitly reports CPU vs NPU stages.
|
It returns structured JSON metadata and explicitly reports CPU vs NPU stages.
|
||||||
|
Optional HTTP is a localhost/loopback-only prototype on `127.0.0.1:18829` when explicitly started; non-loopback binds are rejected and it is not a live Atlas/Hermes/RAG integration.
|
||||||
|
|
||||||
Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
|
Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
|
||||||
|
|
||||||
@@ -13,6 +14,8 @@ Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
|
|||||||
- Full source paths are omitted by default; responses include basename and SHA-256.
|
- Full source paths are omitted by default; responses include basename and SHA-256.
|
||||||
- Allowed roots are enforced for CLI/server requests.
|
- Allowed roots are enforced for CLI/server requests.
|
||||||
- This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services.
|
- This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services.
|
||||||
|
- Do not process broad private document/image directories; use generated synthetic fixtures unless Will explicitly approves a narrow source root.
|
||||||
|
- See `SPEC.md` for the full CLI contract, smoke-test plan, NPU verification plan, docs implications, and no-go/defer criteria.
|
||||||
|
|
||||||
## CPU vs NPU stages
|
## CPU vs NPU stages
|
||||||
|
|
||||||
@@ -35,6 +38,7 @@ Not configured in v1:
|
|||||||
|
|
||||||
- `triage.py` — core library and CLI.
|
- `triage.py` — core library and CLI.
|
||||||
- `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`.
|
- `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`.
|
||||||
|
- `openvino-doc-image-triage.service` — local-only user-systemd service template for `127.0.0.1:18829`, limited to this prototype directory as its default allowed root.
|
||||||
- `make_samples.py` — creates synthetic non-private image/PDF samples.
|
- `make_samples.py` — creates synthetic non-private image/PDF samples.
|
||||||
- `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable.
|
- `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable.
|
||||||
- `samples/` — generated synthetic fixtures.
|
- `samples/` — generated synthetic fixtures.
|
||||||
@@ -88,29 +92,40 @@ Include OCR/sidecar text in a single response only when explicitly requested:
|
|||||||
|
|
||||||
## HTTP usage
|
## HTTP usage
|
||||||
|
|
||||||
Check that port 18820 is free first:
|
The prototype is CLI-first, and the local HTTP wrapper can be run as a reviewed user-systemd service on `127.0.0.1:18829` with an allowlist rooted at this prototype directory. Keep it local-only and do not broaden allowed roots to private document/image directories without explicit approval. Check the port first:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ss -ltnp | grep ':18820\b' || true
|
ss -ltnp | grep ':18829\b' || true
|
||||||
```
|
```
|
||||||
|
|
||||||
Start local-only server:
|
Start a local-only server and stop it after the smoke:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18820 --allowed-root "$PWD"
|
/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
|
||||||
```
|
```
|
||||||
|
|
||||||
Call it:
|
Install/enable the reviewed local-only service template when the HTTP wrapper should persist across logins:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -sS http://127.0.0.1:18820/healthz | jq
|
install -m 0644 openvino-doc-image-triage.service ~/.config/systemd/user/openvino-doc-image-triage.service
|
||||||
curl -sS http://127.0.0.1:18820/models | jq
|
systemctl --user daemon-reload
|
||||||
curl -sS -X POST http://127.0.0.1:18820/triage \
|
systemctl --user enable --now openvino-doc-image-triage.service
|
||||||
|
systemctl --user status openvino-doc-image-triage.service --no-pager
|
||||||
|
```
|
||||||
|
|
||||||
|
Call it with synthetic/non-private fixtures only:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -sS http://127.0.0.1:18829/healthz | jq
|
||||||
|
curl -sS http://127.0.0.1:18829/models | jq
|
||||||
|
curl -sS -X POST http://127.0.0.1:18829/triage \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq
|
-d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Do not point it at private document/image directories during smoke tests unless Will explicitly approves the exact source root.
|
||||||
|
|
||||||
## Smoke test
|
## Smoke test
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -118,7 +133,7 @@ cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
|||||||
/home/will/.venvs/npu/bin/python tests/smoke_test.py
|
/home/will/.venvs/npu/bin/python tests/smoke_test.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Expected: JSON ending with `"ok": true`. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
|
Expected: JSON ending with `"ok": true`. The smoke test generates only synthetic fixtures, verifies non-loopback HTTP binds are rejected, starts its temporary server on a preflighted free localhost port, and terminates it before exit. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
|
||||||
|
|
||||||
## Example output shape
|
## Example output shape
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,146 @@
|
|||||||
|
# OpenVINO NPU document/image triage spec
|
||||||
|
|
||||||
|
Status: CLI-first prototype specification; not a live Atlas/Hermes integration.
|
||||||
|
|
||||||
|
## Safety stance
|
||||||
|
|
||||||
|
- Default workflow is local CLI execution against explicitly named files.
|
||||||
|
- Optional HTTP is disabled unless a human starts it, is constrained to loopback (`127.0.0.1`, `::1`, or `localhost`), and is intended for `127.0.0.1:18829` only.
|
||||||
|
- No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec.
|
||||||
|
- Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval.
|
||||||
|
- NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof.
|
||||||
|
|
||||||
|
## Recommended model/runtime
|
||||||
|
|
||||||
|
Recommended v1 runtime:
|
||||||
|
|
||||||
|
- File intake, hashing, MIME/extension checks, image/PDF rendering, sidecar/native PDF text extraction, metadata extraction, and category fallback: local Python CPU path using Pillow plus optional `pypdf`/`pypdfium2`.
|
||||||
|
- Needs-attention semantic check: reuse the live localhost OpenVINO embeddings service on `127.0.0.1:18817`, currently `bge-base-en-v1.5-int8-ov`, and verify each embedding call with `npu_busy_time_us` deltas.
|
||||||
|
- Category classification in v1: CPU rule fallback, explicitly reported as not an NPU image model.
|
||||||
|
|
||||||
|
Why this is the recommended v1:
|
||||||
|
|
||||||
|
- It avoids private-data exposure: no external upload path and no broader local file scanning.
|
||||||
|
- It avoids collection/routing risk by using the existing embeddings API as a stateless feature extractor only; it does not write to RAG or Chroma.
|
||||||
|
- It gives a real NPU verification hook for the semantic stage without overclaiming that OCR/image classification are NPU-backed.
|
||||||
|
- It keeps the prototype useful even when optional PDF dependencies or the embeddings service are unavailable: it can fall back to CPU-only metadata/rule output and mark NPU verification false.
|
||||||
|
|
||||||
|
Deferred model work:
|
||||||
|
|
||||||
|
- NPU image category classifier: defer until a static-shape OpenVINO IR image model such as MobileNet/EfficientNet/ResNet is selected, calibrated for the label set, and smoke-tested with busy-time deltas.
|
||||||
|
- NPU OCR/VLM: defer; OCR remains local CPU text plumbing in v1.
|
||||||
|
|
||||||
|
## CLI contract
|
||||||
|
|
||||||
|
Command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
/home/will/.venvs/npu/bin/python triage.py \
|
||||||
|
--allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \
|
||||||
|
--max-pages 3 \
|
||||||
|
--pretty \
|
||||||
|
samples/synthetic_invoice.png samples/synthetic_invoice.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
|
||||||
|
- Positional `paths`: one or more local image/PDF paths.
|
||||||
|
- `--allowed-root ROOT`: may repeat; every requested path must resolve under one of these roots. Default is current directory.
|
||||||
|
- `--max-pages N`: maximum rendered/extracted PDF pages; default 3.
|
||||||
|
- `--no-embeddings`: disables the localhost `:18817` embedding/NPU check and reports CPU fallback/no text.
|
||||||
|
- `--dry-run`: skip image/PDF rendering while still checking intake/hash/text/metadata where available.
|
||||||
|
- `--include-ocr-text`: include raw extracted/sidecar text in this single response only; off by default.
|
||||||
|
- `--include-full-path`: include resolved full paths; off by default.
|
||||||
|
- `--pretty`: pretty-print JSON.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- Batch JSON: `{ "ok": bool, "files": [...], "generated_at": "..." }`.
|
||||||
|
- Per file result includes `file_id` as `sha256:<digest>`, `source_path_basename`, media type, file size, pages, classification, needs-attention result, metadata counts/flags, privacy flags, and processing-device summary.
|
||||||
|
- Raw OCR/text and full paths are omitted unless explicitly requested.
|
||||||
|
- NPU evidence is per embedding call: `used`, `verified_npu`, `npu_busy_delta_us`, endpoint, and wall time.
|
||||||
|
|
||||||
|
Exit behavior:
|
||||||
|
|
||||||
|
- Exit 0 when all files triage successfully.
|
||||||
|
- Exit 2 when one or more files fail policy/intake/processing checks.
|
||||||
|
|
||||||
|
## Optional localhost HTTP contract
|
||||||
|
|
||||||
|
HTTP is optional and not enabled by this spec. If explicitly started for a smoke or local demo, use localhost and port 18829:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
ss -ltnp | grep ':18829\b' || true
|
||||||
|
/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD"
|
||||||
|
```
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
|
||||||
|
- `GET /healthz` or `/health`: service name, bind policy, configured allowed roots, privacy flags, and current `npu_busy_time_us`.
|
||||||
|
- `GET /models`: reports v1 stages and whether each is CPU or NPU-backed.
|
||||||
|
- `POST /triage`: `{ "path": "/local/file", "options": {...} }` -> `{ "ok": true, "result": ... }`.
|
||||||
|
- `POST /triage/batch`: `{ "paths": ["/local/file"], "options": {...} }` -> batch JSON.
|
||||||
|
|
||||||
|
HTTP privacy/policy rules:
|
||||||
|
|
||||||
|
- Server startup `--allowed-root` is the outer allowlist.
|
||||||
|
- Request `options.allowed_roots` may narrow that allowlist but must not widen it.
|
||||||
|
- Request `options.embedding_url` may only target the configured local loopback embeddings route `http://127.0.0.1:18817/v1/embeddings` (or localhost equivalent); external or alternate endpoints are rejected.
|
||||||
|
- Request bodies and raw text are not logged by the stdlib handler.
|
||||||
|
- Stop the temporary server after the smoke/demo.
|
||||||
|
|
||||||
|
## Synthetic smoke-test plan
|
||||||
|
|
||||||
|
Use only generated fixtures under the prototype directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
/home/will/.venvs/npu/bin/python make_samples.py
|
||||||
|
/home/will/.venvs/npu/bin/python tests/smoke_test.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected smoke coverage:
|
||||||
|
|
||||||
|
- Creates synthetic invoice/receipt/form-like image/PDF fixtures.
|
||||||
|
- Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root.
|
||||||
|
- Asserts privacy flags (`external_uploads: false`, no full path by default).
|
||||||
|
- Asserts invoice category/needs-attention behavior on synthetic text.
|
||||||
|
- Starts a temporary localhost HTTP server on a preflighted free ephemeral port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, rejects external embedding URLs, and verifies non-loopback binds are rejected.
|
||||||
|
- Terminates the temporary server.
|
||||||
|
|
||||||
|
The smoke port in tests should stay OS-assigned ephemeral/non-live to avoid claiming `18829` as a persistent service.
|
||||||
|
|
||||||
|
## NPU busy-time verification plan
|
||||||
|
|
||||||
|
For every test that claims NPU use:
|
||||||
|
|
||||||
|
1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before the operation.
|
||||||
|
2. Perform an operation that should call the live embeddings service on `127.0.0.1:18817` with non-empty synthetic text.
|
||||||
|
3. Read `npu_busy_time_us` after the operation.
|
||||||
|
4. Require both:
|
||||||
|
- the per-result embedding object reports `used: true`, `verified_npu: true`, and `npu_busy_delta_us > 0`; and
|
||||||
|
- the outer before/after sysfs value increased.
|
||||||
|
5. If sysfs is missing or `:18817` is unavailable, do not claim NPU success; report CPU fallback / embedding unavailable and keep the smoke result honest.
|
||||||
|
|
||||||
|
## Docs and diagram implications
|
||||||
|
|
||||||
|
- Service maps should list document/image triage as CLI-first and optional prototype `127.0.0.1:18829`, not live unless explicitly started.
|
||||||
|
- Diagrams must not draw live Atlas/Hermes/gateway/RAG routing to this triage lane.
|
||||||
|
- If shown with other candidate sidecars, label it separately from live services: live baseline remains RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; prototype sidecars are reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, and optional doc/image triage `:18829`.
|
||||||
|
- Runbooks should include CLI smoke, localhost listener checks, busy-time delta verification, and server shutdown instructions.
|
||||||
|
- Documentation should state CPU vs NPU stages explicitly so the prototype does not imply NPU OCR or NPU image classification.
|
||||||
|
|
||||||
|
## No-go / defer criteria
|
||||||
|
|
||||||
|
Do not proceed to implementation, live integration, or persistent service enablement if any of these are true:
|
||||||
|
|
||||||
|
- Will has not explicitly approved live routing or persistent service enablement.
|
||||||
|
- The requested source path is a private document/image directory or broad home-directory scan rather than synthetic fixtures or an explicitly approved narrow root.
|
||||||
|
- The workflow would mutate Obsidian, RAG, Chroma/vector collections, or reindex in place.
|
||||||
|
- The optional server would need to bind anywhere other than localhost.
|
||||||
|
- NPU busy-time does not increase for an operation being described as NPU-backed.
|
||||||
|
- Raw OCR text or full paths would be logged, uploaded, stored durably, or returned without explicit request.
|
||||||
|
- PDF/image dependencies are missing and the task requires rendered page analysis rather than metadata/text-only fallback.
|
||||||
|
- A future image classifier/OCR/VLM model has not been selected, converted/quantized to OpenVINO, calibrated for the task, and verified on synthetic fixtures with busy-time deltas.
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=OpenVINO NPU document/image triage HTTP Service (local-only, port 18829)
|
||||||
|
After=network.target openvino-embeddings.service
|
||||||
|
Wants=openvino-embeddings.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
WorkingDirectory=/home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
Environment=DOC_IMAGE_TRIAGE_HOST=127.0.0.1
|
||||||
|
Environment=DOC_IMAGE_TRIAGE_PORT=18829
|
||||||
|
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-doc-image-triage-npu/server.py --host 127.0.0.1 --port 18829 --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
||||||
@@ -13,6 +13,7 @@ configured allowed roots. It never uploads document/image contents externally.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import ipaddress
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
@@ -23,6 +24,19 @@ from urllib.parse import urlparse
|
|||||||
from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file
|
from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_loopback_host(host: str) -> str:
|
||||||
|
"""Reject non-loopback binds; this prototype is never a LAN service."""
|
||||||
|
normalized = host.strip()
|
||||||
|
if normalized == "localhost":
|
||||||
|
return normalized
|
||||||
|
try:
|
||||||
|
if ipaddress.ip_address(normalized).is_loopback:
|
||||||
|
return normalized
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise ValueError("host must be localhost/loopback for this prototype")
|
||||||
|
|
||||||
|
|
||||||
def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]:
|
def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]:
|
||||||
"""Return request roots only when they narrow the startup allowlist."""
|
"""Return request roots only when they narrow the startup allowlist."""
|
||||||
narrowed: list[Path] = []
|
narrowed: list[Path] = []
|
||||||
@@ -163,13 +177,17 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
def main() -> int:
|
def main() -> int:
|
||||||
parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server")
|
parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server")
|
||||||
parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1"))
|
parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1"))
|
||||||
parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18820")))
|
parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829")))
|
||||||
parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat")
|
parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
try:
|
||||||
|
host = _validate_loopback_host(args.host)
|
||||||
|
except ValueError as exc:
|
||||||
|
parser.error(str(exc))
|
||||||
roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()]
|
roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()]
|
||||||
httpd = ThreadingHTTPServer((args.host, args.port), Handler)
|
httpd = ThreadingHTTPServer((host, args.port), Handler)
|
||||||
httpd.allowed_roots = roots # type: ignore[attr-defined]
|
httpd.allowed_roots = roots # type: ignore[attr-defined]
|
||||||
print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": args.host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
|
print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
|
||||||
httpd.serve_forever()
|
httpd.serve_forever()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
@@ -42,6 +43,29 @@ def busy() -> int | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def choose_free_loopback_port() -> int:
|
||||||
|
"""Ask the OS for a free localhost port and verify it is not listening yet."""
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.bind(("127.0.0.1", 0))
|
||||||
|
port = int(sock.getsockname()[1])
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
|
||||||
|
probe.settimeout(0.25)
|
||||||
|
assert probe.connect_ex(("127.0.0.1", port)) != 0, f"selected port already has a listener: {port}"
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
def assert_loopback_bind_policy() -> None:
|
||||||
|
blocked = subprocess.run(
|
||||||
|
[sys.executable, "server.py", "--host", "0.0.0.0", "--port", "0", "--allowed-root", str(ROOT)],
|
||||||
|
cwd=ROOT,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
assert blocked.returncode != 0, blocked.stdout + blocked.stderr
|
||||||
|
assert "loopback" in blocked.stderr.lower(), blocked.stderr
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
run([sys.executable, "make_samples.py"])
|
run([sys.executable, "make_samples.py"])
|
||||||
invoice = SAMPLES / "synthetic_invoice.png"
|
invoice = SAMPLES / "synthetic_invoice.png"
|
||||||
@@ -69,20 +93,23 @@ def main() -> int:
|
|||||||
assert (emb.get("npu_busy_delta_us") or 0) > 0, emb
|
assert (emb.get("npu_busy_delta_us") or 0) > 0, emb
|
||||||
assert after > before, {"before": before, "after": after, "embedding": emb}
|
assert after > before, {"before": before, "after": after, "embedding": emb}
|
||||||
|
|
||||||
# HTTP smoke on an ephemeral localhost port so we do not collide with 18820 during tests.
|
# HTTP smoke on a preflighted free localhost port so we do not collide with live/prototype ports.
|
||||||
proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", "18828", "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
assert_loopback_bind_policy()
|
||||||
|
smoke_port = choose_free_loopback_port()
|
||||||
|
base_url = f"http://127.0.0.1:{smoke_port}"
|
||||||
|
proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", str(smoke_port), "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
try:
|
try:
|
||||||
deadline = time.time() + 5
|
deadline = time.time() + 5
|
||||||
while time.time() < deadline:
|
while time.time() < deadline:
|
||||||
try:
|
try:
|
||||||
health = urllib.request.urlopen("http://127.0.0.1:18828/healthz", timeout=1).read()
|
health = urllib.request.urlopen(f"{base_url}/healthz", timeout=1).read()
|
||||||
assert b"openvino-doc-image-triage-npu" in health
|
assert b"openvino-doc-image-triage-npu" in health
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
else:
|
else:
|
||||||
raise AssertionError("server did not become ready")
|
raise AssertionError("server did not become ready")
|
||||||
resp = post_json("http://127.0.0.1:18828/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
|
resp = post_json(f"{base_url}/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
|
||||||
assert resp["ok"] is True, resp
|
assert resp["ok"] is True, resp
|
||||||
assert resp["result"]["source_path_basename"] == "synthetic_invoice.png"
|
assert resp["result"]["source_path_basename"] == "synthetic_invoice.png"
|
||||||
assert "source_path" not in resp["result"]
|
assert "source_path" not in resp["result"]
|
||||||
@@ -92,7 +119,7 @@ def main() -> int:
|
|||||||
outside.write(b"sensitive text outside configured artifact root")
|
outside.write(b"sensitive text outside configured artifact root")
|
||||||
outside.flush()
|
outside.flush()
|
||||||
status, blocked = post_json_status(
|
status, blocked = post_json_status(
|
||||||
"http://127.0.0.1:18828/triage",
|
f"{base_url}/triage",
|
||||||
{"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}},
|
{"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}},
|
||||||
)
|
)
|
||||||
assert status == 400, blocked
|
assert status == 400, blocked
|
||||||
@@ -101,7 +128,7 @@ def main() -> int:
|
|||||||
|
|
||||||
# Request bodies must not redirect extracted text to caller-supplied endpoints.
|
# Request bodies must not redirect extracted text to caller-supplied endpoints.
|
||||||
status, blocked = post_json_status(
|
status, blocked = post_json_status(
|
||||||
"http://127.0.0.1:18828/triage",
|
f"{base_url}/triage",
|
||||||
{"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}},
|
{"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}},
|
||||||
)
|
)
|
||||||
assert status == 400, blocked
|
assert status == 400, blocked
|
||||||
|
|||||||
@@ -0,0 +1,306 @@
|
|||||||
|
# Bounded OpenVINO GenAI NPU worker contract
|
||||||
|
|
||||||
|
Status: prototype contract implemented locally; not a live Atlas/Hermes routing dependency.
|
||||||
|
Default address: `http://127.0.0.1:18820`.
|
||||||
|
|
||||||
|
## Purpose and hard boundary
|
||||||
|
|
||||||
|
This worker is a local-only sidecar for small, bounded generation jobs that are useful around the assistant stack but are not primary chat: title drafting, short summaries, notification condensation, and memory-candidate extraction. It must not be used as Atlas/Hermes primary model routing, gateway fallback routing, autonomous tool-calling, or an unbounded chat endpoint without a separate approval gate.
|
||||||
|
|
||||||
|
Hard boundaries:
|
||||||
|
|
||||||
|
- Bind to `127.0.0.1` by default; non-local bind is a code/ops review item, not a runtime flag to casually change.
|
||||||
|
- Do not enable a persistent systemd/Docker service as part of smoke testing.
|
||||||
|
- Do not restart or reconfigure Atlas, Hermes, gateway, LiteLLM, RAG, or n8n routing to call this worker without explicit approval from Will.
|
||||||
|
- Do not write memory, mutate Chroma/vector collections, trigger RAG reindexing, or process private document/image directories.
|
||||||
|
- Do not log raw prompts or raw request bodies by default.
|
||||||
|
- Treat HTTP success as insufficient for NPU claims; require positive `/sys/class/accel/accel0/device/npu_busy_time_us` delta for generation.
|
||||||
|
|
||||||
|
## Recommended model/runtime
|
||||||
|
|
||||||
|
Recommended first model:
|
||||||
|
|
||||||
|
- Model id: `OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov`
|
||||||
|
- Local path: `/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov`
|
||||||
|
- Runtime: `/home/will/.venvs/npu` with `openvino-genai==2026.2.0.0`
|
||||||
|
- Device: OpenVINO GenAI `NPU`
|
||||||
|
- Compile cache: `/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4`
|
||||||
|
|
||||||
|
Why this model/runtime:
|
||||||
|
|
||||||
|
- It is already staged in the repo prototype and has a local smoke observation with positive NPU busy-time delta.
|
||||||
|
- It is an OpenVINO IR model with INT4-compressed weights, which keeps memory/compile pressure low enough for a sidecar on the shared NPU.
|
||||||
|
- Qwen2.5-1.5B-Instruct is large enough for formatting/summarization/notification jobs but small enough to keep latency bounded. It should not be marketed as a high-quality general assistant model.
|
||||||
|
- The Hugging Face model card identifies it as Qwen2.5-1.5B-Instruct converted to OpenVINO IR with INT4_SYM NNCF weight compression and states compatibility with OpenVINO 2025.1.0+; the local runtime is newer than that baseline.
|
||||||
|
- OpenVINO GenAI `LLMPipeline` is the right first runtime because the existing local NPU stack already uses OpenVINO GenAI successfully for Whisper, and it exposes a simple bounded generate call with cache controls.
|
||||||
|
|
||||||
|
Deferred alternatives:
|
||||||
|
|
||||||
|
- Larger 3B/7B local LLMs: defer until the 1.5B contract proves stable; larger models increase compile time, memory pressure, and NPU contention.
|
||||||
|
- CPU/GPU fallback inside this service: defer; fallback would blur the NPU verification contract. If fallback is later approved, return `device_actual` and keep NPU-only health separate.
|
||||||
|
- Manual `EXPORT_BLOB`/`BLOB_PATH`: defer until compile latency is proven to dominate despite `CACHE_DIR`. If used later, record OpenVINO version, NPU compiler/driver versions, model id, quantization flags, and source model path; invalidate after OpenVINO/NPU driver upgrades.
|
||||||
|
|
||||||
|
## Runtime bounds
|
||||||
|
|
||||||
|
Pipeline configuration for the first milestone:
|
||||||
|
|
||||||
|
```text
|
||||||
|
CACHE_DIR=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
|
||||||
|
MAX_PROMPT_LEN=1024
|
||||||
|
MIN_RESPONSE_LEN=64
|
||||||
|
PREFILL_HINT=DYNAMIC
|
||||||
|
GENERATE_HINT=FAST_COMPILE
|
||||||
|
```
|
||||||
|
|
||||||
|
Request bounds:
|
||||||
|
|
||||||
|
- `input`: required non-empty string; max `6000` characters before prompt templating.
|
||||||
|
- `job`: one of `title`, `summary`, `notification`, `memory_candidate`.
|
||||||
|
- `max_new_tokens`: optional; default by job; hard max `256`.
|
||||||
|
- Concurrency: generation must be serialized inside the process with a lock because the NPU is shared with Whisper/embeddings/prototype sidecars.
|
||||||
|
- Logging: log method/path/status and timing only; never log raw `input` or generated text by default.
|
||||||
|
|
||||||
|
Expected latency target:
|
||||||
|
|
||||||
|
- Cold-ish first generation with cache available: acceptable if roughly 15 seconds or less for a short prompt on the staged model.
|
||||||
|
- Warm short jobs: target under 5 seconds for `title`/`notification` and under 10 seconds for `summary`/`memory_candidate`.
|
||||||
|
- Defer promotion if p95 warm latency exceeds 15 seconds for 24-96 generated tokens, or if cold compile regularly blocks the NPU long enough to degrade live Whisper/embeddings.
|
||||||
|
|
||||||
|
These are prototype acceptance targets, not SLOs for live Atlas routing.
|
||||||
|
|
||||||
|
## CLI contract
|
||||||
|
|
||||||
|
Command shape:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
/home/will/.venvs/npu/bin/python worker.py \
|
||||||
|
--job title \
|
||||||
|
--input 'Synthetic non-private text to title.' \
|
||||||
|
--max-new-tokens 32
|
||||||
|
```
|
||||||
|
|
||||||
|
CLI stdout is JSON with the same response shape as HTTP generation. Exit code must be:
|
||||||
|
|
||||||
|
- `0` when the job succeeds and `npu_busy_delta_us > 0`.
|
||||||
|
- non-zero when input validation fails, model load/generation fails, or NPU busy-time delta is not positive.
|
||||||
|
|
||||||
|
The CLI must not write memory, change service routing, or start persistent services.
|
||||||
|
|
||||||
|
## HTTP contract
|
||||||
|
|
||||||
|
Start temporary local server only:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
|
||||||
|
```
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /healthz
|
||||||
|
GET /models
|
||||||
|
POST /v1/worker/generate
|
||||||
|
POST /v1/worker/extract-memory-candidates
|
||||||
|
POST /v1/worker/condense-notification
|
||||||
|
```
|
||||||
|
|
||||||
|
`GET /healthz` response fields:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
|
||||||
|
"model_path": "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov",
|
||||||
|
"device": "NPU",
|
||||||
|
"cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4",
|
||||||
|
"cache_exists": true,
|
||||||
|
"loaded": false,
|
||||||
|
"initial_load_ms": null,
|
||||||
|
"busy_time_us": 0,
|
||||||
|
"max_input_chars": 6000,
|
||||||
|
"jobs": ["memory_candidate", "notification", "summary", "title"],
|
||||||
|
"bind": "127.0.0.1:18820"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`POST /v1/worker/generate` request:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job": "summary",
|
||||||
|
"input": "Synthetic non-private text to summarize.",
|
||||||
|
"max_new_tokens": 80
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Specialized aliases:
|
||||||
|
|
||||||
|
- `POST /v1/worker/extract-memory-candidates` implies `job=memory_candidate`.
|
||||||
|
- `POST /v1/worker/condense-notification` implies `job=notification`.
|
||||||
|
- Backward-compatible request `job=memory` may map to `memory_candidate`, but new clients should use `memory_candidate`.
|
||||||
|
|
||||||
|
Successful generation response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
|
||||||
|
"device": "NPU",
|
||||||
|
"job": "summary",
|
||||||
|
"text": "...",
|
||||||
|
"json": null,
|
||||||
|
"timing_ms": {
|
||||||
|
"load": 0.0,
|
||||||
|
"initial_load": 10989.08,
|
||||||
|
"generate": 3157.94,
|
||||||
|
"total": 3157.94
|
||||||
|
},
|
||||||
|
"npu_busy_delta_us": 2650724,
|
||||||
|
"npu_busy_before_us": 123,
|
||||||
|
"npu_busy_after_us": 2650847,
|
||||||
|
"cache_dir": "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Validation/error behavior:
|
||||||
|
|
||||||
|
- Unsupported path: `404` JSON `{"error":"not found"}`.
|
||||||
|
- Unsupported job, empty input, too-long input, invalid token bound, missing model, or generation failure: JSON `{"error":"..."}` with non-2xx preferred for future implementations. The current stdlib prototype returns `400` for these errors.
|
||||||
|
- If `npu_busy_delta_us <= 0`, the response should be treated as failed by smoke tests even if an HTTP handler emitted `200`; the refreshed prototype returns `503` with the generation payload plus an `error` field.
|
||||||
|
|
||||||
|
## Prompt/job contract
|
||||||
|
|
||||||
|
`title`:
|
||||||
|
|
||||||
|
- Input: short task/log/message excerpt.
|
||||||
|
- Output: one title, 8 words or fewer, no markdown required.
|
||||||
|
- Default `max_new_tokens`: 32.
|
||||||
|
|
||||||
|
`summary`:
|
||||||
|
|
||||||
|
- Input: synthetic/non-private text excerpt.
|
||||||
|
- Output: one short paragraph or up to 4 bullets.
|
||||||
|
- Default `max_new_tokens`: 160.
|
||||||
|
|
||||||
|
`notification`:
|
||||||
|
|
||||||
|
- Input: synthetic/non-private alert/log excerpt.
|
||||||
|
- Output target: JSON object with `severity`, `category`, `summary`, `action_needed`.
|
||||||
|
- Default `max_new_tokens`: 96.
|
||||||
|
- Client must tolerate `json: null` and parse/validate before using output.
|
||||||
|
|
||||||
|
`memory_candidate`:
|
||||||
|
|
||||||
|
- Input: synthetic/non-private conversation excerpt.
|
||||||
|
- Output target: JSON object with `candidates` and `notes`; candidates are proposals only.
|
||||||
|
- Default `max_new_tokens`: 192.
|
||||||
|
- This worker must never call Hermes memory tools or write durable memory directly.
|
||||||
|
|
||||||
|
## Smoke-test plan using non-private data
|
||||||
|
|
||||||
|
Do not use private vault notes, screenshots, email, chat logs, or document/image directories. Use synthetic text like this:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Atlas received a kanban notification that an OpenVINO NPU prototype finished smoke testing. The reviewer needs a concise status and next action. No live gateway routing changed.
|
||||||
|
```
|
||||||
|
|
||||||
|
Direct NPU smoke:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
|
||||||
|
/home/will/.venvs/npu/bin/python smoke_llm_npu.py \
|
||||||
|
--prompt 'Write a concise title for: synthetic NPU worker contract smoke.' \
|
||||||
|
--max-new-tokens 24
|
||||||
|
status=$?
|
||||||
|
after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
|
||||||
|
printf 'external_busy_delta_us=%s\n' "$((after-before))"
|
||||||
|
test "$status" -eq 0
|
||||||
|
test "$((after-before))" -gt 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Temporary HTTP smoke:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820 &
|
||||||
|
pid=$!
|
||||||
|
trap 'kill "$pid" 2>/dev/null || true' EXIT
|
||||||
|
|
||||||
|
curl -fsS http://127.0.0.1:18820/healthz | python -m json.tool
|
||||||
|
before=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
|
||||||
|
curl -fsS http://127.0.0.1:18820/v1/worker/generate \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"job":"title","input":"Synthetic NPU worker smoke with no routing changes.","max_new_tokens":24}' \
|
||||||
|
| tee /tmp/openvino-genai-worker-smoke.json \
|
||||||
|
| python -m json.tool
|
||||||
|
after=$(cat /sys/class/accel/accel0/device/npu_busy_time_us)
|
||||||
|
python - <<'PY'
|
||||||
|
import json
|
||||||
|
p=json.load(open('/tmp/openvino-genai-worker-smoke.json'))
|
||||||
|
assert p['npu_busy_delta_us'] > 0, p
|
||||||
|
assert p['device'] == 'NPU', p
|
||||||
|
PY
|
||||||
|
test "$((after-before))" -gt 0
|
||||||
|
kill "$pid"
|
||||||
|
trap - EXIT
|
||||||
|
```
|
||||||
|
|
||||||
|
Also verify the temporary listener is gone:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ss -ltnp | grep ':18820' && { echo 'temporary smoke server still running'; exit 1; } || true
|
||||||
|
```
|
||||||
|
|
||||||
|
Unit tests that do not load the model or require private data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
python -m pytest -q
|
||||||
|
```
|
||||||
|
|
||||||
|
## NPU busy-time verification plan
|
||||||
|
|
||||||
|
Acceptance for any NPU claim requires all of the following:
|
||||||
|
|
||||||
|
1. Confirm the sysfs counter exists and is readable:
|
||||||
|
`test -r /sys/class/accel/accel0/device/npu_busy_time_us`.
|
||||||
|
2. Read `busy_before` immediately before the generation call.
|
||||||
|
3. Run exactly one bounded generation against the candidate worker.
|
||||||
|
4. Read `busy_after` immediately after generation completes.
|
||||||
|
5. Require `busy_after > busy_before` and response `npu_busy_delta_us > 0`.
|
||||||
|
6. Record model id, runtime version, prompt chars, max tokens, load/generate timings, and busy delta in the review handoff.
|
||||||
|
7. If the counter is unchanged, mark the smoke as failed even if HTTP returned `200` and text was generated.
|
||||||
|
|
||||||
|
Because the NPU is shared, a positive external delta proves NPU activity during the window but not exclusive attribution. Prefer a quiet window with no concurrent Whisper/embedding jobs for review-grade measurements; otherwise repeat and compare worker-reported internal delta with the external counter.
|
||||||
|
|
||||||
|
## Docs/diagram implications
|
||||||
|
|
||||||
|
If this worker is kept as a prototype, docs and diagrams should show:
|
||||||
|
|
||||||
|
- Live baseline remains RAG `:18810`, Whisper NPU `:18816`, embeddings `:18817`.
|
||||||
|
- GenAI worker `:18820` is proposed/prototype/not-live unless explicitly approved and enabled.
|
||||||
|
- No arrow from Hermes/Atlas gateway or LiteLLM primary routing to `:18820` unless a later approved integration actually exists.
|
||||||
|
- Runbooks should include the CLI/HTTP smoke commands, `ss` listener checks, and NPU busy-time counter checks.
|
||||||
|
- Service maps should label this as "bounded background generation" rather than "chat" or "assistant model".
|
||||||
|
|
||||||
|
## Explicit no-go / defer criteria
|
||||||
|
|
||||||
|
No-go for implementation or promotion:
|
||||||
|
|
||||||
|
- Model path missing, OpenVINO GenAI import fails, or NPU device is unavailable.
|
||||||
|
- `/sys/class/accel/accel0/device/npu_busy_time_us` is unreadable or does not increase during generation.
|
||||||
|
- Warm bounded jobs exceed the prototype latency target or starve live Whisper/embedding services.
|
||||||
|
- The worker needs private documents/images/chat logs for smoke testing.
|
||||||
|
- The worker requires Atlas/Hermes/gateway/LiteLLM/RAG routing changes to demonstrate value.
|
||||||
|
- The API starts accepting arbitrary chat history, tool-call instructions, unbounded prompts, or large outputs.
|
||||||
|
- The service logs raw prompt bodies by default.
|
||||||
|
- Persistent service enablement is requested without an explicit Will approval gate and a reviewer smoke handoff.
|
||||||
|
|
||||||
|
Defer, do not solve in this lane:
|
||||||
|
|
||||||
|
- Primary assistant routing, LiteLLM model registration, gateway fallback, or tool-calling integration.
|
||||||
|
- RAG query rewriting, RAG answer generation, or collection mutation.
|
||||||
|
- Private document/image triage.
|
||||||
|
- Multi-model selection, CPU/GPU fallback policy, batching, streaming, or auth exposure beyond localhost.
|
||||||
@@ -15,9 +15,11 @@ The worker does not write memory, does not restart Atlas/Hermes, does not change
|
|||||||
|
|
||||||
## Files
|
## Files
|
||||||
|
|
||||||
|
- `CONTRACT.md` — bounded-worker service contract, endpoint/CLI API, smoke plan, NPU verification, docs implications, and no-go criteria.
|
||||||
- `worker.py` — stdlib HTTP API plus CLI wrapper.
|
- `worker.py` — stdlib HTTP API plus CLI wrapper.
|
||||||
- `smoke_llm_npu.py` — direct GenAI smoke test with NPU busy-time verification.
|
- `smoke_llm_npu.py` — direct GenAI smoke test with NPU busy-time verification.
|
||||||
- `systemd/openvino-genai-npu-worker.service` — optional user-service template; not installed by this prototype.
|
- `tests/test_worker.py` — unit tests with a fake GenAI pipeline and synthetic busy-time counter.
|
||||||
|
- `systemd/openvino-genai-npu-worker.service` — reviewed local-only user-service template for `127.0.0.1:18820`.
|
||||||
|
|
||||||
## Model/cache
|
## Model/cache
|
||||||
|
|
||||||
@@ -72,15 +74,20 @@ Observed cold-ish smoke after download/cache setup:
|
|||||||
--input 'Kanban task asks for a small OpenVINO GenAI NPU worker prototype.'
|
--input 'Kanban task asks for a small OpenVINO GenAI NPU worker prototype.'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Exit code is non-zero if validation fails, generation fails, or the worker-reported `npu_busy_delta_us` is not positive.
|
||||||
|
|
||||||
## HTTP usage
|
## HTTP usage
|
||||||
|
|
||||||
Start locally only:
|
Start locally only:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
ss -ltnp | grep ':18820' && { echo 'port 18820 already in use'; exit 1; } || true
|
||||||
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
|
/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The server also refuses startup if a listener is already accepting connections on `127.0.0.1:18820`.
|
||||||
|
|
||||||
Endpoints:
|
Endpoints:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
@@ -102,6 +109,39 @@ curl -s http://127.0.0.1:18820/v1/worker/generate \
|
|||||||
|
|
||||||
Response includes `npu_busy_delta_us`; treat zero as failure even if HTTP status is 200.
|
Response includes `npu_busy_delta_us`; treat zero as failure even if HTTP status is 200.
|
||||||
|
|
||||||
|
## Unit tests
|
||||||
|
|
||||||
|
These tests use only synthetic strings and a fake GenAI pipeline, so they do not load the model or touch private data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
|
python -m pytest -q
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
|
```text
|
||||||
|
OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
|
||||||
|
OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
|
||||||
|
OV_GENAI_NPU_HOST=127.0.0.1
|
||||||
|
OV_GENAI_NPU_PORT=18820
|
||||||
|
```
|
||||||
|
|
||||||
|
Only `127.0.0.1` is accepted by the current prototype; wider binds require an explicit code change and approval.
|
||||||
|
|
||||||
|
## Systemd user service
|
||||||
|
|
||||||
|
A reviewed local-only unit exists at `systemd/openvino-genai-npu-worker.service` for persistent background use after foreground smoke succeeds with a positive NPU busy-time delta:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
install -m 0644 systemd/openvino-genai-npu-worker.service ~/.config/systemd/user/openvino-genai-npu-worker.service
|
||||||
|
systemctl --user daemon-reload
|
||||||
|
systemctl --user enable --now openvino-genai-npu-worker.service
|
||||||
|
systemctl --user status openvino-genai-npu-worker.service --no-pager
|
||||||
|
```
|
||||||
|
|
||||||
|
The service remains isolated: do not route primary Atlas/Hermes chat, gateway output, or automatic memory writes to it without a separate approved integration.
|
||||||
|
|
||||||
## Safety boundaries
|
## Safety boundaries
|
||||||
|
|
||||||
- Binds only to `127.0.0.1` by default; non-local bind is refused in code.
|
- Binds only to `127.0.0.1` by default; non-local bind is refused in code.
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
[pytest]
|
||||||
|
testpaths = tests
|
||||||
@@ -10,31 +10,42 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
import openvino_genai as ov_genai
|
|
||||||
|
|
||||||
DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
|
DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
|
||||||
DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
||||||
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
|
||||||
|
|
||||||
def read_busy() -> int:
|
def import_openvino_genai() -> Any:
|
||||||
return int(BUSY_PATH.read_text().strip())
|
import openvino_genai as ov_genai # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
return ov_genai
|
||||||
|
|
||||||
|
|
||||||
|
def read_busy(path: Path = BUSY_PATH) -> int:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--model", default=DEFAULT_MODEL)
|
parser.add_argument("--model", default=DEFAULT_MODEL)
|
||||||
parser.add_argument("--cache-dir", default=DEFAULT_CACHE)
|
parser.add_argument("--cache-dir", default=DEFAULT_CACHE)
|
||||||
parser.add_argument("--prompt", default="Write a concise title for: User asked Atlas to summarize NPU worker options.")
|
parser.add_argument("--busy-path", default=str(BUSY_PATH))
|
||||||
|
parser.add_argument("--prompt", default="Write a concise title for: Synthetic NPU worker contract smoke with no routing changes.")
|
||||||
parser.add_argument("--max-new-tokens", type=int, default=24)
|
parser.add_argument("--max-new-tokens", type=int, default=24)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
model_path = Path(args.model)
|
model_path = Path(args.model)
|
||||||
cache_dir = Path(args.cache_dir)
|
cache_dir = Path(args.cache_dir)
|
||||||
|
busy_path = Path(args.busy_path)
|
||||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
if not model_path.exists():
|
if not model_path.exists():
|
||||||
raise SystemExit(f"model path does not exist: {model_path}")
|
raise SystemExit(f"model path does not exist: {model_path}")
|
||||||
|
if not busy_path.exists():
|
||||||
|
raise SystemExit(f"NPU busy-time counter does not exist: {busy_path}")
|
||||||
|
if args.max_new_tokens < 1 or args.max_new_tokens > 256:
|
||||||
|
raise SystemExit("max-new-tokens must be between 1 and 256")
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"CACHE_DIR": str(cache_dir),
|
"CACHE_DIR": str(cache_dir),
|
||||||
@@ -44,15 +55,16 @@ def main() -> int:
|
|||||||
"GENERATE_HINT": "FAST_COMPILE",
|
"GENERATE_HINT": "FAST_COMPILE",
|
||||||
}
|
}
|
||||||
|
|
||||||
before = read_busy()
|
ov_genai = import_openvino_genai()
|
||||||
|
before = read_busy(busy_path)
|
||||||
load_start = time.monotonic()
|
load_start = time.monotonic()
|
||||||
pipe = ov_genai.LLMPipeline(str(model_path), "NPU", config)
|
pipe = ov_genai.LLMPipeline(str(model_path), "NPU", **config)
|
||||||
load_ms = round((time.monotonic() - load_start) * 1000, 2)
|
load_ms = round((time.monotonic() - load_start) * 1000, 2)
|
||||||
|
|
||||||
gen_start = time.monotonic()
|
gen_start = time.monotonic()
|
||||||
output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens)
|
output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens)
|
||||||
gen_ms = round((time.monotonic() - gen_start) * 1000, 2)
|
gen_ms = round((time.monotonic() - gen_start) * 1000, 2)
|
||||||
after = read_busy()
|
after = read_busy(busy_path)
|
||||||
result = {
|
result = {
|
||||||
"model": str(model_path),
|
"model": str(model_path),
|
||||||
"device": "NPU",
|
"device": "NPU",
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ Type=simple
|
|||||||
WorkingDirectory=/home/will/lab/swarm/openvino-genai-npu-worker
|
WorkingDirectory=/home/will/lab/swarm/openvino-genai-npu-worker
|
||||||
Environment=OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
|
Environment=OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov
|
||||||
Environment=OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
|
Environment=OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4
|
||||||
|
Environment=OV_GENAI_NPU_HOST=127.0.0.1
|
||||||
Environment=OV_GENAI_NPU_PORT=18820
|
Environment=OV_GENAI_NPU_PORT=18820
|
||||||
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-genai-npu-worker/worker.py --host 127.0.0.1 --port 18820
|
ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-genai-npu-worker/worker.py --host 127.0.0.1 --port 18820
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
|
|||||||
@@ -0,0 +1,131 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import worker
|
||||||
|
|
||||||
|
|
||||||
|
class FakePipeline:
|
||||||
|
def __init__(self, model_path: str, device: str, config: dict[str, object], busy_path: Path, output: str = "Synthetic title"):
|
||||||
|
self.model_path = model_path
|
||||||
|
self.device = device
|
||||||
|
self.config = config
|
||||||
|
self.busy_path = busy_path
|
||||||
|
self.output = output
|
||||||
|
self.calls: list[tuple[str, int]] = []
|
||||||
|
|
||||||
|
def generate(self, prompt: str, *, max_new_tokens: int):
|
||||||
|
self.calls.append((prompt, max_new_tokens))
|
||||||
|
before = int(self.busy_path.read_text().strip())
|
||||||
|
self.busy_path.write_text(str(before + 1234))
|
||||||
|
return self.output
|
||||||
|
|
||||||
|
|
||||||
|
class FakeGenAI:
|
||||||
|
def __init__(self, busy_path: Path, output: str = "Synthetic title"):
|
||||||
|
self.busy_path = busy_path
|
||||||
|
self.output = output
|
||||||
|
self.pipeline: FakePipeline | None = None
|
||||||
|
|
||||||
|
def LLMPipeline(self, model_path: str, device: str, *args: object, **kwargs: object): # noqa: N802 - mirrors OpenVINO API
|
||||||
|
if args and isinstance(args[0], dict):
|
||||||
|
config: dict[str, object] = {str(k): v for k, v in args[0].items()}
|
||||||
|
else:
|
||||||
|
config = dict(kwargs)
|
||||||
|
self.pipeline = FakePipeline(model_path, device, config, self.busy_path, self.output)
|
||||||
|
return self.pipeline
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def worker_paths(tmp_path: Path):
|
||||||
|
model_path = tmp_path / "model"
|
||||||
|
cache_dir = tmp_path / "cache"
|
||||||
|
busy_path = tmp_path / "npu_busy_time_us"
|
||||||
|
model_path.mkdir()
|
||||||
|
busy_path.write_text("100")
|
||||||
|
return model_path, cache_dir, busy_path
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_uses_npu_config_and_reports_busy_delta(monkeypatch: pytest.MonkeyPatch, worker_paths):
|
||||||
|
model_path, cache_dir, busy_path = worker_paths
|
||||||
|
fake_genai = FakeGenAI(busy_path)
|
||||||
|
monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
|
||||||
|
|
||||||
|
npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_port=18820)
|
||||||
|
result = npu_worker.generate("title", "Synthetic non-private kanban notification.", max_new_tokens=24)
|
||||||
|
|
||||||
|
assert result.npu_busy_before_us == 100
|
||||||
|
assert result.npu_busy_after_us == 1334
|
||||||
|
assert result.npu_busy_delta_us == 1234
|
||||||
|
assert result.text == "Synthetic title"
|
||||||
|
assert fake_genai.pipeline is not None
|
||||||
|
assert fake_genai.pipeline.device == "NPU"
|
||||||
|
assert fake_genai.pipeline.config["CACHE_DIR"] == str(cache_dir)
|
||||||
|
assert fake_genai.pipeline.config["MAX_PROMPT_LEN"] == 1024
|
||||||
|
assert fake_genai.pipeline.calls[0][1] == 24
|
||||||
|
|
||||||
|
|
||||||
|
def test_memory_alias_json_wrapping(monkeypatch: pytest.MonkeyPatch, worker_paths):
|
||||||
|
model_path, cache_dir, busy_path = worker_paths
|
||||||
|
fake_genai = FakeGenAI(busy_path, output='[{"fact":"synthetic stable preference","confidence":0.8}]')
|
||||||
|
monkeypatch.setattr(worker, "import_openvino_genai", lambda: fake_genai)
|
||||||
|
|
||||||
|
npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
|
||||||
|
result = npu_worker.generate("memory_candidate", "Synthetic user says they prefer concise answers.")
|
||||||
|
|
||||||
|
assert result.parsed_json is not None
|
||||||
|
assert result.parsed_json["candidates"][0]["fact"] == "synthetic stable preference"
|
||||||
|
assert "wrapped" in result.parsed_json["notes"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("job", "user_input", "max_new_tokens", "message"),
|
||||||
|
[
|
||||||
|
("bad", "hello", 1, "unsupported job"),
|
||||||
|
("title", "", 1, "non-empty"),
|
||||||
|
("title", "x" * (worker.MAX_INPUT_CHARS + 1), 1, "input too long"),
|
||||||
|
("title", "hello", worker.MAX_NEW_TOKENS + 1, "max_new_tokens"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_validation_errors(monkeypatch: pytest.MonkeyPatch, worker_paths, job: str, user_input: str, max_new_tokens: int, message: str):
|
||||||
|
model_path, cache_dir, busy_path = worker_paths
|
||||||
|
monkeypatch.setattr(worker, "import_openvino_genai", lambda: FakeGenAI(busy_path))
|
||||||
|
npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match=message):
|
||||||
|
npu_worker.generate(job, user_input, max_new_tokens=max_new_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def test_health_reports_actual_bind_and_limits(worker_paths):
|
||||||
|
model_path, cache_dir, busy_path = worker_paths
|
||||||
|
npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path, bind_host="127.0.0.1", bind_port=18821)
|
||||||
|
|
||||||
|
health = npu_worker.health()
|
||||||
|
|
||||||
|
assert health["bind"] == "127.0.0.1:18821"
|
||||||
|
assert health["max_input_chars"] == 6000
|
||||||
|
assert health["max_new_tokens"] == 256
|
||||||
|
assert health["busy_time_us"] == 100
|
||||||
|
|
||||||
|
|
||||||
|
def test_response_payload_shape(worker_paths):
|
||||||
|
model_path, cache_dir, busy_path = worker_paths
|
||||||
|
npu_worker = worker.NpuWorker(str(model_path), str(cache_dir), busy_path=busy_path)
|
||||||
|
result = worker.GenerationResult(
|
||||||
|
text="ok",
|
||||||
|
parsed_json={"severity": "info"},
|
||||||
|
timing_ms={"load": 1.0, "initial_load": 1.0, "generate": 2.0, "total": 3.0},
|
||||||
|
npu_busy_delta_us=5,
|
||||||
|
npu_busy_before_us=10,
|
||||||
|
npu_busy_after_us=15,
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = worker.response_payload(npu_worker, "notification", result)
|
||||||
|
|
||||||
|
assert json.dumps(payload)
|
||||||
|
assert payload["device"] == "NPU"
|
||||||
|
assert payload["job"] == "notification"
|
||||||
|
assert payload["json"] == {"severity": "info"}
|
||||||
@@ -10,6 +10,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -18,8 +19,6 @@ from pathlib import Path
|
|||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import openvino_genai as ov_genai # type: ignore[import-not-found]
|
|
||||||
|
|
||||||
MODEL_ID = "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov"
|
MODEL_ID = "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov"
|
||||||
DEFAULT_MODEL_PATH = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
|
DEFAULT_MODEL_PATH = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov"
|
||||||
DEFAULT_CACHE_DIR = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
DEFAULT_CACHE_DIR = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4"
|
||||||
@@ -27,6 +26,14 @@ BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
|||||||
HOST = "127.0.0.1"
|
HOST = "127.0.0.1"
|
||||||
PORT = 18820
|
PORT = 18820
|
||||||
MAX_INPUT_CHARS = 6000
|
MAX_INPUT_CHARS = 6000
|
||||||
|
MAX_NEW_TOKENS = 256
|
||||||
|
GENAI_CONFIG = {
|
||||||
|
"CACHE_DIR": DEFAULT_CACHE_DIR,
|
||||||
|
"MAX_PROMPT_LEN": 1024,
|
||||||
|
"MIN_RESPONSE_LEN": 64,
|
||||||
|
"PREFILL_HINT": "DYNAMIC",
|
||||||
|
"GENERATE_HINT": "FAST_COMPILE",
|
||||||
|
}
|
||||||
DEFAULTS = {
|
DEFAULTS = {
|
||||||
"title": 32,
|
"title": 32,
|
||||||
"summary": 160,
|
"summary": 160,
|
||||||
@@ -48,8 +55,20 @@ PROMPTS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def read_busy() -> int:
|
def import_openvino_genai() -> Any:
|
||||||
return int(BUSY_PATH.read_text().strip())
|
"""Import OpenVINO GenAI lazily so unit tests do not require the NPU venv."""
|
||||||
|
|
||||||
|
import openvino_genai as ov_genai # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
return ov_genai
|
||||||
|
|
||||||
|
|
||||||
|
def listener_exists(host: str, port: int) -> bool:
|
||||||
|
"""Return True when a TCP listener already accepts connections."""
|
||||||
|
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.settimeout(0.2)
|
||||||
|
return sock.connect_ex((host, port)) == 0
|
||||||
|
|
||||||
|
|
||||||
def coerce_json(text: str) -> Any | None:
|
def coerce_json(text: str) -> Any | None:
|
||||||
@@ -79,9 +98,20 @@ class GenerationResult:
|
|||||||
|
|
||||||
|
|
||||||
class NpuWorker:
|
class NpuWorker:
|
||||||
def __init__(self, model_path: str, cache_dir: str):
|
def __init__(
|
||||||
|
self,
|
||||||
|
model_path: str,
|
||||||
|
cache_dir: str,
|
||||||
|
*,
|
||||||
|
busy_path: Path = BUSY_PATH,
|
||||||
|
bind_host: str = HOST,
|
||||||
|
bind_port: int = PORT,
|
||||||
|
):
|
||||||
self.model_path = Path(model_path)
|
self.model_path = Path(model_path)
|
||||||
self.cache_dir = Path(cache_dir)
|
self.cache_dir = Path(cache_dir)
|
||||||
|
self.busy_path = Path(busy_path)
|
||||||
|
self.bind_host = bind_host
|
||||||
|
self.bind_port = bind_port
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
self._pipe = None
|
self._pipe = None
|
||||||
self._load_ms: float | None = None
|
self._load_ms: float | None = None
|
||||||
@@ -89,21 +119,20 @@ class NpuWorker:
|
|||||||
self._loaded_at: float | None = None
|
self._loaded_at: float | None = None
|
||||||
if not self.model_path.exists():
|
if not self.model_path.exists():
|
||||||
raise FileNotFoundError(f"model path does not exist: {self.model_path}")
|
raise FileNotFoundError(f"model path does not exist: {self.model_path}")
|
||||||
|
if not self.busy_path.exists():
|
||||||
|
raise FileNotFoundError(f"NPU busy-time counter does not exist: {self.busy_path}")
|
||||||
|
|
||||||
|
def read_busy(self) -> int:
|
||||||
|
return int(self.busy_path.read_text().strip())
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
if self._pipe is not None:
|
if self._pipe is not None:
|
||||||
return
|
return
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
# NPU GenAI requires bounded prompt/response shapes; CACHE_DIR enables compiled blob caching.
|
# NPU GenAI requires bounded prompt/response shapes; CACHE_DIR enables compiled blob caching.
|
||||||
self._pipe = ov_genai.LLMPipeline(
|
ov_genai = import_openvino_genai()
|
||||||
str(self.model_path),
|
config = GENAI_CONFIG | {"CACHE_DIR": str(self.cache_dir)}
|
||||||
"NPU",
|
self._pipe = ov_genai.LLMPipeline(str(self.model_path), "NPU", **config)
|
||||||
CACHE_DIR=str(self.cache_dir),
|
|
||||||
MAX_PROMPT_LEN=1024,
|
|
||||||
MIN_RESPONSE_LEN=64,
|
|
||||||
PREFILL_HINT="DYNAMIC",
|
|
||||||
GENERATE_HINT="FAST_COMPILE",
|
|
||||||
)
|
|
||||||
self._load_ms = round((time.monotonic() - start) * 1000, 2)
|
self._load_ms = round((time.monotonic() - start) * 1000, 2)
|
||||||
self._loaded_at = time.time()
|
self._loaded_at = time.time()
|
||||||
|
|
||||||
@@ -115,19 +144,19 @@ class NpuWorker:
|
|||||||
if len(user_input) > MAX_INPUT_CHARS:
|
if len(user_input) > MAX_INPUT_CHARS:
|
||||||
raise ValueError(f"input too long: {len(user_input)} chars > {MAX_INPUT_CHARS}")
|
raise ValueError(f"input too long: {len(user_input)} chars > {MAX_INPUT_CHARS}")
|
||||||
max_new_tokens = int(max_new_tokens or DEFAULTS[job])
|
max_new_tokens = int(max_new_tokens or DEFAULTS[job])
|
||||||
if max_new_tokens < 1 or max_new_tokens > 256:
|
if max_new_tokens < 1 or max_new_tokens > MAX_NEW_TOKENS:
|
||||||
raise ValueError("max_new_tokens must be between 1 and 256")
|
raise ValueError(f"max_new_tokens must be between 1 and {MAX_NEW_TOKENS}")
|
||||||
prompt = PROMPTS[job].format(input=user_input.strip())
|
prompt = PROMPTS[job].format(input=user_input.strip())
|
||||||
with self._lock:
|
with self._lock:
|
||||||
load_start = time.monotonic()
|
load_start = time.monotonic()
|
||||||
self.load()
|
self.load()
|
||||||
load_ms = round((time.monotonic() - load_start) * 1000, 2)
|
load_ms = round((time.monotonic() - load_start) * 1000, 2)
|
||||||
before = read_busy()
|
before = self.read_busy()
|
||||||
gen_start = time.monotonic()
|
gen_start = time.monotonic()
|
||||||
pipe = cast(Any, self._pipe)
|
pipe = cast(Any, self._pipe)
|
||||||
text = str(pipe.generate(prompt, max_new_tokens=max_new_tokens)).strip()
|
text = str(pipe.generate(prompt, max_new_tokens=max_new_tokens)).strip()
|
||||||
generate_ms = round((time.monotonic() - gen_start) * 1000, 2)
|
generate_ms = round((time.monotonic() - gen_start) * 1000, 2)
|
||||||
after = read_busy()
|
after = self.read_busy()
|
||||||
parsed = coerce_json(text) if job in {"memory_candidate", "notification"} else None
|
parsed = coerce_json(text) if job in {"memory_candidate", "notification"} else None
|
||||||
if job == "memory_candidate" and isinstance(parsed, list):
|
if job == "memory_candidate" and isinstance(parsed, list):
|
||||||
parsed = {"candidates": parsed, "notes": "model returned a top-level array; worker wrapped it to preserve the API contract"}
|
parsed = {"candidates": parsed, "notes": "model returned a top-level array; worker wrapped it to preserve the API contract"}
|
||||||
@@ -151,10 +180,11 @@ class NpuWorker:
|
|||||||
"loaded": self._pipe is not None,
|
"loaded": self._pipe is not None,
|
||||||
"initial_load_ms": self._load_ms,
|
"initial_load_ms": self._load_ms,
|
||||||
"loaded_at": self._loaded_at,
|
"loaded_at": self._loaded_at,
|
||||||
"busy_time_us": read_busy(),
|
"busy_time_us": self.read_busy(),
|
||||||
"max_input_chars": MAX_INPUT_CHARS,
|
"max_input_chars": MAX_INPUT_CHARS,
|
||||||
|
"max_new_tokens": MAX_NEW_TOKENS,
|
||||||
"jobs": sorted(PROMPTS),
|
"jobs": sorted(PROMPTS),
|
||||||
"bind": f"{HOST}:{PORT}",
|
"bind": f"{self.bind_host}:{self.bind_port}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -175,7 +205,7 @@ def response_payload(worker: NpuWorker, job: str, result: GenerationResult) -> d
|
|||||||
|
|
||||||
def make_handler(worker: NpuWorker):
|
def make_handler(worker: NpuWorker):
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
server_version = "openvino-genai-npu-worker/0.1"
|
server_version = "openvino-genai-npu-worker/0.2"
|
||||||
|
|
||||||
def log_message(self, format: str, *args: Any) -> None:
|
def log_message(self, format: str, *args: Any) -> None:
|
||||||
# Log only method/path/status metadata, not raw request bodies.
|
# Log only method/path/status metadata, not raw request bodies.
|
||||||
@@ -215,7 +245,12 @@ def make_handler(worker: NpuWorker):
|
|||||||
if job == "memory":
|
if job == "memory":
|
||||||
job = "memory_candidate"
|
job = "memory_candidate"
|
||||||
result = worker.generate(job, str(payload.get("input", "")), payload.get("max_new_tokens"))
|
result = worker.generate(job, str(payload.get("input", "")), payload.get("max_new_tokens"))
|
||||||
self.send_json(200, response_payload(worker, job, result))
|
body = response_payload(worker, job, result)
|
||||||
|
if result.npu_busy_delta_us <= 0:
|
||||||
|
body["error"] = "NPU busy-time counter did not increase during generation"
|
||||||
|
self.send_json(503, body)
|
||||||
|
return
|
||||||
|
self.send_json(200, body)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.send_json(400, {"error": str(exc)})
|
self.send_json(400, {"error": str(exc)})
|
||||||
|
|
||||||
@@ -226,21 +261,24 @@ def cli(argv: list[str] | None = None) -> int:
|
|||||||
parser = argparse.ArgumentParser(description="OpenVINO GenAI NPU worker")
|
parser = argparse.ArgumentParser(description="OpenVINO GenAI NPU worker")
|
||||||
parser.add_argument("--model-path", default=os.environ.get("OV_GENAI_NPU_MODEL", DEFAULT_MODEL_PATH))
|
parser.add_argument("--model-path", default=os.environ.get("OV_GENAI_NPU_MODEL", DEFAULT_MODEL_PATH))
|
||||||
parser.add_argument("--cache-dir", default=os.environ.get("OV_GENAI_NPU_CACHE", DEFAULT_CACHE_DIR))
|
parser.add_argument("--cache-dir", default=os.environ.get("OV_GENAI_NPU_CACHE", DEFAULT_CACHE_DIR))
|
||||||
parser.add_argument("--host", default=HOST)
|
parser.add_argument("--host", default=os.environ.get("OV_GENAI_NPU_HOST", HOST))
|
||||||
parser.add_argument("--port", type=int, default=int(os.environ.get("OV_GENAI_NPU_PORT", PORT)))
|
parser.add_argument("--port", type=int, default=int(os.environ.get("OV_GENAI_NPU_PORT", PORT)))
|
||||||
parser.add_argument("--job", choices=sorted(PROMPTS), help="Run one CLI job instead of serving HTTP")
|
parser.add_argument("--job", choices=sorted(PROMPTS), help="Run one CLI job instead of serving HTTP")
|
||||||
parser.add_argument("--input", help="Input text for --job")
|
parser.add_argument("--input", help="Input text for --job")
|
||||||
parser.add_argument("--max-new-tokens", type=int)
|
parser.add_argument("--max-new-tokens", type=int)
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
worker = NpuWorker(args.model_path, args.cache_dir)
|
if args.host != "127.0.0.1":
|
||||||
|
raise SystemExit("Refusing non-local bind without code change/explicit approval")
|
||||||
|
|
||||||
|
worker = NpuWorker(args.model_path, args.cache_dir, bind_host=args.host, bind_port=args.port)
|
||||||
if args.job:
|
if args.job:
|
||||||
result = worker.generate(args.job, args.input or "", args.max_new_tokens)
|
result = worker.generate(args.job, args.input or "", args.max_new_tokens)
|
||||||
print(json.dumps(response_payload(worker, args.job, result), indent=2))
|
print(json.dumps(response_payload(worker, args.job, result), indent=2))
|
||||||
return 0 if result.npu_busy_delta_us > 0 else 2
|
return 0 if result.npu_busy_delta_us > 0 else 2
|
||||||
|
|
||||||
if args.host != "127.0.0.1":
|
if listener_exists(args.host, args.port):
|
||||||
raise SystemExit("Refusing non-local bind without code change/explicit approval")
|
raise SystemExit(f"Refusing to start: listener already exists on {args.host}:{args.port}")
|
||||||
server = ThreadingHTTPServer((args.host, args.port), make_handler(worker))
|
server = ThreadingHTTPServer((args.host, args.port), make_handler(worker))
|
||||||
print(f"serving {MODEL_ID} on http://{args.host}:{args.port}; raw prompts are not logged")
|
print(f"serving {MODEL_ID} on http://{args.host}:{args.port}; raw prompts are not logged")
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|||||||
@@ -12,8 +12,10 @@ This service is intentionally not wired into live RAG by default.
|
|||||||
|
|
||||||
## Files
|
## Files
|
||||||
|
|
||||||
- `server.py` — stdlib HTTP OpenVINO Runtime service.
|
- `SPEC.md` — endpoint/CLI contract, model/runtime recommendation, smoke/NPU proof plan, RAG integration plan, docs implications, and no-go criteria.
|
||||||
|
- `server.py` — stdlib HTTP OpenVINO Runtime service with fail-fast localhost listener conflict checks and request validation.
|
||||||
- `smoke.py` — non-private API/ranking/NPU busy-time smoke test.
|
- `smoke.py` — non-private API/ranking/NPU busy-time smoke test.
|
||||||
|
- `tests/test_server_validation.py` — stdlib unit checks for request validation and listener conflict detection.
|
||||||
- `openvino-reranker.service` — optional user-systemd unit.
|
- `openvino-reranker.service` — optional user-systemd unit.
|
||||||
|
|
||||||
## One-time setup
|
## One-time setup
|
||||||
@@ -61,7 +63,7 @@ OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco
|
|||||||
python /home/will/lab/swarm/openvino-reranker-npu/server.py
|
python /home/will/lab/swarm/openvino-reranker-npu/server.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase.
|
Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. It also checks whether the requested listener can bind before compiling the OpenVINO model, so obvious port conflicts fail fast; the real server bind still happens immediately after model load.
|
||||||
|
|
||||||
## API
|
## API
|
||||||
|
|
||||||
@@ -109,6 +111,16 @@ Expected:
|
|||||||
- The top result matches the non-private fixture expectation.
|
- The top result matches the non-private fixture expectation.
|
||||||
- Response and sysfs `npu_busy_delta_us` are positive.
|
- Response and sysfs `npu_busy_delta_us` are positive.
|
||||||
|
|
||||||
|
## Validation checks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source /home/will/.venvs/openvino-reranker/bin/activate
|
||||||
|
PYTHONPATH=/home/will/lab/swarm/openvino-reranker-npu \
|
||||||
|
python -m unittest discover -s /home/will/lab/swarm/openvino-reranker-npu/tests
|
||||||
|
```
|
||||||
|
|
||||||
|
These checks do not compile the OpenVINO model; they cover request validation and fail-fast listener conflict detection.
|
||||||
|
|
||||||
## Optional systemd user service
|
## Optional systemd user service
|
||||||
|
|
||||||
Install the unit only after the foreground command and smoke test pass:
|
Install the unit only after the foreground command and smoke test pass:
|
||||||
|
|||||||
@@ -0,0 +1,243 @@
|
|||||||
|
# OpenVINO NPU reranker service spec
|
||||||
|
|
||||||
|
Status: proposed localhost prototype; not live RAG integration.
|
||||||
|
Target port: `127.0.0.1:18818`.
|
||||||
|
Safety posture: foreground smoke first, no persistent enablement, no Atlas/Hermes/RAG routing changes without Will's explicit approval.
|
||||||
|
|
||||||
|
## Recommendation
|
||||||
|
|
||||||
|
Use `cross-encoder/ms-marco-MiniLM-L6-v2`, exported to OpenVINO IR as INT8, served by the local stdlib HTTP service in `server.py` on OpenVINO Runtime `NPU`.
|
||||||
|
|
||||||
|
Why this choice:
|
||||||
|
|
||||||
|
- It is a small BERT-family cross-encoder reranker intended for MS MARCO-style passage ranking, matching the second-stage RAG use case better than another embedding-only similarity pass.
|
||||||
|
- The model shape is simple pairwise text classification/scoring: `(query, document) -> score`, which maps cleanly to OpenVINO Runtime and avoids introducing a heavier LLM worker for reranking.
|
||||||
|
- INT8 OpenVINO IR keeps memory and compile/runtime cost low enough for a localhost sidecar and is already represented in the repo defaults:
|
||||||
|
`/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov`.
|
||||||
|
- The service can fail closed on startup when `OPENVINO_RERANKER_DEVICE=NPU` but `/sys/class/accel/accel0/device/npu_busy_time_us` does not increase, preventing false "NPU-backed" claims.
|
||||||
|
|
||||||
|
Runtime default:
|
||||||
|
|
||||||
|
```text
|
||||||
|
OPENVINO_RERANKER_HOST=127.0.0.1
|
||||||
|
OPENVINO_RERANKER_PORT=18818
|
||||||
|
OPENVINO_RERANKER_DEVICE=NPU
|
||||||
|
OPENVINO_RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L6-v2
|
||||||
|
OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov
|
||||||
|
OPENVINO_RERANKER_MAX_LENGTH=512
|
||||||
|
OPENVINO_RERANKER_MAX_DOCUMENTS=100
|
||||||
|
OPENVINO_RERANKER_MAX_BODY_BYTES=5242880
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoint contract
|
||||||
|
|
||||||
|
### Health and readiness
|
||||||
|
|
||||||
|
`GET /healthz` and `GET /readyz` return JSON.
|
||||||
|
|
||||||
|
`/readyz` must return HTTP 200 only when the model is loaded and startup smoke passed. For NPU mode, startup smoke must include a positive `npu_busy_delta_us`.
|
||||||
|
|
||||||
|
Representative ready response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"ok": true,
|
||||||
|
"service": "openvino-reranker",
|
||||||
|
"model": "cross-encoder/ms-marco-MiniLM-L6-v2",
|
||||||
|
"model_dir": "/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov",
|
||||||
|
"device": "NPU",
|
||||||
|
"available_devices": ["CPU", "NPU"],
|
||||||
|
"max_length": 512,
|
||||||
|
"startup_smoke": {"ok": true, "duration_ms": 12.3, "npu_busy_delta_us": 1234},
|
||||||
|
"last_inference": null,
|
||||||
|
"ready_error": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rerank
|
||||||
|
|
||||||
|
`POST /rerank` and compatibility alias `POST /v1/rerank` accept:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"query": "how do I verify OpenVINO NPU usage?",
|
||||||
|
"documents": [
|
||||||
|
{"id": "good", "text": "Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.", "metadata": {"source": "synthetic"}},
|
||||||
|
{"id": "bad", "text": "This note is about making sourdough starter."}
|
||||||
|
],
|
||||||
|
"top_k": 2,
|
||||||
|
"return_documents": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Compatibility notes:
|
||||||
|
|
||||||
|
- `documents` may be strings or objects with `id`, `text`, and optional object `metadata`.
|
||||||
|
- `top_k` is preferred; `top_n` is accepted for common reranker-client compatibility.
|
||||||
|
- `return_documents=false` is recommended for RAG integration to avoid echoing private source text into logs or intermediate traces.
|
||||||
|
- The optional `model` field may be sent by clients but is not used for routing; this sidecar serves one configured model.
|
||||||
|
|
||||||
|
Successful response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"model": "cross-encoder/ms-marco-MiniLM-L6-v2",
|
||||||
|
"device": "NPU",
|
||||||
|
"query": "how do I verify OpenVINO NPU usage?",
|
||||||
|
"input_count": 2,
|
||||||
|
"top_k": 2,
|
||||||
|
"duration_ms": 10.5,
|
||||||
|
"npu_busy_delta_us": 1234,
|
||||||
|
"results": [
|
||||||
|
{"index": 0, "id": "good", "score": 8.1, "raw_score": 8.1, "probability": 0.9997},
|
||||||
|
{"index": 1, "id": "bad", "score": -4.2, "raw_score": -4.2, "probability": 0.0148}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Error response shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"ok": false, "error": "human-readable error", "results": []}
|
||||||
|
```
|
||||||
|
|
||||||
|
Status behavior:
|
||||||
|
|
||||||
|
- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text, or non-positive/non-integer `top_k`/`top_n`.
|
||||||
|
- 413: request body above `OPENVINO_RERANKER_MAX_BODY_BYTES`.
|
||||||
|
- 503: model not ready.
|
||||||
|
- 500: unexpected inference/runtime failure.
|
||||||
|
|
||||||
|
## CLI contract
|
||||||
|
|
||||||
|
Foreground-only review start:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ss -ltnp | grep ':18818\b' || true
|
||||||
|
cat /sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
source /home/will/.venvs/openvino-reranker/bin/activate
|
||||||
|
OPENVINO_RERANKER_HOST=127.0.0.1 \
|
||||||
|
OPENVINO_RERANKER_PORT=18818 \
|
||||||
|
OPENVINO_RERANKER_DEVICE=NPU \
|
||||||
|
OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \
|
||||||
|
python /home/will/lab/swarm/openvino-reranker-npu/server.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Client smoke:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source /home/will/.venvs/openvino-reranker/bin/activate
|
||||||
|
python /home/will/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional user-systemd unit exists as `openvino-reranker.service`, but this spec does not approve copying, starting, enabling, or wiring it into live paths.
|
||||||
|
|
||||||
|
## Non-private smoke payload
|
||||||
|
|
||||||
|
Use only synthetic public-text fixtures. Do not query the Obsidian vault, private document directories, image folders, or live Chroma documents during smoke.
|
||||||
|
|
||||||
|
Minimum cases:
|
||||||
|
|
||||||
|
1. Query: `how do I verify OpenVINO NPU usage?`
|
||||||
|
- Expected top document: `Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference.`
|
||||||
|
- Distractor: `This note is about making sourdough starter.`
|
||||||
|
2. Query: `what port does the reranker service use?`
|
||||||
|
- Expected top document: `The OpenVINO reranker prototype listens locally on port 18818.`
|
||||||
|
- Distractor: `Whisper transcription accepts audio uploads.`
|
||||||
|
3. Query: `why should reranking not mutate vector collections?`
|
||||||
|
- Expected top document: `Reranking is a read-only second-stage transformation after vector search.`
|
||||||
|
- Distractor: `Boil pasta in salted water until al dente.`
|
||||||
|
|
||||||
|
Pass criteria:
|
||||||
|
|
||||||
|
- `/readyz` is HTTP 200 and reports `device=NPU`.
|
||||||
|
- Every case returns `ok=true` and a sorted `results` list with the expected top `id`.
|
||||||
|
- Response-level `npu_busy_delta_us` is positive for each case.
|
||||||
|
- External sysfs `after - before` is positive for each case or at least for the full smoke batch.
|
||||||
|
- Smoke script exits 0 and prints JSON with `ok: true`.
|
||||||
|
|
||||||
|
## NPU busy-time verification plan
|
||||||
|
|
||||||
|
HTTP 200 is not proof. Verification must capture both endpoint-reported and sysfs-observed deltas.
|
||||||
|
|
||||||
|
Procedure:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
BUSY=/sys/class/accel/accel0/device/npu_busy_time_us
|
||||||
|
before=$(cat "$BUSY")
|
||||||
|
curl -fsS http://127.0.0.1:18818/rerank \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"query":"how do I verify OpenVINO NPU usage?","documents":[{"id":"good","text":"Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."},{"id":"bad","text":"This note is about making sourdough starter."}],"top_k":2,"return_documents":false}' \
|
||||||
|
| jq '{ok, device, npu_busy_delta_us, top_id:.results[0].id}'
|
||||||
|
after=$(cat "$BUSY")
|
||||||
|
echo "sysfs_npu_busy_delta_us=$((after-before))"
|
||||||
|
```
|
||||||
|
|
||||||
|
Acceptance:
|
||||||
|
|
||||||
|
- `device == "NPU"`.
|
||||||
|
- Response `npu_busy_delta_us > 0`.
|
||||||
|
- Shell-computed `sysfs_npu_busy_delta_us > 0`.
|
||||||
|
- If any value is zero/negative/missing, call the result CPU/unknown and do not claim NPU-backed reranking.
|
||||||
|
|
||||||
|
## Optional RAG second-stage integration plan (deferred)
|
||||||
|
|
||||||
|
This is a plan only. Do not enable it in live RAG without explicit approval.
|
||||||
|
|
||||||
|
Design:
|
||||||
|
|
||||||
|
1. Keep existing vector search and Chroma collection `obsidian_bge_npu` unchanged.
|
||||||
|
2. Retrieve more candidates from current vector search, e.g. `initial_k=20`.
|
||||||
|
3. Send only request-time candidate snippets/ids to `http://127.0.0.1:18818/rerank`.
|
||||||
|
4. Use reranker order to choose final `top_k`, e.g. `5`.
|
||||||
|
5. On timeout, connection error, invalid response, or non-positive NPU proof when proof is required, fall back to vector order and attach metadata like `rerank_error`; do not fail the whole RAG request unless explicitly configured.
|
||||||
|
6. Log counters and latency, but avoid logging raw private document text.
|
||||||
|
|
||||||
|
Disabled-by-default knobs:
|
||||||
|
|
||||||
|
```text
|
||||||
|
RAG_RERANK_ENABLED=false
|
||||||
|
RAG_RERANK_URL=http://127.0.0.1:18818/rerank
|
||||||
|
RAG_RERANK_INITIAL_K=20
|
||||||
|
RAG_RERANK_TOP_K=5
|
||||||
|
RAG_RERANK_TIMEOUT_MS=3000
|
||||||
|
RAG_RERANK_REQUIRE_NPU_PROOF=true
|
||||||
|
RAG_RERANK_RETURN_DOCUMENTS=false
|
||||||
|
```
|
||||||
|
|
||||||
|
Integration tests should use synthetic in-memory candidates first. Live-vault evaluation requires a separate approval and must not mutate or rebuild the vector collection.
|
||||||
|
|
||||||
|
## Docs and diagram implications
|
||||||
|
|
||||||
|
If this prototype advances beyond spec/review, update these surfaces while keeping live/prototype labels clear:
|
||||||
|
|
||||||
|
- `openvino-reranker-npu/README.md`: keep model/runtime, endpoint contract, smoke command, and approval gates synchronized with code.
|
||||||
|
- `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md`: list `:18818` as prototype/not enabled, with foreground smoke and NPU sysfs proof.
|
||||||
|
- Service catalog / architecture notes: show live baseline `:18810`, `:18816`, `:18817`; show `:18818` as optional second-stage RAG prototype, not live routing.
|
||||||
|
- Diagrams: render `RAG :18810 -> optional reranker :18818` as dashed/disabled or "proposed"; do not imply Atlas/Hermes/gateway traffic is using it.
|
||||||
|
- Optional systemd unit: document as installable after approval, not enabled by default.
|
||||||
|
|
||||||
|
## No-go / defer criteria
|
||||||
|
|
||||||
|
Do not ship, enable, or integrate the reranker if any of these hold:
|
||||||
|
|
||||||
|
- Port `18818` is already owned by another live service.
|
||||||
|
- `NPU` is unavailable in `ov.Core().available_devices` or `/sys/class/accel/accel0/device/npu_busy_time_us` is missing.
|
||||||
|
- Foreground startup smoke fails or has non-positive NPU busy-time delta while configured for NPU.
|
||||||
|
- Synthetic smoke top-1 ranking fails or latency is unacceptable for the intended RAG timeout budget.
|
||||||
|
- Model export requires overwriting the existing model directory or touching Chroma/vector collections.
|
||||||
|
- The service must bind beyond `127.0.0.1` to be useful.
|
||||||
|
- Live RAG integration would require reindexing, collection mutation, private-doc smoke, or Atlas/Hermes/gateway routing changes without explicit approval.
|
||||||
|
- Logs or responses would persist raw private document text outside the existing RAG request path.
|
||||||
|
|
||||||
|
## Current local preflight observed during this spec pass
|
||||||
|
|
||||||
|
- `/sys/class/accel/accel0/device/npu_busy_time_us` is readable.
|
||||||
|
- `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov` is present.
|
||||||
|
- `/home/will/.venvs/openvino-reranker/bin/python` is present.
|
||||||
|
- `:18818` was not listening during preflight.
|
||||||
|
- `server.py` and `smoke.py` pass `python -m py_compile`.
|
||||||
|
|
||||||
|
These observations are preflight only; they are not a live service/NPU smoke result.
|
||||||
@@ -16,6 +16,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@@ -251,6 +252,27 @@ def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]:
|
|||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
||||||
|
def parse_top_k(value: Any, document_count: int) -> int:
|
||||||
|
"""Validate top_k/top_n before inference so schema errors return HTTP 400."""
|
||||||
|
if value is None:
|
||||||
|
return document_count
|
||||||
|
if isinstance(value, bool) or not isinstance(value, int):
|
||||||
|
raise ValueError("top_k/top_n must be a positive integer")
|
||||||
|
if value < 1:
|
||||||
|
raise ValueError("top_k/top_n must be a positive integer")
|
||||||
|
return min(value, document_count)
|
||||||
|
|
||||||
|
|
||||||
|
def assert_port_available(host: str, port: int) -> None:
|
||||||
|
"""Fail fast on listener conflicts before compiling the OpenVINO model."""
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
try:
|
||||||
|
sock.bind((host, port))
|
||||||
|
except OSError as exc:
|
||||||
|
raise RuntimeError(f"cannot bind {host}:{port}; listener conflict or invalid bind: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
server_version = "OpenVINOReranker/0.1"
|
server_version = "OpenVINOReranker/0.1"
|
||||||
|
|
||||||
@@ -293,6 +315,7 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
raise ValueError("query is required")
|
raise ValueError("query is required")
|
||||||
top_k = payload.get("top_k", payload.get("top_n"))
|
top_k = payload.get("top_k", payload.get("top_n"))
|
||||||
documents = normalize_documents(payload.get("documents"), self.max_documents)
|
documents = normalize_documents(payload.get("documents"), self.max_documents)
|
||||||
|
top_k = parse_top_k(top_k, len(documents))
|
||||||
return_documents = bool(payload.get("return_documents", True))
|
return_documents = bool(payload.get("return_documents", True))
|
||||||
response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents)
|
response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents)
|
||||||
self.write_json(response)
|
self.write_json(response)
|
||||||
@@ -342,6 +365,7 @@ def main() -> int:
|
|||||||
parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"})
|
parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"})
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
assert_port_available(args.host, args.port)
|
||||||
service = RerankerService(
|
service = RerankerService(
|
||||||
Path(args.model_dir).expanduser(),
|
Path(args.model_dir).expanduser(),
|
||||||
args.model,
|
args.model,
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Unit checks for reranker request validation helpers.
|
||||||
|
|
||||||
|
These tests intentionally avoid loading an OpenVINO model; they only cover the
|
||||||
|
stdlib validation helpers used before inference.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from server import assert_port_available, normalize_documents, parse_top_k
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationTests(unittest.TestCase):
|
||||||
|
def test_normalize_accepts_strings_and_objects(self) -> None:
|
||||||
|
docs = normalize_documents(
|
||||||
|
[
|
||||||
|
"plain text document",
|
||||||
|
{"id": "obj", "text": "object document", "metadata": {"source": "synthetic"}},
|
||||||
|
],
|
||||||
|
max_documents=2,
|
||||||
|
)
|
||||||
|
self.assertEqual(docs[0], {"text": "plain text document"})
|
||||||
|
self.assertEqual(docs[1]["id"], "obj")
|
||||||
|
self.assertEqual(docs[1]["metadata"], {"source": "synthetic"})
|
||||||
|
|
||||||
|
def test_normalize_rejects_empty_or_too_many_documents(self) -> None:
|
||||||
|
with self.assertRaisesRegex(ValueError, "non-empty"):
|
||||||
|
normalize_documents([], max_documents=2)
|
||||||
|
with self.assertRaisesRegex(ValueError, "max_documents"):
|
||||||
|
normalize_documents(["a", "b", "c"], max_documents=2)
|
||||||
|
with self.assertRaisesRegex(ValueError, "non-empty string"):
|
||||||
|
normalize_documents([{"id": "empty", "text": ""}], max_documents=2)
|
||||||
|
|
||||||
|
def test_parse_top_k_defaults_clamps_and_rejects_invalid_values(self) -> None:
|
||||||
|
self.assertEqual(parse_top_k(None, document_count=3), 3)
|
||||||
|
self.assertEqual(parse_top_k(2, document_count=3), 2)
|
||||||
|
self.assertEqual(parse_top_k(99, document_count=3), 3)
|
||||||
|
for value in (0, -1, True, False, 1.5, "2", "nope"):
|
||||||
|
with self.subTest(value=value):
|
||||||
|
with self.assertRaisesRegex(ValueError, "positive integer"):
|
||||||
|
parse_top_k(value, document_count=3)
|
||||||
|
|
||||||
|
def test_assert_port_available_detects_listener_conflict(self) -> None:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as listener:
|
||||||
|
listener.bind(("127.0.0.1", 0))
|
||||||
|
listener.listen(1)
|
||||||
|
port = listener.getsockname()[1]
|
||||||
|
with self.assertRaisesRegex(RuntimeError, "cannot bind"):
|
||||||
|
assert_port_available("127.0.0.1", port)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
# OpenVINO Context Gate
|
||||||
|
|
||||||
|
Local-only Atlas/Hermes context-gate advisory prototype.
|
||||||
|
|
||||||
|
This first slice is CLI-only and dry-run by design. It takes a non-private query,
|
||||||
|
optionally asks the localhost classifier on `127.0.0.1:18819` for advisory labels,
|
||||||
|
and emits a compact typed context bundle plan. It does not retrieve private
|
||||||
|
content or change live Atlas/Hermes behavior.
|
||||||
|
|
||||||
|
## Safety invariants
|
||||||
|
|
||||||
|
Closed in v1:
|
||||||
|
|
||||||
|
- live Atlas/Hermes routing changes
|
||||||
|
- memory writes
|
||||||
|
- outbound sends
|
||||||
|
- tool execution by the sidecar
|
||||||
|
- service restarts
|
||||||
|
- vector DB mutation or reindexing
|
||||||
|
- private root broadening
|
||||||
|
- live config changes
|
||||||
|
|
||||||
|
The CLI only plans which source classes an authoritative Atlas/Hermes agent might
|
||||||
|
use later: `durable_memory`, `session_search`, `rag_search`, `repo_files`,
|
||||||
|
`live_system`, `web`, or `no_retrieval`.
|
||||||
|
|
||||||
|
NPU proof is strict: `npu_verified=true` is only emitted when a live classifier
|
||||||
|
request reports a positive endpoint NPU delta and a positive sysfs/endpoint sysfs
|
||||||
|
busy delta. HTTP 200 alone is never treated as proof. Offline and fallback modes
|
||||||
|
set `npu_verified=false` and include a warning.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Live classifier path, with compact terminal output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--query "How do I check whether the RAG reranker is using the NPU?" \
|
||||||
|
--format compact
|
||||||
|
```
|
||||||
|
|
||||||
|
Deterministic offline smoke, safe for unit-test hosts without NPU services:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--offline \
|
||||||
|
--query "Write a haiku about Seattle rain." \
|
||||||
|
--format compact-json
|
||||||
|
```
|
||||||
|
|
||||||
|
Fallback plan if the classifier is down:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/context-gate-advisory.py \
|
||||||
|
--allow-offline-fallback \
|
||||||
|
--query "Where did we leave the NPU context gate implementation plan?" \
|
||||||
|
--context platform=kanban \
|
||||||
|
--context repo_path=/home/will/lab/swarm \
|
||||||
|
--format compact-json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output shape
|
||||||
|
|
||||||
|
Full JSON includes:
|
||||||
|
|
||||||
|
- `schema=atlas_context_gate_plan_v1`
|
||||||
|
- `dry_run=true`
|
||||||
|
- `query_class`
|
||||||
|
- `source_plan`
|
||||||
|
- `bundle_plan`
|
||||||
|
- `npu_proof`
|
||||||
|
- closed `authority`
|
||||||
|
- closed approval `gates`
|
||||||
|
- compact `warnings`
|
||||||
|
|
||||||
|
Compact output intentionally avoids raw private snippets and raw JSON dumps:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ok=true schema=atlas_context_gate_plan_v1 bundle=OpsDebugBundle sources=live_system,repo_files,rag_search source_count=3 npu_verified=false classifier_delta_us=None outer_sysfs_delta_us=None gates=closed:route,memory,send,tools,restart,vector,private_roots,config warnings=offline_heuristic_classifier_no_npu_claim,npu_proof_inconclusive
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes for reviewers
|
||||||
|
|
||||||
|
- No HTTP service or systemd unit is added in this slice.
|
||||||
|
- The prototype does not call RAG, memory, session search, web, filesystem tools,
|
||||||
|
or the advisory gateway. It only emits a plan.
|
||||||
|
- Unit tests use fake/offline classifier results and do not require live NPU.
|
||||||
|
- Optional live smoke may call only the local classifier endpoint and read
|
||||||
|
`/sys/class/accel/accel0/device/npu_busy_time_us` for positive delta proof.
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
"""Atlas/Hermes local advisory context-gate prototype."""
|
||||||
|
|
||||||
|
from .context_gate import SCHEMA, ContextGateError, build_plan, compact_json, compact_line, validate_plan
|
||||||
|
|
||||||
|
__all__ = ["SCHEMA", "ContextGateError", "build_plan", "compact_json", "compact_line", "validate_plan"]
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .context_gate import (
|
||||||
|
DEFAULT_CLASSIFIER_URL,
|
||||||
|
ContextGateError,
|
||||||
|
build_plan,
|
||||||
|
classify_live,
|
||||||
|
classify_offline,
|
||||||
|
compact_json,
|
||||||
|
compact_line,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_context(raw_items: list[str]) -> dict[str, Any]:
|
||||||
|
context: dict[str, Any] = {}
|
||||||
|
for item in raw_items:
|
||||||
|
if "=" not in item:
|
||||||
|
raise ContextGateError(f"invalid_context_item:{item}")
|
||||||
|
key, value = item.split("=", 1)
|
||||||
|
if not key:
|
||||||
|
raise ContextGateError("invalid_context_key")
|
||||||
|
if value.lower() == "true":
|
||||||
|
parsed: Any = True
|
||||||
|
elif value.lower() == "false":
|
||||||
|
parsed = False
|
||||||
|
else:
|
||||||
|
parsed = value
|
||||||
|
context[key] = parsed
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Emit a local-only Atlas/Hermes advisory context bundle plan. No routing, retrieval, memory writes, sends, restarts, or vector mutations are performed.",
|
||||||
|
)
|
||||||
|
parser.add_argument("--query", required=True, help="Non-private query to plan for")
|
||||||
|
parser.add_argument("--format", choices=["compact", "compact-json", "json"], default="compact")
|
||||||
|
parser.add_argument("--context", action="append", default=[], metavar="KEY=VALUE", help="Optional compact request context, e.g. platform=kanban repo_path=/path")
|
||||||
|
parser.add_argument("--max-sources", type=int, default=4)
|
||||||
|
parser.add_argument("--trace-id")
|
||||||
|
parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
|
||||||
|
parser.add_argument("--classifier-timeout", type=float, default=8.0)
|
||||||
|
parser.add_argument("--offline", action="store_true", help="Use deterministic heuristic labels; makes no NPU claim")
|
||||||
|
parser.add_argument("--allow-offline-fallback", action="store_true", help="If live classifier is unavailable, emit an advisory fallback plan with npu_verified=false")
|
||||||
|
parser.add_argument("--no-require-npu-proof", action="store_true", help="Do not add npu_proof_inconclusive warning when running offline/fallback")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_arg_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
context = _parse_context(args.context)
|
||||||
|
options = {
|
||||||
|
"dry_run": True,
|
||||||
|
"max_sources": args.max_sources,
|
||||||
|
"include_private_text": False,
|
||||||
|
"require_npu_proof": not args.no_require_npu_proof,
|
||||||
|
"trace_id": args.trace_id,
|
||||||
|
}
|
||||||
|
if args.offline:
|
||||||
|
classifier = classify_offline(args.query, context)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
classifier = classify_live(args.query, context, classifier_url=args.classifier_url, timeout=args.classifier_timeout)
|
||||||
|
except ContextGateError as exc:
|
||||||
|
if not args.allow_offline_fallback:
|
||||||
|
raise
|
||||||
|
classifier = classify_offline(args.query, context, warning=str(exc))
|
||||||
|
plan = build_plan(args.query, context=context, options=options, classifier=classifier)
|
||||||
|
except ContextGateError as exc:
|
||||||
|
print(f"error={exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if args.format == "json":
|
||||||
|
print(json.dumps(plan, indent=2, sort_keys=True))
|
||||||
|
elif args.format == "compact-json":
|
||||||
|
print(compact_json(plan))
|
||||||
|
else:
|
||||||
|
print(compact_line(plan))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -0,0 +1,482 @@
|
|||||||
|
"""Local-only advisory context bundle planner for Atlas/Hermes.
|
||||||
|
|
||||||
|
This module intentionally emits a retrieval/authority plan only. It does not call
|
||||||
|
Hermes memory/session/RAG/web tools, mutate vector stores, broaden private roots,
|
||||||
|
or change live routing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import ipaddress
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Mapping, Sequence
|
||||||
|
|
||||||
|
SCHEMA = "atlas_context_gate_plan_v1"
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
|
||||||
|
AUTHORITY = {
|
||||||
|
"may_route": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_process_private_dirs": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
"may_mutate_vector_db": False,
|
||||||
|
"may_change_live_config": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
GATES = {
|
||||||
|
"live_routing_change": "closed_requires_explicit_approval",
|
||||||
|
"memory_write": "closed_requires_explicit_approval",
|
||||||
|
"outbound_send": "closed_requires_explicit_approval",
|
||||||
|
"tool_execution": "closed_requires_explicit_approval",
|
||||||
|
"service_restart": "closed_requires_explicit_approval",
|
||||||
|
"vector_mutation": "closed_requires_explicit_approval",
|
||||||
|
"private_root_broadening": "closed_requires_explicit_approval",
|
||||||
|
}
|
||||||
|
|
||||||
|
_ALLOWED_SOURCES = {
|
||||||
|
"durable_memory",
|
||||||
|
"session_search",
|
||||||
|
"rag_search",
|
||||||
|
"repo_files",
|
||||||
|
"live_system",
|
||||||
|
"web",
|
||||||
|
"no_retrieval",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ContextGateError(ValueError):
|
||||||
|
"""Raised for invalid requests or unavailable required local stages."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ClassifierResult:
|
||||||
|
labels: Mapping[str, Any]
|
||||||
|
npu_busy_delta_us: int | None
|
||||||
|
sysfs_npu_busy_delta_us: int | None
|
||||||
|
outer_sysfs_delta_us: int | None
|
||||||
|
live: bool
|
||||||
|
warning: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def read_npu_busy_time_us(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text(encoding="utf-8").strip())
|
||||||
|
except (FileNotFoundError, PermissionError, ValueError, OSError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _label_value(labels: Mapping[str, Any], name: str, default: Any) -> Any:
|
||||||
|
value = labels.get(name, default)
|
||||||
|
if isinstance(value, Mapping) and "value" in value:
|
||||||
|
return value.get("value", default)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _label_confidence(labels: Mapping[str, Any], name: str, default: float = 0.5) -> float:
|
||||||
|
value = labels.get(name)
|
||||||
|
if isinstance(value, Mapping):
|
||||||
|
try:
|
||||||
|
return float(value.get("confidence", default))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def heuristic_labels(query: str, context: Mapping[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
"""Small transparent fallback used by tests and explicit offline smoke mode."""
|
||||||
|
text = query.lower()
|
||||||
|
platform = str((context or {}).get("platform", "unknown")).lower()
|
||||||
|
|
||||||
|
current_words = ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs"]
|
||||||
|
prior_words = ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "plan"]
|
||||||
|
coding_words = ["implement", "code", "repo", "test", "pytest", "diff", "branch", "hermes"]
|
||||||
|
research_words = ["research", "compare", "summarize", "explain", "what is", "how do i"]
|
||||||
|
unsafe_words = ["change live routing", "live routing", "restart", "send", "write memory", "reindex", "mutate", "delete"]
|
||||||
|
|
||||||
|
safety = any(w in text for w in unsafe_words)
|
||||||
|
tool_needed = any(w in text for w in current_words + coding_words) or safety
|
||||||
|
|
||||||
|
if platform == "kanban" or "kanban" in text or any(w in text for w in coding_words):
|
||||||
|
category = "coding"
|
||||||
|
elif any(w in text for w in current_words):
|
||||||
|
category = "devops"
|
||||||
|
elif any(w in text for w in research_words + prior_words):
|
||||||
|
category = "research"
|
||||||
|
else:
|
||||||
|
category = "chat"
|
||||||
|
|
||||||
|
if "remember" in text or "preference" in text:
|
||||||
|
memory_candidate = "durable_user_fact"
|
||||||
|
elif "convention" in text or "workflow" in text:
|
||||||
|
memory_candidate = "workflow_convention"
|
||||||
|
else:
|
||||||
|
memory_candidate = "none"
|
||||||
|
|
||||||
|
urgency = "high" if any(w in text for w in ["urgent", "critical", "down", "broken"]) else "normal"
|
||||||
|
return {
|
||||||
|
"tool_needed": {"value": tool_needed, "confidence": 0.76 if tool_needed else 0.68},
|
||||||
|
"memory_candidate": {"value": memory_candidate, "confidence": 0.8 if memory_candidate != "none" else 0.35},
|
||||||
|
"urgency": {"value": urgency, "confidence": 0.8 if urgency == "high" else 0.65},
|
||||||
|
"workflow_category": {"value": category, "confidence": 0.78 if category != "chat" else 0.7},
|
||||||
|
"safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.2},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _NoClassifierRedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||||
|
"""Fail closed instead of following redirects away from a validated local URL."""
|
||||||
|
|
||||||
|
def redirect_request(self, req, fp, code, msg, headers, newurl): # type: ignore[no-untyped-def]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
_CLASSIFIER_OPENER = urllib.request.build_opener(_NoClassifierRedirectHandler)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_live(
|
||||||
|
query: str,
|
||||||
|
context: Mapping[str, Any] | None = None,
|
||||||
|
classifier_url: str = DEFAULT_CLASSIFIER_URL,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> ClassifierResult:
|
||||||
|
classifier_url = validate_classifier_url(classifier_url)
|
||||||
|
before = read_npu_busy_time_us()
|
||||||
|
payload = {
|
||||||
|
"id": f"context-gate-{int(time.time())}",
|
||||||
|
"text": query,
|
||||||
|
"context": {"platform": (context or {}).get("platform", "cli"), "source": "context_gate"},
|
||||||
|
"options": {"include_evidence": False, "include_embedding_debug": False, "dry_run": True},
|
||||||
|
}
|
||||||
|
req = urllib.request.Request(
|
||||||
|
classifier_url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with _CLASSIFIER_OPENER.open(req, timeout=timeout) as resp: # noqa: S310 - local configured endpoint only
|
||||||
|
raw = resp.read(256_000)
|
||||||
|
except (urllib.error.URLError, TimeoutError, OSError) as exc:
|
||||||
|
raise ContextGateError(f"classifier_unavailable: {exc}") from exc
|
||||||
|
after = read_npu_busy_time_us()
|
||||||
|
try:
|
||||||
|
data = json.loads(raw.decode("utf-8"))
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ContextGateError("classifier_invalid_json") from exc
|
||||||
|
labels = data.get("labels")
|
||||||
|
if not isinstance(labels, Mapping):
|
||||||
|
raise ContextGateError("classifier_missing_labels")
|
||||||
|
outer = after - before if before is not None and after is not None else None
|
||||||
|
return ClassifierResult(
|
||||||
|
labels=labels,
|
||||||
|
npu_busy_delta_us=_as_int_or_none(data.get("npu_busy_delta_us")),
|
||||||
|
sysfs_npu_busy_delta_us=_as_int_or_none(data.get("sysfs_npu_busy_delta_us")),
|
||||||
|
outer_sysfs_delta_us=outer,
|
||||||
|
live=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_classifier_url(classifier_url: str) -> str:
|
||||||
|
"""Validate the local-only classifier endpoint before any POST is attempted."""
|
||||||
|
parsed = urllib.parse.urlparse(classifier_url)
|
||||||
|
if parsed.scheme not in {"http", "https"}:
|
||||||
|
raise ContextGateError("invalid_classifier_url:scheme_must_be_http_or_https")
|
||||||
|
host = parsed.hostname
|
||||||
|
if not host:
|
||||||
|
raise ContextGateError("invalid_classifier_url:missing_host")
|
||||||
|
host_normalized = host.lower().rstrip(".")
|
||||||
|
if host_normalized == "localhost":
|
||||||
|
return classifier_url
|
||||||
|
try:
|
||||||
|
address = ipaddress.ip_address(host_normalized)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ContextGateError("invalid_classifier_url:host_must_be_loopback") from exc
|
||||||
|
if not address.is_loopback:
|
||||||
|
raise ContextGateError("invalid_classifier_url:host_must_be_loopback")
|
||||||
|
return classifier_url
|
||||||
|
|
||||||
|
|
||||||
|
def _as_int_or_none(value: Any) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def classify_offline(query: str, context: Mapping[str, Any] | None = None, warning: str | None = None) -> ClassifierResult:
|
||||||
|
return ClassifierResult(
|
||||||
|
labels=heuristic_labels(query, context),
|
||||||
|
npu_busy_delta_us=None,
|
||||||
|
sysfs_npu_busy_delta_us=None,
|
||||||
|
outer_sysfs_delta_us=None,
|
||||||
|
live=False,
|
||||||
|
warning=warning or "offline_heuristic_classifier_no_npu_claim",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_any(text: str, needles: list[str]) -> bool:
|
||||||
|
return any(n in text for n in needles)
|
||||||
|
|
||||||
|
|
||||||
|
def _source(source: str, action: str, reason: str, priority: int, freshness: str, confidence: float) -> dict[str, Any]:
|
||||||
|
assert source in _ALLOWED_SOURCES
|
||||||
|
return {
|
||||||
|
"source": source,
|
||||||
|
"action": action,
|
||||||
|
"reason": reason,
|
||||||
|
"priority": priority,
|
||||||
|
"freshness": freshness,
|
||||||
|
"permission": "tool_required_by_authoritative_agent" if source != "no_retrieval" else "none",
|
||||||
|
"missing_behavior": "retrieve_or_mark_missing" if source != "no_retrieval" else "skip_retrieval",
|
||||||
|
"confidence": round(confidence, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def select_sources(query: str, labels: Mapping[str, Any], context: Mapping[str, Any], max_sources: int) -> list[dict[str, Any]]:
|
||||||
|
text = query.lower()
|
||||||
|
sources: list[dict[str, Any]] = []
|
||||||
|
category = str(_label_value(labels, "workflow_category", "unknown"))
|
||||||
|
memory_candidate = str(_label_value(labels, "memory_candidate", "none"))
|
||||||
|
tool_needed = bool(_label_value(labels, "tool_needed", False))
|
||||||
|
|
||||||
|
if tool_needed or _has_any(text, ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs", "time", "date"]):
|
||||||
|
sources.append(_source("live_system", "inspect_with_terminal_or_domain_tool", "current service/system state requested", 1, "live_required", 0.9))
|
||||||
|
|
||||||
|
if context.get("repo_path") or category == "coding" or _has_any(text, ["repo", "code", "file", "test", "pytest", "diff", "implementation", "hermes", "atlas"]):
|
||||||
|
sources.append(_source("repo_files", "inspect_explicit_repo_paths", "repo-specific implementation or config context", 2, "current_filesystem", 0.84))
|
||||||
|
|
||||||
|
if _has_any(text, ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "prior", "last time"]):
|
||||||
|
sources.append(_source("session_search", "search_prior_sessions_or_kanban_handoffs", "prior decision or handoff requested", 3, "session-era", 0.82))
|
||||||
|
|
||||||
|
if _has_any(text, ["runbook", "note", "obsidian", "rag", "docs", "knowledge", "plan"]):
|
||||||
|
sources.append(_source("rag_search", "query_local_index_read_only", "local docs or indexed knowledge likely useful", 4, "cached_index", 0.76))
|
||||||
|
|
||||||
|
if memory_candidate != "none" or _has_any(text, ["preference", "remember", "profile", "durable fact"]):
|
||||||
|
sources.append(_source("durable_memory", "read_stable_facts_only", "stable preference/environment facts may be relevant", 5, "static", 0.72))
|
||||||
|
|
||||||
|
if _has_any(text, ["latest", "news", "version", "release", "public", "web"]):
|
||||||
|
sources.append(_source("web", "search_public_current_sources", "current external public fact requested", 6, "live_external", 0.7))
|
||||||
|
|
||||||
|
if not sources:
|
||||||
|
sources.append(_source("no_retrieval", "answer_directly", "no factual retrieval dependency detected", 1, "none", 0.78))
|
||||||
|
|
||||||
|
# Stable priority order and bounded compact plan.
|
||||||
|
seen: set[str] = set()
|
||||||
|
deduped = []
|
||||||
|
for item in sorted(sources, key=lambda x: x["priority"]):
|
||||||
|
if item["source"] not in seen:
|
||||||
|
seen.add(item["source"])
|
||||||
|
deduped.append(item)
|
||||||
|
return deduped[:max_sources]
|
||||||
|
|
||||||
|
|
||||||
|
def select_bundle_name(query: str, labels: Mapping[str, Any], context: Mapping[str, Any]) -> str:
|
||||||
|
text = query.lower()
|
||||||
|
category = str(_label_value(labels, "workflow_category", "unknown"))
|
||||||
|
if context.get("platform") == "kanban" or context.get("task_id") or category == "coding":
|
||||||
|
return "CodingTaskBundle"
|
||||||
|
if category in {"devops", "debugging"} or _has_any(text, ["health", "port", "systemd", "npu", "service", "logs"]):
|
||||||
|
return "OpsDebugBundle"
|
||||||
|
if category in {"note_taking", "productivity"} or _has_any(text, ["preference", "remember", "profile"]):
|
||||||
|
return "PersonalAssistantBundle"
|
||||||
|
if "no_retrieval" in [s["source"] for s in select_sources(query, labels, context, 1)]:
|
||||||
|
return "SimpleResponseBundle"
|
||||||
|
return "ResearchBundle"
|
||||||
|
|
||||||
|
|
||||||
|
def _field(field: str, shape: str, source: str, freshness: str, missing: str, privacy: str, confidence: float = 0.8) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"field": field,
|
||||||
|
"shape": shape,
|
||||||
|
"source_of_truth": source,
|
||||||
|
"freshness": freshness,
|
||||||
|
"provenance_required": True,
|
||||||
|
"missing_behavior": missing,
|
||||||
|
"privacy": privacy,
|
||||||
|
"confidence": round(confidence, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_bundle_plan(bundle_name: str, sources: Sequence[Mapping[str, Any]], query: str, labels: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
safety_required = bool(_label_value(labels, "safety_confirmation_required", False))
|
||||||
|
source_names = {s["source"] for s in sources}
|
||||||
|
if bundle_name == "OpsDebugBundle":
|
||||||
|
required = [
|
||||||
|
_field("problem_statement", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("target_scope", "service_repo_or_host", "query_or_classifier", "request", "ask_or_infer_low_confidence", "no_private_paths_beyond_explicit"),
|
||||||
|
_field("live_state", "status_table", "live_system", "live_required", "retrieve_or_fail_closed", "no_raw_logs_by_default"),
|
||||||
|
_field("safety_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("provenance", "tool_names_and_paths", "executing_agent", "run", "mark_missing", "paths_only"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "CodingTaskBundle":
|
||||||
|
required = [
|
||||||
|
_field("repo_root", "absolute_path", "task_or_context", "current", "ask_or_fail", "explicit_path_only"),
|
||||||
|
_field("git_state", "branch_dirty_counts", "live_system", "live_required", "retrieve_or_fail_closed", "no_diff_dump_by_default"),
|
||||||
|
_field("requirements", "bullet_summary", "user_kanban_files", "current", "retrieve_or_mark_missing", "no_private_snippets"),
|
||||||
|
_field("relevant_paths", "path_list", "repo_files", "current_filesystem", "search_narrowly", "paths_only"),
|
||||||
|
_field("tests_or_smokes", "command_list", "repo_files", "current_filesystem", "mark_missing", "commands_only"),
|
||||||
|
_field("review_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "PersonalAssistantBundle":
|
||||||
|
required = [
|
||||||
|
_field("user_intent", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("durable_facts_needed", "fact_keys", "durable_memory", "static", "retrieve_or_mark_missing", "no_raw_memory_dump"),
|
||||||
|
_field("prior_decisions_needed", "session_refs", "session_search", "session-era", "retrieve_or_mark_missing", "summaries_only"),
|
||||||
|
_field("privacy_boundary", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("action_authority", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
elif bundle_name == "SimpleResponseBundle":
|
||||||
|
required = []
|
||||||
|
else:
|
||||||
|
required = [
|
||||||
|
_field("research_question", "compact_text", "user", "request", "mark_missing", "query_text_only"),
|
||||||
|
_field("source_plan", "ordered_source_list", "context_gate", "run", "mark_missing", "no_private_snippets"),
|
||||||
|
_field("evidence_requirements", "provenance_rules", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
_field("freshness_cutoff", "freshness_policy", "classifier_query", "request", "mark_missing", "no_private_data"),
|
||||||
|
_field("missing_data_behavior", "policy_enum", "policy", "static", "fail_closed", "no_private_data"),
|
||||||
|
]
|
||||||
|
|
||||||
|
blocked = []
|
||||||
|
if safety_required or re.search(r"\b(route|routing|restart|send|write memory|reindex|delete|mutate)\b", query.lower()):
|
||||||
|
blocked.append(_field("authority_side_effect", "approval_required", "policy", "static", "fail_closed", "no_side_effects_in_v1", 0.95))
|
||||||
|
if "rag_search" in source_names:
|
||||||
|
blocked.append(_field("vector_db_mutation", "not_allowed", "policy", "static", "fail_closed", "read_only_query_plan", 0.95))
|
||||||
|
return {"bundle_name": bundle_name, "required_fields": required, "optional_fields": [], "blocked_fields": blocked}
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_query_class(labels: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"workflow_category": _label_value(labels, "workflow_category", "unknown"),
|
||||||
|
"urgency": _label_value(labels, "urgency", "normal"),
|
||||||
|
"tool_needed": bool(_label_value(labels, "tool_needed", False)),
|
||||||
|
"memory_candidate": _label_value(labels, "memory_candidate", "none"),
|
||||||
|
"safety_confirmation_required": bool(_label_value(labels, "safety_confirmation_required", False)),
|
||||||
|
"confidence": round(max(
|
||||||
|
_label_confidence(labels, "workflow_category", 0.5),
|
||||||
|
_label_confidence(labels, "tool_needed", 0.5),
|
||||||
|
_label_confidence(labels, "safety_confirmation_required", 0.5),
|
||||||
|
), 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def npu_proof_from_classifier(result: ClassifierResult, require_npu_proof: bool) -> tuple[dict[str, Any], list[str]]:
|
||||||
|
endpoint_delta = result.npu_busy_delta_us
|
||||||
|
endpoint_sysfs_delta = result.sysfs_npu_busy_delta_us
|
||||||
|
outer_delta = result.outer_sysfs_delta_us
|
||||||
|
positive_endpoint_sysfs = endpoint_sysfs_delta is not None and endpoint_sysfs_delta > 0
|
||||||
|
positive_outer = outer_delta is not None and outer_delta > 0
|
||||||
|
verified = bool(result.live and (positive_endpoint_sysfs or positive_outer))
|
||||||
|
warnings: list[str] = []
|
||||||
|
if result.warning:
|
||||||
|
warnings.append(result.warning)
|
||||||
|
if require_npu_proof and not verified:
|
||||||
|
warnings.append("npu_proof_inconclusive")
|
||||||
|
return {
|
||||||
|
"classifier_delta_us": endpoint_delta,
|
||||||
|
"classifier_sysfs_delta_us": endpoint_sysfs_delta,
|
||||||
|
"outer_sysfs_delta_us": outer_delta,
|
||||||
|
"rerank_delta_us": None,
|
||||||
|
"verified": verified,
|
||||||
|
"required": require_npu_proof,
|
||||||
|
"classifier_live": result.live,
|
||||||
|
}, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def build_plan(
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
context: Mapping[str, Any] | None = None,
|
||||||
|
options: Mapping[str, Any] | None = None,
|
||||||
|
classifier: ClassifierResult | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if not query or not query.strip():
|
||||||
|
raise ContextGateError("query_required")
|
||||||
|
context = dict(context or {})
|
||||||
|
options = dict(options or {})
|
||||||
|
if options.get("dry_run", True) is not True:
|
||||||
|
raise ContextGateError("dry_run_must_remain_true_in_v1")
|
||||||
|
if options.get("include_private_text", False):
|
||||||
|
raise ContextGateError("include_private_text_not_allowed_in_v1")
|
||||||
|
max_sources = max(1, min(6, int(options.get("max_sources", 4))))
|
||||||
|
require_npu = bool(options.get("require_npu_proof", True))
|
||||||
|
if classifier is None:
|
||||||
|
classifier = classify_offline(query, context)
|
||||||
|
labels = classifier.labels
|
||||||
|
source_plan = select_sources(query, labels, context, max_sources)
|
||||||
|
bundle_name = select_bundle_name(query, labels, context)
|
||||||
|
npu_proof, warnings = npu_proof_from_classifier(classifier, require_npu)
|
||||||
|
plan = {
|
||||||
|
"schema": SCHEMA,
|
||||||
|
"trace_id": options.get("trace_id") or context.get("trace_id"),
|
||||||
|
"dry_run": True,
|
||||||
|
"ok": True,
|
||||||
|
"query_class": summarize_query_class(labels),
|
||||||
|
"source_plan": source_plan,
|
||||||
|
"bundle_plan": build_bundle_plan(bundle_name, source_plan, query, labels),
|
||||||
|
"npu_proof": npu_proof,
|
||||||
|
"authority": dict(AUTHORITY),
|
||||||
|
"gates": dict(GATES),
|
||||||
|
"warnings": warnings,
|
||||||
|
}
|
||||||
|
validate_plan(plan)
|
||||||
|
return plan
|
||||||
|
|
||||||
|
|
||||||
|
def validate_plan(plan: Mapping[str, Any]) -> None:
|
||||||
|
if plan.get("schema") != SCHEMA:
|
||||||
|
raise ContextGateError("invalid_schema")
|
||||||
|
if plan.get("dry_run") is not True:
|
||||||
|
raise ContextGateError("dry_run_missing")
|
||||||
|
if plan.get("authority") != AUTHORITY:
|
||||||
|
raise ContextGateError("authority_not_closed")
|
||||||
|
sources = plan.get("source_plan")
|
||||||
|
if not isinstance(sources, list) or not sources:
|
||||||
|
raise ContextGateError("source_plan_required")
|
||||||
|
for item in sources:
|
||||||
|
if item.get("source") not in _ALLOWED_SOURCES:
|
||||||
|
raise ContextGateError(f"invalid_source:{item.get('source')}")
|
||||||
|
required_blocks = ["query_class", "bundle_plan", "npu_proof", "gates"]
|
||||||
|
for block in required_blocks:
|
||||||
|
if block not in plan:
|
||||||
|
raise ContextGateError(f"missing_block:{block}")
|
||||||
|
|
||||||
|
|
||||||
|
def compact_line(plan: Mapping[str, Any]) -> str:
|
||||||
|
sources = ",".join(str(s["source"]) for s in plan["source_plan"])
|
||||||
|
closed = "route,memory,send,tools,restart,vector,private_roots,config"
|
||||||
|
warnings = ",".join(plan.get("warnings") or []) or "none"
|
||||||
|
return (
|
||||||
|
f"ok={str(plan['ok']).lower()} schema={plan['schema']} "
|
||||||
|
f"bundle={plan['bundle_plan']['bundle_name']} sources={sources} "
|
||||||
|
f"source_count={len(plan['source_plan'])} "
|
||||||
|
f"npu_verified={str(plan['npu_proof']['verified']).lower()} "
|
||||||
|
f"classifier_delta_us={plan['npu_proof'].get('classifier_delta_us')} "
|
||||||
|
f"outer_sysfs_delta_us={plan['npu_proof'].get('outer_sysfs_delta_us')} "
|
||||||
|
f"gates=closed:{closed} warnings={warnings}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compact_json(plan: Mapping[str, Any]) -> str:
|
||||||
|
compact = {
|
||||||
|
"schema": plan["schema"],
|
||||||
|
"ok": plan["ok"],
|
||||||
|
"dry_run": plan["dry_run"],
|
||||||
|
"bundle_name": plan["bundle_plan"]["bundle_name"],
|
||||||
|
"sources": [s["source"] for s in plan["source_plan"]],
|
||||||
|
"source_count": len(plan["source_plan"]),
|
||||||
|
"query_class": plan["query_class"],
|
||||||
|
"npu_proof": plan["npu_proof"],
|
||||||
|
"authority": plan["authority"],
|
||||||
|
"gates_closed": list(plan["gates"].keys()),
|
||||||
|
"warnings": plan.get("warnings", []),
|
||||||
|
}
|
||||||
|
return json.dumps(compact, sort_keys=True, separators=(",", ":"))
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Thin repo-local wrapper for the Atlas/Hermes context-gate advisory CLI."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
if str(REPO_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
from openvino_context_gate.cli import main # noqa: E402
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+526
@@ -0,0 +1,526 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dry-run Kanban hygiene advisory classifier.
|
||||||
|
|
||||||
|
Reads compact board/task summaries and emits bounded labels/next gates without
|
||||||
|
mutating any Hermes Kanban state. Phase 1 is deterministic rules only; it does
|
||||||
|
not call kanban tools, restart services, write memory, or send outbound data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
SCHEMA = "kanban_hygiene_advisory_v1"
|
||||||
|
AUTHORITY = {
|
||||||
|
"may_mutate_board": False,
|
||||||
|
"may_assign": False,
|
||||||
|
"may_block_or_unblock": False,
|
||||||
|
"may_complete_or_archive": False,
|
||||||
|
"may_create_tasks": False,
|
||||||
|
"may_write_memory": False,
|
||||||
|
"may_send_external": False,
|
||||||
|
"may_restart_services": False,
|
||||||
|
"may_execute_tools": False,
|
||||||
|
}
|
||||||
|
NPU_PROOF = {
|
||||||
|
"required_for_npu_claims": True,
|
||||||
|
"attempted": False,
|
||||||
|
"ok": None,
|
||||||
|
"npu_busy_delta_us": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
REQUIRED_TASK_FIELDS = {"id", "title", "status", "assignee", "created_at", "updated_at"}
|
||||||
|
SUPPORTED_STATUSES = {
|
||||||
|
"triage",
|
||||||
|
"todo",
|
||||||
|
"ready",
|
||||||
|
"running",
|
||||||
|
"blocked",
|
||||||
|
"done",
|
||||||
|
"archived",
|
||||||
|
"failed",
|
||||||
|
"cancelled",
|
||||||
|
}
|
||||||
|
TASK_TYPES = {
|
||||||
|
"charter",
|
||||||
|
"discovery",
|
||||||
|
"spec",
|
||||||
|
"implement",
|
||||||
|
"test",
|
||||||
|
"review",
|
||||||
|
"docs",
|
||||||
|
"ops",
|
||||||
|
"integration",
|
||||||
|
"final",
|
||||||
|
"unknown",
|
||||||
|
}
|
||||||
|
LANES = {
|
||||||
|
"observability_utilization",
|
||||||
|
"cron_n8n_classifier",
|
||||||
|
"rag_context_gate",
|
||||||
|
"doc_image_audio_triage",
|
||||||
|
"voice_audio_pipeline",
|
||||||
|
"kanban_hygiene",
|
||||||
|
"docs_runbook_service_map",
|
||||||
|
"ops_integration",
|
||||||
|
"final_closeout",
|
||||||
|
"general",
|
||||||
|
"unknown",
|
||||||
|
}
|
||||||
|
LIFECYCLE_PREFIXES = {
|
||||||
|
"charter",
|
||||||
|
"discovery",
|
||||||
|
"spec",
|
||||||
|
"implement",
|
||||||
|
"test",
|
||||||
|
"review",
|
||||||
|
"docs",
|
||||||
|
"doc",
|
||||||
|
"ops",
|
||||||
|
"integration",
|
||||||
|
"final",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compact_text(task: dict[str, Any]) -> str:
|
||||||
|
parts = [str(task.get("title", "")), str(task.get("body_excerpt", "")), str(task.get("last_run_summary_excerpt", "")), str(task.get("last_comment_excerpt", ""))]
|
||||||
|
return " ".join(part for part in parts if part).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def load_jsonl(raw: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
||||||
|
tasks = []
|
||||||
|
for line_no, line in enumerate(raw.splitlines(), start=1):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
row = json.loads(line)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ValueError(f"invalid JSONL on line {line_no}: {exc.msg}") from exc
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
raise ValueError(f"JSONL line {line_no} is not an object")
|
||||||
|
tasks.append(row)
|
||||||
|
return tasks, {}
|
||||||
|
|
||||||
|
|
||||||
|
def load_input(path: str | None, fmt: str) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
||||||
|
raw = sys.stdin.read() if not path or path == "-" else Path(path).read_text(encoding="utf-8")
|
||||||
|
if not raw.strip():
|
||||||
|
raise ValueError("input is empty")
|
||||||
|
|
||||||
|
parse_as_jsonl = fmt == "jsonl" or (fmt == "auto" and "\n" in raw.strip() and not raw.lstrip().startswith(("{", "[")))
|
||||||
|
if parse_as_jsonl:
|
||||||
|
return load_jsonl(raw)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
if fmt == "auto" and "\n" in raw.strip():
|
||||||
|
return load_jsonl(raw)
|
||||||
|
raise ValueError(f"invalid JSON input: {exc.msg}") from exc
|
||||||
|
if isinstance(data, list):
|
||||||
|
if not all(isinstance(item, dict) for item in data):
|
||||||
|
raise ValueError("JSON list must contain task objects")
|
||||||
|
return data, {}
|
||||||
|
if isinstance(data, dict):
|
||||||
|
tasks = data.get("tasks")
|
||||||
|
if tasks is None:
|
||||||
|
# Treat a single object with required task fields as a one-task summary.
|
||||||
|
if REQUIRED_TASK_FIELDS.issubset(data):
|
||||||
|
return [data], {}
|
||||||
|
raise ValueError("JSON object must contain a 'tasks' list")
|
||||||
|
if not isinstance(tasks, list) or not all(isinstance(item, dict) for item in tasks):
|
||||||
|
raise ValueError("'tasks' must be a list of objects")
|
||||||
|
metadata = {key: value for key, value in data.items() if key != "tasks"}
|
||||||
|
return tasks, metadata
|
||||||
|
raise ValueError("input must be JSON object, JSON list, or JSON Lines")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_task(task: dict[str, Any]) -> None:
|
||||||
|
missing = sorted(REQUIRED_TASK_FIELDS - set(task))
|
||||||
|
if missing:
|
||||||
|
task_id = task.get("id", "<unknown>")
|
||||||
|
raise ValueError(f"task {task_id} missing required fields: {', '.join(missing)}")
|
||||||
|
status = str(task.get("status"))
|
||||||
|
if status not in SUPPORTED_STATUSES:
|
||||||
|
raise ValueError(f"task {task.get('id')} has unsupported status: {status}")
|
||||||
|
for field in ("created_at", "updated_at"):
|
||||||
|
if not isinstance(task.get(field), (int, float)):
|
||||||
|
raise ValueError(f"task {task.get('id')} field {field} must be epoch seconds")
|
||||||
|
|
||||||
|
|
||||||
|
def confidence(value: float) -> float:
|
||||||
|
return round(max(0.0, min(1.0, value)), 2)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_task_type(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
title = str(task.get("title", "")).strip().lower()
|
||||||
|
body = compact_text(task)
|
||||||
|
prefix = title.split(":", 1)[0].strip() if ":" in title else ""
|
||||||
|
prefix_map = {"doc": "docs"}
|
||||||
|
if prefix in LIFECYCLE_PREFIXES:
|
||||||
|
value = prefix_map.get(prefix, prefix)
|
||||||
|
if value in TASK_TYPES:
|
||||||
|
return {"value": value, "confidence": 0.95, "reason_codes": [f"title_prefix_{value}"]}
|
||||||
|
keyword_rules = [
|
||||||
|
("discovery", ["discover", "inventory", "repo map", "read-only"]),
|
||||||
|
("spec", ["spec", "define", "contract", "schema"]),
|
||||||
|
("implement", ["implement", "engineer", "script", "code", "build"]),
|
||||||
|
("review", ["review", "approve", "findings"]),
|
||||||
|
("docs", ["docs", "runbook", "readme"]),
|
||||||
|
("ops", ["ops", "health", "monitor", "deploy", "cleanup"]),
|
||||||
|
("integration", ["integration", "merge", "cherry-pick", "fan-in"]),
|
||||||
|
("final", ["final", "closeout", "synthesis"]),
|
||||||
|
("test", ["test", "smoke", "validate"]),
|
||||||
|
("charter", ["charter", "program framing"]),
|
||||||
|
]
|
||||||
|
for value, needles in keyword_rules:
|
||||||
|
if any(needle in body for needle in needles):
|
||||||
|
return {"value": value, "confidence": 0.78, "reason_codes": [f"keyword_{value}"]}
|
||||||
|
return {"value": "unknown", "confidence": 0.2, "reason_codes": ["insufficient_signal"]}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_lane(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
text = compact_text(task)
|
||||||
|
rules = [
|
||||||
|
("kanban_hygiene", ["kanban", "task hygiene", "board summaries", "review-needed", "next gate"]),
|
||||||
|
("cron_n8n_classifier", ["cron", "n8n", "alert", "event classifier"]),
|
||||||
|
("rag_context_gate", ["rag", "context gate", "retrieval", "bundle"]),
|
||||||
|
("doc_image_audio_triage", ["document", "image", "audio triage", "ocr", "attachments"]),
|
||||||
|
("voice_audio_pipeline", ["voice", "whisper", "memo", "transcribe"]),
|
||||||
|
("docs_runbook_service_map", ["service map", "runbook", "readme"]),
|
||||||
|
("observability_utilization", ["health", "utilization", "metrics", "digest"]),
|
||||||
|
("ops_integration", ["merge", "integration", "cherry-pick", "fan-in"]),
|
||||||
|
("final_closeout", ["final", "closeout", "synthesis"]),
|
||||||
|
]
|
||||||
|
for value, needles in rules:
|
||||||
|
matched = [needle.replace(" ", "_") for needle in needles if needle in text]
|
||||||
|
if matched:
|
||||||
|
return {"value": value, "confidence": 0.9, "reason_codes": [f"mentions_{matched[0]}"]}
|
||||||
|
if text:
|
||||||
|
return {"value": "general", "confidence": 0.45, "reason_codes": ["no_lane_specific_signal"]}
|
||||||
|
return {"value": "unknown", "confidence": 0.1, "reason_codes": ["insufficient_signal"]}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_project(task: dict[str, Any], board: str | None, input_metadata: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
explicit = task.get("project") or input_metadata.get("project")
|
||||||
|
if explicit:
|
||||||
|
return {"value": str(explicit), "confidence": 0.9, "source": "input"}
|
||||||
|
board_name = board or input_metadata.get("board")
|
||||||
|
if board_name:
|
||||||
|
return {"value": str(board_name), "confidence": 0.98, "source": "board_name"}
|
||||||
|
text = compact_text(task)
|
||||||
|
if "npu" in text or "openvino" in text:
|
||||||
|
return {"value": "npu-maximization", "confidence": 0.72, "source": "body"}
|
||||||
|
return {"value": "unknown", "confidence": 0.1, "source": "unknown"}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_blocker(task: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
status = str(task.get("status"))
|
||||||
|
text = compact_text(task)
|
||||||
|
last_outcome = str(task.get("last_run_outcome") or "").lower()
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
value = "none"
|
||||||
|
blocked = False
|
||||||
|
conf = 0.0
|
||||||
|
|
||||||
|
if status == "blocked":
|
||||||
|
blocked = True
|
||||||
|
conf = 0.85
|
||||||
|
if "review-required" in text or "changes requested" in text:
|
||||||
|
value = "review_changes_requested"
|
||||||
|
reason_codes.append("blocked_review_required_or_changes")
|
||||||
|
elif any(word in text for word in ("credential", "token", "path", "spawn_failed")):
|
||||||
|
value = "missing_credentials"
|
||||||
|
reason_codes.append("blocked_missing_credentials_or_path")
|
||||||
|
elif any(word in text for word in ("human", "approval", "decision", "confirm")):
|
||||||
|
value = "human_decision"
|
||||||
|
reason_codes.append("blocked_human_decision")
|
||||||
|
else:
|
||||||
|
value = "unknown"
|
||||||
|
reason_codes.append("status_blocked")
|
||||||
|
elif status == "todo" and task.get("parents"):
|
||||||
|
value = "missing_parent"
|
||||||
|
conf = 0.75
|
||||||
|
reason_codes.append("todo_with_parents")
|
||||||
|
elif last_outcome in {"crashed", "timed_out", "failed"}:
|
||||||
|
value = "failed_tests"
|
||||||
|
conf = 0.65
|
||||||
|
reason_codes.append(f"last_run_{last_outcome}")
|
||||||
|
|
||||||
|
return {"value": value, "blocked": blocked, "confidence": confidence(conf), "reason_codes": reason_codes}
|
||||||
|
|
||||||
|
|
||||||
|
def age_hours(now: float, timestamp: Any) -> float | None:
|
||||||
|
if not isinstance(timestamp, (int, float)):
|
||||||
|
return None
|
||||||
|
return round(max(0.0, now - float(timestamp)) / 3600.0, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_staleness(task: dict[str, Any], now: float) -> dict[str, Any]:
|
||||||
|
status = str(task.get("status"))
|
||||||
|
created = float(task["created_at"])
|
||||||
|
activity_ts = float(task.get("heartbeat_at") or task.get("last_activity_at") or task.get("updated_at") or created)
|
||||||
|
age = age_hours(now, created)
|
||||||
|
last_activity = age_hours(now, activity_ts)
|
||||||
|
threshold = 24
|
||||||
|
value = "fresh"
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
|
||||||
|
if status == "running":
|
||||||
|
threshold = 1
|
||||||
|
if last_activity is not None and last_activity > 1:
|
||||||
|
value = "stale_lock"
|
||||||
|
reason_codes.append("running_no_recent_heartbeat")
|
||||||
|
elif status == "ready":
|
||||||
|
threshold = 24
|
||||||
|
if last_activity is not None and last_activity >= 72:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("ready_over_72h")
|
||||||
|
elif last_activity is not None and last_activity >= 24:
|
||||||
|
value = "aging"
|
||||||
|
reason_codes.append("ready_over_24h")
|
||||||
|
elif status == "blocked":
|
||||||
|
review_required = "review-required" in compact_text(task)
|
||||||
|
threshold = 24 if review_required else 48
|
||||||
|
if last_activity is not None and last_activity >= 168:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("blocked_over_7d")
|
||||||
|
elif review_required and last_activity is not None and last_activity >= 72:
|
||||||
|
value = "stale"
|
||||||
|
reason_codes.append("review_required_over_72h")
|
||||||
|
elif last_activity is not None and last_activity >= threshold:
|
||||||
|
value = "aging"
|
||||||
|
reason_codes.append("blocked_or_review_aging")
|
||||||
|
elif status == "todo" and not task.get("parents") and last_activity is not None and last_activity >= 72:
|
||||||
|
value = "orphaned"
|
||||||
|
threshold = 72
|
||||||
|
reason_codes.append("todo_without_parents_over_72h")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"value": value,
|
||||||
|
"age_hours": age,
|
||||||
|
"last_activity_hours": last_activity,
|
||||||
|
"threshold_hours": threshold,
|
||||||
|
"reason_codes": reason_codes,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_title(title: str) -> str:
|
||||||
|
text = title.lower().strip()
|
||||||
|
text = re.sub(r"^(charter|discovery|spec|implement|test|review|docs?|ops|integration|final)\s*:\s*", "", text)
|
||||||
|
text = re.sub(r"[^a-z0-9]+", " ", text)
|
||||||
|
return re.sub(r"\s+", " ", text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicates(tasks: list[dict[str, Any]], labels: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||||
|
groups: dict[tuple[str, str, str], list[str]] = {}
|
||||||
|
active_statuses = SUPPORTED_STATUSES - {"done", "archived", "cancelled"}
|
||||||
|
for task in tasks:
|
||||||
|
if str(task.get("status")) not in active_statuses:
|
||||||
|
continue
|
||||||
|
task_id = str(task["id"])
|
||||||
|
key = (
|
||||||
|
normalize_title(str(task.get("title", ""))),
|
||||||
|
labels[task_id]["lane"]["value"],
|
||||||
|
labels[task_id]["task_type"]["value"],
|
||||||
|
)
|
||||||
|
if key[0]:
|
||||||
|
groups.setdefault(key, []).append(task_id)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
str(task["id"]): {
|
||||||
|
"is_duplicate": False,
|
||||||
|
"canonical_task_id": None,
|
||||||
|
"candidate_ids": [],
|
||||||
|
"confidence": 0.0,
|
||||||
|
"reason_codes": [],
|
||||||
|
}
|
||||||
|
for task in tasks
|
||||||
|
}
|
||||||
|
for ids in groups.values():
|
||||||
|
if len(ids) < 2:
|
||||||
|
continue
|
||||||
|
canonical = sorted(ids)[0]
|
||||||
|
for task_id in ids:
|
||||||
|
candidates = [candidate for candidate in ids if candidate != task_id]
|
||||||
|
result[task_id] = {
|
||||||
|
"is_duplicate": task_id != canonical,
|
||||||
|
"canonical_task_id": canonical if task_id != canonical else None,
|
||||||
|
"candidate_ids": candidates,
|
||||||
|
"confidence": 0.86,
|
||||||
|
"reason_codes": ["same_normalized_title_lane_and_task_type"],
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def has_non_positive_npu_busy_delta(text: str) -> bool:
|
||||||
|
if "npu" not in text and "busy" not in text:
|
||||||
|
return False
|
||||||
|
patterns = [
|
||||||
|
r"\b(?:npu_)?busy(?:_time)?(?:_delta)?(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
|
||||||
|
r"\b(?:npu_)?delta(?:_us)?\s*[=:]\s*([+-]?\d+(?:\.\d+)?)\b",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
for match in re.finditer(pattern, text):
|
||||||
|
try:
|
||||||
|
if float(match.group(1)) <= 0:
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def classify_review_needed(task: dict[str, Any], task_type: str) -> dict[str, Any]:
|
||||||
|
text = compact_text(task)
|
||||||
|
changed_files = task.get("changed_files") or task.get("diff_path") or task.get("tests_run")
|
||||||
|
if has_non_positive_npu_busy_delta(text):
|
||||||
|
return {"value": True, "kind": "npu_proof_gate", "confidence": 0.84, "reason_codes": ["npu_claim_non_positive_busy_delta"]}
|
||||||
|
if "npu" in text and ("http 200" in text or "no busy" in text or "missing busy" in text):
|
||||||
|
return {"value": True, "kind": "npu_proof_gate", "confidence": 0.8, "reason_codes": ["npu_claim_needs_busy_delta"]}
|
||||||
|
if "review-required" in text:
|
||||||
|
kind = "code_change" if task_type == "implement" else "spec_review"
|
||||||
|
return {"value": True, "kind": kind, "confidence": 0.92, "reason_codes": ["review_required_marker"]}
|
||||||
|
if changed_files and task_type in {"implement", "ops", "docs"}:
|
||||||
|
return {"value": True, "kind": "code_change", "confidence": 0.86, "reason_codes": ["reported_changed_files_or_tests"]}
|
||||||
|
if any(needle in text for needle in ("routing authority", "restart service", "write memory", "send outbound", "private root", "wildcard bind", "vector db mutation")):
|
||||||
|
return {"value": True, "kind": "human_approval", "confidence": 0.84, "reason_codes": ["authority_change_requires_approval"]}
|
||||||
|
return {"value": False, "kind": "none", "confidence": 0.2, "reason_codes": []}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_next_gate(task: dict[str, Any], labels: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
task_type = labels["task_type"]["value"]
|
||||||
|
status = str(task.get("status"))
|
||||||
|
reason_codes: list[str] = []
|
||||||
|
|
||||||
|
if labels["duplicate"]["is_duplicate"]:
|
||||||
|
return {"value": "dedupe_review", "confidence": 0.86, "reason_codes": ["duplicate_candidate"]}
|
||||||
|
if labels["staleness"]["value"] == "stale_lock":
|
||||||
|
return {"value": "investigate_stale_lock", "confidence": 0.88, "reason_codes": ["running_stale_lock"]}
|
||||||
|
blocker = labels["blocker"]
|
||||||
|
if blocker["value"] in {"human_decision", "missing_credentials", "unknown"} and blocker["blocked"]:
|
||||||
|
return {"value": "needs_human_decision", "confidence": 0.85, "reason_codes": blocker["reason_codes"] or ["blocked"]}
|
||||||
|
if blocker["value"] == "missing_parent":
|
||||||
|
return {"value": "wait_for_parents", "confidence": 0.82, "reason_codes": ["unfinished_parents"]}
|
||||||
|
if task_type == "implement" and not (task.get("tests_run") or task.get("test_evidence")) and status in {"blocked", "done"}:
|
||||||
|
return {"value": "needs_test_evidence", "confidence": 0.78, "reason_codes": ["implementation_without_test_evidence"]}
|
||||||
|
review_needed = labels["review_needed"]
|
||||||
|
if review_needed["kind"] == "npu_proof_gate":
|
||||||
|
return {"value": "needs_npu_proof", "confidence": 0.8, "reason_codes": review_needed["reason_codes"]}
|
||||||
|
if review_needed["value"]:
|
||||||
|
return {"value": "ready_for_review", "confidence": 0.86, "reason_codes": review_needed["reason_codes"]}
|
||||||
|
|
||||||
|
gate_by_type = {
|
||||||
|
"spec": "ready_for_implementation",
|
||||||
|
"implement": "ready_for_review",
|
||||||
|
"review": "ready_for_integration",
|
||||||
|
"docs": "ready_for_integration",
|
||||||
|
"ops": "ready_for_ops_validation",
|
||||||
|
"integration": "ready_for_closeout",
|
||||||
|
"final": "safe_to_complete",
|
||||||
|
"discovery": "safe_to_complete",
|
||||||
|
"charter": "ready_for_spec",
|
||||||
|
"test": "ready_for_review",
|
||||||
|
}
|
||||||
|
type_gate = gate_by_type.get(task_type, "unknown")
|
||||||
|
if task_type in gate_by_type:
|
||||||
|
reason_codes.append(f"task_type_{task_type}")
|
||||||
|
return {"value": type_gate, "confidence": 0.74 if type_gate != "unknown" else 0.2, "reason_codes": reason_codes}
|
||||||
|
|
||||||
|
|
||||||
|
def advisory(tasks: list[dict[str, Any]], *, board: str | None, now: float, input_metadata: dict[str, Any], include_evidence: bool) -> dict[str, Any]:
|
||||||
|
for task in tasks:
|
||||||
|
validate_task(task)
|
||||||
|
|
||||||
|
prelim: dict[str, dict[str, Any]] = {}
|
||||||
|
for task in tasks:
|
||||||
|
task_id = str(task["id"])
|
||||||
|
prelim[task_id] = {
|
||||||
|
"task_type": classify_task_type(task),
|
||||||
|
"project": classify_project(task, board, input_metadata),
|
||||||
|
"lane": classify_lane(task),
|
||||||
|
"blocker": classify_blocker(task),
|
||||||
|
"staleness": classify_staleness(task, now),
|
||||||
|
}
|
||||||
|
duplicates = find_duplicates(tasks, prelim)
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for task in tasks:
|
||||||
|
task_id = str(task["id"])
|
||||||
|
labels = dict(prelim[task_id])
|
||||||
|
labels["duplicate"] = duplicates[task_id]
|
||||||
|
labels["review_needed"] = classify_review_needed(task, labels["task_type"]["value"])
|
||||||
|
labels["next_gate"] = classify_next_gate(task, labels)
|
||||||
|
item = {
|
||||||
|
"task_id": task_id,
|
||||||
|
**labels,
|
||||||
|
"warnings": [],
|
||||||
|
}
|
||||||
|
if include_evidence:
|
||||||
|
item["evidence"] = {
|
||||||
|
"normalized_title": normalize_title(str(task.get("title", ""))),
|
||||||
|
"status": task.get("status"),
|
||||||
|
"parents_count": len(task.get("parents") or []),
|
||||||
|
"children_count": len(task.get("children") or []),
|
||||||
|
}
|
||||||
|
items.append(item)
|
||||||
|
|
||||||
|
counts = {
|
||||||
|
"tasks": len(items),
|
||||||
|
"duplicates": sum(1 for item in items if item["duplicate"]["is_duplicate"]),
|
||||||
|
"review_needed": sum(1 for item in items if item["review_needed"]["value"]),
|
||||||
|
"stale": sum(1 for item in items if item["staleness"]["value"] in {"stale", "stale_lock", "orphaned"}),
|
||||||
|
"blocked": sum(1 for item in items if item["blocker"]["blocked"]),
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"schema": SCHEMA,
|
||||||
|
"dry_run": True,
|
||||||
|
"created": int(now),
|
||||||
|
"board": board or input_metadata.get("board") or None,
|
||||||
|
"counts": counts,
|
||||||
|
"authority": AUTHORITY,
|
||||||
|
"npu_proof": NPU_PROOF,
|
||||||
|
"items": items,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Dry-run Kanban hygiene advisory classifier",
|
||||||
|
epilog="Input: JSON object with tasks[] or JSONL task objects. Required task fields: id,title,status,assignee,created_at,updated_at. Optional compact fields such as body_excerpt, parents, children, changed_files, tests_run, last_run_outcome, and last_comment_excerpt improve labels.",
|
||||||
|
)
|
||||||
|
parser.add_argument("--input", "-i", help="Input JSON/JSONL file; omit or '-' for stdin")
|
||||||
|
parser.add_argument("--format", choices=["auto", "json", "jsonl"], default="auto", help="Input format")
|
||||||
|
parser.add_argument("--board", help="Board/project name to include in output")
|
||||||
|
parser.add_argument("--now", type=float, default=None, help="Epoch seconds for deterministic staleness tests")
|
||||||
|
parser.add_argument("--compact", action="store_true", help="Accepted for compatibility; output is compact JSON by default")
|
||||||
|
parser.add_argument("--include-evidence", action="store_true", help="Include short derived evidence fields")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
tasks, metadata = load_input(args.input, args.format)
|
||||||
|
output = advisory(
|
||||||
|
tasks,
|
||||||
|
board=args.board,
|
||||||
|
now=args.now if args.now is not None else time.time(),
|
||||||
|
input_metadata=metadata,
|
||||||
|
include_evidence=args.include_evidence,
|
||||||
|
)
|
||||||
|
except (OSError, ValueError) as exc:
|
||||||
|
print(f"kanban-hygiene-advisory: {exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
print(json.dumps(output, sort_keys=True, separators=(",", ":")))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+567
@@ -0,0 +1,567 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dry-run comparison harness for advisory-only NPU lanes.
|
||||||
|
|
||||||
|
The harness evaluates synthetic/non-private fixtures against deterministic lane
|
||||||
|
adapters and emits compact npu_advisory_decision_v1 records plus JSON/markdown
|
||||||
|
summaries. It intentionally performs no live routing, memory writes, tool
|
||||||
|
execution, service restarts, outbound sends, broad private scans, or vector-store
|
||||||
|
mutation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import hashlib
|
||||||
|
import uuid
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
DEFAULT_FIXTURES = REPO_ROOT / "fixtures" / "npu_advisory_dry_run" / "fixtures.json"
|
||||||
|
SCHEMA = "npu_advisory_decision_v1"
|
||||||
|
HARNESS_SCHEMA = "npu_advisory_dry_run_summary_v1"
|
||||||
|
|
||||||
|
AUTHORITY_FLAGS_CLOSED = {
|
||||||
|
"can_route_atlas": False,
|
||||||
|
"can_write_memory": False,
|
||||||
|
"can_execute_tools": False,
|
||||||
|
"can_restart_services": False,
|
||||||
|
"can_send_outbound": False,
|
||||||
|
"can_scan_private_roots": False,
|
||||||
|
"can_mutate_vector_store": False,
|
||||||
|
"can_post_advisory_event": False,
|
||||||
|
"can_change_gateway_config": False,
|
||||||
|
"requires_human_approval": True,
|
||||||
|
"advisory_only": True,
|
||||||
|
}
|
||||||
|
MAY_TO_CAN = {
|
||||||
|
"may_route": "can_route_atlas",
|
||||||
|
"may_write_memory": "can_write_memory",
|
||||||
|
"may_execute_tools": "can_execute_tools",
|
||||||
|
"may_restart_services": "can_restart_services",
|
||||||
|
"may_send_external": "can_send_outbound",
|
||||||
|
"may_process_private_dirs": "can_scan_private_roots",
|
||||||
|
"may_mutate_vector_db": "can_mutate_vector_store",
|
||||||
|
"may_change_live_config": "can_change_gateway_config",
|
||||||
|
}
|
||||||
|
MUTATION_FLAGS_FALSE = {
|
||||||
|
"live_routing": False,
|
||||||
|
"memory_writes": False,
|
||||||
|
"tool_execution": False,
|
||||||
|
"service_restarts": False,
|
||||||
|
"outbound_sends": False,
|
||||||
|
"broad_private_scans": False,
|
||||||
|
"vector_store_mutation": False,
|
||||||
|
"gateway_restart": False,
|
||||||
|
}
|
||||||
|
ALLOWED_ACTIONS = ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"]
|
||||||
|
NO_ACTUAL_ACTION = {"kind": "dry_run_reported", "performed": False, "performed_by": "harness", "side_effects": []}
|
||||||
|
ACTION_PATTERNS = {
|
||||||
|
"follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
|
||||||
|
"date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
|
||||||
|
"decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
|
||||||
|
"task": re.compile(r"\b(todo|to-do|action item|assign|need to|please|reminder|review|ask)\b", re.I),
|
||||||
|
}
|
||||||
|
|
||||||
|
class HarnessError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(name: str, path: Path):
|
||||||
|
spec = importlib.util.spec_from_file_location(name, path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise HarnessError(f"module_import_failed:{path}")
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules.setdefault(name, module)
|
||||||
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def confidence_bucket(value: float | int | None) -> str:
|
||||||
|
if value is None:
|
||||||
|
return "unknown"
|
||||||
|
v = float(value)
|
||||||
|
if v >= 0.95:
|
||||||
|
return "very_high"
|
||||||
|
if v >= 0.80:
|
||||||
|
return "high"
|
||||||
|
if v >= 0.60:
|
||||||
|
return "medium"
|
||||||
|
if v >= 0.40:
|
||||||
|
return "low"
|
||||||
|
return "very_low"
|
||||||
|
|
||||||
|
|
||||||
|
def lane_confidence(output: Mapping[str, Any], fallback: float = 0.7) -> float:
|
||||||
|
for key in ("confidence", "score"):
|
||||||
|
try:
|
||||||
|
return float(output[key])
|
||||||
|
except (KeyError, TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
labels = output.get("labels")
|
||||||
|
if isinstance(labels, Mapping):
|
||||||
|
vals: list[float] = []
|
||||||
|
for value in labels.values():
|
||||||
|
if isinstance(value, Mapping) and "confidence" in value:
|
||||||
|
try:
|
||||||
|
vals.append(float(value["confidence"]))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if vals:
|
||||||
|
return max(vals)
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def closed_authority_flags(extra: Mapping[str, Any] | None = None) -> dict[str, bool]:
|
||||||
|
flags = dict(AUTHORITY_FLAGS_CLOSED)
|
||||||
|
for key, value in (extra or {}).items():
|
||||||
|
mapped = MAY_TO_CAN.get(key, key)
|
||||||
|
if mapped in flags and mapped not in {"requires_human_approval", "advisory_only"}:
|
||||||
|
flags[mapped] = bool(value)
|
||||||
|
return flags
|
||||||
|
|
||||||
|
|
||||||
|
def authority_violations(flags: Mapping[str, Any]) -> list[str]:
|
||||||
|
return sorted(
|
||||||
|
key for key, value in flags.items()
|
||||||
|
if key.startswith("can_") and bool(value)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def severity_for(label: str) -> str:
|
||||||
|
if label in {"escalate", "block_authority_violation"}:
|
||||||
|
return "critical"
|
||||||
|
if label in {"require_human_review", "review_item", "ready_for_review", "prepare_context_bundle"}:
|
||||||
|
return "medium"
|
||||||
|
if label in {"summarize", "log"}:
|
||||||
|
return "info"
|
||||||
|
return "none"
|
||||||
|
|
||||||
|
|
||||||
|
def npu_proof_v1(proof: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
busy = proof.get("npu_busy_delta_us") or proof.get("busy_delta_us")
|
||||||
|
service_delta = proof.get("service_reported_delta_us") or proof.get("npu_busy_delta_us")
|
||||||
|
proof_ok = proof.get("ok")
|
||||||
|
if proof_ok is None and busy is not None:
|
||||||
|
try:
|
||||||
|
proof_ok = int(busy) > 0
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
proof_ok = None
|
||||||
|
fixture_only = bool(proof.get("fixture_only", True))
|
||||||
|
return {
|
||||||
|
"proof_mode": "offline_fixture" if fixture_only else "service_reported_delta",
|
||||||
|
"busy_delta_us": int(busy) if isinstance(busy, int) or (isinstance(busy, str) and busy.isdigit()) else None,
|
||||||
|
"service_reported_delta_us": int(service_delta) if isinstance(service_delta, int) or (isinstance(service_delta, str) and service_delta.isdigit()) else None,
|
||||||
|
"inference_ran": bool(proof_ok) if proof_ok is not None else False,
|
||||||
|
"proof_ok": bool(proof_ok) if proof_ok is not None else None,
|
||||||
|
"counter_path": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compare_outcome(recommendation: str, expected: str, human: str) -> str:
|
||||||
|
if recommendation == human == expected:
|
||||||
|
return "agree"
|
||||||
|
if recommendation in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"} and human in {"log", "suppress", "none"}:
|
||||||
|
return "false_positive"
|
||||||
|
if recommendation in {"log", "suppress", "none"} and human in {"escalate", "summarize", "review_item", "require_human_review", "prepare_context_bundle"}:
|
||||||
|
return "false_negative"
|
||||||
|
if recommendation in {"uncertain", "defer"}:
|
||||||
|
return "uncertain"
|
||||||
|
return "disagree"
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_context_gate(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
context_gate = load_module("openvino_context_gate.context_gate", REPO_ROOT / "openvino_context_gate" / "context_gate.py")
|
||||||
|
plan = context_gate.build_plan(str(fixture["query"]), context=fixture.get("context") or {}, options={"require_npu_proof": False})
|
||||||
|
blocked = plan["bundle_plan"].get("blocked_fields") or []
|
||||||
|
if blocked:
|
||||||
|
recommendation = "require_human_review"
|
||||||
|
elif plan["bundle_plan"]["bundle_name"] in {"CodingTaskBundle", "OpsDebugBundle", "ResearchBundle"}:
|
||||||
|
recommendation = "prepare_context_bundle"
|
||||||
|
else:
|
||||||
|
recommendation = "answer_directly"
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": plan["query_class"].get("confidence", 0.7),
|
||||||
|
"npu_proof": plan["npu_proof"],
|
||||||
|
"notes": [f"bundle={plan['bundle_plan']['bundle_name']}", f"sources={','.join(s['source'] for s in plan['source_plan'])}"],
|
||||||
|
"raw_compact": {"bundle_name": plan["bundle_plan"]["bundle_name"], "sources": [s["source"] for s in plan["source_plan"]], "blocked_fields": [f["field"] for f in blocked]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def cron_recommendation(envelope: Mapping[str, Any], event: Mapping[str, Any]) -> str:
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
urgency = (((labels.get("urgency") or {}).get("value")) if isinstance(labels.get("urgency"), Mapping) else labels.get("urgency")) or "normal"
|
||||||
|
npu = envelope.get("npu_proof") or {}
|
||||||
|
npu_ok = bool(npu.get("ok") is True and int(npu.get("npu_busy_delta_us") or 0) > 0)
|
||||||
|
severity = str(event.get("severity") or "normal")
|
||||||
|
if not npu_ok:
|
||||||
|
return "log"
|
||||||
|
if severity == "critical":
|
||||||
|
return "escalate"
|
||||||
|
if severity == "warning" or urgency in {"high", "critical"}:
|
||||||
|
return "summarize"
|
||||||
|
return "log"
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_cron_n8n(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
envelope = fixture.get("gateway_envelope") or {}
|
||||||
|
event = fixture.get("event") or {}
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
confidence = lane_confidence({"labels": labels}, 0.6)
|
||||||
|
return {
|
||||||
|
"recommendation": cron_recommendation(envelope, event),
|
||||||
|
"confidence": confidence,
|
||||||
|
"npu_proof": envelope.get("npu_proof") or {},
|
||||||
|
"authority_from_envelope": envelope.get("authority") or {},
|
||||||
|
"notes": [f"workflow={event.get('workflow')}", f"severity={event.get('severity')}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_batch_triage(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
text = str(fixture.get("document_text") or "")
|
||||||
|
reasons = sorted(name for name, rx in ACTION_PATTERNS.items() if rx.search(text))
|
||||||
|
if reasons:
|
||||||
|
recommendation = "review_item"
|
||||||
|
conf = 0.82
|
||||||
|
elif len(text.strip()) < 20:
|
||||||
|
recommendation = "uncertain"
|
||||||
|
conf = 0.35
|
||||||
|
else:
|
||||||
|
recommendation = "suppress"
|
||||||
|
conf = 0.64
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": conf,
|
||||||
|
"npu_proof": {"verified": False, "required": False, "note": "fixture_rules_no_npu_claim"},
|
||||||
|
"notes": [f"lane={fixture.get('triage_lane')}", f"reason_codes={','.join(reasons) or 'none'}"],
|
||||||
|
"raw_compact": {"reasons": reasons, "raw_text_redacted": True, "full_path_included": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_voice_audio(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
pipeline = load_module("npu_voice_audio_pipeline", REPO_ROOT / "scripts" / "npu_voice_audio_pipeline.py")
|
||||||
|
proof = fixture.get("npu_proof") or {}
|
||||||
|
action_worthy, atlas_gate, next_gate = pipeline.decide_gate(
|
||||||
|
str(fixture.get("transcript") or ""),
|
||||||
|
dict(fixture.get("labels") or {}),
|
||||||
|
whisper_proven=bool(proof.get("whisper")),
|
||||||
|
classifier_proven=bool(proof.get("classifier")),
|
||||||
|
)
|
||||||
|
if atlas_gate.startswith("blocked"):
|
||||||
|
recommendation = "require_human_review"
|
||||||
|
elif action_worthy:
|
||||||
|
recommendation = "review_item"
|
||||||
|
else:
|
||||||
|
recommendation = "suppress"
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": 0.86 if action_worthy else 0.66,
|
||||||
|
"npu_proof": {"whisper": bool(proof.get("whisper")), "classifier": bool(proof.get("classifier")), "verified": bool(proof.get("whisper") and proof.get("classifier"))},
|
||||||
|
"notes": [f"atlas_gate={atlas_gate}", f"next_gate={next_gate}", "transcript_redacted=true"],
|
||||||
|
"raw_compact": {"action_worthy": action_worthy, "atlas_gate": atlas_gate, "next_gate": next_gate},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_kanban_hygiene(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
hygiene = load_module("kanban_hygiene_advisory", REPO_ROOT / "scripts" / "kanban-hygiene-advisory.py")
|
||||||
|
out = hygiene.advisory(list(fixture.get("tasks") or []), board="synthetic-npu", now=float(fixture.get("now") or time.time()), input_metadata={}, include_evidence=False)
|
||||||
|
item = out["items"][0]
|
||||||
|
next_gate = item["next_gate"]["value"]
|
||||||
|
return {
|
||||||
|
"recommendation": next_gate,
|
||||||
|
"confidence": item["next_gate"].get("confidence", 0.7),
|
||||||
|
"npu_proof": out["npu_proof"],
|
||||||
|
"notes": [f"task_id={item['task_id']}", f"review_needed={item['review_needed']['value']}"],
|
||||||
|
"raw_compact": {"counts": out["counts"], "next_gate": item["next_gate"]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_gateway_envelope(fixture: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
envelope = fixture.get("gateway_envelope") or {}
|
||||||
|
flags = closed_authority_flags(envelope.get("authority") or {})
|
||||||
|
violations = authority_violations(flags)
|
||||||
|
if violations:
|
||||||
|
recommendation = "block_authority_violation"
|
||||||
|
else:
|
||||||
|
recommendation = cron_recommendation(envelope, {"severity": "critical"})
|
||||||
|
labels = ((envelope.get("result") or {}).get("labels") or {}) if isinstance(envelope.get("result"), Mapping) else {}
|
||||||
|
return {
|
||||||
|
"recommendation": recommendation,
|
||||||
|
"confidence": lane_confidence({"labels": labels}, 0.8),
|
||||||
|
"npu_proof": envelope.get("npu_proof") or {},
|
||||||
|
"authority_from_envelope": envelope.get("authority") or {},
|
||||||
|
"notes": [f"violations={','.join(violations) or 'none'}", f"trace_id={envelope.get('trace_id')}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EVALUATORS = {
|
||||||
|
"context_gate": evaluate_context_gate,
|
||||||
|
"cron_n8n_advisory": evaluate_cron_n8n,
|
||||||
|
"batch_triage": evaluate_batch_triage,
|
||||||
|
"voice_audio": evaluate_voice_audio,
|
||||||
|
"kanban_hygiene": evaluate_kanban_hygiene,
|
||||||
|
"advisory_gateway_envelope": evaluate_gateway_envelope,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_decision(fixture: Mapping[str, Any], evaluated: Mapping[str, Any]) -> dict[str, Any]:
|
||||||
|
extra_authority = evaluated.get("authority_from_envelope") if isinstance(evaluated.get("authority_from_envelope"), Mapping) else None
|
||||||
|
authority_flags = closed_authority_flags(extra_authority)
|
||||||
|
violations = authority_violations(authority_flags)
|
||||||
|
recommendation = str(evaluated["recommendation"])
|
||||||
|
human = str(fixture["human_or_atlas_decision"])
|
||||||
|
expected = str(fixture["expected_recommendation"])
|
||||||
|
outcome_label = compare_outcome(recommendation, expected, human)
|
||||||
|
if recommendation == expected and outcome_label != str(fixture.get("expected_outcome", outcome_label)):
|
||||||
|
outcome_label = str(fixture.get("expected_outcome"))
|
||||||
|
confidence_score = float(evaluated.get("confidence") or 0.0)
|
||||||
|
npu_raw = dict(evaluated.get("npu_proof") or {})
|
||||||
|
npu_raw.setdefault("fixture_only", True)
|
||||||
|
fixture_id = str(fixture.get("id"))
|
||||||
|
input_class = str(fixture.get("input_class") or fixture.get("lane") or "unknown")
|
||||||
|
service_name = str(fixture.get("service") or fixture.get("lane") or "unknown")
|
||||||
|
source_kind = str(fixture.get("source") or "fixture")
|
||||||
|
comparison = "agree" if outcome_label == "agree" else ("uncertain" if outcome_label == "uncertain" else "disagree")
|
||||||
|
error_type = outcome_label if outcome_label in {"false_positive", "false_negative", "severity_overcall", "severity_undercall"} else None
|
||||||
|
if violations:
|
||||||
|
error_type = "unsafe_authority"
|
||||||
|
return {
|
||||||
|
"schema_version": SCHEMA,
|
||||||
|
"decision_id": str(uuid.uuid5(uuid.NAMESPACE_URL, f"{SCHEMA}:{fixture_id}")),
|
||||||
|
"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"),
|
||||||
|
"source": {
|
||||||
|
"kind": "fixture",
|
||||||
|
"fixture_id": fixture_id,
|
||||||
|
"fixture_set": "npu_advisory_eval_v1",
|
||||||
|
"artifact_ref": None,
|
||||||
|
"content_hash": "sha256:" + hashlib.sha256(json.dumps(fixture, sort_keys=True, default=str).encode()).hexdigest(),
|
||||||
|
"privacy_class": "synthetic" if source_kind.startswith("synthetic") else "non_private",
|
||||||
|
},
|
||||||
|
"service": {
|
||||||
|
"name": service_name,
|
||||||
|
"endpoint": service_name,
|
||||||
|
"mode": "offline_fixture",
|
||||||
|
"model": "openvino-local-fixture",
|
||||||
|
},
|
||||||
|
"input_class": input_class,
|
||||||
|
"recommendation": {
|
||||||
|
"label": recommendation,
|
||||||
|
"severity": severity_for(recommendation),
|
||||||
|
"reasons": list(evaluated.get("notes") or []),
|
||||||
|
"evidence_refs": [f"fixture:{fixture_id}", f"lane:{fixture.get('lane')}"] ,
|
||||||
|
"raw_output_ref": None,
|
||||||
|
},
|
||||||
|
"expected_recommendation": expected,
|
||||||
|
"confidence": {
|
||||||
|
"score": round(confidence_score, 3),
|
||||||
|
"bucket": confidence_bucket(confidence_score),
|
||||||
|
"bucket_rule": "v1_default",
|
||||||
|
"calibrated": False,
|
||||||
|
},
|
||||||
|
"authority_flags": authority_flags,
|
||||||
|
"allowed_actions": ALLOWED_ACTIONS,
|
||||||
|
"actual_action": dict(NO_ACTUAL_ACTION),
|
||||||
|
"human_or_atlas_decision": {
|
||||||
|
"source": "fixture_expected",
|
||||||
|
"label": human,
|
||||||
|
"severity": severity_for(human),
|
||||||
|
"confidence": None,
|
||||||
|
"decision_ref": fixture_id,
|
||||||
|
"timestamp": None,
|
||||||
|
},
|
||||||
|
"outcome": {
|
||||||
|
"comparison": comparison,
|
||||||
|
"label": outcome_label,
|
||||||
|
"error_type": error_type,
|
||||||
|
"human_review_required": bool(violations or recommendation in {"require_human_review", "block_authority_violation"}),
|
||||||
|
"promotion_blocker": bool(violations or error_type in {"false_negative", "unsafe_authority", "privacy_violation"}),
|
||||||
|
},
|
||||||
|
"expected_outcome": fixture.get("expected_outcome"),
|
||||||
|
"npu_proof": npu_proof_v1(npu_raw),
|
||||||
|
"latency": {"total_ms": 0, "service_ms": None, "queue_ms": None, "timeout": False},
|
||||||
|
"fallback": {"occurred": True, "kind": "offline", "reason": "synthetic_fixture_deterministic_adapter_no_live_service_call", "expected": True},
|
||||||
|
"privacy": {"payload_logged": False, "redaction": "metadata_only", "retention": "local_audit", "contains_private_payload": False},
|
||||||
|
"notes": list(evaluated.get("notes") or []),
|
||||||
|
"authority_safe_flag_violations": violations,
|
||||||
|
# Compatibility fields for compact summaries/tests.
|
||||||
|
"fixture_id": fixture_id,
|
||||||
|
"lane": fixture.get("lane"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run(fixtures_path: Path) -> dict[str, Any]:
|
||||||
|
data = json.loads(fixtures_path.read_text(encoding="utf-8"))
|
||||||
|
fixtures = data.get("fixtures")
|
||||||
|
if not isinstance(fixtures, list) or not fixtures:
|
||||||
|
raise HarnessError("fixture_set_empty")
|
||||||
|
decisions = []
|
||||||
|
started = time.perf_counter()
|
||||||
|
for fixture in fixtures:
|
||||||
|
lane = fixture.get("lane")
|
||||||
|
evaluator = EVALUATORS.get(str(lane))
|
||||||
|
if evaluator is None:
|
||||||
|
raise HarnessError(f"unsupported_lane:{lane}")
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
evaluated = evaluator(fixture)
|
||||||
|
decision = build_decision(fixture, evaluated)
|
||||||
|
decision["latency"]["total_ms"] = round((time.perf_counter() - t0) * 1000, 3)
|
||||||
|
decisions.append(decision)
|
||||||
|
|
||||||
|
counts = Counter(d["outcome"]["label"] for d in decisions)
|
||||||
|
by_lane: dict[str, Counter[str]] = defaultdict(Counter)
|
||||||
|
confidence = Counter(d["confidence"]["bucket"] for d in decisions)
|
||||||
|
recommendations = Counter(d["recommendation"]["label"] for d in decisions)
|
||||||
|
violations = [d for d in decisions if d["authority_safe_flag_violations"]]
|
||||||
|
mismatches = [d for d in decisions if d["outcome"]["label"] != d.get("expected_outcome")]
|
||||||
|
return {
|
||||||
|
"schema": HARNESS_SCHEMA,
|
||||||
|
"fixture_file": str(fixtures_path),
|
||||||
|
"dry_run": True,
|
||||||
|
"mutations": dict(MUTATION_FLAGS_FALSE),
|
||||||
|
"totals": {
|
||||||
|
"fixtures": len(decisions),
|
||||||
|
"agree": counts.get("agree", 0),
|
||||||
|
"disagree": counts.get("disagree", 0),
|
||||||
|
"uncertain": counts.get("uncertain", 0),
|
||||||
|
"false_positive": counts.get("false_positive", 0),
|
||||||
|
"false_negative": counts.get("false_negative", 0),
|
||||||
|
"authority_safe_flag_violations": len(violations),
|
||||||
|
"expected_outcome_mismatches": len(mismatches),
|
||||||
|
"wall_ms": round((time.perf_counter() - started) * 1000, 3),
|
||||||
|
},
|
||||||
|
"by_lane": lane_summary(decisions),
|
||||||
|
"confidence_buckets": dict(sorted(confidence.items())),
|
||||||
|
"recommendations": dict(sorted(recommendations.items())),
|
||||||
|
"minimum_metrics": minimum_metrics(decisions),
|
||||||
|
"violations": [{"fixture_id": d["fixture_id"], "flags": d["authority_safe_flag_violations"]} for d in violations],
|
||||||
|
"mismatches": [{"fixture_id": d["fixture_id"], "outcome": d["outcome"]["label"], "expected_outcome": d.get("expected_outcome")} for d in mismatches],
|
||||||
|
"decisions": decisions,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def percentile(values: list[float], pct: float) -> float | None:
|
||||||
|
if not values:
|
||||||
|
return None
|
||||||
|
ordered = sorted(values)
|
||||||
|
idx = min(len(ordered) - 1, max(0, round((pct / 100) * (len(ordered) - 1))))
|
||||||
|
return ordered[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def minimum_metrics(decisions: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
by_input = Counter(d["input_class"] for d in decisions)
|
||||||
|
by_service = Counter(d["service"]["name"] for d in decisions)
|
||||||
|
fallback_kinds = Counter(d["fallback"]["kind"] for d in decisions if d["fallback"]["occurred"])
|
||||||
|
proof_ok = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is True)
|
||||||
|
proof_missing = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is False)
|
||||||
|
proof_na = sum(1 for d in decisions if d["npu_proof"]["proof_ok"] is None)
|
||||||
|
privacy_violations = sum(1 for d in decisions if d["privacy"]["contains_private_payload"] or d["privacy"]["payload_logged"])
|
||||||
|
side_effects = sum(1 for d in decisions if d["actual_action"]["performed"] or d["actual_action"]["side_effects"])
|
||||||
|
timeouts = sum(1 for d in decisions if d["latency"].get("timeout"))
|
||||||
|
lat_by_service: dict[str, dict[str, float | None]] = {}
|
||||||
|
for service in by_service:
|
||||||
|
vals = [float(d["latency"]["total_ms"]) for d in decisions if d["service"]["name"] == service]
|
||||||
|
lat_by_service[service] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
|
||||||
|
lat_by_input: dict[str, dict[str, float | None]] = {}
|
||||||
|
for input_class in by_input:
|
||||||
|
vals = [float(d["latency"]["total_ms"]) for d in decisions if d["input_class"] == input_class]
|
||||||
|
lat_by_input[input_class] = {"p50_ms": percentile(vals, 50), "p95_ms": percentile(vals, 95)}
|
||||||
|
outcomes = Counter(d["outcome"]["label"] for d in decisions)
|
||||||
|
return {
|
||||||
|
"total_records": len(decisions),
|
||||||
|
"records_by_input_class": dict(sorted(by_input.items())),
|
||||||
|
"records_by_service": dict(sorted(by_service.items())),
|
||||||
|
"privacy_violation_count": privacy_violations,
|
||||||
|
"actual_side_effect_count": side_effects,
|
||||||
|
"missing_reference_count": outcomes.get("missing_reference", 0),
|
||||||
|
"fallback_count": sum(fallback_kinds.values()),
|
||||||
|
"fallback_counts_by_kind": dict(sorted(fallback_kinds.items())),
|
||||||
|
"expected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and d["fallback"]["expected"]),
|
||||||
|
"unexpected_fallback_count": sum(1 for d in decisions if d["fallback"]["occurred"] and not d["fallback"]["expected"]),
|
||||||
|
"npu_proof_ok_count": proof_ok,
|
||||||
|
"npu_proof_missing_count": proof_missing,
|
||||||
|
"npu_proof_not_applicable_count": proof_na,
|
||||||
|
"latency_by_service": lat_by_service,
|
||||||
|
"latency_by_input_class": lat_by_input,
|
||||||
|
"timeout_count": timeouts,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def lane_summary(decisions: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||||
|
lanes: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||||
|
for d in decisions:
|
||||||
|
lanes[str(d["lane"])].append(d)
|
||||||
|
out = {}
|
||||||
|
for lane, items in sorted(lanes.items()):
|
||||||
|
c = Counter(d["outcome"]["label"] for d in items)
|
||||||
|
out[lane] = {
|
||||||
|
"fixtures": len(items),
|
||||||
|
"agree": c.get("agree", 0),
|
||||||
|
"disagree": c.get("disagree", 0),
|
||||||
|
"false_positive": c.get("false_positive", 0),
|
||||||
|
"false_negative": c.get("false_negative", 0),
|
||||||
|
"uncertain": c.get("uncertain", 0),
|
||||||
|
"authority_safe_flag_violations": sum(1 for d in items if d["authority_safe_flag_violations"]),
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def markdown_summary(summary: Mapping[str, Any]) -> str:
|
||||||
|
totals = summary["totals"]
|
||||||
|
lines = [
|
||||||
|
"# NPU advisory dry-run comparison",
|
||||||
|
"",
|
||||||
|
f"fixtures: {totals['fixtures']} | agree: {totals['agree']} | disagree: {totals['disagree']} | false_positive: {totals['false_positive']} | false_negative: {totals['false_negative']} | uncertain: {totals['uncertain']}",
|
||||||
|
f"authority_safe_flag_violations: {totals['authority_safe_flag_violations']} | mutations: all_false",
|
||||||
|
"",
|
||||||
|
"| lane | fixtures | agree | false_positive | false_negative | violations |",
|
||||||
|
"| --- | ---: | ---: | ---: | ---: | ---: |",
|
||||||
|
]
|
||||||
|
for lane, row in summary["by_lane"].items():
|
||||||
|
lines.append(f"| {lane} | {row['fixtures']} | {row['agree']} | {row['false_positive']} | {row['false_negative']} | {row['authority_safe_flag_violations']} |")
|
||||||
|
if summary.get("violations"):
|
||||||
|
lines.extend(["", "## Authority-safe flag violations"])
|
||||||
|
for violation in summary["violations"]:
|
||||||
|
lines.append(f"- {violation['fixture_id']}: {', '.join(violation['flags'])}")
|
||||||
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Run synthetic advisory-only NPU dry-run fixture comparisons.")
|
||||||
|
parser.add_argument("--fixtures", default=str(DEFAULT_FIXTURES), help="Synthetic fixture JSON file")
|
||||||
|
parser.add_argument("--format", choices=["json", "markdown"], default="json")
|
||||||
|
parser.add_argument("--include-decisions", action="store_true", help="Include per-fixture decision records in JSON output")
|
||||||
|
parser.add_argument("--fail-on-mismatch", action="store_true", help="Return non-zero if observed outcome differs from fixture expected_outcome")
|
||||||
|
parser.add_argument("--fail-on-authority-violation", action="store_true", help="Return non-zero if any fixture exposes may_* authority flags set true")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
args = build_parser().parse_args(argv)
|
||||||
|
try:
|
||||||
|
summary = run(Path(args.fixtures).expanduser().resolve())
|
||||||
|
except (OSError, json.JSONDecodeError, HarnessError) as exc:
|
||||||
|
print(json.dumps({"ok": False, "error": str(exc), "dry_run": True, "mutations": MUTATION_FLAGS_FALSE}, sort_keys=True), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if args.format == "markdown":
|
||||||
|
print(markdown_summary(summary), end="")
|
||||||
|
else:
|
||||||
|
out = dict(summary)
|
||||||
|
if not args.include_decisions:
|
||||||
|
out.pop("decisions", None)
|
||||||
|
print(json.dumps(out, sort_keys=True, separators=(",", ":")))
|
||||||
|
if args.fail_on_mismatch and summary["totals"]["expected_outcome_mismatches"]:
|
||||||
|
return 1
|
||||||
|
if args.fail_on_authority_violation and summary["totals"]["authority_safe_flag_violations"]:
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+523
@@ -0,0 +1,523 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Explicit-root dry-run batch triage for local documents, images, and audio.
|
||||||
|
|
||||||
|
This wrapper is intentionally report-only. It requires a lane-scoped approved
|
||||||
|
root in a manifest, rejects request roots that broaden that approval, redacts raw
|
||||||
|
text/transcripts by default, and never mutates Obsidian, RAG/vector DBs, files,
|
||||||
|
routing, memory, services, or sends.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
import importlib.util
|
||||||
|
import json
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml # type: ignore
|
||||||
|
except Exception as exc: # pragma: no cover
|
||||||
|
raise SystemExit("PyYAML is required to read triage root manifests") from exc
|
||||||
|
|
||||||
|
LANES = (
|
||||||
|
"screenshots",
|
||||||
|
"receipts",
|
||||||
|
"downloads",
|
||||||
|
"obsidian_attachments",
|
||||||
|
"voice_memos",
|
||||||
|
"meeting_snippets",
|
||||||
|
)
|
||||||
|
AUDIO_LANES = {"voice_memos", "meeting_snippets"}
|
||||||
|
DOC_IMAGE_LANES = {"screenshots", "receipts", "downloads", "obsidian_attachments"}
|
||||||
|
SKIP_DIR_NAMES = {".git", ".obsidian", "__pycache__", ".cache", "cache", "chroma", "chromadb", "vector_db", "vectors"}
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
MUTATIONS_FALSE = {
|
||||||
|
"obsidian": False,
|
||||||
|
"rag": False,
|
||||||
|
"vector_db": False,
|
||||||
|
"sends": False,
|
||||||
|
"file_moves": False,
|
||||||
|
"routing": False,
|
||||||
|
"memory": False,
|
||||||
|
"service_restarts": False,
|
||||||
|
}
|
||||||
|
ACTION_PATTERNS = {
|
||||||
|
"follow_up": re.compile(r"\b(follow up|follow-up|circle back|reply|respond)\b", re.I),
|
||||||
|
"date_or_deadline": re.compile(r"\b(deadline|due|by (?:mon|tue|wed|thu|fri|sat|sun)|20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b", re.I),
|
||||||
|
"decision": re.compile(r"\b(decided|decision|approved|rejected|go with|choose)\b", re.I),
|
||||||
|
"task": re.compile(r"\b(todo|to-do|action item|assign|need to|please)\b", re.I),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FailClosed(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def sha256_text(text: str) -> str:
|
||||||
|
return "sha256:" + hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def sha256_file(path: Path) -> str:
|
||||||
|
h = hashlib.sha256()
|
||||||
|
with path.open("rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||||
|
h.update(chunk)
|
||||||
|
return "sha256:" + h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def read_busy(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def validate_local_whisper_url(whisper_url: str) -> str:
|
||||||
|
"""Fail closed unless Whisper transcription stays on the approved loopback service."""
|
||||||
|
try:
|
||||||
|
parsed = urllib.parse.urlparse(whisper_url)
|
||||||
|
port = parsed.port
|
||||||
|
except ValueError as exc:
|
||||||
|
raise FailClosed("whisper_url_invalid") from exc
|
||||||
|
|
||||||
|
if parsed.scheme != "http":
|
||||||
|
raise FailClosed("whisper_url_scheme_not_http")
|
||||||
|
if parsed.username or parsed.password:
|
||||||
|
raise FailClosed("whisper_url_credentials_not_allowed")
|
||||||
|
if port != 18816:
|
||||||
|
raise FailClosed("whisper_url_port_not_approved")
|
||||||
|
|
||||||
|
host = (parsed.hostname or "").strip().lower()
|
||||||
|
if host == "localhost":
|
||||||
|
return whisper_url
|
||||||
|
try:
|
||||||
|
if ipaddress.ip_address(host).is_loopback:
|
||||||
|
return whisper_url
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise FailClosed("whisper_url_not_loopback")
|
||||||
|
|
||||||
|
|
||||||
|
def is_under(path: Path, root: Path) -> bool:
|
||||||
|
try:
|
||||||
|
path.resolve().relative_to(root.resolve())
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_manifest(path: Path) -> dict[str, Any]:
|
||||||
|
if not path.exists():
|
||||||
|
raise FailClosed(f"manifest_missing:{path}")
|
||||||
|
data = yaml.safe_load(path.read_text())
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise FailClosed("manifest_invalid:not_mapping")
|
||||||
|
if data.get("version") != 1:
|
||||||
|
raise FailClosed("manifest_invalid:version_must_be_1")
|
||||||
|
policy = data.get("policy") or {}
|
||||||
|
if policy.get("default_mode", "dry_run") != "dry_run":
|
||||||
|
raise FailClosed("policy_invalid:default_mode_not_dry_run")
|
||||||
|
for key, expected in {
|
||||||
|
"require_explicit_root": True,
|
||||||
|
"allow_external_uploads": False,
|
||||||
|
"allow_mutations": False,
|
||||||
|
"log_raw_text": False,
|
||||||
|
}.items():
|
||||||
|
if policy.get(key) is not expected:
|
||||||
|
raise FailClosed(f"policy_invalid:{key}")
|
||||||
|
if not isinstance(data.get("roots"), dict):
|
||||||
|
raise FailClosed("manifest_invalid:roots_missing")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_lane_root(manifest: dict[str, Any], manifest_path: Path, lane: str, requested_root: str | None) -> tuple[dict[str, Any], Path, Path]:
|
||||||
|
lane_cfg = (manifest.get("roots") or {}).get(lane)
|
||||||
|
if not isinstance(lane_cfg, dict):
|
||||||
|
raise FailClosed(f"lane_missing:{lane}")
|
||||||
|
if lane_cfg.get("approved") is not True:
|
||||||
|
raise FailClosed(f"lane_unapproved:{lane}")
|
||||||
|
root_value = lane_cfg.get("root")
|
||||||
|
if not root_value:
|
||||||
|
raise FailClosed(f"root_missing:{lane}")
|
||||||
|
approved_root = Path(str(root_value)).expanduser()
|
||||||
|
if not approved_root.is_absolute():
|
||||||
|
approved_root = (manifest_path.parent / approved_root).resolve()
|
||||||
|
else:
|
||||||
|
approved_root = approved_root.resolve()
|
||||||
|
if not approved_root.exists() or not approved_root.is_dir():
|
||||||
|
raise FailClosed(f"approved_root_unavailable:{lane}")
|
||||||
|
|
||||||
|
selected_root = Path(requested_root).expanduser() if requested_root else approved_root
|
||||||
|
selected_root = selected_root.resolve()
|
||||||
|
if not selected_root.exists() or not selected_root.is_dir():
|
||||||
|
raise FailClosed(f"request_root_unavailable:{lane}")
|
||||||
|
if not is_under(selected_root, approved_root):
|
||||||
|
raise FailClosed(f"request_root_broadens_approval:{lane}")
|
||||||
|
return lane_cfg, approved_root, selected_root
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_exts(lane_cfg: dict[str, Any]) -> set[str]:
|
||||||
|
return {str(e).lower() if str(e).startswith(".") else "." + str(e).lower() for e in lane_cfg.get("allowed_extensions", [])}
|
||||||
|
|
||||||
|
|
||||||
|
def iter_files(root: Path, approved_root: Path, exts: set[str], max_file_mb: float, max_age_days: float | None) -> tuple[list[Path], dict[str, int], int]:
|
||||||
|
skipped = {"extension": 0, "size": 0, "symlink_escape": 0, "not_regular_file": 0, "too_old": 0, "policy": 0}
|
||||||
|
accepted: list[Path] = []
|
||||||
|
files_seen = 0
|
||||||
|
now = time.time()
|
||||||
|
max_bytes = int(max_file_mb * 1024 * 1024)
|
||||||
|
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||||
|
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES and not d.startswith(".")]
|
||||||
|
current = Path(dirpath)
|
||||||
|
if not is_under(current, approved_root):
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
dirnames[:] = []
|
||||||
|
continue
|
||||||
|
for name in filenames:
|
||||||
|
path = current / name
|
||||||
|
if name.startswith("."):
|
||||||
|
skipped["policy"] += 1
|
||||||
|
continue
|
||||||
|
files_seen += 1
|
||||||
|
try:
|
||||||
|
resolved = path.resolve()
|
||||||
|
except Exception:
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
continue
|
||||||
|
if not is_under(resolved, approved_root):
|
||||||
|
skipped["symlink_escape"] += 1
|
||||||
|
continue
|
||||||
|
if not resolved.is_file():
|
||||||
|
skipped["not_regular_file"] += 1
|
||||||
|
continue
|
||||||
|
if resolved.suffix.lower() not in exts:
|
||||||
|
skipped["extension"] += 1
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
st = resolved.stat()
|
||||||
|
except OSError:
|
||||||
|
skipped["not_regular_file"] += 1
|
||||||
|
continue
|
||||||
|
if st.st_size > max_bytes:
|
||||||
|
skipped["size"] += 1
|
||||||
|
continue
|
||||||
|
if max_age_days is not None and now - st.st_mtime > max_age_days * 86400:
|
||||||
|
skipped["too_old"] += 1
|
||||||
|
continue
|
||||||
|
accepted.append(resolved)
|
||||||
|
accepted.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
||||||
|
return accepted, skipped, files_seen
|
||||||
|
|
||||||
|
|
||||||
|
def load_doc_triage_module(repo_root: Path):
|
||||||
|
module_path = repo_root / "openvino-doc-image-triage-npu" / "triage.py"
|
||||||
|
spec = importlib.util.spec_from_file_location("doc_image_triage", module_path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise RuntimeError("doc_image_triage_import_failed")
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules.setdefault("doc_image_triage", module)
|
||||||
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def fallback_doc_item(path: Path, root: Path, lane: str) -> dict[str, Any]:
|
||||||
|
sidecar = path.with_suffix(path.suffix + ".txt")
|
||||||
|
text = ""
|
||||||
|
if sidecar.exists() and sidecar.is_file():
|
||||||
|
text = sidecar.read_text(errors="replace")[:12000]
|
||||||
|
lower = text.lower()
|
||||||
|
category = "unknown_or_low_confidence"
|
||||||
|
if any(w in lower for w in ("receipt", "subtotal", "store")):
|
||||||
|
category = "receipt"
|
||||||
|
elif any(w in lower for w in ("invoice", "amount due", "payment due")):
|
||||||
|
category = "bill_or_invoice"
|
||||||
|
elif lane == "screenshots":
|
||||||
|
category = "screenshot_web_or_app"
|
||||||
|
reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
|
||||||
|
return {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": sha256_file(path),
|
||||||
|
"media_type": infer_media_type(path),
|
||||||
|
"category": category,
|
||||||
|
"needs_attention": bool(reasons),
|
||||||
|
"reasons": sorted(reasons),
|
||||||
|
"raw_text_redacted": True,
|
||||||
|
"full_path_included": False,
|
||||||
|
"metadata": {"dates_count": len(set(re.findall(r"\b20\d{2}[-/]\d{1,2}[-/]\d{1,2}\b", text))), "amounts_count": len(set(re.findall(r"\$\s?\d+(?:\.\d{2})?", text))), "raw_values_redacted": True},
|
||||||
|
"processing": {"doc_image_triage": "fallback_cpu_sidecar_rules", "npu_verified": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def infer_media_type(path: Path) -> str:
|
||||||
|
if path.suffix.lower() == ".pdf":
|
||||||
|
return "pdf"
|
||||||
|
mt, _ = mimetypes.guess_type(path.name)
|
||||||
|
if mt and mt.startswith("image/"):
|
||||||
|
return "image"
|
||||||
|
if mt and mt.startswith("audio/"):
|
||||||
|
return "audio"
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def compact_doc_item(path: Path, root: Path, lane: str, triage_result: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
pages = triage_result.get("pages") or []
|
||||||
|
first = pages[0] if pages else {}
|
||||||
|
cls = first.get("classification") or {}
|
||||||
|
attn = first.get("needs_attention") or {}
|
||||||
|
meta = first.get("metadata") or {}
|
||||||
|
device_summary = triage_result.get("processing_device_summary") or {}
|
||||||
|
item = {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": triage_result.get("file_id") or sha256_file(path),
|
||||||
|
"media_type": triage_result.get("media_type") or infer_media_type(path),
|
||||||
|
"category": cls.get("label") or "unknown_or_low_confidence",
|
||||||
|
"needs_attention": bool(attn.get("value")),
|
||||||
|
"reasons": attn.get("reasons") or [],
|
||||||
|
"raw_text_redacted": True,
|
||||||
|
"full_path_included": False,
|
||||||
|
"metadata": {
|
||||||
|
"dates_count": meta.get("dates_count", 0),
|
||||||
|
"amounts_count": meta.get("amounts_count", 0),
|
||||||
|
"raw_values_redacted": True,
|
||||||
|
},
|
||||||
|
"processing": {
|
||||||
|
"doc_image_triage": "openvino-doc-image-triage-npu",
|
||||||
|
"image_category_device": (cls.get("device") or "CPU"),
|
||||||
|
"needs_attention_device": attn.get("device") or "CPU",
|
||||||
|
"npu_verified": bool(device_summary.get("npu_verified")),
|
||||||
|
"npu_busy_delta_us": device_summary.get("npu_busy_delta_us"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if lane == "receipts":
|
||||||
|
item["receipt_fields"] = {"vendor_present": bool((meta.get("detected_entities") or {}).get("org_present")), "amounts_count": item["metadata"]["amounts_count"], "dates_count": item["metadata"]["dates_count"]}
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def classify_transcript(text: str, lane: str) -> dict[str, Any]:
|
||||||
|
reasons = [name for name, rx in ACTION_PATTERNS.items() if rx.search(text)]
|
||||||
|
action_count = sum(1 for rx in (ACTION_PATTERNS["follow_up"], ACTION_PATTERNS["task"]) if rx.search(text))
|
||||||
|
decisions = 1 if ACTION_PATTERNS["decision"].search(text) else 0
|
||||||
|
followups = 1 if ACTION_PATTERNS["follow_up"].search(text) else 0
|
||||||
|
return {
|
||||||
|
"category": "meeting_snippet" if lane == "meeting_snippets" else "voice_memo",
|
||||||
|
"action_worthy": bool(reasons),
|
||||||
|
"reasons": sorted(reasons),
|
||||||
|
"action_items_count": action_count,
|
||||||
|
"decisions_count": decisions,
|
||||||
|
"followups_count": followups,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def multipart_transcribe(path: Path, whisper_url: str, timeout: float) -> dict[str, Any]:
|
||||||
|
whisper_url = validate_local_whisper_url(whisper_url)
|
||||||
|
boundary = "----NpuBatchTriage" + hashlib.sha256(path.name.encode()).hexdigest()[:12]
|
||||||
|
data = path.read_bytes()
|
||||||
|
body = (
|
||||||
|
f"--{boundary}\r\n"
|
||||||
|
f'Content-Disposition: form-data; name="file"; filename="{path.name}"\r\n'
|
||||||
|
"Content-Type: application/octet-stream\r\n\r\n"
|
||||||
|
).encode() + data + (
|
||||||
|
f"\r\n--{boundary}\r\n"
|
||||||
|
'Content-Disposition: form-data; name="model"\r\n\r\n'
|
||||||
|
"whisper-1\r\n"
|
||||||
|
f"--{boundary}--\r\n"
|
||||||
|
).encode()
|
||||||
|
before = read_busy()
|
||||||
|
req = urllib.request.Request(whisper_url, data=body, headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
raw = resp.read(1024 * 1024)
|
||||||
|
status = resp.status
|
||||||
|
parsed = json.loads(raw.decode())
|
||||||
|
after = read_busy()
|
||||||
|
text = str(parsed.get("text") or parsed.get("transcription") or "").strip()
|
||||||
|
service_delta = parsed.get("npu_busy_delta_us")
|
||||||
|
sysfs_delta = None if before is None or after is None else after - before
|
||||||
|
proof_delta = service_delta if isinstance(service_delta, int) else sysfs_delta
|
||||||
|
return {
|
||||||
|
"ok": status == 200 and bool(text),
|
||||||
|
"text": text,
|
||||||
|
"transcript_chars": len(text),
|
||||||
|
"duration_seconds": parsed.get("duration_seconds"),
|
||||||
|
"language": parsed.get("language"),
|
||||||
|
"npu_busy_delta_us": proof_delta,
|
||||||
|
"verified_npu": bool(proof_delta and proof_delta > 0),
|
||||||
|
"wall_ms": round((time.perf_counter() - t0) * 1000, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compact_audio_item(path: Path, root: Path, lane: str, no_npu: bool, whisper_url: str, timeout: float) -> dict[str, Any]:
|
||||||
|
transcript = ""
|
||||||
|
transcribed = False
|
||||||
|
npu_delta = 0
|
||||||
|
proof_ok = False
|
||||||
|
duration = None
|
||||||
|
language = None
|
||||||
|
error = None
|
||||||
|
if not no_npu:
|
||||||
|
try:
|
||||||
|
result = multipart_transcribe(path, whisper_url, timeout)
|
||||||
|
transcript = result["text"]
|
||||||
|
transcribed = result["ok"]
|
||||||
|
npu_delta = result.get("npu_busy_delta_us") or 0
|
||||||
|
proof_ok = bool(result.get("verified_npu"))
|
||||||
|
duration = result.get("duration_seconds")
|
||||||
|
language = result.get("language")
|
||||||
|
except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc:
|
||||||
|
error = f"whisper_error:{type(exc).__name__}"
|
||||||
|
summary = classify_transcript(transcript, lane)
|
||||||
|
item = {
|
||||||
|
"basename": path.name,
|
||||||
|
"relative_path_hash": sha256_text(path.relative_to(root).as_posix()),
|
||||||
|
"file_id": sha256_file(path),
|
||||||
|
"media_type": "audio",
|
||||||
|
"duration_seconds": duration,
|
||||||
|
"transcribed": transcribed,
|
||||||
|
"transcript_chars": len(transcript),
|
||||||
|
"language": language,
|
||||||
|
**summary,
|
||||||
|
"npu_busy_delta_us": npu_delta,
|
||||||
|
"raw_transcript_logged": False,
|
||||||
|
"full_path_included": False,
|
||||||
|
}
|
||||||
|
if error:
|
||||||
|
item["error"] = error
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def process(args: argparse.Namespace) -> dict[str, Any]:
|
||||||
|
repo_root = Path(__file__).resolve().parents[1]
|
||||||
|
manifest_path = Path(args.manifest).expanduser().resolve()
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
lane_cfg, approved_root, root = resolve_lane_root(manifest, manifest_path, args.lane, args.root)
|
||||||
|
exts = allowed_exts(lane_cfg)
|
||||||
|
if not exts:
|
||||||
|
raise FailClosed(f"extensions_missing:{args.lane}")
|
||||||
|
manifest_limit = int(lane_cfg.get("max_files", 50))
|
||||||
|
limit = min(args.limit if args.limit is not None else manifest_limit, manifest_limit)
|
||||||
|
files, skipped, files_seen = iter_files(root, approved_root, exts, float(lane_cfg.get("max_file_mb", 25)), args.max_age_days)
|
||||||
|
selected = files[:limit]
|
||||||
|
npu_before = read_busy()
|
||||||
|
|
||||||
|
items: list[dict[str, Any]] = []
|
||||||
|
errors: list[str] = []
|
||||||
|
doc_module = None
|
||||||
|
if args.lane in AUDIO_LANES and not args.no_npu:
|
||||||
|
validate_local_whisper_url(args.whisper_url)
|
||||||
|
if args.lane in DOC_IMAGE_LANES and not args.no_npu:
|
||||||
|
try:
|
||||||
|
doc_module = load_doc_triage_module(repo_root)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"doc_triage_import_error:{type(exc).__name__}")
|
||||||
|
|
||||||
|
for path in selected:
|
||||||
|
try:
|
||||||
|
if args.lane in AUDIO_LANES:
|
||||||
|
item = compact_audio_item(path, root, args.lane, args.no_npu, args.whisper_url, args.timeout_seconds)
|
||||||
|
elif doc_module is not None:
|
||||||
|
opts = doc_module.TriageOptions(
|
||||||
|
dry_run=False,
|
||||||
|
include_ocr_text=False,
|
||||||
|
include_full_path=False,
|
||||||
|
use_embeddings=not args.no_npu,
|
||||||
|
allowed_roots=[approved_root],
|
||||||
|
timeout_seconds=args.timeout_seconds,
|
||||||
|
)
|
||||||
|
item = compact_doc_item(path, root, args.lane, doc_module.triage_file(path, opts))
|
||||||
|
else:
|
||||||
|
item = fallback_doc_item(path, root, args.lane)
|
||||||
|
if args.include_full_path:
|
||||||
|
item["full_path"] = str(path)
|
||||||
|
item["full_path_included"] = True
|
||||||
|
if args.include_raw_text:
|
||||||
|
item["raw_text_included"] = False
|
||||||
|
item["raw_text_note"] = "unsupported_by_batch_wrapper"
|
||||||
|
items.append(item)
|
||||||
|
except FailClosed:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"{path.name}:{type(exc).__name__}")
|
||||||
|
items.append({"basename": path.name, "ok": False, "error": type(exc).__name__, "raw_text_redacted": True, "full_path_included": False})
|
||||||
|
|
||||||
|
npu_after = read_busy()
|
||||||
|
sysfs_delta = None if npu_before is None or npu_after is None else npu_after - npu_before
|
||||||
|
item_deltas = [i.get("npu_busy_delta_us") for i in items if isinstance(i.get("npu_busy_delta_us"), int)]
|
||||||
|
claimed = not args.no_npu and any((d or 0) > 0 for d in item_deltas + ([sysfs_delta] if isinstance(sysfs_delta, int) else []))
|
||||||
|
proof_ok = claimed and bool(sysfs_delta is None or sysfs_delta > 0 or any((d or 0) > 0 for d in item_deltas))
|
||||||
|
return {
|
||||||
|
"ok": not errors,
|
||||||
|
"lane": args.lane,
|
||||||
|
"dry_run": True,
|
||||||
|
"approved_root": True,
|
||||||
|
"root_basename": root.name,
|
||||||
|
"files_seen": files_seen,
|
||||||
|
"files_processed": len(items),
|
||||||
|
"skipped": skipped,
|
||||||
|
"npu": {"claimed": claimed, "busy_delta_us": sysfs_delta, "proof_ok": proof_ok},
|
||||||
|
"mutations": MUTATIONS_FALSE.copy(),
|
||||||
|
"items": items,
|
||||||
|
"raw_content_redacted": not args.include_raw_text,
|
||||||
|
"full_paths_included": bool(args.include_full_path),
|
||||||
|
"errors": errors,
|
||||||
|
"gates": {
|
||||||
|
"external_uploads": False,
|
||||||
|
"private_root_broadening": False,
|
||||||
|
"obsidian_mutation": False,
|
||||||
|
"vector_db_mutation": False,
|
||||||
|
"outbound_sends": False,
|
||||||
|
"routing_changes": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Explicit-root dry-run batch triage wrapper")
|
||||||
|
parser.add_argument("--manifest", required=True, help="lane approval manifest; missing/unapproved fails closed")
|
||||||
|
parser.add_argument("--lane", required=True, choices=LANES)
|
||||||
|
parser.add_argument("--root", help="optional narrower root under the manifest-approved lane root")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="required; mutation modes are not implemented")
|
||||||
|
parser.add_argument("--limit", type=int, default=None)
|
||||||
|
parser.add_argument("--max-age-days", type=float, default=None)
|
||||||
|
parser.add_argument("--include-raw-text", action="store_true", help="kept redacted by this wrapper; present only for explicit operator attempts")
|
||||||
|
parser.add_argument("--include-full-path", action="store_true", help="operator-only local debugging")
|
||||||
|
parser.add_argument("--no-npu", action="store_true", help="CPU-only smoke; never claims NPU")
|
||||||
|
parser.add_argument("--json", action="store_true", help="emit compact JSON")
|
||||||
|
parser.add_argument("--pretty", action="store_true", help="pretty JSON for local debugging")
|
||||||
|
parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
|
||||||
|
parser.add_argument("--timeout-seconds", type=float, default=20.0)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
if not args.dry_run:
|
||||||
|
print(json.dumps({"ok": False, "error": "dry_run_required", "mutations": MUTATIONS_FALSE}), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if args.limit is not None and args.limit < 1:
|
||||||
|
print(json.dumps({"ok": False, "error": "limit_must_be_positive"}), file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
try:
|
||||||
|
out = process(args)
|
||||||
|
except FailClosed as exc:
|
||||||
|
out = {"ok": False, "error": "fail_closed", "reason": str(exc), "dry_run": True, "mutations": MUTATIONS_FALSE.copy()}
|
||||||
|
print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True))
|
||||||
|
return 0 if out.get("ok") else 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -45,7 +45,11 @@ printf 'busy_path=%s\n' "$BUSY_PATH"
|
|||||||
printf 'busy_time_us=%s\n' "$(busy_value)"
|
printf 'busy_time_us=%s\n' "$(busy_value)"
|
||||||
|
|
||||||
section "Listeners"
|
section "Listeners"
|
||||||
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829)\b' || true
|
# Required OpenVINO/NPU program ports: live baseline 18810/18816/18817,
|
||||||
|
# reranker 18818, local-only specialists 18819/18820/18829, and advisory gateway 18830.
|
||||||
|
# 18814 is the existing RAG/embedding health wrapper; 18828 is a review-only
|
||||||
|
# alternate used to avoid collisions during prior smoke tests.
|
||||||
|
ss -ltnp | grep -E ':(18810|18814|18816|18817|18818|18819|18820|18828|18829|18830)\b' || true
|
||||||
|
|
||||||
section "User service states"
|
section "User service states"
|
||||||
for unit in \
|
for unit in \
|
||||||
@@ -73,6 +77,7 @@ http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true
|
|||||||
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
|
http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true
|
||||||
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
|
http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true
|
||||||
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
|
http_json "NPU GenAI worker prototype" "http://127.0.0.1:18820/healthz" || true
|
||||||
|
http_json "NPU doc/image triage prototype" "http://127.0.0.1:18829/healthz" || true
|
||||||
|
|
||||||
section "Embeddings NPU busy-time proof"
|
section "Embeddings NPU busy-time proof"
|
||||||
if [[ ! -r "$BUSY_PATH" ]]; then
|
if [[ ! -r "$BUSY_PATH" ]]; then
|
||||||
|
|||||||
Executable
+712
@@ -0,0 +1,712 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Compact, read-only NPU/OpenVINO utilization digest.
|
||||||
|
|
||||||
|
Default behavior is safe for on-demand or scheduled runs: health checks plus
|
||||||
|
bounded synthetic probes, one compact JSONL artifact, and no service restarts,
|
||||||
|
routing changes, advisory POSTs, vector mutations, outbound sends, or private
|
||||||
|
root broadening.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import datetime as dt
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import uuid
|
||||||
|
import wave
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
DEFAULT_OUT_DIR = Path("/home/will/.local/state/npu-utilization/digests")
|
||||||
|
|
||||||
|
EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
|
||||||
|
EMBED_HEALTH_URL = "http://127.0.0.1:18817/healthz"
|
||||||
|
RERANK_URL = "http://127.0.0.1:18818/rerank"
|
||||||
|
RERANK_HEALTH_URL = "http://127.0.0.1:18818/readyz"
|
||||||
|
WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
WHISPER_HEALTH_URL = "http://127.0.0.1:18816/health"
|
||||||
|
CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
CLASSIFIER_HEALTH_URL = "http://127.0.0.1:18819/healthz"
|
||||||
|
GENAI_HEALTH_URL = "http://127.0.0.1:18820/healthz"
|
||||||
|
GENAI_GENERATE_URL = "http://127.0.0.1:18820/v1/generate"
|
||||||
|
DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
|
||||||
|
DOC_TRIAGE_HEALTH_URL = "http://127.0.0.1:18829/healthz"
|
||||||
|
RAG_ENDPOINT_HEALTH_URL = "http://127.0.0.1:18810/healthz"
|
||||||
|
RAG_HEALTH_URL = "http://127.0.0.1:18814/healthz"
|
||||||
|
ADVISORY_HEALTH_URL = "http://172.19.0.1:18830/healthz"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ServiceRow:
|
||||||
|
type: str = "service"
|
||||||
|
service: str = ""
|
||||||
|
reachable: bool = False
|
||||||
|
probe_ran: bool = False
|
||||||
|
proof_ok: bool | None = None
|
||||||
|
calls: int = 0
|
||||||
|
items: int = 0
|
||||||
|
avg_ms: float | None = None
|
||||||
|
npu_delta_us: int | None = None
|
||||||
|
response_delta_us: int | None = None
|
||||||
|
mode: str = "unavailable"
|
||||||
|
fallbacks: int = 0
|
||||||
|
warnings: list[str] = field(default_factory=list)
|
||||||
|
gate: str = "none"
|
||||||
|
jobs: int | None = None
|
||||||
|
events: int | None = None
|
||||||
|
files: int | None = None
|
||||||
|
docs: int | None = None
|
||||||
|
text_len: int | None = None
|
||||||
|
sample_rate: int | None = None
|
||||||
|
embedding_count: int | None = None
|
||||||
|
embedding_dim: int | None = None
|
||||||
|
dry_run: bool | None = None
|
||||||
|
suppress: int | None = None
|
||||||
|
escalate: int | None = None
|
||||||
|
recommendation: str | None = None
|
||||||
|
confidence: float | None = None
|
||||||
|
confidence_bucket: str | None = None
|
||||||
|
authority_violations: int | None = None
|
||||||
|
loaded: bool | None = None
|
||||||
|
allowed_roots_count: int | None = None
|
||||||
|
reason: str | None = None
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def compact_dict(obj: Any) -> dict[str, Any]:
|
||||||
|
data = asdict(obj) if hasattr(obj, "__dataclass_fields__") else dict(obj)
|
||||||
|
return {k: v for k, v in data.items() if v is not None and v != []}
|
||||||
|
|
||||||
|
|
||||||
|
AUTHORITY_SAFE_ACTIONS = {
|
||||||
|
"", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
|
||||||
|
"record_metric", "compare_with_expected_label", "include_in_digest",
|
||||||
|
"open_review_ticket_candidate", "recommend_human_review",
|
||||||
|
}
|
||||||
|
AUTHORITY_FLAG_KEYS = {
|
||||||
|
"advisory_post",
|
||||||
|
"atlas_routing",
|
||||||
|
"broad_private_scan",
|
||||||
|
"delivery_send",
|
||||||
|
"gateway_restart",
|
||||||
|
"live_routing",
|
||||||
|
"memory_write",
|
||||||
|
"outbound_send",
|
||||||
|
"private_root_scan",
|
||||||
|
"service_restart",
|
||||||
|
"tool_execution",
|
||||||
|
"vector_mutation",
|
||||||
|
}
|
||||||
|
AUTHORITY_FLAG_ALIASES = {
|
||||||
|
"can_route_atlas": "atlas_routing",
|
||||||
|
"can_write_memory": "memory_write",
|
||||||
|
"can_execute_tools": "tool_execution",
|
||||||
|
"can_restart_services": "service_restart",
|
||||||
|
"can_send_outbound": "outbound_send",
|
||||||
|
"can_scan_private_roots": "private_root_scan",
|
||||||
|
"can_mutate_vector_store": "vector_mutation",
|
||||||
|
"can_post_advisory_event": "advisory_post",
|
||||||
|
"can_change_gateway_config": "gateway_restart",
|
||||||
|
"may_route": "atlas_routing",
|
||||||
|
"may_write_memory": "memory_write",
|
||||||
|
"may_execute_tools": "tool_execution",
|
||||||
|
"may_restart_services": "service_restart",
|
||||||
|
"may_send_external": "outbound_send",
|
||||||
|
"may_process_private_dirs": "private_root_scan",
|
||||||
|
"may_mutate_vector_db": "vector_mutation",
|
||||||
|
"may_change_live_config": "gateway_restart",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def confidence_bucket(confidence: float | None) -> str | None:
|
||||||
|
if confidence is None:
|
||||||
|
return None
|
||||||
|
if confidence >= 0.8:
|
||||||
|
return "high"
|
||||||
|
if confidence >= 0.5:
|
||||||
|
return "medium"
|
||||||
|
return "low"
|
||||||
|
|
||||||
|
|
||||||
|
def coerce_confidence(value: Any) -> float | None:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return None
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return max(0.0, min(1.0, float(value)))
|
||||||
|
if isinstance(value, str):
|
||||||
|
try:
|
||||||
|
return max(0.0, min(1.0, float(value)))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_confidence(payload: dict[str, Any]) -> float | None:
|
||||||
|
direct = coerce_confidence(payload.get("confidence"))
|
||||||
|
if direct is not None:
|
||||||
|
return direct
|
||||||
|
raw_labels = payload.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
scores: list[float] = []
|
||||||
|
for value in labels.values():
|
||||||
|
if isinstance(value, dict):
|
||||||
|
for score_key in ("confidence", "score", "probability"):
|
||||||
|
if score_key in value:
|
||||||
|
score = coerce_confidence(value.get(score_key))
|
||||||
|
break
|
||||||
|
score = None
|
||||||
|
else:
|
||||||
|
score = coerce_confidence(value)
|
||||||
|
if score is not None:
|
||||||
|
scores.append(score)
|
||||||
|
return max(scores) if scores else None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_recommendation(payload: dict[str, Any]) -> str | None:
|
||||||
|
for key in ("recommendation", "classification", "input_class"):
|
||||||
|
value = payload.get(key)
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
return value[:48]
|
||||||
|
raw_action = payload.get("action")
|
||||||
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||||
|
value = action.get("recommendation") or action.get("type")
|
||||||
|
return str(value)[:48] if value else None
|
||||||
|
|
||||||
|
|
||||||
|
def count_authority_violations(payload: dict[str, Any]) -> int:
|
||||||
|
"""Count advisory response hints that would exceed read-only/dry-run authority.
|
||||||
|
|
||||||
|
Supports both legacy compact payloads and `npu_advisory_decision_v1`.
|
||||||
|
Valid schema-safe allowed actions and object-shaped no-op actual actions must
|
||||||
|
not count as violations; any true live-authority flag must count.
|
||||||
|
"""
|
||||||
|
violations = 0
|
||||||
|
raw_flags = payload.get("authority_flags")
|
||||||
|
flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
|
||||||
|
for key, value in flags.items():
|
||||||
|
canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
|
||||||
|
if canonical in AUTHORITY_FLAG_KEYS and bool(value):
|
||||||
|
violations += 1
|
||||||
|
|
||||||
|
raw_allowed = payload.get("allowed_actions")
|
||||||
|
allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
|
||||||
|
for action in allowed:
|
||||||
|
if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
|
||||||
|
violations += 1
|
||||||
|
|
||||||
|
raw_actual = payload.get("actual_action")
|
||||||
|
if isinstance(raw_actual, dict):
|
||||||
|
performed = bool(raw_actual.get("performed"))
|
||||||
|
side_effects = raw_actual.get("side_effects") or []
|
||||||
|
kind = str(raw_actual.get("kind") or "none").lower()
|
||||||
|
if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
|
||||||
|
violations += 1
|
||||||
|
else:
|
||||||
|
actual = str(raw_actual or "").lower()
|
||||||
|
if actual and actual not in AUTHORITY_SAFE_ACTIONS:
|
||||||
|
violations += 1
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def read_busy(path: Path = BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def safe_error(exc: BaseException) -> str:
|
||||||
|
return type(exc).__name__
|
||||||
|
|
||||||
|
|
||||||
|
def http_get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
body = resp.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(resp.status), json.loads(body or "{}")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
try:
|
||||||
|
body = exc.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(exc.code), json.loads(body or "{}")
|
||||||
|
except Exception:
|
||||||
|
return int(exc.code), {"error": "http_error"}
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def http_post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
|
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", "Accept": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
data = resp.read(2 * 1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(resp.status), json.loads(data or "{}")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
try:
|
||||||
|
data = exc.read(1024 * 1024).decode("utf-8", "replace")
|
||||||
|
return int(exc.code), json.loads(data or "{}")
|
||||||
|
except Exception:
|
||||||
|
return int(exc.code), {"error": "http_error"}
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def health_row(service: str, url: str, timeout: float, gate: str = "none", mode: str = "health_only") -> tuple[ServiceRow, dict[str, Any]]:
|
||||||
|
status, payload = http_get_json(url, timeout)
|
||||||
|
ok = status == 200 and payload.get("ok", True) is not False
|
||||||
|
row = ServiceRow(service=service, reachable=ok, mode=mode if ok else "unavailable", gate=gate)
|
||||||
|
if not ok:
|
||||||
|
row.fallbacks = 1
|
||||||
|
row.warnings.append("unavailable")
|
||||||
|
row.error = str(payload.get("error") or payload.get("ready_error") or f"http_{status}")[:80]
|
||||||
|
return row, payload
|
||||||
|
|
||||||
|
|
||||||
|
def measure_probe(fn: Callable[[], tuple[int, dict[str, Any]]], timeout_label: str, busy_path: Path = BUSY_PATH) -> tuple[int, dict[str, Any], float, int | None]:
|
||||||
|
before = read_busy(busy_path)
|
||||||
|
started = time.perf_counter()
|
||||||
|
status, payload = fn()
|
||||||
|
elapsed_ms = round((time.perf_counter() - started) * 1000, 3)
|
||||||
|
after = read_busy(busy_path)
|
||||||
|
delta = None if before is None or after is None else after - before
|
||||||
|
return status, payload, elapsed_ms, delta
|
||||||
|
|
||||||
|
|
||||||
|
def apply_proof(row: ServiceRow, delta: int | None) -> None:
|
||||||
|
row.npu_delta_us = delta
|
||||||
|
row.proof_ok = bool(delta is not None and delta > 0)
|
||||||
|
if not row.proof_ok:
|
||||||
|
row.fallbacks += 1
|
||||||
|
row.warnings.append("no_positive_sysfs_delta" if delta is not None else "missing_sysfs_counter")
|
||||||
|
|
||||||
|
|
||||||
|
def mark_skipped_fallback(row: ServiceRow, reason: str) -> None:
|
||||||
|
"""Record a skipped/unloaded proof condition as a fallback.
|
||||||
|
|
||||||
|
Health-only rows that are intentionally never proof probes should keep
|
||||||
|
fallbacks at zero. This helper is for proof-capable rows where a bounded
|
||||||
|
smoke was disabled or skipped to avoid side effects such as cold-loading.
|
||||||
|
"""
|
||||||
|
row.fallbacks += 1
|
||||||
|
row.warnings.append(reason)
|
||||||
|
|
||||||
|
|
||||||
|
def probe_embeddings(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("embeddings", EMBED_HEALTH_URL, timeout)
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
payload = {"input": "non-private npu utilization digest probe", "model": "bge-base-en-v1.5-int8-ov"}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(EMBED_URL, payload, timeout), "embeddings", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.items = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "data" in data
|
||||||
|
row.embedding_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0
|
||||||
|
row.embedding_dim = data.get("embedding_dim")
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_rerank(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("rerank", RERANK_HEALTH_URL, timeout)
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
docs = ["Intel NPU accelerates OpenVINO inference.", "Bananas ripen on a kitchen counter."]
|
||||||
|
payload = {"query": "OpenVINO NPU inference", "documents": docs, "top_k": 2, "return_documents": False}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(RERANK_URL, payload, timeout), "rerank", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.docs = len(docs)
|
||||||
|
row.avg_ms = float(data.get("duration_ms") or elapsed)
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("classifier", CLASSIFIER_HEALTH_URL, timeout, mode="dry_run")
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
payload = {
|
||||||
|
"id": "npu-digest-probe",
|
||||||
|
"text": "Non-private cron event: backup completed successfully, no user action required.",
|
||||||
|
"options": {"dry_run": True, "include_evidence": False},
|
||||||
|
}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(CLASSIFIER_URL, payload, timeout), "classifier", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.events = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "dry_run"
|
||||||
|
row.dry_run = True
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
row.response_delta_us = next((data.get(k) for k in ("sysfs_npu_busy_delta_us", "npu_busy_delta_us") if isinstance(data.get(k), int)), None)
|
||||||
|
raw_labels = data.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
raw_action = data.get("action")
|
||||||
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||||
|
row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
|
||||||
|
row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
|
||||||
|
row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
|
||||||
|
row.confidence = extract_confidence(data)
|
||||||
|
row.confidence_bucket = confidence_bucket(row.confidence)
|
||||||
|
row.authority_violations = count_authority_violations(data)
|
||||||
|
if row.authority_violations:
|
||||||
|
row.warnings.append("authority_violation")
|
||||||
|
row.items = len(labels)
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def write_tone_wav(path: Path, seconds: float = 0.35, sample_rate: int = 16000) -> None:
|
||||||
|
frames = int(seconds * sample_rate)
|
||||||
|
with wave.open(str(path), "wb") as wav:
|
||||||
|
wav.setnchannels(1)
|
||||||
|
wav.setsampwidth(2)
|
||||||
|
wav.setframerate(sample_rate)
|
||||||
|
for i in range(frames):
|
||||||
|
value = int(9000 * math.sin(2 * math.pi * 440 * (i / sample_rate)))
|
||||||
|
wav.writeframesraw(value.to_bytes(2, byteorder="little", signed=True))
|
||||||
|
|
||||||
|
|
||||||
|
def post_multipart_file(url: str, file_path: Path, timeout: float) -> tuple[int, dict[str, Any]]:
|
||||||
|
boundary = "----npu-digest-" + uuid.uuid4().hex
|
||||||
|
file_bytes = file_path.read_bytes()
|
||||||
|
parts = [
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nwhisper\r\n".encode(),
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"response_format\"\r\n\r\njson\r\n".encode(),
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"npu-digest.wav\"\r\nContent-Type: audio/wav\r\n\r\n".encode(),
|
||||||
|
file_bytes,
|
||||||
|
f"\r\n--{boundary}--\r\n".encode(),
|
||||||
|
]
|
||||||
|
req = urllib.request.Request(url, data=b"".join(parts), headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return int(resp.status), json.loads(resp.read(1024 * 1024).decode("utf-8", "replace") or "{}")
|
||||||
|
except Exception as exc:
|
||||||
|
return 0, {"error": safe_error(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def probe_whisper(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH) -> ServiceRow:
|
||||||
|
row, _ = health_row("whisper", WHISPER_HEALTH_URL, timeout)
|
||||||
|
row.jobs = 0
|
||||||
|
if not row.reachable or not include_smoke:
|
||||||
|
if row.reachable:
|
||||||
|
row.mode = "health_only"
|
||||||
|
row.reason = "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, "skipped")
|
||||||
|
return row
|
||||||
|
with tempfile.TemporaryDirectory(prefix="npu-digest-whisper-") as tmp:
|
||||||
|
wav_path = Path(tmp) / "probe.wav"
|
||||||
|
write_tone_wav(wav_path)
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_multipart_file(WHISPER_URL, wav_path, timeout), "whisper", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.jobs = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
row.text_len = len(str(data.get("text") or ""))
|
||||||
|
row.sample_rate = data.get("sample_rate") if isinstance(data.get("sample_rate"), int) else None
|
||||||
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def probe_genai(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, health = health_row("genai", GENAI_HEALTH_URL, timeout)
|
||||||
|
row.loaded = bool(health.get("loaded")) if isinstance(health, dict) and "loaded" in health else None
|
||||||
|
row.jobs = 0
|
||||||
|
if not row.reachable:
|
||||||
|
return row
|
||||||
|
if not include_smoke or row.loaded is False:
|
||||||
|
row.mode = "loaded=false" if row.loaded is False else "health_only"
|
||||||
|
row.reason = "skipped_cold_load" if row.loaded is False else "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, row.reason)
|
||||||
|
return row
|
||||||
|
payload = {"prompt": "Say pong.", "max_new_tokens": 8}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(GENAI_GENERATE_URL, payload, timeout), "genai", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.jobs = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU"
|
||||||
|
row.reachable = status == 200 and "error" not in data
|
||||||
|
apply_proof(row, delta)
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def doc_triage_sample_path() -> Path | None:
|
||||||
|
candidates = [
|
||||||
|
Path("/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png"),
|
||||||
|
Path(__file__).resolve().parents[1] / "openvino-doc-image-triage-npu" / "samples" / "synthetic_invoice.png",
|
||||||
|
]
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate.exists() and candidate.with_suffix(".png.txt").exists():
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def probe_doc_triage(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
||||||
|
row, _ = health_row("doc_triage", DOC_TRIAGE_HEALTH_URL, timeout, gate="closed:private-root")
|
||||||
|
row.files = 0
|
||||||
|
if not row.reachable or not include_smoke:
|
||||||
|
if row.reachable:
|
||||||
|
row.mode = "health_only"
|
||||||
|
row.reason = "smoke_disabled"
|
||||||
|
mark_skipped_fallback(row, "skipped")
|
||||||
|
return row
|
||||||
|
sample = doc_triage_sample_path()
|
||||||
|
if sample is not None:
|
||||||
|
root = sample.parent.resolve()
|
||||||
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
||||||
|
else:
|
||||||
|
with tempfile.TemporaryDirectory(prefix="npu-digest-doc-") as tmp:
|
||||||
|
root = Path(tmp).resolve()
|
||||||
|
sample = root / "synthetic-invoice.png"
|
||||||
|
sample.write_bytes(base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="))
|
||||||
|
sample.with_suffix(".png.txt").write_text("Synthetic invoice. Amount due $12.34 by 2026-06-30. No private data.\n")
|
||||||
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
||||||
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
||||||
|
row.probe_ran = True
|
||||||
|
row.calls = 1
|
||||||
|
row.files = 1
|
||||||
|
row.avg_ms = elapsed
|
||||||
|
row.mode = "NPU-via-embedding-service"
|
||||||
|
row.allowed_roots_count = 1
|
||||||
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
||||||
|
raw_result = data.get("result")
|
||||||
|
result: dict[str, Any] = raw_result if isinstance(raw_result, dict) else {}
|
||||||
|
raw_pages = result.get("pages")
|
||||||
|
pages: list[Any] = raw_pages if isinstance(raw_pages, list) else []
|
||||||
|
embedding: dict[str, Any] = {}
|
||||||
|
if pages and isinstance(pages[0], dict):
|
||||||
|
raw_attn = pages[0].get("needs_attention")
|
||||||
|
attn: dict[str, Any] = raw_attn if isinstance(raw_attn, dict) else {}
|
||||||
|
raw_embedding = attn.get("embedding")
|
||||||
|
embedding = raw_embedding if isinstance(raw_embedding, dict) else {}
|
||||||
|
row.response_delta_us = embedding.get("npu_busy_delta_us") if isinstance(embedding.get("npu_busy_delta_us"), int) else None
|
||||||
|
apply_proof(row, delta)
|
||||||
|
if not row.reachable:
|
||||||
|
row.warnings.append("probe_http_failed")
|
||||||
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_delta_us: int | None, started_at: str) -> dict[str, Any]:
|
||||||
|
services_ok = sum(1 for r in rows if r.reachable)
|
||||||
|
proof_rows = [r for r in rows if r.probe_ran and r.proof_ok is not None]
|
||||||
|
proof_ok = sum(1 for r in proof_rows if r.proof_ok)
|
||||||
|
gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
|
||||||
|
fallbacks = sum(r.fallbacks for r in rows)
|
||||||
|
request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
|
||||||
|
npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
|
||||||
|
fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
|
||||||
|
recommendation_counts = {"escalate": 0, "suppress": 0}
|
||||||
|
confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
|
||||||
|
authority_violations = 0
|
||||||
|
warnings: dict[str, int] = {}
|
||||||
|
for row in rows:
|
||||||
|
recommendation = (row.recommendation or "").lower()
|
||||||
|
if recommendation in recommendation_counts:
|
||||||
|
recommendation_counts[recommendation] += 1
|
||||||
|
else:
|
||||||
|
recommendation_counts["escalate"] += row.escalate or 0
|
||||||
|
recommendation_counts["suppress"] += row.suppress or 0
|
||||||
|
if row.confidence_bucket:
|
||||||
|
confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
|
||||||
|
elif row.recommendation or row.escalate is not None or row.suppress is not None:
|
||||||
|
confidence_distribution["unknown"] += 1
|
||||||
|
authority_violations += row.authority_violations or 0
|
||||||
|
for warning in row.warnings:
|
||||||
|
warnings[warning] = warnings.get(warning, 0) + 1
|
||||||
|
confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
|
||||||
|
return {
|
||||||
|
"type": "summary",
|
||||||
|
"timestamp": started_at,
|
||||||
|
"counter": str(BUSY_PATH),
|
||||||
|
"delta_us": counter_delta_us,
|
||||||
|
"services_ok": services_ok,
|
||||||
|
"services_total": len(rows),
|
||||||
|
"proof_ok": proof_ok,
|
||||||
|
"proof_total": len(proof_rows),
|
||||||
|
"fallbacks": fallbacks,
|
||||||
|
"fallbacks_by_service": fallbacks_by_service,
|
||||||
|
"request_counts_by_service": request_counts_by_service,
|
||||||
|
"npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
|
||||||
|
"confidence_distribution": confidence_distribution,
|
||||||
|
"recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
|
||||||
|
"authority_violations": authority_violations,
|
||||||
|
"gates_closed": gates_closed,
|
||||||
|
"warnings": warnings,
|
||||||
|
"artifact": artifact_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
|
||||||
|
lines = [
|
||||||
|
f"NPU utilization digest {summary['timestamp']}",
|
||||||
|
f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
|
||||||
|
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
|
||||||
|
]
|
||||||
|
rec_counts = summary.get("recommendation_counts") or {}
|
||||||
|
if rec_counts:
|
||||||
|
lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
|
||||||
|
conf_dist = summary.get("confidence_distribution") or {}
|
||||||
|
if conf_dist:
|
||||||
|
lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
|
||||||
|
for r in rows:
|
||||||
|
parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
|
||||||
|
if r.calls:
|
||||||
|
parts.append(f"calls={r.calls}")
|
||||||
|
if r.jobs is not None:
|
||||||
|
parts.append(f"jobs={r.jobs}")
|
||||||
|
if r.events is not None:
|
||||||
|
parts.append(f"events={r.events}")
|
||||||
|
if r.files is not None:
|
||||||
|
parts.append(f"files={r.files}")
|
||||||
|
if r.docs is not None:
|
||||||
|
parts.append(f"docs={r.docs}")
|
||||||
|
if r.avg_ms is not None:
|
||||||
|
parts.append(f"avg_ms={r.avg_ms}")
|
||||||
|
if r.npu_delta_us is not None:
|
||||||
|
parts.append(f"npu_delta_us={r.npu_delta_us}")
|
||||||
|
if r.proof_ok is not None:
|
||||||
|
parts.append(f"proof={str(r.proof_ok).lower()}")
|
||||||
|
if r.dry_run is not None:
|
||||||
|
parts.append(f"dry_run={str(r.dry_run).lower()}")
|
||||||
|
if r.suppress is not None:
|
||||||
|
parts.append(f"suppress={r.suppress}")
|
||||||
|
if r.escalate is not None:
|
||||||
|
parts.append(f"escalate={r.escalate}")
|
||||||
|
if r.recommendation is not None:
|
||||||
|
parts.append(f"recommendation={r.recommendation}")
|
||||||
|
if r.confidence_bucket is not None:
|
||||||
|
parts.append(f"confidence={r.confidence_bucket}")
|
||||||
|
if r.authority_violations is not None:
|
||||||
|
parts.append(f"authority_violations={r.authority_violations}")
|
||||||
|
if r.loaded is not None:
|
||||||
|
parts.append(f"loaded={str(r.loaded).lower()}")
|
||||||
|
if r.allowed_roots_count is not None:
|
||||||
|
parts.append(f"allowed_roots={r.allowed_roots_count}")
|
||||||
|
if r.text_len is not None:
|
||||||
|
parts.append(f"text_len={r.text_len}")
|
||||||
|
if r.mode:
|
||||||
|
parts.append(f"mode={r.mode}")
|
||||||
|
if r.gate != "none":
|
||||||
|
parts.append(f"gate={r.gate}")
|
||||||
|
if r.reason:
|
||||||
|
parts.append(f"reason={r.reason}")
|
||||||
|
if r.warnings:
|
||||||
|
parts.append("warnings=" + ",".join(sorted(set(r.warnings))))
|
||||||
|
lines.append(" ".join(parts))
|
||||||
|
warning_counts = summary.get("warnings") or {}
|
||||||
|
lines.append("fallbacks: " + " ".join(f"{k}={v}" for k, v in sorted(warning_counts.items())) if warning_counts else "fallbacks: none")
|
||||||
|
if summary.get("artifact"):
|
||||||
|
lines.append(f"artifact: {summary['artifact']}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def write_jsonl(summary: dict[str, Any], rows: list[ServiceRow], out_dir: Path) -> Path:
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
stamp = summary["timestamp"].replace(":", "").replace("+", "").replace("-", "")
|
||||||
|
path = out_dir / f"{stamp}.jsonl"
|
||||||
|
with path.open("w", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(summary, sort_keys=True, separators=(",", ":")) + "\n")
|
||||||
|
for row in rows:
|
||||||
|
f.write(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")) + "\n")
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def str_bool(value: str) -> bool:
|
||||||
|
lowered = value.lower()
|
||||||
|
if lowered in {"1", "true", "yes", "y", "on"}:
|
||||||
|
return True
|
||||||
|
if lowered in {"0", "false", "no", "n", "off"}:
|
||||||
|
return False
|
||||||
|
raise argparse.ArgumentTypeError("expected true or false")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Compact NPU utilization digest")
|
||||||
|
parser.add_argument("--format", choices=("text", "jsonl"), default="text")
|
||||||
|
parser.add_argument("--out", default=str(DEFAULT_OUT_DIR))
|
||||||
|
parser.add_argument("--timeout-s", type=float, default=8.0)
|
||||||
|
parser.add_argument("--include-whisper-smoke", type=str_bool, default=True)
|
||||||
|
parser.add_argument("--include-genai-smoke", type=str_bool, default=False)
|
||||||
|
parser.add_argument("--include-doc-triage-smoke", type=str_bool, default=True)
|
||||||
|
parser.add_argument("--no-write", action="store_true")
|
||||||
|
parser.add_argument("--strict-proof", action="store_true", help="exit nonzero if a proof-required probe ran without positive sysfs delta")
|
||||||
|
parser.add_argument("--verbose", action="store_true")
|
||||||
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def run(args: argparse.Namespace) -> tuple[dict[str, Any], list[ServiceRow]]:
|
||||||
|
started_at = dt.datetime.now().astimezone().replace(microsecond=0).isoformat()
|
||||||
|
before_all = read_busy(BUSY_PATH)
|
||||||
|
rows = [
|
||||||
|
probe_embeddings(args.timeout_s),
|
||||||
|
probe_rerank(args.timeout_s),
|
||||||
|
probe_whisper(args.timeout_s, args.include_whisper_smoke),
|
||||||
|
probe_classifier(args.timeout_s),
|
||||||
|
probe_genai(args.timeout_s, args.include_genai_smoke),
|
||||||
|
probe_doc_triage(args.timeout_s, args.include_doc_triage_smoke),
|
||||||
|
]
|
||||||
|
rows.append(health_row("rag_endpoint", RAG_ENDPOINT_HEALTH_URL, args.timeout_s, gate="closed:vector-mutation")[0])
|
||||||
|
rows.append(health_row("rag_health", RAG_HEALTH_URL, args.timeout_s)[0])
|
||||||
|
rows.append(health_row("advisory_gateway", ADVISORY_HEALTH_URL, args.timeout_s, gate="closed:advisory-post")[0])
|
||||||
|
after_all = read_busy(BUSY_PATH)
|
||||||
|
delta_all = None if before_all is None or after_all is None else after_all - before_all
|
||||||
|
summary = build_summary(rows, artifact_path=None, counter_delta_us=delta_all, started_at=started_at)
|
||||||
|
return summary, rows
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
args = parse_args(argv)
|
||||||
|
summary, rows = run(args)
|
||||||
|
if not args.no_write:
|
||||||
|
artifact = write_jsonl(summary, rows, Path(args.out).expanduser())
|
||||||
|
summary["artifact"] = str(artifact)
|
||||||
|
# rewrite with artifact path included in the summary line
|
||||||
|
artifact.write_text("\n".join([json.dumps(summary, sort_keys=True, separators=(",", ":"))] + [json.dumps(compact_dict(r), sort_keys=True, separators=(",", ":")) for r in rows]) + "\n")
|
||||||
|
if args.format == "jsonl":
|
||||||
|
print(json.dumps(summary, sort_keys=True, separators=(",", ":")))
|
||||||
|
for row in rows:
|
||||||
|
print(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")))
|
||||||
|
else:
|
||||||
|
print(render_text(summary, rows))
|
||||||
|
if args.strict_proof and any(r.probe_ran and r.proof_ok is False for r in rows):
|
||||||
|
return 2
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Executable
+339
@@ -0,0 +1,339 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Local-file voice/audio NPU advisory pipeline.
|
||||||
|
|
||||||
|
Side-effect-free first slice:
|
||||||
|
local audio file -> Whisper NPU -> classifier NPU -> advisory gate
|
||||||
|
|
||||||
|
No platform fetching, outbound sends, Obsidian/memory/vector writes, service
|
||||||
|
restarts, or live Atlas/Hermes routing changes are performed by this script.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
import wave
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
DEFAULT_WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
||||||
|
DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
||||||
|
NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
||||||
|
AUDIO_EXTENSIONS = {".wav", ".ogg", ".oga", ".opus", ".mp3", ".m4a", ".mp4", ".webm", ".flac"}
|
||||||
|
ACTION_MARKERS = re.compile(
|
||||||
|
r"\b(remind|todo|to-do|task|follow[- ]?up|schedule|call|email|send|draft|inspect|check|fix|review|question|ask)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineError(RuntimeError):
|
||||||
|
def __init__(self, message: str, *, status: int = 1, details: dict[str, Any] | None = None):
|
||||||
|
super().__init__(message)
|
||||||
|
self.status = status
|
||||||
|
self.details = details or {}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_loopback_endpoint(url: str, *, label: str) -> str:
|
||||||
|
"""Return url when it targets an explicit local HTTP(S) endpoint.
|
||||||
|
|
||||||
|
The pipeline reads local audio and posts transcripts/audio bytes, so endpoint
|
||||||
|
overrides must not be able to exfiltrate data to remote hosts. Keep the
|
||||||
|
policy intentionally narrow: localhost, IPv4 loopback, or IPv6 ::1 only.
|
||||||
|
"""
|
||||||
|
parsed = urllib.parse.urlparse(url)
|
||||||
|
if parsed.scheme not in {"http", "https"}:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_scheme_not_allowed",
|
||||||
|
details={"url_host": parsed.hostname or "", "allowed_schemes": ["http", "https"]},
|
||||||
|
)
|
||||||
|
host = parsed.hostname
|
||||||
|
if not host:
|
||||||
|
raise PipelineError(f"{label}_url_missing_host")
|
||||||
|
normalized = host.rstrip(".").lower()
|
||||||
|
if normalized == "localhost":
|
||||||
|
return url
|
||||||
|
try:
|
||||||
|
address = ipaddress.ip_address(normalized)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_host_not_loopback",
|
||||||
|
details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
|
||||||
|
) from exc
|
||||||
|
if not address.is_loopback:
|
||||||
|
raise PipelineError(
|
||||||
|
f"{label}_url_host_not_loopback",
|
||||||
|
details={"url_host": host, "allowed_hosts": ["localhost", "127.0.0.0/8", "::1"]},
|
||||||
|
)
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def read_npu_busy_us(path: Path = NPU_BUSY_PATH) -> int | None:
|
||||||
|
try:
|
||||||
|
return int(path.read_text().strip())
|
||||||
|
except (OSError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def delta_us(before: int | None, after: int | None) -> int | None:
|
||||||
|
if before is None or after is None:
|
||||||
|
return None
|
||||||
|
return max(0, after - before)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_multipart(fields: dict[str, str], files: dict[str, tuple[str, bytes, str]]) -> tuple[bytes, str]:
|
||||||
|
boundary = "----npu-voice-audio-" + uuid.uuid4().hex
|
||||||
|
parts: list[bytes] = []
|
||||||
|
for name, value in fields.items():
|
||||||
|
parts.append(f"--{boundary}\r\n".encode())
|
||||||
|
parts.append(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
|
||||||
|
parts.append(str(value).encode())
|
||||||
|
parts.append(b"\r\n")
|
||||||
|
for name, (filename, data, content_type) in files.items():
|
||||||
|
parts.append(f"--{boundary}\r\n".encode())
|
||||||
|
parts.append(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
|
||||||
|
parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
|
||||||
|
parts.append(data)
|
||||||
|
parts.append(b"\r\n")
|
||||||
|
parts.append(f"--{boundary}--\r\n".encode())
|
||||||
|
return b"".join(parts), f"multipart/form-data; boundary={boundary}"
|
||||||
|
|
||||||
|
|
||||||
|
def post_json(url: str, payload: dict[str, Any], *, timeout: int) -> dict[str, Any]:
|
||||||
|
url = validate_loopback_endpoint(url, label="classifier")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=json.dumps(payload).encode(),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
body = exc.read().decode(errors="replace")[:300]
|
||||||
|
raise PipelineError(f"classifier_http_{exc.code}", details={"body_preview": body}) from exc
|
||||||
|
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
|
||||||
|
raise PipelineError(f"classifier_request_failed: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def post_whisper(url: str, audio_path: Path, audio_data: bytes, language: str, *, timeout: int) -> dict[str, Any]:
|
||||||
|
url = validate_loopback_endpoint(url, label="whisper")
|
||||||
|
content_type = mimetypes.guess_type(audio_path.name)[0] or "application/octet-stream"
|
||||||
|
body, multipart_type = encode_multipart(
|
||||||
|
{"model": "whisper-1", "language": language, "response_format": "json"},
|
||||||
|
{"file": (audio_path.name, audio_data, content_type)},
|
||||||
|
)
|
||||||
|
req = urllib.request.Request(url, data=body, headers={"Content-Type": multipart_type}, method="POST")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
body = exc.read().decode(errors="replace")[:300]
|
||||||
|
raise PipelineError(f"whisper_http_{exc.code}", details={"body_preview": body}) from exc
|
||||||
|
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
|
||||||
|
raise PipelineError(f"whisper_request_failed: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def validate_audio_path(path_text: str, *, max_bytes: int, max_audio_seconds: float | None) -> tuple[Path, int]:
|
||||||
|
path = Path(path_text).expanduser()
|
||||||
|
if not path.is_absolute():
|
||||||
|
raise PipelineError("audio_path_must_be_absolute")
|
||||||
|
if path.is_symlink():
|
||||||
|
raise PipelineError("audio_path_must_not_be_symlink")
|
||||||
|
if not path.exists():
|
||||||
|
raise PipelineError("audio_path_not_found")
|
||||||
|
if not path.is_file():
|
||||||
|
raise PipelineError("audio_path_not_file")
|
||||||
|
if path.suffix.lower() not in AUDIO_EXTENSIONS:
|
||||||
|
raise PipelineError("unsupported_audio_extension", details={"extension": path.suffix.lower()})
|
||||||
|
size = path.stat().st_size
|
||||||
|
if size <= 0:
|
||||||
|
raise PipelineError("audio_file_empty")
|
||||||
|
if size > max_bytes:
|
||||||
|
raise PipelineError("audio_file_too_large", details={"bytes": size, "max_bytes": max_bytes})
|
||||||
|
if max_audio_seconds is not None and path.suffix.lower() == ".wav":
|
||||||
|
try:
|
||||||
|
with wave.open(str(path), "rb") as wav:
|
||||||
|
duration = wav.getnframes() / float(wav.getframerate())
|
||||||
|
except wave.Error as exc:
|
||||||
|
raise PipelineError(f"wav_decode_failed: {exc}") from exc
|
||||||
|
if duration > max_audio_seconds:
|
||||||
|
raise PipelineError("audio_duration_too_long", details={"duration_seconds": round(duration, 3), "max_audio_seconds": max_audio_seconds})
|
||||||
|
return path, size
|
||||||
|
|
||||||
|
|
||||||
|
def extract_transcript(payload: dict[str, Any]) -> str:
|
||||||
|
text = payload.get("text") or payload.get("transcript") or payload.get("transcription")
|
||||||
|
if not text and isinstance(payload.get("segments"), list):
|
||||||
|
text = " ".join(str(seg.get("text", "")) for seg in payload["segments"] if isinstance(seg, dict))
|
||||||
|
return str(text or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def label_value(labels: dict[str, Any], key: str, default: Any = None) -> Any:
|
||||||
|
value = labels.get(key, default)
|
||||||
|
if isinstance(value, dict) and "value" in value:
|
||||||
|
return value.get("value")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def compact_labels(classifier_payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
raw_labels = classifier_payload.get("labels")
|
||||||
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||||
|
return {
|
||||||
|
"workflow_category": label_value(labels, "workflow_category"),
|
||||||
|
"tool_needed": bool(label_value(labels, "tool_needed", False)),
|
||||||
|
"urgency": label_value(labels, "urgency", "normal"),
|
||||||
|
"safety_confirmation_required": bool(label_value(labels, "safety_confirmation_required", False)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_text(
|
||||||
|
*,
|
||||||
|
classifier_url: str,
|
||||||
|
item_id: str,
|
||||||
|
source: str,
|
||||||
|
title: str,
|
||||||
|
transcript: str,
|
||||||
|
max_transcript_chars: int,
|
||||||
|
dry_run: bool,
|
||||||
|
timeout: int,
|
||||||
|
) -> tuple[dict[str, Any], int | None]:
|
||||||
|
bounded_transcript = transcript[:max_transcript_chars]
|
||||||
|
title_line = f"Title: {title}\n" if title else ""
|
||||||
|
text = "Voice memo transcript summary candidate.\n" f"Source: {source}\n" f"{title_line}Transcript:\n{bounded_transcript}"
|
||||||
|
payload = {
|
||||||
|
"id": item_id,
|
||||||
|
"text": text,
|
||||||
|
"context": {"source": source, "media": "audio"},
|
||||||
|
"options": {"include_evidence": False, "dry_run": dry_run},
|
||||||
|
}
|
||||||
|
before = read_npu_busy_us()
|
||||||
|
data = post_json(classifier_url, payload, timeout=timeout)
|
||||||
|
after = read_npu_busy_us()
|
||||||
|
return data, delta_us(before, after)
|
||||||
|
|
||||||
|
|
||||||
|
def decide_gate(transcript: str, labels: dict[str, Any], whisper_proven: bool, classifier_proven: bool) -> tuple[bool, str, str]:
|
||||||
|
safety_required = bool(labels.get("safety_confirmation_required"))
|
||||||
|
urgency = str(labels.get("urgency") or "normal").lower()
|
||||||
|
action_worthy = bool(labels.get("tool_needed")) or urgency in {"high", "critical"} or bool(ACTION_MARKERS.search(transcript))
|
||||||
|
if not whisper_proven or not classifier_proven:
|
||||||
|
return action_worthy, "blocked_missing_npu_proof", "npu_proof_required"
|
||||||
|
if safety_required:
|
||||||
|
return action_worthy, "blocked_safety_confirmation_required", "human_approval_required"
|
||||||
|
if action_worthy:
|
||||||
|
return True, "advisory_only_not_sent", "dry_run_no_side_effects"
|
||||||
|
return False, "suppressed_not_action_worthy", "dry_run_no_side_effects"
|
||||||
|
|
||||||
|
|
||||||
|
def run_pipeline(args: argparse.Namespace) -> dict[str, Any]:
|
||||||
|
args.whisper_url = validate_loopback_endpoint(args.whisper_url, label="whisper")
|
||||||
|
args.classifier_url = validate_loopback_endpoint(args.classifier_url, label="classifier")
|
||||||
|
audio_path, audio_bytes = validate_audio_path(
|
||||||
|
args.audio,
|
||||||
|
max_bytes=args.max_bytes,
|
||||||
|
max_audio_seconds=args.max_audio_seconds,
|
||||||
|
)
|
||||||
|
audio_data = audio_path.read_bytes()
|
||||||
|
item_id = args.id or f"voice-audio-{int(time.time())}"
|
||||||
|
|
||||||
|
whisper_before = read_npu_busy_us()
|
||||||
|
whisper_payload = post_whisper(args.whisper_url, audio_path, audio_data, args.language, timeout=args.timeout)
|
||||||
|
whisper_after = read_npu_busy_us()
|
||||||
|
whisper_sysfs_delta = delta_us(whisper_before, whisper_after)
|
||||||
|
transcript = extract_transcript(whisper_payload)
|
||||||
|
if not transcript:
|
||||||
|
raise PipelineError("whisper_empty_transcript")
|
||||||
|
|
||||||
|
whisper_response_delta = int(whisper_payload.get("npu_busy_delta_us") or 0)
|
||||||
|
whisper_proven = whisper_response_delta > 0 and (whisper_sysfs_delta is None or whisper_sysfs_delta > 0)
|
||||||
|
|
||||||
|
classifier_payload, classifier_sysfs_observed = classify_text(
|
||||||
|
classifier_url=args.classifier_url,
|
||||||
|
item_id=item_id,
|
||||||
|
source=args.source,
|
||||||
|
title=args.title or "",
|
||||||
|
transcript=transcript,
|
||||||
|
max_transcript_chars=args.max_transcript_chars,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
timeout=args.timeout,
|
||||||
|
)
|
||||||
|
labels = compact_labels(classifier_payload)
|
||||||
|
classifier_response_delta = int(classifier_payload.get("npu_busy_delta_us") or 0)
|
||||||
|
classifier_response_sysfs_delta = int(classifier_payload.get("sysfs_npu_busy_delta_us") or 0)
|
||||||
|
classifier_proven = classifier_response_delta > 0 and classifier_response_sysfs_delta > 0 and (classifier_sysfs_observed is None or classifier_sysfs_observed > 0)
|
||||||
|
|
||||||
|
action_worthy, atlas_gate, next_gate = decide_gate(transcript, labels, whisper_proven, classifier_proven)
|
||||||
|
|
||||||
|
output: dict[str, Any] = {
|
||||||
|
"ok": True,
|
||||||
|
"id": item_id,
|
||||||
|
"source": args.source,
|
||||||
|
"transcript_chars": len(transcript),
|
||||||
|
"action_worthy": action_worthy,
|
||||||
|
"atlas_gate": atlas_gate,
|
||||||
|
"next_gate": next_gate,
|
||||||
|
"whisper_npu_delta_us": whisper_response_delta,
|
||||||
|
"whisper_sysfs_delta_us": whisper_sysfs_delta,
|
||||||
|
"classifier_npu_delta_us": classifier_response_delta,
|
||||||
|
"classifier_sysfs_delta_us": classifier_response_sysfs_delta,
|
||||||
|
"classifier_observed_sysfs_delta_us": classifier_sysfs_observed,
|
||||||
|
"labels": labels,
|
||||||
|
"external_sends": 0,
|
||||||
|
"writes": 0,
|
||||||
|
}
|
||||||
|
if args.include_transcript:
|
||||||
|
output["transcript"] = transcript
|
||||||
|
if args.include_transcript_preview_chars > 0:
|
||||||
|
output["transcript_preview"] = transcript[: args.include_transcript_preview_chars]
|
||||||
|
if args.include_raw:
|
||||||
|
output["raw"] = {"whisper": whisper_payload, "classifier": classifier_payload}
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Run local-file audio through NPU Whisper and NPU classifier in dry-run advisory mode.")
|
||||||
|
parser.add_argument("--audio", required=True, help="Absolute path to a local audio file; no URL/platform fetching is performed.")
|
||||||
|
parser.add_argument("--id", default="", help="Optional stable item id for classifier correlation.")
|
||||||
|
parser.add_argument("--source", default="local_file", choices=["local_file", "manual_smoke", "local_voice_memo", "meeting_snippet", "staged_telegram", "staged_discord"], help="Local/staged source label only.")
|
||||||
|
parser.add_argument("--title", default="", help="Optional short local title for classifier context.")
|
||||||
|
parser.add_argument("--language", default="en")
|
||||||
|
parser.add_argument("--whisper-url", default=DEFAULT_WHISPER_URL)
|
||||||
|
parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
|
||||||
|
parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True, help="Keep classifier in dry-run advisory mode (default).")
|
||||||
|
parser.add_argument("--no-dry-run", dest="dry_run", action="store_false", help="Send dry_run=false to classifier; this script still performs no side effects.")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Emit compact JSON; default is JSON for machine-safe handoff.")
|
||||||
|
parser.add_argument("--include-transcript", action="store_true", help="Include full transcript in output; off by default.")
|
||||||
|
parser.add_argument("--include-transcript-preview-chars", type=int, default=0, help="Include a bounded transcript preview; default 0.")
|
||||||
|
parser.add_argument("--include-raw", action="store_true", help="Include raw service responses for one-off local debugging; off by default.")
|
||||||
|
parser.add_argument("--max-bytes", type=int, default=25 * 1024 * 1024)
|
||||||
|
parser.add_argument("--max-audio-seconds", type=float, default=300.0, help="Enforced for WAV inputs; other codecs remain size-capped.")
|
||||||
|
parser.add_argument("--max-transcript-chars", type=int, default=6000)
|
||||||
|
parser.add_argument("--timeout", type=int, default=300)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
try:
|
||||||
|
result = run_pipeline(args)
|
||||||
|
print(json.dumps(result, ensure_ascii=False, sort_keys=True))
|
||||||
|
return 0
|
||||||
|
except PipelineError as exc:
|
||||||
|
result = {"ok": False, "error": str(exc), "external_sends": 0, "writes": 0, **exc.details}
|
||||||
|
print(json.dumps(result, ensure_ascii=False, sort_keys=True), file=sys.stderr)
|
||||||
|
return exc.status
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -21,14 +21,32 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
from urllib import request, error
|
||||||
|
|
||||||
PORT = int(os.environ.get("PORT", 18810))
|
PORT = int(os.environ.get("PORT", 18810))
|
||||||
REINDEX_TIMEOUT = int(os.environ.get("REINDEX_TIMEOUT", "1800"))
|
REINDEX_TIMEOUT = int(os.environ.get("REINDEX_TIMEOUT", "1800"))
|
||||||
RAG_COLLECTION = os.environ.get("RAG_COLLECTION", "obsidian").strip() or "obsidian"
|
RAG_COLLECTION = os.environ.get("RAG_COLLECTION", "obsidian").strip() or "obsidian"
|
||||||
RAG_EMBED_MODEL = os.environ.get("RAG_EMBED_MODEL", "nomic-embed-text").strip() or "nomic-embed-text"
|
RAG_EMBED_MODEL = os.environ.get("RAG_EMBED_MODEL", "nomic-embed-text").strip() or "nomic-embed-text"
|
||||||
OLLAMA_BASE_URL = (os.environ.get("OLLAMA_BASE_URL") or "http://127.0.0.1:18807").rstrip("/")
|
OLLAMA_BASE_URL = (os.environ.get("OLLAMA_BASE_URL") or "http://127.0.0.1:18807").rstrip("/")
|
||||||
|
RAG_RERANK_ENABLED = (os.environ.get("RAG_RERANK_ENABLED") or "false").strip().lower() in {
|
||||||
|
"1",
|
||||||
|
"true",
|
||||||
|
"yes",
|
||||||
|
"on",
|
||||||
|
}
|
||||||
|
RAG_RERANK_URL = (os.environ.get("RAG_RERANK_URL") or "http://127.0.0.1:18818/rerank").strip()
|
||||||
|
RAG_RERANK_INITIAL_K = max(1, int(os.environ.get("RAG_RERANK_INITIAL_K") or "20"))
|
||||||
|
RAG_RERANK_TOP_K = max(1, int(os.environ.get("RAG_RERANK_TOP_K") or "5"))
|
||||||
|
RAG_RERANK_TIMEOUT_MS = max(1, int(os.environ.get("RAG_RERANK_TIMEOUT_MS") or "3000"))
|
||||||
|
RAG_RERANK_REQUIRE_NPU_PROOF = (os.environ.get("RAG_RERANK_REQUIRE_NPU_PROOF") or "true").strip().lower() in {
|
||||||
|
"1",
|
||||||
|
"true",
|
||||||
|
"yes",
|
||||||
|
"on",
|
||||||
|
}
|
||||||
|
|
||||||
REINDEX_SCRIPT = str(
|
REINDEX_SCRIPT = str(
|
||||||
Path.home()
|
Path.home()
|
||||||
@@ -102,12 +120,125 @@ def get_status() -> dict:
|
|||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def _result_text(result: dict) -> str:
|
||||||
|
"""Return the text field sent to the reranker without changing response shape."""
|
||||||
|
return str(result.get("text") or result.get("content") or "")
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_rerank(query: str, results: list[dict], final_k: int) -> tuple[list[dict], dict]:
|
||||||
|
"""Optionally rerank semantic results, falling back to vector order on any error."""
|
||||||
|
metadata = {
|
||||||
|
"enabled": RAG_RERANK_ENABLED,
|
||||||
|
"attempted": False,
|
||||||
|
"ok": False,
|
||||||
|
"url": RAG_RERANK_URL,
|
||||||
|
"initial_k": len(results),
|
||||||
|
"top_k": final_k,
|
||||||
|
}
|
||||||
|
if not RAG_RERANK_ENABLED:
|
||||||
|
metadata["ok"] = True
|
||||||
|
metadata["reason"] = "disabled"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
if not results:
|
||||||
|
metadata["ok"] = True
|
||||||
|
metadata["reason"] = "no_results"
|
||||||
|
return [], metadata
|
||||||
|
|
||||||
|
metadata["attempted"] = True
|
||||||
|
documents = []
|
||||||
|
for idx, item in enumerate(results):
|
||||||
|
text = _result_text(item)
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
documents.append(
|
||||||
|
{
|
||||||
|
"id": str(item.get("id") or idx),
|
||||||
|
"text": text,
|
||||||
|
"metadata": {
|
||||||
|
"index": idx,
|
||||||
|
"path": item.get("path"),
|
||||||
|
"source": item.get("source"),
|
||||||
|
"chunk": item.get("chunk"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if not documents:
|
||||||
|
metadata["ok"] = True
|
||||||
|
metadata["reason"] = "no_text_documents"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
|
||||||
|
started = time.monotonic()
|
||||||
|
try:
|
||||||
|
body = json.dumps(
|
||||||
|
{
|
||||||
|
"query": query,
|
||||||
|
"documents": documents,
|
||||||
|
"top_k": final_k,
|
||||||
|
"return_documents": False,
|
||||||
|
}
|
||||||
|
).encode("utf-8")
|
||||||
|
req = request.Request(
|
||||||
|
RAG_RERANK_URL,
|
||||||
|
data=body,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with request.urlopen(req, timeout=RAG_RERANK_TIMEOUT_MS / 1000.0) as resp:
|
||||||
|
payload = json.loads(resp.read().decode("utf-8"))
|
||||||
|
except (OSError, TimeoutError, json.JSONDecodeError, error.URLError, error.HTTPError) as exc:
|
||||||
|
metadata["duration_ms"] = round((time.monotonic() - started) * 1000, 2)
|
||||||
|
metadata["error"] = f"{type(exc).__name__}: {exc}"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
|
||||||
|
metadata["duration_ms"] = round((time.monotonic() - started) * 1000, 2)
|
||||||
|
metadata["ok"] = bool(payload.get("ok", True))
|
||||||
|
metadata["model"] = payload.get("model")
|
||||||
|
metadata["device"] = payload.get("device")
|
||||||
|
metadata["npu_busy_delta_us"] = payload.get("npu_busy_delta_us")
|
||||||
|
metadata["require_npu_proof"] = RAG_RERANK_REQUIRE_NPU_PROOF
|
||||||
|
metadata["input_count"] = payload.get("input_count")
|
||||||
|
ranked = payload.get("results") or []
|
||||||
|
if RAG_RERANK_REQUIRE_NPU_PROOF and int(payload.get("npu_busy_delta_us") or 0) <= 0:
|
||||||
|
metadata["ok"] = False
|
||||||
|
metadata["error"] = "reranker response lacked positive npu_busy_delta_us"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
if not metadata["ok"] or not ranked:
|
||||||
|
metadata["error"] = payload.get("error") or "reranker returned no ranked results"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
|
||||||
|
by_id = {str(item.get("id") or idx): item for idx, item in enumerate(results)}
|
||||||
|
reranked = []
|
||||||
|
for rank, ranked_item in enumerate(ranked):
|
||||||
|
source_item = None
|
||||||
|
if "id" in ranked_item:
|
||||||
|
source_item = by_id.get(str(ranked_item.get("id")))
|
||||||
|
if source_item is None and isinstance(ranked_item.get("index"), int):
|
||||||
|
idx = ranked_item["index"]
|
||||||
|
if 0 <= idx < len(results):
|
||||||
|
source_item = results[idx]
|
||||||
|
if source_item is None:
|
||||||
|
continue
|
||||||
|
merged = dict(source_item)
|
||||||
|
merged["rerank_score"] = ranked_item.get("score")
|
||||||
|
merged["rerank_rank"] = rank + 1
|
||||||
|
reranked.append(merged)
|
||||||
|
if len(reranked) >= final_k:
|
||||||
|
break
|
||||||
|
if not reranked:
|
||||||
|
metadata["ok"] = False
|
||||||
|
metadata["error"] = "reranker result IDs did not match search results"
|
||||||
|
return results[:final_k], metadata
|
||||||
|
return reranked, metadata
|
||||||
|
|
||||||
|
|
||||||
def run_semantic_search(query: str, top_k: int = 5) -> dict:
|
def run_semantic_search(query: str, top_k: int = 5) -> dict:
|
||||||
"""Query the local Obsidian Chroma index via the rag-search script."""
|
"""Query the local Obsidian Chroma index via the rag-search script."""
|
||||||
query = (query or "").strip()
|
query = (query or "").strip()
|
||||||
if not query:
|
if not query:
|
||||||
return {"ok": False, "error": "query is required", "results": []}
|
return {"ok": False, "error": "query is required", "results": []}
|
||||||
top_k = max(1, min(int(top_k or 5), 20))
|
top_k = max(1, min(int(top_k or 5), 20))
|
||||||
|
search_k = max(top_k, min(RAG_RERANK_INITIAL_K, 100)) if RAG_RERANK_ENABLED else top_k
|
||||||
|
final_k = min(top_k, RAG_RERANK_TOP_K) if RAG_RERANK_ENABLED else top_k
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env.setdefault("RAG_COLLECTION", RAG_COLLECTION)
|
env.setdefault("RAG_COLLECTION", RAG_COLLECTION)
|
||||||
env.setdefault("RAG_EMBED_MODEL", RAG_EMBED_MODEL)
|
env.setdefault("RAG_EMBED_MODEL", RAG_EMBED_MODEL)
|
||||||
@@ -119,7 +250,7 @@ def run_semantic_search(query: str, top_k: int = 5) -> dict:
|
|||||||
"--index",
|
"--index",
|
||||||
RAG_COLLECTION,
|
RAG_COLLECTION,
|
||||||
"--top-k",
|
"--top-k",
|
||||||
str(top_k),
|
str(search_k),
|
||||||
"--raw",
|
"--raw",
|
||||||
query,
|
query,
|
||||||
],
|
],
|
||||||
@@ -133,17 +264,27 @@ def run_semantic_search(query: str, top_k: int = 5) -> dict:
|
|||||||
"ok": False,
|
"ok": False,
|
||||||
"query": query,
|
"query": query,
|
||||||
"top_k": top_k,
|
"top_k": top_k,
|
||||||
|
"search_k": search_k,
|
||||||
"error": result.stderr.strip()[-2000:] or result.stdout.strip()[-2000:],
|
"error": result.stderr.strip()[-2000:] or result.stdout.strip()[-2000:],
|
||||||
"results": [],
|
"results": [],
|
||||||
|
"rerank": {
|
||||||
|
"enabled": RAG_RERANK_ENABLED,
|
||||||
|
"attempted": False,
|
||||||
|
"ok": False,
|
||||||
|
"error": "vector search failed before rerank",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
payload = json.loads(result.stdout)
|
payload = json.loads(result.stdout)
|
||||||
results = payload.get("results") or []
|
results = payload.get("results") or []
|
||||||
|
results, rerank_meta = _apply_rerank(query, results, final_k)
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"query": query,
|
"query": query,
|
||||||
"index": payload.get("index", RAG_COLLECTION),
|
"index": payload.get("index", RAG_COLLECTION),
|
||||||
"top_k": top_k,
|
"top_k": top_k,
|
||||||
|
"search_k": search_k,
|
||||||
"result_count": len(results),
|
"result_count": len(results),
|
||||||
|
"rerank": rerank_meta,
|
||||||
"results": results,
|
"results": results,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"updatedAt": "2026-05-14T00:04:59.343Z",
|
"updatedAt": "2026-06-05T19:59:54.879Z",
|
||||||
"createdAt": "2026-05-13T21:40:33.847Z",
|
"createdAt": "2026-05-13T21:40:33.847Z",
|
||||||
"id": "PlZywwqL8MRNEAN6",
|
"id": "PlZywwqL8MRNEAN6",
|
||||||
"name": "Evening Digest",
|
"name": "Evening Digest",
|
||||||
@@ -56,7 +56,9 @@
|
|||||||
"id": "UPAHgUJVRqZQceL4",
|
"id": "UPAHgUJVRqZQceL4",
|
||||||
"name": "n8n Public API (Failure Digest)"
|
"name": "n8n Public API (Failure Digest)"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"continueOnFail": true,
|
||||||
|
"alwaysOutputData": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
@@ -86,7 +88,9 @@
|
|||||||
"id": "UPAHgUJVRqZQceL4",
|
"id": "UPAHgUJVRqZQceL4",
|
||||||
"name": "n8n Public API (Failure Digest)"
|
"name": "n8n Public API (Failure Digest)"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"continueOnFail": true,
|
||||||
|
"alwaysOutputData": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
@@ -138,7 +142,9 @@
|
|||||||
"id": "465Swz2b71O2KRAK",
|
"id": "465Swz2b71O2KRAK",
|
||||||
"name": "Obsidian Local REST API"
|
"name": "Obsidian Local REST API"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"continueOnFail": true,
|
||||||
|
"alwaysOutputData": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
@@ -275,7 +281,9 @@
|
|||||||
"id": "465Swz2b71O2KRAK",
|
"id": "465Swz2b71O2KRAK",
|
||||||
"name": "Obsidian Local REST API"
|
"name": "Obsidian Local REST API"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"continueOnFail": true,
|
||||||
|
"alwaysOutputData": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"connections": {
|
"connections": {
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
[{"updatedAt":"2026-05-14T21:36:33.045Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' || rest.manifest || rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status || rest.statusCode || 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}}}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]
|
[{"updatedAt":"2026-06-05T20:17:39.529Z","createdAt":"2026-05-14T21:36:33.045Z","id":"PCtD3PuQjzKLyEEE","name":"Obsidian Health + Reindex","description":null,"active":true,"isArchived":false,"nodes":[{"parameters":{},"id":"f9152036-4ee6-48cf-9f71-fd59ce617c52","name":"Manual Trigger","type":"n8n-nodes-base.manualTrigger","typeVersion":1,"position":[0,0]},{"parameters":{"rule":{"interval":[{"field":"hours","hoursInterval":1}]}},"id":"7845e784-c35b-4912-9d72-2463a06d95d2","name":"Hourly Health Schedule","type":"n8n-nodes-base.scheduleTrigger","typeVersion":1.2,"position":[0,180]},{"parameters":{"url":"http://172.19.0.1:27123/","options":{"timeout":10000}},"id":"4976f00c-3539-4d3a-a87d-f7f3ac1adf19","name":"Check Obsidian REST","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[280,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18810/reindex","options":{"timeout":300000}},"id":"8abf0596-3af6-4d56-b4d0-5284f13998ae","name":"Trigger Obsidian Reindex","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[560,80],"continueOnFail":true},{"parameters":{"method":"POST","url":"http://172.19.0.1:18814/check","options":{"timeout":240000}},"id":"248b4109-2d60-43bc-b598-cb766edde11f","name":"Run RAG Embedding Check","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[840,80],"continueOnFail":true},{"parameters":{"jsCode":"\nconst now = new Date().toISOString();\nconst reindex = $('Trigger Obsidian Reindex').first().json;\nconst rag = $('Run RAG Embedding Check').first().json;\nconst rest = $('Check Obsidian REST').first().json;\nconst ok = Boolean(rest.status === 'OK' || rest.manifest || rest.statusCode) && Boolean(rag.ok !== false) && Boolean(reindex.ok !== false);\nconst body = `# Obsidian Automation Health\n\nUpdated: ${now}\n\n## Status\n\n- Overall: ${ok ? 'OK' : 'Needs attention'}\n- Obsidian REST: ${rest.status || rest.statusCode || 'responded'}\n- Reindex trigger: ${JSON.stringify(reindex).slice(0, 500)}\n- RAG/embedding check: ${JSON.stringify(rag).slice(0, 1000)}\n\nThis note is automatically overwritten by n8n.\n`;\nreturn [{ json: { ok, path: 'Resources/Obsidian Automation Health.md', body } }];\n"},"id":"e67008ad-0d9e-4546-a180-3d4223b8d05c","name":"Build Health Note","type":"n8n-nodes-base.code","typeVersion":2,"position":[1120,80]},{"parameters":{"method":"PUT","url":"={{'http://172.19.0.1:27123/vault/' + encodeURIComponent($json.path).replace(/%2F/g, '/')}}","sendHeaders":true,"headerParameters":{"parameters":[{"name":"Content-Type","value":"text/markdown"}]},"sendBody":true,"contentType":"raw","rawContentType":"text/markdown","body":"={{$json.body}}","options":{"timeout":30000},"authentication":"genericCredentialType","genericAuthType":"httpHeaderAuth"},"id":"d86d8942-966a-48fd-ad99-cf23408f2ae4","name":"Write Health Note","type":"n8n-nodes-base.httpRequest","typeVersion":4.2,"position":[1400,80],"credentials":{"httpHeaderAuth":{"id":"465Swz2b71O2KRAK","name":"Obsidian Local REST API"}},"continueOnFail":true,"alwaysOutputData":true}],"connections":{"Manual Trigger":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Hourly Health Schedule":{"main":[[{"node":"Check Obsidian REST","type":"main","index":0}]]},"Check Obsidian REST":{"main":[[{"node":"Trigger Obsidian Reindex","type":"main","index":0}]]},"Trigger Obsidian Reindex":{"main":[[{"node":"Run RAG Embedding Check","type":"main","index":0}]]},"Run RAG Embedding Check":{"main":[[{"node":"Build Health Note","type":"main","index":0}]]},"Build Health Note":{"main":[[{"node":"Write Health Note","type":"main","index":0}]]}},"settings":{"executionOrder":"v1","callerPolicy":"workflowsFromSameOwner","availableInMCP":false},"staticData":{"node:Hourly Health Schedule":{"recurrenceRules":[]}},"meta":null,"pinData":null,"versionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","activeVersionId":"2de2a0d3-ab17-47b5-b2ee-a9c5c20969cd","versionCounter":4,"triggerCount":1,"tags":[],"shared":[{"updatedAt":"2026-05-14T21:36:33.056Z","createdAt":"2026-05-14T21:36:33.056Z","role":"workflow:owner","workflowId":"PCtD3PuQjzKLyEEE","projectId":"WGdp8QunI1tHpjXa","project":{"updatedAt":"2026-03-11T21:08:10.005Z","createdAt":"2026-03-11T21:05:11.541Z","id":"WGdp8QunI1tHpjXa","name":"will will <will@wills-portal.com>","type":"personal","icon":null,"description":null,"creatorId":"5ad50ead-6e6a-4d12-ab5b-e5db15835bb5"}}],"versionMetadata":{"name":null,"description":null}}]
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -11,6 +11,14 @@ Environment=PORT=18810
|
|||||||
Environment=RAG_COLLECTION=obsidian_bge_npu
|
Environment=RAG_COLLECTION=obsidian_bge_npu
|
||||||
Environment=RAG_EMBED_MODEL=bge-base-en-v1.5-int8-ov
|
Environment=RAG_EMBED_MODEL=bge-base-en-v1.5-int8-ov
|
||||||
Environment=OLLAMA_BASE_URL=http://127.0.0.1:18817
|
Environment=OLLAMA_BASE_URL=http://127.0.0.1:18817
|
||||||
|
# Request-time second-stage reranking. The :18810 handler keeps vector-order
|
||||||
|
# fallback on reranker timeout/error or missing positive NPU proof.
|
||||||
|
Environment=RAG_RERANK_ENABLED=true
|
||||||
|
Environment=RAG_RERANK_URL=http://127.0.0.1:18818/rerank
|
||||||
|
Environment=RAG_RERANK_INITIAL_K=20
|
||||||
|
Environment=RAG_RERANK_TOP_K=5
|
||||||
|
Environment=RAG_RERANK_TIMEOUT_MS=1500
|
||||||
|
Environment=RAG_RERANK_REQUIRE_NPU_PROOF=true
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=default.target
|
WantedBy=default.target
|
||||||
|
|||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"collapse-filter": true,
|
||||||
|
"search": "",
|
||||||
|
"showTags": false,
|
||||||
|
"showAttachments": false,
|
||||||
|
"hideUnresolved": false,
|
||||||
|
"showOrphans": true,
|
||||||
|
"collapse-color-groups": true,
|
||||||
|
"colorGroups": [],
|
||||||
|
"collapse-display": true,
|
||||||
|
"showArrow": false,
|
||||||
|
"textFadeMultiplier": 0,
|
||||||
|
"nodeSizeMultiplier": 1,
|
||||||
|
"lineSizeMultiplier": 1,
|
||||||
|
"collapse-forces": true,
|
||||||
|
"centerStrength": 0.518713248970312,
|
||||||
|
"repelStrength": 10,
|
||||||
|
"linkStrength": 1,
|
||||||
|
"linkDistance": 250,
|
||||||
|
"scale": 0.9999999999999999,
|
||||||
|
"close": true
|
||||||
|
}
|
||||||
swarm-common/obsidian-vault/will/will-shared-zap/.obsidian/plugins/obsidian-local-rest-api/data.json
Vendored
+13
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"port": 27124,
|
||||||
|
"insecurePort": 27123,
|
||||||
|
"enableInsecureServer": true,
|
||||||
|
"apiKey": "698cfc8b00b93c41480e7e1cb84d77b75176be87507256a5fae9a5b53b5a20cb",
|
||||||
|
"crypto": {
|
||||||
|
"cert": "-----BEGIN CERTIFICATE-----\r\nMIIDRTCCAi2gAwIBAgIBATANBgkqhkiG9w0BAQsFADAiMSAwHgYDVQQDExdPYnNp\r\nZGlhbiBMb2NhbCBSRVNUIEFQSTAeFw0yNjAzMTcxOTU5MjJaFw0yNzAzMTcxOTU5\r\nMjJaMCIxIDAeBgNVBAMTF09ic2lkaWFuIExvY2FsIFJFU1QgQVBJMIIBIjANBgkq\r\nhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAokD5oRVV46TXbRgzEQ1zIrOnu29eKL4Q\r\nyqpNV6Lx6mPyuJyMhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z64WwJXIRdNVF\r\nXAX+r5OrumQObCxXIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooEKy5IhMI9Dh07\r\nV08zFrV0IU5JUjScWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9tdTqUpMh/weRTi\r\nr71wovgn1ijmF6mJM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPeR0y7NRYqqxg2\r\nQ/VqjJ6PC5aR/dRf2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuRJwIDAQABo4GF\r\nMIGCMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgLEMDsGA1UdJQQ0MDIG\r\nCCsGAQUFBwMBBggrBgEFBQcDAgYIKwYBBQUHAwMGCCsGAQUFBwMEBggrBgEFBQcD\r\nCDARBglghkgBhvhCAQEEBAMCAPcwDwYDVR0RBAgwBocEfwAAATANBgkqhkiG9w0B\r\nAQsFAAOCAQEAbUWN+bPEI3k/CPZ6u6blFT1bs+siP1hysTlvRv4vN7CydZLwla3x\r\nocH4uIqwoPIb1Rpi3jPHpqSMiWBNvymK5TPGWmqS2/O6ivF/8AbTvA0YSpMVzIMb\r\n9caOm+wJtz1vsgdf1vy+USOnUtpWm9Sep/6S08Af3J7wS+sOJIWHHD4hlwEk1mpq\r\nxKLpXCm+vH8VuqQ3vSkVBbb4jOVishmO8Yxc+u+xWVpWXxJhaxIFO8MZbC4bbEDR\r\nN6ujylUI6+EF6nFb0SnaD0JDwPNw7ljTP8cB2loAXz2W7rhAiuZU1kjCiQBwWXc0\r\nkJqeYb+EhkCkDCKKCz0zv5xIas04MrxeDg==\r\n-----END CERTIFICATE-----\r\n",
|
||||||
|
"privateKey": "-----BEGIN RSA PRIVATE KEY-----\r\nMIIEogIBAAKCAQEAokD5oRVV46TXbRgzEQ1zIrOnu29eKL4QyqpNV6Lx6mPyuJyM\r\nhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z64WwJXIRdNVFXAX+r5OrumQObCxX\r\nIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooEKy5IhMI9Dh07V08zFrV0IU5JUjSc\r\nWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9tdTqUpMh/weRTir71wovgn1ijmF6mJ\r\nM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPeR0y7NRYqqxg2Q/VqjJ6PC5aR/dRf\r\n2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuRJwIDAQABAoIBACf8umjUIMRHMl5t\r\nGdzIg0kYnKxpcHu7B9liqkgAXP2Gn2GXF1y8Fi+4+MYfiDsas8HQLYCxPjczMSs8\r\nVer2NmYgnv5DhADWtM7OnWt5CdgYY6OOM/U0cnoKzTrXCazmMiRsS/UGnusM1BTR\r\nVLPDYO6ha/boBfMOCjtkxfMBSjsQszS3GVtNsv+LCeY4fYH/tj7LnC9KSaAEDyLB\r\n+Fl6RMp/h11yWC2RLrWMUE/2tRVmz60VJgOphjTLkLtJXsHTOaWKOzg/ZFQaxtLX\r\n5oVGSAnGe0CD5QP8ImSVxnZoErHbA8B7AtsL028pQxy3zrr+5eXbKA7ZQKmjb4yM\r\nVx08I9kCgYEAzSL/9uqxvFinZfAB78g0lFZvO78jjfEWm8/upv+9L2nsrcnSM79T\r\n+M0hOndR8S1Dy0DmavYov2atuXRV3JIlWiNFUi1EM7OCe15GGqgVb3ADpZziJkil\r\nsKrb40rCLEePbp4nmLE7LhYh8TOclXRL8HySuEm74v46uUh2xJJ1m38CgYEAynv+\r\nYgPtuv+4QoAHImO7BQVmVUZg+56NksYLPnf+0ukhOHMoVFTc9KbM12Q5qiHzTBYX\r\n49AErlQ2apMZqcAD40dwdH3Dv9w2gGqLYp2o8mkGc2sPb12SNTIizsbzvTLW24zF\r\njdMs2y5d5ZHNVhfi8yAFmWuaV9lC1P8OVywwflkCgYB/x5h3vxO9hd+oQMuECEqw\r\nR/L73YERLqbtoaVAAzdeLHYQfxHfyANPjL9xAthZCeAb4K5m3DTfnN8EEXJWdfas\r\nYiIRIT9FkUDrBftXKXJIuxaad9HrFP+Yv6U+vNec62pt9jgmBegeOg0kiQi1k/6l\r\nq4NdJhjSOZhsx7WrlquJkwKBgEdaBjwX0lARCKc2Yk02A5MzYeou0MIaDx1neFLd\r\nCgjcaf5wZgfBl9MGbCyCfud66zcmmeiHRv7/YeWQTHzK0xPl+rSyFKapPPNnmBJ6\r\nKCyz4bgOQ/Qkbv8b2bQv23gSUDAxnPPrNGVQI3pgNJFf/XNbF14G0u6d+rT/49fI\r\nFJaRAoGAT7QjSLPBbK+jm1n295LObZjLGEuuiIH9PBKDx1mbcSJkwx4QUpqgr6tT\r\nOchHvloOEBXKf0P5UWOGFJL2UcXnKL5st8D8vQrX8WFZ+ER1dMDyTl/0ly4mhQEH\r\nhN3sxn/PMztYMMCQm94cwQxZQqvLZa7dL/1x8vhm7jSRIfLmZE0=\r\n-----END RSA PRIVATE KEY-----\r\n",
|
||||||
|
"publicKey": "-----BEGIN PUBLIC KEY-----\r\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAokD5oRVV46TXbRgzEQ1z\r\nIrOnu29eKL4QyqpNV6Lx6mPyuJyMhcvaPhIf1AFmgOwVFqwae7BSLEqIPKJKLq4Z\r\n64WwJXIRdNVFXAX+r5OrumQObCxXIivBr5T4RHnUpkN9V9LNUzKNbHL2KNX/fooE\r\nKy5IhMI9Dh07V08zFrV0IU5JUjScWSSoaZheOXhnigRBYDz8phvS0PpF8hsCL9td\r\nTqUpMh/weRTir71wovgn1ijmF6mJM61gAlK53zG+DWdjEpEUZVEjvsA/5LnEjWPe\r\nR0y7NRYqqxg2Q/VqjJ6PC5aR/dRf2u8Z2rRKaW91dmpTGqRxaIRmnhVmp4FcyTuR\r\nJwIDAQAB\r\n-----END PUBLIC KEY-----\r\n"
|
||||||
|
},
|
||||||
|
"enableSecureServer": true,
|
||||||
|
"bindingHost": "0.0.0.0"
|
||||||
|
}
|
||||||
Vendored
+58819
File diff suppressed because one or more lines are too long
+10
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"id": "obsidian-local-rest-api",
|
||||||
|
"name": "Local REST API",
|
||||||
|
"version": "3.4.6",
|
||||||
|
"minAppVersion": "0.12.0",
|
||||||
|
"description": "Get, change or otherwise interact with your notes in Obsidian via a REST API.",
|
||||||
|
"author": "Adam Coddington",
|
||||||
|
"authorUrl": "https://coddingtonbear.net/",
|
||||||
|
"isDesktopOnly": true
|
||||||
|
}
|
||||||
+47
@@ -0,0 +1,47 @@
|
|||||||
|
/* Sets all the text color to red! */
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings div.api-key-display {
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
div.obsidian-local-rest-api-settings div.api-key-display pre {
|
||||||
|
font-size: 0.8em;
|
||||||
|
padding: 10px 20px;
|
||||||
|
background-color: var(--background-modifier-cover);
|
||||||
|
font-family: monospace;
|
||||||
|
user-select: all;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings div.setting-item-control {
|
||||||
|
min-width: 50%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings textarea {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings div.certificate-expired {
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: 2px solid #ff0000;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings div.certificate-expiring-soon {
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: 2px solid #ffff00;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings div.certificate-regeneration-recommended {
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: 2px solid #ffff00;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings table.api-urls tr {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings table.api-urls th, div.obsidian-local-rest-api-settings table.api-urls td {
|
||||||
|
padding: 5px 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.obsidian-local-rest-api-settings table.api-urls tr.disabled td.name, div.obsidian-local-rest-api-settings table.api-urls tr.disabled td.url {
|
||||||
|
text-decoration: line-through;
|
||||||
|
}
|
||||||
@@ -0,0 +1,238 @@
|
|||||||
|
{
|
||||||
|
"main": {
|
||||||
|
"id": "3deecfae849ca8d4",
|
||||||
|
"type": "split",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "bfbbaa82fdc8e552",
|
||||||
|
"type": "tabs",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "91d4ead9052f8b83",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "empty",
|
||||||
|
"state": {},
|
||||||
|
"icon": "lucide-file",
|
||||||
|
"title": "New tab"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"direction": "vertical"
|
||||||
|
},
|
||||||
|
"left": {
|
||||||
|
"id": "28c8862873c84ac7",
|
||||||
|
"type": "split",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "db366f44e3369007",
|
||||||
|
"type": "tabs",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "83702dd4b091f767",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "file-explorer",
|
||||||
|
"state": {
|
||||||
|
"sortOrder": "alphabetical",
|
||||||
|
"autoReveal": true
|
||||||
|
},
|
||||||
|
"icon": "lucide-folder-closed",
|
||||||
|
"title": "Files"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "16fe402f7461b5c4",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "search",
|
||||||
|
"state": {
|
||||||
|
"query": "",
|
||||||
|
"matchingCase": false,
|
||||||
|
"explainSearch": false,
|
||||||
|
"collapseAll": false,
|
||||||
|
"extraContext": false,
|
||||||
|
"sortOrder": "alphabetical"
|
||||||
|
},
|
||||||
|
"icon": "lucide-search",
|
||||||
|
"title": "Search"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "9517f62d1aba2d93",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "bookmarks",
|
||||||
|
"state": {},
|
||||||
|
"icon": "lucide-bookmark",
|
||||||
|
"title": "Bookmarks"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1c968d6bfe211541",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "notebook-navigator",
|
||||||
|
"state": {},
|
||||||
|
"icon": "notebook-navigator",
|
||||||
|
"title": "Notebook Navigator"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"direction": "horizontal",
|
||||||
|
"width": 321.5
|
||||||
|
},
|
||||||
|
"right": {
|
||||||
|
"id": "c2bbb286ef2dc629",
|
||||||
|
"type": "split",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "f48263853996d79f",
|
||||||
|
"type": "tabs",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"id": "16df20b009c624f4",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "backlink",
|
||||||
|
"state": {
|
||||||
|
"file": "Welcome.md",
|
||||||
|
"collapseAll": false,
|
||||||
|
"extraContext": false,
|
||||||
|
"sortOrder": "alphabetical",
|
||||||
|
"showSearch": false,
|
||||||
|
"searchQuery": "",
|
||||||
|
"backlinkCollapsed": false,
|
||||||
|
"unlinkedCollapsed": true
|
||||||
|
},
|
||||||
|
"icon": "links-coming-in",
|
||||||
|
"title": "Backlinks for Welcome"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "5f7e30b0fc7fe373",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "outgoing-link",
|
||||||
|
"state": {
|
||||||
|
"file": "Welcome.md",
|
||||||
|
"linksCollapsed": false,
|
||||||
|
"unlinkedCollapsed": true
|
||||||
|
},
|
||||||
|
"icon": "links-going-out",
|
||||||
|
"title": "Outgoing links from Welcome"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "0de07aca9c62fd2b",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "tag",
|
||||||
|
"state": {
|
||||||
|
"sortOrder": "frequency",
|
||||||
|
"useHierarchy": true,
|
||||||
|
"showSearch": false,
|
||||||
|
"searchQuery": ""
|
||||||
|
},
|
||||||
|
"icon": "lucide-tags",
|
||||||
|
"title": "Tags"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "92a53d80f80daaef",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "all-properties",
|
||||||
|
"state": {
|
||||||
|
"sortOrder": "frequency",
|
||||||
|
"showSearch": false,
|
||||||
|
"searchQuery": ""
|
||||||
|
},
|
||||||
|
"icon": "lucide-archive",
|
||||||
|
"title": "All properties"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "eefa8a89837d21b5",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "outline",
|
||||||
|
"state": {
|
||||||
|
"file": "Welcome.md",
|
||||||
|
"followCursor": false,
|
||||||
|
"showSearch": false,
|
||||||
|
"searchQuery": ""
|
||||||
|
},
|
||||||
|
"icon": "lucide-list",
|
||||||
|
"title": "Outline of Welcome"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"direction": "horizontal",
|
||||||
|
"width": 300,
|
||||||
|
"collapsed": true
|
||||||
|
},
|
||||||
|
"left-ribbon": {
|
||||||
|
"hiddenItems": {
|
||||||
|
"switcher:Open quick switcher": false,
|
||||||
|
"graph:Open graph view": false,
|
||||||
|
"canvas:Create new canvas": false,
|
||||||
|
"daily-notes:Open today's daily note": false,
|
||||||
|
"templates:Insert template": false,
|
||||||
|
"command-palette:Open command palette": false,
|
||||||
|
"bases:Create new base": false,
|
||||||
|
"table-editor-obsidian:Advanced Tables Toolbar": false,
|
||||||
|
"notebook-navigator:Notebook Navigator": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"active": "83702dd4b091f767",
|
||||||
|
"lastOpenFiles": [
|
||||||
|
"Weekend Activity Ideas.md",
|
||||||
|
"Vault Conventions.md",
|
||||||
|
"Templates/Atlas Artifacts/test-report.md",
|
||||||
|
"Templates/Atlas Artifacts/status-report.md",
|
||||||
|
"Templates/Atlas Artifacts/runbook.md",
|
||||||
|
"Templates/Atlas Artifacts/reviewer-checklist.md",
|
||||||
|
"Templates/Atlas Artifacts/postmortem.md",
|
||||||
|
"Templates/Atlas Artifacts/implementation-plan.md",
|
||||||
|
"Templates/Atlas Artifacts/diagram.md",
|
||||||
|
"Templates/Atlas Artifacts/decision-log.md",
|
||||||
|
"Templates/Atlas Artifacts/agent-audit-event.md",
|
||||||
|
"Templates/Atlas Artifacts/README.md",
|
||||||
|
"Templates/Runbook.md",
|
||||||
|
"Templates/Project.md",
|
||||||
|
"Templates/Person.md",
|
||||||
|
"Templates/Meeting.md",
|
||||||
|
"Templates/Kanban Task Graph Templates.md",
|
||||||
|
"Templates/Diary Weekly Review.md",
|
||||||
|
"Templates/Diary Daily.md",
|
||||||
|
"Templates/Decision.md",
|
||||||
|
"Templates/Daily Note.md",
|
||||||
|
"Templates/Context Pack.md",
|
||||||
|
"Templates/Atlas Artifacts",
|
||||||
|
"Templates",
|
||||||
|
"Runbooks/Runbooks Home.md",
|
||||||
|
"Runbooks/Promote Session Output to Notes.md",
|
||||||
|
"Runbooks/Atlas Kanban Durable Project Workflow.md",
|
||||||
|
"Runbooks/Atlas Event-Driven Automation.md",
|
||||||
|
"Projects/Atlas Capability Upgrade Program/Reports/Status",
|
||||||
|
"Projects/Atlas Capability Upgrade Program/Reports",
|
||||||
|
"Projects/Atlas Capability Upgrade Program/Plans",
|
||||||
|
"Projects/Atlas Capability Upgrade Program",
|
||||||
|
"Projects/Atlas",
|
||||||
|
"Projects",
|
||||||
|
"People",
|
||||||
|
"Meetings",
|
||||||
|
"Infrastructure/Architecture - Service Topology.canvas",
|
||||||
|
"Infrastructure/Architecture - Overview.canvas",
|
||||||
|
"Infrastructure/Architecture - Master.canvas",
|
||||||
|
"Infrastructure/Architecture - Automation Flow.canvas",
|
||||||
|
"Untitled.canvas"
|
||||||
|
]
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user