72434c8bc3
Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.
713 lines
31 KiB
Python
Executable File
713 lines
31 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Compact, read-only NPU/OpenVINO utilization digest.
|
|
|
|
Default behavior is safe for on-demand or scheduled runs: health checks plus
|
|
bounded synthetic probes, one compact JSONL artifact, and no service restarts,
|
|
routing changes, advisory POSTs, vector mutations, outbound sends, or private
|
|
root broadening.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import base64
|
|
import datetime as dt
|
|
import json
|
|
import math
|
|
import os
|
|
import tempfile
|
|
import time
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
import uuid
|
|
import wave
|
|
from dataclasses import asdict, dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Callable
|
|
|
|
BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
|
|
DEFAULT_OUT_DIR = Path("/home/will/.local/state/npu-utilization/digests")
|
|
|
|
EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
|
|
EMBED_HEALTH_URL = "http://127.0.0.1:18817/healthz"
|
|
RERANK_URL = "http://127.0.0.1:18818/rerank"
|
|
RERANK_HEALTH_URL = "http://127.0.0.1:18818/readyz"
|
|
WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
|
|
WHISPER_HEALTH_URL = "http://127.0.0.1:18816/health"
|
|
CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
|
|
CLASSIFIER_HEALTH_URL = "http://127.0.0.1:18819/healthz"
|
|
GENAI_HEALTH_URL = "http://127.0.0.1:18820/healthz"
|
|
GENAI_GENERATE_URL = "http://127.0.0.1:18820/v1/generate"
|
|
DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
|
|
DOC_TRIAGE_HEALTH_URL = "http://127.0.0.1:18829/healthz"
|
|
RAG_ENDPOINT_HEALTH_URL = "http://127.0.0.1:18810/healthz"
|
|
RAG_HEALTH_URL = "http://127.0.0.1:18814/healthz"
|
|
ADVISORY_HEALTH_URL = "http://172.19.0.1:18830/healthz"
|
|
|
|
|
|
@dataclass
|
|
class ServiceRow:
|
|
type: str = "service"
|
|
service: str = ""
|
|
reachable: bool = False
|
|
probe_ran: bool = False
|
|
proof_ok: bool | None = None
|
|
calls: int = 0
|
|
items: int = 0
|
|
avg_ms: float | None = None
|
|
npu_delta_us: int | None = None
|
|
response_delta_us: int | None = None
|
|
mode: str = "unavailable"
|
|
fallbacks: int = 0
|
|
warnings: list[str] = field(default_factory=list)
|
|
gate: str = "none"
|
|
jobs: int | None = None
|
|
events: int | None = None
|
|
files: int | None = None
|
|
docs: int | None = None
|
|
text_len: int | None = None
|
|
sample_rate: int | None = None
|
|
embedding_count: int | None = None
|
|
embedding_dim: int | None = None
|
|
dry_run: bool | None = None
|
|
suppress: int | None = None
|
|
escalate: int | None = None
|
|
recommendation: str | None = None
|
|
confidence: float | None = None
|
|
confidence_bucket: str | None = None
|
|
authority_violations: int | None = None
|
|
loaded: bool | None = None
|
|
allowed_roots_count: int | None = None
|
|
reason: str | None = None
|
|
error: str | None = None
|
|
|
|
|
|
def compact_dict(obj: Any) -> dict[str, Any]:
|
|
data = asdict(obj) if hasattr(obj, "__dataclass_fields__") else dict(obj)
|
|
return {k: v for k, v in data.items() if v is not None and v != []}
|
|
|
|
|
|
AUTHORITY_SAFE_ACTIONS = {
|
|
"", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
|
|
"record_metric", "compare_with_expected_label", "include_in_digest",
|
|
"open_review_ticket_candidate", "recommend_human_review",
|
|
}
|
|
AUTHORITY_FLAG_KEYS = {
|
|
"advisory_post",
|
|
"atlas_routing",
|
|
"broad_private_scan",
|
|
"delivery_send",
|
|
"gateway_restart",
|
|
"live_routing",
|
|
"memory_write",
|
|
"outbound_send",
|
|
"private_root_scan",
|
|
"service_restart",
|
|
"tool_execution",
|
|
"vector_mutation",
|
|
}
|
|
AUTHORITY_FLAG_ALIASES = {
|
|
"can_route_atlas": "atlas_routing",
|
|
"can_write_memory": "memory_write",
|
|
"can_execute_tools": "tool_execution",
|
|
"can_restart_services": "service_restart",
|
|
"can_send_outbound": "outbound_send",
|
|
"can_scan_private_roots": "private_root_scan",
|
|
"can_mutate_vector_store": "vector_mutation",
|
|
"can_post_advisory_event": "advisory_post",
|
|
"can_change_gateway_config": "gateway_restart",
|
|
"may_route": "atlas_routing",
|
|
"may_write_memory": "memory_write",
|
|
"may_execute_tools": "tool_execution",
|
|
"may_restart_services": "service_restart",
|
|
"may_send_external": "outbound_send",
|
|
"may_process_private_dirs": "private_root_scan",
|
|
"may_mutate_vector_db": "vector_mutation",
|
|
"may_change_live_config": "gateway_restart",
|
|
}
|
|
|
|
|
|
def confidence_bucket(confidence: float | None) -> str | None:
|
|
if confidence is None:
|
|
return None
|
|
if confidence >= 0.8:
|
|
return "high"
|
|
if confidence >= 0.5:
|
|
return "medium"
|
|
return "low"
|
|
|
|
|
|
def coerce_confidence(value: Any) -> float | None:
|
|
if isinstance(value, bool):
|
|
return None
|
|
if isinstance(value, (int, float)):
|
|
return max(0.0, min(1.0, float(value)))
|
|
if isinstance(value, str):
|
|
try:
|
|
return max(0.0, min(1.0, float(value)))
|
|
except ValueError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def extract_confidence(payload: dict[str, Any]) -> float | None:
|
|
direct = coerce_confidence(payload.get("confidence"))
|
|
if direct is not None:
|
|
return direct
|
|
raw_labels = payload.get("labels")
|
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
|
scores: list[float] = []
|
|
for value in labels.values():
|
|
if isinstance(value, dict):
|
|
for score_key in ("confidence", "score", "probability"):
|
|
if score_key in value:
|
|
score = coerce_confidence(value.get(score_key))
|
|
break
|
|
score = None
|
|
else:
|
|
score = coerce_confidence(value)
|
|
if score is not None:
|
|
scores.append(score)
|
|
return max(scores) if scores else None
|
|
|
|
|
|
def extract_recommendation(payload: dict[str, Any]) -> str | None:
|
|
for key in ("recommendation", "classification", "input_class"):
|
|
value = payload.get(key)
|
|
if isinstance(value, str) and value:
|
|
return value[:48]
|
|
raw_action = payload.get("action")
|
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
|
value = action.get("recommendation") or action.get("type")
|
|
return str(value)[:48] if value else None
|
|
|
|
|
|
def count_authority_violations(payload: dict[str, Any]) -> int:
|
|
"""Count advisory response hints that would exceed read-only/dry-run authority.
|
|
|
|
Supports both legacy compact payloads and `npu_advisory_decision_v1`.
|
|
Valid schema-safe allowed actions and object-shaped no-op actual actions must
|
|
not count as violations; any true live-authority flag must count.
|
|
"""
|
|
violations = 0
|
|
raw_flags = payload.get("authority_flags")
|
|
flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
|
|
for key, value in flags.items():
|
|
canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
|
|
if canonical in AUTHORITY_FLAG_KEYS and bool(value):
|
|
violations += 1
|
|
|
|
raw_allowed = payload.get("allowed_actions")
|
|
allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
|
|
for action in allowed:
|
|
if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
|
|
violations += 1
|
|
|
|
raw_actual = payload.get("actual_action")
|
|
if isinstance(raw_actual, dict):
|
|
performed = bool(raw_actual.get("performed"))
|
|
side_effects = raw_actual.get("side_effects") or []
|
|
kind = str(raw_actual.get("kind") or "none").lower()
|
|
if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
|
|
violations += 1
|
|
else:
|
|
actual = str(raw_actual or "").lower()
|
|
if actual and actual not in AUTHORITY_SAFE_ACTIONS:
|
|
violations += 1
|
|
return violations
|
|
|
|
|
|
def read_busy(path: Path = BUSY_PATH) -> int | None:
|
|
try:
|
|
return int(path.read_text().strip())
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def safe_error(exc: BaseException) -> str:
|
|
return type(exc).__name__
|
|
|
|
|
|
def http_get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
|
|
try:
|
|
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
body = resp.read(1024 * 1024).decode("utf-8", "replace")
|
|
return int(resp.status), json.loads(body or "{}")
|
|
except urllib.error.HTTPError as exc:
|
|
try:
|
|
body = exc.read(1024 * 1024).decode("utf-8", "replace")
|
|
return int(exc.code), json.loads(body or "{}")
|
|
except Exception:
|
|
return int(exc.code), {"error": "http_error"}
|
|
except Exception as exc:
|
|
return 0, {"error": safe_error(exc)}
|
|
|
|
|
|
def http_post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
|
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", "Accept": "application/json"})
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
data = resp.read(2 * 1024 * 1024).decode("utf-8", "replace")
|
|
return int(resp.status), json.loads(data or "{}")
|
|
except urllib.error.HTTPError as exc:
|
|
try:
|
|
data = exc.read(1024 * 1024).decode("utf-8", "replace")
|
|
return int(exc.code), json.loads(data or "{}")
|
|
except Exception:
|
|
return int(exc.code), {"error": "http_error"}
|
|
except Exception as exc:
|
|
return 0, {"error": safe_error(exc)}
|
|
|
|
|
|
def health_row(service: str, url: str, timeout: float, gate: str = "none", mode: str = "health_only") -> tuple[ServiceRow, dict[str, Any]]:
|
|
status, payload = http_get_json(url, timeout)
|
|
ok = status == 200 and payload.get("ok", True) is not False
|
|
row = ServiceRow(service=service, reachable=ok, mode=mode if ok else "unavailable", gate=gate)
|
|
if not ok:
|
|
row.fallbacks = 1
|
|
row.warnings.append("unavailable")
|
|
row.error = str(payload.get("error") or payload.get("ready_error") or f"http_{status}")[:80]
|
|
return row, payload
|
|
|
|
|
|
def measure_probe(fn: Callable[[], tuple[int, dict[str, Any]]], timeout_label: str, busy_path: Path = BUSY_PATH) -> tuple[int, dict[str, Any], float, int | None]:
|
|
before = read_busy(busy_path)
|
|
started = time.perf_counter()
|
|
status, payload = fn()
|
|
elapsed_ms = round((time.perf_counter() - started) * 1000, 3)
|
|
after = read_busy(busy_path)
|
|
delta = None if before is None or after is None else after - before
|
|
return status, payload, elapsed_ms, delta
|
|
|
|
|
|
def apply_proof(row: ServiceRow, delta: int | None) -> None:
|
|
row.npu_delta_us = delta
|
|
row.proof_ok = bool(delta is not None and delta > 0)
|
|
if not row.proof_ok:
|
|
row.fallbacks += 1
|
|
row.warnings.append("no_positive_sysfs_delta" if delta is not None else "missing_sysfs_counter")
|
|
|
|
|
|
def mark_skipped_fallback(row: ServiceRow, reason: str) -> None:
|
|
"""Record a skipped/unloaded proof condition as a fallback.
|
|
|
|
Health-only rows that are intentionally never proof probes should keep
|
|
fallbacks at zero. This helper is for proof-capable rows where a bounded
|
|
smoke was disabled or skipped to avoid side effects such as cold-loading.
|
|
"""
|
|
row.fallbacks += 1
|
|
row.warnings.append(reason)
|
|
|
|
|
|
def probe_embeddings(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
|
row, _ = health_row("embeddings", EMBED_HEALTH_URL, timeout)
|
|
if not row.reachable:
|
|
return row
|
|
payload = {"input": "non-private npu utilization digest probe", "model": "bge-base-en-v1.5-int8-ov"}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(EMBED_URL, payload, timeout), "embeddings", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.items = 1
|
|
row.avg_ms = elapsed
|
|
row.mode = "NPU"
|
|
row.reachable = status == 200 and "data" in data
|
|
row.embedding_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0
|
|
row.embedding_dim = data.get("embedding_dim")
|
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
|
apply_proof(row, delta)
|
|
if not row.reachable:
|
|
row.warnings.append("probe_http_failed")
|
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
|
return row
|
|
|
|
|
|
def probe_rerank(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
|
row, _ = health_row("rerank", RERANK_HEALTH_URL, timeout)
|
|
if not row.reachable:
|
|
return row
|
|
docs = ["Intel NPU accelerates OpenVINO inference.", "Bananas ripen on a kitchen counter."]
|
|
payload = {"query": "OpenVINO NPU inference", "documents": docs, "top_k": 2, "return_documents": False}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(RERANK_URL, payload, timeout), "rerank", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.docs = len(docs)
|
|
row.avg_ms = float(data.get("duration_ms") or elapsed)
|
|
row.mode = "NPU"
|
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
|
apply_proof(row, delta)
|
|
if not row.reachable:
|
|
row.warnings.append("probe_http_failed")
|
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
|
return row
|
|
|
|
|
|
def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
|
row, _ = health_row("classifier", CLASSIFIER_HEALTH_URL, timeout, mode="dry_run")
|
|
if not row.reachable:
|
|
return row
|
|
payload = {
|
|
"id": "npu-digest-probe",
|
|
"text": "Non-private cron event: backup completed successfully, no user action required.",
|
|
"options": {"dry_run": True, "include_evidence": False},
|
|
}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(CLASSIFIER_URL, payload, timeout), "classifier", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.events = 1
|
|
row.avg_ms = elapsed
|
|
row.mode = "dry_run"
|
|
row.dry_run = True
|
|
row.reachable = status == 200 and "error" not in data
|
|
row.response_delta_us = next((data.get(k) for k in ("sysfs_npu_busy_delta_us", "npu_busy_delta_us") if isinstance(data.get(k), int)), None)
|
|
raw_labels = data.get("labels")
|
|
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
|
raw_action = data.get("action")
|
|
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
|
row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
|
|
row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
|
|
row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
|
|
row.confidence = extract_confidence(data)
|
|
row.confidence_bucket = confidence_bucket(row.confidence)
|
|
row.authority_violations = count_authority_violations(data)
|
|
if row.authority_violations:
|
|
row.warnings.append("authority_violation")
|
|
row.items = len(labels)
|
|
apply_proof(row, delta)
|
|
if not row.reachable:
|
|
row.warnings.append("probe_http_failed")
|
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
|
return row
|
|
|
|
|
|
def write_tone_wav(path: Path, seconds: float = 0.35, sample_rate: int = 16000) -> None:
|
|
frames = int(seconds * sample_rate)
|
|
with wave.open(str(path), "wb") as wav:
|
|
wav.setnchannels(1)
|
|
wav.setsampwidth(2)
|
|
wav.setframerate(sample_rate)
|
|
for i in range(frames):
|
|
value = int(9000 * math.sin(2 * math.pi * 440 * (i / sample_rate)))
|
|
wav.writeframesraw(value.to_bytes(2, byteorder="little", signed=True))
|
|
|
|
|
|
def post_multipart_file(url: str, file_path: Path, timeout: float) -> tuple[int, dict[str, Any]]:
|
|
boundary = "----npu-digest-" + uuid.uuid4().hex
|
|
file_bytes = file_path.read_bytes()
|
|
parts = [
|
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nwhisper\r\n".encode(),
|
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"response_format\"\r\n\r\njson\r\n".encode(),
|
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"npu-digest.wav\"\r\nContent-Type: audio/wav\r\n\r\n".encode(),
|
|
file_bytes,
|
|
f"\r\n--{boundary}--\r\n".encode(),
|
|
]
|
|
req = urllib.request.Request(url, data=b"".join(parts), headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
return int(resp.status), json.loads(resp.read(1024 * 1024).decode("utf-8", "replace") or "{}")
|
|
except Exception as exc:
|
|
return 0, {"error": safe_error(exc)}
|
|
|
|
|
|
def probe_whisper(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH) -> ServiceRow:
|
|
row, _ = health_row("whisper", WHISPER_HEALTH_URL, timeout)
|
|
row.jobs = 0
|
|
if not row.reachable or not include_smoke:
|
|
if row.reachable:
|
|
row.mode = "health_only"
|
|
row.reason = "smoke_disabled"
|
|
mark_skipped_fallback(row, "skipped")
|
|
return row
|
|
with tempfile.TemporaryDirectory(prefix="npu-digest-whisper-") as tmp:
|
|
wav_path = Path(tmp) / "probe.wav"
|
|
write_tone_wav(wav_path)
|
|
status, data, elapsed, delta = measure_probe(lambda: post_multipart_file(WHISPER_URL, wav_path, timeout), "whisper", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.jobs = 1
|
|
row.avg_ms = elapsed
|
|
row.mode = "NPU"
|
|
row.reachable = status == 200 and "error" not in data
|
|
row.text_len = len(str(data.get("text") or ""))
|
|
row.sample_rate = data.get("sample_rate") if isinstance(data.get("sample_rate"), int) else None
|
|
row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
|
|
apply_proof(row, delta)
|
|
if not row.reachable:
|
|
row.warnings.append("probe_http_failed")
|
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
|
return row
|
|
|
|
|
|
def probe_genai(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
|
row, health = health_row("genai", GENAI_HEALTH_URL, timeout)
|
|
row.loaded = bool(health.get("loaded")) if isinstance(health, dict) and "loaded" in health else None
|
|
row.jobs = 0
|
|
if not row.reachable:
|
|
return row
|
|
if not include_smoke or row.loaded is False:
|
|
row.mode = "loaded=false" if row.loaded is False else "health_only"
|
|
row.reason = "skipped_cold_load" if row.loaded is False else "smoke_disabled"
|
|
mark_skipped_fallback(row, row.reason)
|
|
return row
|
|
payload = {"prompt": "Say pong.", "max_new_tokens": 8}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(GENAI_GENERATE_URL, payload, timeout), "genai", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.jobs = 1
|
|
row.avg_ms = elapsed
|
|
row.mode = "NPU"
|
|
row.reachable = status == 200 and "error" not in data
|
|
apply_proof(row, delta)
|
|
return row
|
|
|
|
|
|
def doc_triage_sample_path() -> Path | None:
|
|
candidates = [
|
|
Path("/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png"),
|
|
Path(__file__).resolve().parents[1] / "openvino-doc-image-triage-npu" / "samples" / "synthetic_invoice.png",
|
|
]
|
|
for candidate in candidates:
|
|
if candidate.exists() and candidate.with_suffix(".png.txt").exists():
|
|
return candidate
|
|
return None
|
|
|
|
|
|
def probe_doc_triage(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
|
|
row, _ = health_row("doc_triage", DOC_TRIAGE_HEALTH_URL, timeout, gate="closed:private-root")
|
|
row.files = 0
|
|
if not row.reachable or not include_smoke:
|
|
if row.reachable:
|
|
row.mode = "health_only"
|
|
row.reason = "smoke_disabled"
|
|
mark_skipped_fallback(row, "skipped")
|
|
return row
|
|
sample = doc_triage_sample_path()
|
|
if sample is not None:
|
|
root = sample.parent.resolve()
|
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
|
else:
|
|
with tempfile.TemporaryDirectory(prefix="npu-digest-doc-") as tmp:
|
|
root = Path(tmp).resolve()
|
|
sample = root / "synthetic-invoice.png"
|
|
sample.write_bytes(base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="))
|
|
sample.with_suffix(".png.txt").write_text("Synthetic invoice. Amount due $12.34 by 2026-06-30. No private data.\n")
|
|
payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
|
|
status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
|
|
row.probe_ran = True
|
|
row.calls = 1
|
|
row.files = 1
|
|
row.avg_ms = elapsed
|
|
row.mode = "NPU-via-embedding-service"
|
|
row.allowed_roots_count = 1
|
|
row.reachable = status == 200 and data.get("ok", True) is not False
|
|
raw_result = data.get("result")
|
|
result: dict[str, Any] = raw_result if isinstance(raw_result, dict) else {}
|
|
raw_pages = result.get("pages")
|
|
pages: list[Any] = raw_pages if isinstance(raw_pages, list) else []
|
|
embedding: dict[str, Any] = {}
|
|
if pages and isinstance(pages[0], dict):
|
|
raw_attn = pages[0].get("needs_attention")
|
|
attn: dict[str, Any] = raw_attn if isinstance(raw_attn, dict) else {}
|
|
raw_embedding = attn.get("embedding")
|
|
embedding = raw_embedding if isinstance(raw_embedding, dict) else {}
|
|
row.response_delta_us = embedding.get("npu_busy_delta_us") if isinstance(embedding.get("npu_busy_delta_us"), int) else None
|
|
apply_proof(row, delta)
|
|
if not row.reachable:
|
|
row.warnings.append("probe_http_failed")
|
|
row.error = str(data.get("error") or f"http_{status}")[:80]
|
|
return row
|
|
|
|
|
|
def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_delta_us: int | None, started_at: str) -> dict[str, Any]:
|
|
services_ok = sum(1 for r in rows if r.reachable)
|
|
proof_rows = [r for r in rows if r.probe_ran and r.proof_ok is not None]
|
|
proof_ok = sum(1 for r in proof_rows if r.proof_ok)
|
|
gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
|
|
fallbacks = sum(r.fallbacks for r in rows)
|
|
request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
|
|
npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
|
|
fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
|
|
recommendation_counts = {"escalate": 0, "suppress": 0}
|
|
confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
|
|
authority_violations = 0
|
|
warnings: dict[str, int] = {}
|
|
for row in rows:
|
|
recommendation = (row.recommendation or "").lower()
|
|
if recommendation in recommendation_counts:
|
|
recommendation_counts[recommendation] += 1
|
|
else:
|
|
recommendation_counts["escalate"] += row.escalate or 0
|
|
recommendation_counts["suppress"] += row.suppress or 0
|
|
if row.confidence_bucket:
|
|
confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
|
|
elif row.recommendation or row.escalate is not None or row.suppress is not None:
|
|
confidence_distribution["unknown"] += 1
|
|
authority_violations += row.authority_violations or 0
|
|
for warning in row.warnings:
|
|
warnings[warning] = warnings.get(warning, 0) + 1
|
|
confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
|
|
return {
|
|
"type": "summary",
|
|
"timestamp": started_at,
|
|
"counter": str(BUSY_PATH),
|
|
"delta_us": counter_delta_us,
|
|
"services_ok": services_ok,
|
|
"services_total": len(rows),
|
|
"proof_ok": proof_ok,
|
|
"proof_total": len(proof_rows),
|
|
"fallbacks": fallbacks,
|
|
"fallbacks_by_service": fallbacks_by_service,
|
|
"request_counts_by_service": request_counts_by_service,
|
|
"npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
|
|
"confidence_distribution": confidence_distribution,
|
|
"recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
|
|
"authority_violations": authority_violations,
|
|
"gates_closed": gates_closed,
|
|
"warnings": warnings,
|
|
"artifact": artifact_path,
|
|
}
|
|
|
|
|
|
def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
|
|
lines = [
|
|
f"NPU utilization digest {summary['timestamp']}",
|
|
f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
|
|
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
|
|
]
|
|
rec_counts = summary.get("recommendation_counts") or {}
|
|
if rec_counts:
|
|
lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
|
|
conf_dist = summary.get("confidence_distribution") or {}
|
|
if conf_dist:
|
|
lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
|
|
for r in rows:
|
|
parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
|
|
if r.calls:
|
|
parts.append(f"calls={r.calls}")
|
|
if r.jobs is not None:
|
|
parts.append(f"jobs={r.jobs}")
|
|
if r.events is not None:
|
|
parts.append(f"events={r.events}")
|
|
if r.files is not None:
|
|
parts.append(f"files={r.files}")
|
|
if r.docs is not None:
|
|
parts.append(f"docs={r.docs}")
|
|
if r.avg_ms is not None:
|
|
parts.append(f"avg_ms={r.avg_ms}")
|
|
if r.npu_delta_us is not None:
|
|
parts.append(f"npu_delta_us={r.npu_delta_us}")
|
|
if r.proof_ok is not None:
|
|
parts.append(f"proof={str(r.proof_ok).lower()}")
|
|
if r.dry_run is not None:
|
|
parts.append(f"dry_run={str(r.dry_run).lower()}")
|
|
if r.suppress is not None:
|
|
parts.append(f"suppress={r.suppress}")
|
|
if r.escalate is not None:
|
|
parts.append(f"escalate={r.escalate}")
|
|
if r.recommendation is not None:
|
|
parts.append(f"recommendation={r.recommendation}")
|
|
if r.confidence_bucket is not None:
|
|
parts.append(f"confidence={r.confidence_bucket}")
|
|
if r.authority_violations is not None:
|
|
parts.append(f"authority_violations={r.authority_violations}")
|
|
if r.loaded is not None:
|
|
parts.append(f"loaded={str(r.loaded).lower()}")
|
|
if r.allowed_roots_count is not None:
|
|
parts.append(f"allowed_roots={r.allowed_roots_count}")
|
|
if r.text_len is not None:
|
|
parts.append(f"text_len={r.text_len}")
|
|
if r.mode:
|
|
parts.append(f"mode={r.mode}")
|
|
if r.gate != "none":
|
|
parts.append(f"gate={r.gate}")
|
|
if r.reason:
|
|
parts.append(f"reason={r.reason}")
|
|
if r.warnings:
|
|
parts.append("warnings=" + ",".join(sorted(set(r.warnings))))
|
|
lines.append(" ".join(parts))
|
|
warning_counts = summary.get("warnings") or {}
|
|
lines.append("fallbacks: " + " ".join(f"{k}={v}" for k, v in sorted(warning_counts.items())) if warning_counts else "fallbacks: none")
|
|
if summary.get("artifact"):
|
|
lines.append(f"artifact: {summary['artifact']}")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def write_jsonl(summary: dict[str, Any], rows: list[ServiceRow], out_dir: Path) -> Path:
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
stamp = summary["timestamp"].replace(":", "").replace("+", "").replace("-", "")
|
|
path = out_dir / f"{stamp}.jsonl"
|
|
with path.open("w", encoding="utf-8") as f:
|
|
f.write(json.dumps(summary, sort_keys=True, separators=(",", ":")) + "\n")
|
|
for row in rows:
|
|
f.write(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")) + "\n")
|
|
return path
|
|
|
|
|
|
def str_bool(value: str) -> bool:
|
|
lowered = value.lower()
|
|
if lowered in {"1", "true", "yes", "y", "on"}:
|
|
return True
|
|
if lowered in {"0", "false", "no", "n", "off"}:
|
|
return False
|
|
raise argparse.ArgumentTypeError("expected true or false")
|
|
|
|
|
|
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Compact NPU utilization digest")
|
|
parser.add_argument("--format", choices=("text", "jsonl"), default="text")
|
|
parser.add_argument("--out", default=str(DEFAULT_OUT_DIR))
|
|
parser.add_argument("--timeout-s", type=float, default=8.0)
|
|
parser.add_argument("--include-whisper-smoke", type=str_bool, default=True)
|
|
parser.add_argument("--include-genai-smoke", type=str_bool, default=False)
|
|
parser.add_argument("--include-doc-triage-smoke", type=str_bool, default=True)
|
|
parser.add_argument("--no-write", action="store_true")
|
|
parser.add_argument("--strict-proof", action="store_true", help="exit nonzero if a proof-required probe ran without positive sysfs delta")
|
|
parser.add_argument("--verbose", action="store_true")
|
|
return parser.parse_args(argv)
|
|
|
|
|
|
def run(args: argparse.Namespace) -> tuple[dict[str, Any], list[ServiceRow]]:
|
|
started_at = dt.datetime.now().astimezone().replace(microsecond=0).isoformat()
|
|
before_all = read_busy(BUSY_PATH)
|
|
rows = [
|
|
probe_embeddings(args.timeout_s),
|
|
probe_rerank(args.timeout_s),
|
|
probe_whisper(args.timeout_s, args.include_whisper_smoke),
|
|
probe_classifier(args.timeout_s),
|
|
probe_genai(args.timeout_s, args.include_genai_smoke),
|
|
probe_doc_triage(args.timeout_s, args.include_doc_triage_smoke),
|
|
]
|
|
rows.append(health_row("rag_endpoint", RAG_ENDPOINT_HEALTH_URL, args.timeout_s, gate="closed:vector-mutation")[0])
|
|
rows.append(health_row("rag_health", RAG_HEALTH_URL, args.timeout_s)[0])
|
|
rows.append(health_row("advisory_gateway", ADVISORY_HEALTH_URL, args.timeout_s, gate="closed:advisory-post")[0])
|
|
after_all = read_busy(BUSY_PATH)
|
|
delta_all = None if before_all is None or after_all is None else after_all - before_all
|
|
summary = build_summary(rows, artifact_path=None, counter_delta_us=delta_all, started_at=started_at)
|
|
return summary, rows
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
args = parse_args(argv)
|
|
summary, rows = run(args)
|
|
if not args.no_write:
|
|
artifact = write_jsonl(summary, rows, Path(args.out).expanduser())
|
|
summary["artifact"] = str(artifact)
|
|
# rewrite with artifact path included in the summary line
|
|
artifact.write_text("\n".join([json.dumps(summary, sort_keys=True, separators=(",", ":"))] + [json.dumps(compact_dict(r), sort_keys=True, separators=(",", ":")) for r in rows]) + "\n")
|
|
if args.format == "jsonl":
|
|
print(json.dumps(summary, sort_keys=True, separators=(",", ":")))
|
|
for row in rows:
|
|
print(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")))
|
|
else:
|
|
print(render_text(summary, rows))
|
|
if args.strict_proof and any(r.probe_ran and r.proof_ok is False for r in rows):
|
|
return 2
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|