swarm-master/scripts/npu-utilization-digest.py

#!/usr/bin/env python3
"""Compact, read-only NPU/OpenVINO utilization digest.

Default behavior is safe for on-demand or scheduled runs: health checks plus
bounded synthetic probes, one compact JSONL artifact, and no service restarts,
routing changes, advisory POSTs, vector mutations, outbound sends, or private
root broadening.
"""
from __future__ import annotations

import argparse
import base64
import datetime as dt
import json
import math
import os
import tempfile
import time
import urllib.error
import urllib.parse
import urllib.request
import uuid
import wave
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any, Callable

BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
DEFAULT_OUT_DIR = Path("/home/will/.local/state/npu-utilization/digests")

EMBED_URL = "http://127.0.0.1:18817/v1/embeddings"
EMBED_HEALTH_URL = "http://127.0.0.1:18817/healthz"
RERANK_URL = "http://127.0.0.1:18818/rerank"
RERANK_HEALTH_URL = "http://127.0.0.1:18818/readyz"
WHISPER_URL = "http://127.0.0.1:18816/v1/audio/transcriptions"
WHISPER_HEALTH_URL = "http://127.0.0.1:18816/health"
CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
CLASSIFIER_HEALTH_URL = "http://127.0.0.1:18819/healthz"
GENAI_HEALTH_URL = "http://127.0.0.1:18820/healthz"
GENAI_GENERATE_URL = "http://127.0.0.1:18820/v1/generate"
DOC_TRIAGE_URL = "http://127.0.0.1:18829/triage"
DOC_TRIAGE_HEALTH_URL = "http://127.0.0.1:18829/healthz"
RAG_ENDPOINT_HEALTH_URL = "http://127.0.0.1:18810/healthz"
RAG_HEALTH_URL = "http://127.0.0.1:18814/healthz"
ADVISORY_HEALTH_URL = "http://172.19.0.1:18830/healthz"


@dataclass
class ServiceRow:
    type: str = "service"
    service: str = ""
    reachable: bool = False
    probe_ran: bool = False
    proof_ok: bool | None = None
    calls: int = 0
    items: int = 0
    avg_ms: float | None = None
    npu_delta_us: int | None = None
    response_delta_us: int | None = None
    mode: str = "unavailable"
    fallbacks: int = 0
    warnings: list[str] = field(default_factory=list)
    gate: str = "none"
    jobs: int | None = None
    events: int | None = None
    files: int | None = None
    docs: int | None = None
    text_len: int | None = None
    sample_rate: int | None = None
    embedding_count: int | None = None
    embedding_dim: int | None = None
    dry_run: bool | None = None
    suppress: int | None = None
    escalate: int | None = None
    recommendation: str | None = None
    confidence: float | None = None
    confidence_bucket: str | None = None
    authority_violations: int | None = None
    loaded: bool | None = None
    allowed_roots_count: int | None = None
    reason: str | None = None
    error: str | None = None


def compact_dict(obj: Any) -> dict[str, Any]:
    data = asdict(obj) if hasattr(obj, "__dataclass_fields__") else dict(obj)
    return {k: v for k, v in data.items() if v is not None and v != []}


AUTHORITY_SAFE_ACTIONS = {
    "", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
    "record_metric", "compare_with_expected_label", "include_in_digest",
    "open_review_ticket_candidate", "recommend_human_review",
}
AUTHORITY_FLAG_KEYS = {
    "advisory_post",
    "atlas_routing",
    "broad_private_scan",
    "delivery_send",
    "gateway_restart",
    "live_routing",
    "memory_write",
    "outbound_send",
    "private_root_scan",
    "service_restart",
    "tool_execution",
    "vector_mutation",
}
AUTHORITY_FLAG_ALIASES = {
    "can_route_atlas": "atlas_routing",
    "can_write_memory": "memory_write",
    "can_execute_tools": "tool_execution",
    "can_restart_services": "service_restart",
    "can_send_outbound": "outbound_send",
    "can_scan_private_roots": "private_root_scan",
    "can_mutate_vector_store": "vector_mutation",
    "can_post_advisory_event": "advisory_post",
    "can_change_gateway_config": "gateway_restart",
    "may_route": "atlas_routing",
    "may_write_memory": "memory_write",
    "may_execute_tools": "tool_execution",
    "may_restart_services": "service_restart",
    "may_send_external": "outbound_send",
    "may_process_private_dirs": "private_root_scan",
    "may_mutate_vector_db": "vector_mutation",
    "may_change_live_config": "gateway_restart",
}


def confidence_bucket(confidence: float | None) -> str | None:
    if confidence is None:
        return None
    if confidence >= 0.8:
        return "high"
    if confidence >= 0.5:
        return "medium"
    return "low"


def coerce_confidence(value: Any) -> float | None:
    if isinstance(value, bool):
        return None
    if isinstance(value, (int, float)):
        return max(0.0, min(1.0, float(value)))
    if isinstance(value, str):
        try:
            return max(0.0, min(1.0, float(value)))
        except ValueError:
            return None
    return None


def extract_confidence(payload: dict[str, Any]) -> float | None:
    direct = coerce_confidence(payload.get("confidence"))
    if direct is not None:
        return direct
    raw_labels = payload.get("labels")
    labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
    scores: list[float] = []
    for value in labels.values():
        if isinstance(value, dict):
            for score_key in ("confidence", "score", "probability"):
                if score_key in value:
                    score = coerce_confidence(value.get(score_key))
                    break
                score = None
        else:
            score = coerce_confidence(value)
        if score is not None:
            scores.append(score)
    return max(scores) if scores else None


def extract_recommendation(payload: dict[str, Any]) -> str | None:
    for key in ("recommendation", "classification", "input_class"):
        value = payload.get(key)
        if isinstance(value, str) and value:
            return value[:48]
    raw_action = payload.get("action")
    action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
    value = action.get("recommendation") or action.get("type")
    return str(value)[:48] if value else None


def count_authority_violations(payload: dict[str, Any]) -> int:
    """Count advisory response hints that would exceed read-only/dry-run authority.

    Supports both legacy compact payloads and `npu_advisory_decision_v1`.
    Valid schema-safe allowed actions and object-shaped no-op actual actions must
    not count as violations; any true live-authority flag must count.
    """
    violations = 0
    raw_flags = payload.get("authority_flags")
    flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
    for key, value in flags.items():
        canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
        if canonical in AUTHORITY_FLAG_KEYS and bool(value):
            violations += 1

    raw_allowed = payload.get("allowed_actions")
    allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
    for action in allowed:
        if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
            violations += 1

    raw_actual = payload.get("actual_action")
    if isinstance(raw_actual, dict):
        performed = bool(raw_actual.get("performed"))
        side_effects = raw_actual.get("side_effects") or []
        kind = str(raw_actual.get("kind") or "none").lower()
        if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
            violations += 1
    else:
        actual = str(raw_actual or "").lower()
        if actual and actual not in AUTHORITY_SAFE_ACTIONS:
            violations += 1
    return violations


def read_busy(path: Path = BUSY_PATH) -> int | None:
    try:
        return int(path.read_text().strip())
    except Exception:
        return None


def safe_error(exc: BaseException) -> str:
    return type(exc).__name__


def http_get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]:
    try:
        req = urllib.request.Request(url, headers={"Accept": "application/json"})
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            body = resp.read(1024 * 1024).decode("utf-8", "replace")
            return int(resp.status), json.loads(body or "{}")
    except urllib.error.HTTPError as exc:
        try:
            body = exc.read(1024 * 1024).decode("utf-8", "replace")
            return int(exc.code), json.loads(body or "{}")
        except Exception:
            return int(exc.code), {"error": "http_error"}
    except Exception as exc:
        return 0, {"error": safe_error(exc)}


def http_post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]:
    body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
    req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", "Accept": "application/json"})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            data = resp.read(2 * 1024 * 1024).decode("utf-8", "replace")
            return int(resp.status), json.loads(data or "{}")
    except urllib.error.HTTPError as exc:
        try:
            data = exc.read(1024 * 1024).decode("utf-8", "replace")
            return int(exc.code), json.loads(data or "{}")
        except Exception:
            return int(exc.code), {"error": "http_error"}
    except Exception as exc:
        return 0, {"error": safe_error(exc)}


def health_row(service: str, url: str, timeout: float, gate: str = "none", mode: str = "health_only") -> tuple[ServiceRow, dict[str, Any]]:
    status, payload = http_get_json(url, timeout)
    ok = status == 200 and payload.get("ok", True) is not False
    row = ServiceRow(service=service, reachable=ok, mode=mode if ok else "unavailable", gate=gate)
    if not ok:
        row.fallbacks = 1
        row.warnings.append("unavailable")
        row.error = str(payload.get("error") or payload.get("ready_error") or f"http_{status}")[:80]
    return row, payload


def measure_probe(fn: Callable[[], tuple[int, dict[str, Any]]], timeout_label: str, busy_path: Path = BUSY_PATH) -> tuple[int, dict[str, Any], float, int | None]:
    before = read_busy(busy_path)
    started = time.perf_counter()
    status, payload = fn()
    elapsed_ms = round((time.perf_counter() - started) * 1000, 3)
    after = read_busy(busy_path)
    delta = None if before is None or after is None else after - before
    return status, payload, elapsed_ms, delta


def apply_proof(row: ServiceRow, delta: int | None) -> None:
    row.npu_delta_us = delta
    row.proof_ok = bool(delta is not None and delta > 0)
    if not row.proof_ok:
        row.fallbacks += 1
        row.warnings.append("no_positive_sysfs_delta" if delta is not None else "missing_sysfs_counter")


def mark_skipped_fallback(row: ServiceRow, reason: str) -> None:
    """Record a skipped/unloaded proof condition as a fallback.

    Health-only rows that are intentionally never proof probes should keep
    fallbacks at zero. This helper is for proof-capable rows where a bounded
    smoke was disabled or skipped to avoid side effects such as cold-loading.
    """
    row.fallbacks += 1
    row.warnings.append(reason)


def probe_embeddings(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("embeddings", EMBED_HEALTH_URL, timeout)
    if not row.reachable:
        return row
    payload = {"input": "non-private npu utilization digest probe", "model": "bge-base-en-v1.5-int8-ov"}
    status, data, elapsed, delta = measure_probe(lambda: post_json(EMBED_URL, payload, timeout), "embeddings", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.items = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "data" in data
    row.embedding_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0
    row.embedding_dim = data.get("embedding_dim")
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row


def probe_rerank(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("rerank", RERANK_HEALTH_URL, timeout)
    if not row.reachable:
        return row
    docs = ["Intel NPU accelerates OpenVINO inference.", "Bananas ripen on a kitchen counter."]
    payload = {"query": "OpenVINO NPU inference", "documents": docs, "top_k": 2, "return_documents": False}
    status, data, elapsed, delta = measure_probe(lambda: post_json(RERANK_URL, payload, timeout), "rerank", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.docs = len(docs)
    row.avg_ms = float(data.get("duration_ms") or elapsed)
    row.mode = "NPU"
    row.reachable = status == 200 and data.get("ok", True) is not False
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row


def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("classifier", CLASSIFIER_HEALTH_URL, timeout, mode="dry_run")
    if not row.reachable:
        return row
    payload = {
        "id": "npu-digest-probe",
        "text": "Non-private cron event: backup completed successfully, no user action required.",
        "options": {"dry_run": True, "include_evidence": False},
    }
    status, data, elapsed, delta = measure_probe(lambda: post_json(CLASSIFIER_URL, payload, timeout), "classifier", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.events = 1
    row.avg_ms = elapsed
    row.mode = "dry_run"
    row.dry_run = True
    row.reachable = status == 200 and "error" not in data
    row.response_delta_us = next((data.get(k) for k in ("sysfs_npu_busy_delta_us", "npu_busy_delta_us") if isinstance(data.get(k), int)), None)
    raw_labels = data.get("labels")
    labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
    raw_action = data.get("action")
    action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
    row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
    row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
    row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
    row.confidence = extract_confidence(data)
    row.confidence_bucket = confidence_bucket(row.confidence)
    row.authority_violations = count_authority_violations(data)
    if row.authority_violations:
        row.warnings.append("authority_violation")
    row.items = len(labels)
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row


def write_tone_wav(path: Path, seconds: float = 0.35, sample_rate: int = 16000) -> None:
    frames = int(seconds * sample_rate)
    with wave.open(str(path), "wb") as wav:
        wav.setnchannels(1)
        wav.setsampwidth(2)
        wav.setframerate(sample_rate)
        for i in range(frames):
            value = int(9000 * math.sin(2 * math.pi * 440 * (i / sample_rate)))
            wav.writeframesraw(value.to_bytes(2, byteorder="little", signed=True))


def post_multipart_file(url: str, file_path: Path, timeout: float) -> tuple[int, dict[str, Any]]:
    boundary = "----npu-digest-" + uuid.uuid4().hex
    file_bytes = file_path.read_bytes()
    parts = [
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nwhisper\r\n".encode(),
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"response_format\"\r\n\r\njson\r\n".encode(),
        f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"npu-digest.wav\"\r\nContent-Type: audio/wav\r\n\r\n".encode(),
        file_bytes,
        f"\r\n--{boundary}--\r\n".encode(),
    ]
    req = urllib.request.Request(url, data=b"".join(parts), headers={"Content-Type": f"multipart/form-data; boundary={boundary}"})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return int(resp.status), json.loads(resp.read(1024 * 1024).decode("utf-8", "replace") or "{}")
    except Exception as exc:
        return 0, {"error": safe_error(exc)}


def probe_whisper(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH) -> ServiceRow:
    row, _ = health_row("whisper", WHISPER_HEALTH_URL, timeout)
    row.jobs = 0
    if not row.reachable or not include_smoke:
        if row.reachable:
            row.mode = "health_only"
            row.reason = "smoke_disabled"
            mark_skipped_fallback(row, "skipped")
        return row
    with tempfile.TemporaryDirectory(prefix="npu-digest-whisper-") as tmp:
        wav_path = Path(tmp) / "probe.wav"
        write_tone_wav(wav_path)
        status, data, elapsed, delta = measure_probe(lambda: post_multipart_file(WHISPER_URL, wav_path, timeout), "whisper", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.jobs = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "error" not in data
    row.text_len = len(str(data.get("text") or ""))
    row.sample_rate = data.get("sample_rate") if isinstance(data.get("sample_rate"), int) else None
    row.response_delta_us = data.get("npu_busy_delta_us") if isinstance(data.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row


def probe_genai(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, health = health_row("genai", GENAI_HEALTH_URL, timeout)
    row.loaded = bool(health.get("loaded")) if isinstance(health, dict) and "loaded" in health else None
    row.jobs = 0
    if not row.reachable:
        return row
    if not include_smoke or row.loaded is False:
        row.mode = "loaded=false" if row.loaded is False else "health_only"
        row.reason = "skipped_cold_load" if row.loaded is False else "smoke_disabled"
        mark_skipped_fallback(row, row.reason)
        return row
    payload = {"prompt": "Say pong.", "max_new_tokens": 8}
    status, data, elapsed, delta = measure_probe(lambda: post_json(GENAI_GENERATE_URL, payload, timeout), "genai", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.jobs = 1
    row.avg_ms = elapsed
    row.mode = "NPU"
    row.reachable = status == 200 and "error" not in data
    apply_proof(row, delta)
    return row


def doc_triage_sample_path() -> Path | None:
    candidates = [
        Path("/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png"),
        Path(__file__).resolve().parents[1] / "openvino-doc-image-triage-npu" / "samples" / "synthetic_invoice.png",
    ]
    for candidate in candidates:
        if candidate.exists() and candidate.with_suffix(".png.txt").exists():
            return candidate
    return None


def probe_doc_triage(timeout: float, include_smoke: bool, busy_path: Path = BUSY_PATH, post_json: Callable[..., tuple[int, dict[str, Any]]] = http_post_json) -> ServiceRow:
    row, _ = health_row("doc_triage", DOC_TRIAGE_HEALTH_URL, timeout, gate="closed:private-root")
    row.files = 0
    if not row.reachable or not include_smoke:
        if row.reachable:
            row.mode = "health_only"
            row.reason = "smoke_disabled"
            mark_skipped_fallback(row, "skipped")
        return row
    sample = doc_triage_sample_path()
    if sample is not None:
        root = sample.parent.resolve()
        payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
        status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
    else:
        with tempfile.TemporaryDirectory(prefix="npu-digest-doc-") as tmp:
            root = Path(tmp).resolve()
            sample = root / "synthetic-invoice.png"
            sample.write_bytes(base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="))
            sample.with_suffix(".png.txt").write_text("Synthetic invoice. Amount due $12.34 by 2026-06-30. No private data.\n")
            payload = {"path": str(sample), "options": {"allowed_roots": [str(root)], "include_ocr_text": False, "use_embeddings": True}}
            status, data, elapsed, delta = measure_probe(lambda: post_json(DOC_TRIAGE_URL, payload, timeout), "doc_triage", busy_path)
    row.probe_ran = True
    row.calls = 1
    row.files = 1
    row.avg_ms = elapsed
    row.mode = "NPU-via-embedding-service"
    row.allowed_roots_count = 1
    row.reachable = status == 200 and data.get("ok", True) is not False
    raw_result = data.get("result")
    result: dict[str, Any] = raw_result if isinstance(raw_result, dict) else {}
    raw_pages = result.get("pages")
    pages: list[Any] = raw_pages if isinstance(raw_pages, list) else []
    embedding: dict[str, Any] = {}
    if pages and isinstance(pages[0], dict):
        raw_attn = pages[0].get("needs_attention")
        attn: dict[str, Any] = raw_attn if isinstance(raw_attn, dict) else {}
        raw_embedding = attn.get("embedding")
        embedding = raw_embedding if isinstance(raw_embedding, dict) else {}
    row.response_delta_us = embedding.get("npu_busy_delta_us") if isinstance(embedding.get("npu_busy_delta_us"), int) else None
    apply_proof(row, delta)
    if not row.reachable:
        row.warnings.append("probe_http_failed")
        row.error = str(data.get("error") or f"http_{status}")[:80]
    return row


def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_delta_us: int | None, started_at: str) -> dict[str, Any]:
    services_ok = sum(1 for r in rows if r.reachable)
    proof_rows = [r for r in rows if r.probe_ran and r.proof_ok is not None]
    proof_ok = sum(1 for r in proof_rows if r.proof_ok)
    gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
    fallbacks = sum(r.fallbacks for r in rows)
    request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
    npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
    fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
    recommendation_counts = {"escalate": 0, "suppress": 0}
    confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
    authority_violations = 0
    warnings: dict[str, int] = {}
    for row in rows:
        recommendation = (row.recommendation or "").lower()
        if recommendation in recommendation_counts:
            recommendation_counts[recommendation] += 1
        else:
            recommendation_counts["escalate"] += row.escalate or 0
            recommendation_counts["suppress"] += row.suppress or 0
        if row.confidence_bucket:
            confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
        elif row.recommendation or row.escalate is not None or row.suppress is not None:
            confidence_distribution["unknown"] += 1
        authority_violations += row.authority_violations or 0
        for warning in row.warnings:
            warnings[warning] = warnings.get(warning, 0) + 1
    confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
    return {
        "type": "summary",
        "timestamp": started_at,
        "counter": str(BUSY_PATH),
        "delta_us": counter_delta_us,
        "services_ok": services_ok,
        "services_total": len(rows),
        "proof_ok": proof_ok,
        "proof_total": len(proof_rows),
        "fallbacks": fallbacks,
        "fallbacks_by_service": fallbacks_by_service,
        "request_counts_by_service": request_counts_by_service,
        "npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
        "confidence_distribution": confidence_distribution,
        "recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
        "authority_violations": authority_violations,
        "gates_closed": gates_closed,
        "warnings": warnings,
        "artifact": artifact_path,
    }


def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
    lines = [
        f"NPU utilization digest {summary['timestamp']}",
        f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
        f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
    ]
    rec_counts = summary.get("recommendation_counts") or {}
    if rec_counts:
        lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
    conf_dist = summary.get("confidence_distribution") or {}
    if conf_dist:
        lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
    for r in rows:
        parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
        if r.calls:
            parts.append(f"calls={r.calls}")
        if r.jobs is not None:
            parts.append(f"jobs={r.jobs}")
        if r.events is not None:
            parts.append(f"events={r.events}")
        if r.files is not None:
            parts.append(f"files={r.files}")
        if r.docs is not None:
            parts.append(f"docs={r.docs}")
        if r.avg_ms is not None:
            parts.append(f"avg_ms={r.avg_ms}")
        if r.npu_delta_us is not None:
            parts.append(f"npu_delta_us={r.npu_delta_us}")
        if r.proof_ok is not None:
            parts.append(f"proof={str(r.proof_ok).lower()}")
        if r.dry_run is not None:
            parts.append(f"dry_run={str(r.dry_run).lower()}")
        if r.suppress is not None:
            parts.append(f"suppress={r.suppress}")
        if r.escalate is not None:
            parts.append(f"escalate={r.escalate}")
        if r.recommendation is not None:
            parts.append(f"recommendation={r.recommendation}")
        if r.confidence_bucket is not None:
            parts.append(f"confidence={r.confidence_bucket}")
        if r.authority_violations is not None:
            parts.append(f"authority_violations={r.authority_violations}")
        if r.loaded is not None:
            parts.append(f"loaded={str(r.loaded).lower()}")
        if r.allowed_roots_count is not None:
            parts.append(f"allowed_roots={r.allowed_roots_count}")
        if r.text_len is not None:
            parts.append(f"text_len={r.text_len}")
        if r.mode:
            parts.append(f"mode={r.mode}")
        if r.gate != "none":
            parts.append(f"gate={r.gate}")
        if r.reason:
            parts.append(f"reason={r.reason}")
        if r.warnings:
            parts.append("warnings=" + ",".join(sorted(set(r.warnings))))
        lines.append(" ".join(parts))
    warning_counts = summary.get("warnings") or {}
    lines.append("fallbacks: " + " ".join(f"{k}={v}" for k, v in sorted(warning_counts.items())) if warning_counts else "fallbacks: none")
    if summary.get("artifact"):
        lines.append(f"artifact: {summary['artifact']}")
    return "\n".join(lines)


def write_jsonl(summary: dict[str, Any], rows: list[ServiceRow], out_dir: Path) -> Path:
    out_dir.mkdir(parents=True, exist_ok=True)
    stamp = summary["timestamp"].replace(":", "").replace("+", "").replace("-", "")
    path = out_dir / f"{stamp}.jsonl"
    with path.open("w", encoding="utf-8") as f:
        f.write(json.dumps(summary, sort_keys=True, separators=(",", ":")) + "\n")
        for row in rows:
            f.write(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")) + "\n")
    return path


def str_bool(value: str) -> bool:
    lowered = value.lower()
    if lowered in {"1", "true", "yes", "y", "on"}:
        return True
    if lowered in {"0", "false", "no", "n", "off"}:
        return False
    raise argparse.ArgumentTypeError("expected true or false")


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Compact NPU utilization digest")
    parser.add_argument("--format", choices=("text", "jsonl"), default="text")
    parser.add_argument("--out", default=str(DEFAULT_OUT_DIR))
    parser.add_argument("--timeout-s", type=float, default=8.0)
    parser.add_argument("--include-whisper-smoke", type=str_bool, default=True)
    parser.add_argument("--include-genai-smoke", type=str_bool, default=False)
    parser.add_argument("--include-doc-triage-smoke", type=str_bool, default=True)
    parser.add_argument("--no-write", action="store_true")
    parser.add_argument("--strict-proof", action="store_true", help="exit nonzero if a proof-required probe ran without positive sysfs delta")
    parser.add_argument("--verbose", action="store_true")
    return parser.parse_args(argv)


def run(args: argparse.Namespace) -> tuple[dict[str, Any], list[ServiceRow]]:
    started_at = dt.datetime.now().astimezone().replace(microsecond=0).isoformat()
    before_all = read_busy(BUSY_PATH)
    rows = [
        probe_embeddings(args.timeout_s),
        probe_rerank(args.timeout_s),
        probe_whisper(args.timeout_s, args.include_whisper_smoke),
        probe_classifier(args.timeout_s),
        probe_genai(args.timeout_s, args.include_genai_smoke),
        probe_doc_triage(args.timeout_s, args.include_doc_triage_smoke),
    ]
    rows.append(health_row("rag_endpoint", RAG_ENDPOINT_HEALTH_URL, args.timeout_s, gate="closed:vector-mutation")[0])
    rows.append(health_row("rag_health", RAG_HEALTH_URL, args.timeout_s)[0])
    rows.append(health_row("advisory_gateway", ADVISORY_HEALTH_URL, args.timeout_s, gate="closed:advisory-post")[0])
    after_all = read_busy(BUSY_PATH)
    delta_all = None if before_all is None or after_all is None else after_all - before_all
    summary = build_summary(rows, artifact_path=None, counter_delta_us=delta_all, started_at=started_at)
    return summary, rows


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    summary, rows = run(args)
    if not args.no_write:
        artifact = write_jsonl(summary, rows, Path(args.out).expanduser())
        summary["artifact"] = str(artifact)
        # rewrite with artifact path included in the summary line
        artifact.write_text("\n".join([json.dumps(summary, sort_keys=True, separators=(",", ":"))] + [json.dumps(compact_dict(r), sort_keys=True, separators=(",", ":")) for r in rows]) + "\n")
    if args.format == "jsonl":
        print(json.dumps(summary, sort_keys=True, separators=(",", ":")))
        for row in rows:
            print(json.dumps(compact_dict(row), sort_keys=True, separators=(",", ":")))
    else:
        print(render_text(summary, rows))
    if args.strict_proof and any(r.probe_ran and r.proof_ok is False for r in rows):
        return 2
    return 0


if __name__ == "__main__":
    raise SystemExit(main())