feat(npu): add advisory metrics to utilization digest

Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.
This commit is contained in:
William Valentin
2026-06-06 15:30:31 -07:00
parent dae2a57124
commit 72434c8bc3
3 changed files with 254 additions and 4 deletions
+177 -1
View File
@@ -72,6 +72,10 @@ class ServiceRow:
dry_run: bool | None = None
suppress: int | None = None
escalate: int | None = None
recommendation: str | None = None
confidence: float | None = None
confidence_bucket: str | None = None
authority_violations: int | None = None
loaded: bool | None = None
allowed_roots_count: int | None = None
reason: str | None = None
@@ -83,6 +87,136 @@ def compact_dict(obj: Any) -> dict[str, Any]:
return {k: v for k, v in data.items() if v is not None and v != []}
AUTHORITY_SAFE_ACTIONS = {
"", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
"record_metric", "compare_with_expected_label", "include_in_digest",
"open_review_ticket_candidate", "recommend_human_review",
}
AUTHORITY_FLAG_KEYS = {
"advisory_post",
"atlas_routing",
"broad_private_scan",
"delivery_send",
"gateway_restart",
"live_routing",
"memory_write",
"outbound_send",
"private_root_scan",
"service_restart",
"tool_execution",
"vector_mutation",
}
AUTHORITY_FLAG_ALIASES = {
"can_route_atlas": "atlas_routing",
"can_write_memory": "memory_write",
"can_execute_tools": "tool_execution",
"can_restart_services": "service_restart",
"can_send_outbound": "outbound_send",
"can_scan_private_roots": "private_root_scan",
"can_mutate_vector_store": "vector_mutation",
"can_post_advisory_event": "advisory_post",
"can_change_gateway_config": "gateway_restart",
"may_route": "atlas_routing",
"may_write_memory": "memory_write",
"may_execute_tools": "tool_execution",
"may_restart_services": "service_restart",
"may_send_external": "outbound_send",
"may_process_private_dirs": "private_root_scan",
"may_mutate_vector_db": "vector_mutation",
"may_change_live_config": "gateway_restart",
}
def confidence_bucket(confidence: float | None) -> str | None:
if confidence is None:
return None
if confidence >= 0.8:
return "high"
if confidence >= 0.5:
return "medium"
return "low"
def coerce_confidence(value: Any) -> float | None:
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return max(0.0, min(1.0, float(value)))
if isinstance(value, str):
try:
return max(0.0, min(1.0, float(value)))
except ValueError:
return None
return None
def extract_confidence(payload: dict[str, Any]) -> float | None:
direct = coerce_confidence(payload.get("confidence"))
if direct is not None:
return direct
raw_labels = payload.get("labels")
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
scores: list[float] = []
for value in labels.values():
if isinstance(value, dict):
for score_key in ("confidence", "score", "probability"):
if score_key in value:
score = coerce_confidence(value.get(score_key))
break
score = None
else:
score = coerce_confidence(value)
if score is not None:
scores.append(score)
return max(scores) if scores else None
def extract_recommendation(payload: dict[str, Any]) -> str | None:
for key in ("recommendation", "classification", "input_class"):
value = payload.get(key)
if isinstance(value, str) and value:
return value[:48]
raw_action = payload.get("action")
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
value = action.get("recommendation") or action.get("type")
return str(value)[:48] if value else None
def count_authority_violations(payload: dict[str, Any]) -> int:
"""Count advisory response hints that would exceed read-only/dry-run authority.
Supports both legacy compact payloads and `npu_advisory_decision_v1`.
Valid schema-safe allowed actions and object-shaped no-op actual actions must
not count as violations; any true live-authority flag must count.
"""
violations = 0
raw_flags = payload.get("authority_flags")
flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
for key, value in flags.items():
canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
if canonical in AUTHORITY_FLAG_KEYS and bool(value):
violations += 1
raw_allowed = payload.get("allowed_actions")
allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
for action in allowed:
if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
violations += 1
raw_actual = payload.get("actual_action")
if isinstance(raw_actual, dict):
performed = bool(raw_actual.get("performed"))
side_effects = raw_actual.get("side_effects") or []
kind = str(raw_actual.get("kind") or "none").lower()
if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
violations += 1
else:
actual = str(raw_actual or "").lower()
if actual and actual not in AUTHORITY_SAFE_ACTIONS:
violations += 1
return violations
def read_busy(path: Path = BUSY_PATH) -> int | None:
try:
return int(path.read_text().strip())
@@ -234,6 +368,12 @@ def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Cal
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
row.confidence = extract_confidence(data)
row.confidence_bucket = confidence_bucket(row.confidence)
row.authority_violations = count_authority_violations(data)
if row.authority_violations:
row.warnings.append("authority_violation")
row.items = len(labels)
apply_proof(row, delta)
if not row.reachable:
@@ -387,10 +527,28 @@ def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_del
proof_ok = sum(1 for r in proof_rows if r.proof_ok)
gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
fallbacks = sum(r.fallbacks for r in rows)
request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
recommendation_counts = {"escalate": 0, "suppress": 0}
confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
authority_violations = 0
warnings: dict[str, int] = {}
for row in rows:
recommendation = (row.recommendation or "").lower()
if recommendation in recommendation_counts:
recommendation_counts[recommendation] += 1
else:
recommendation_counts["escalate"] += row.escalate or 0
recommendation_counts["suppress"] += row.suppress or 0
if row.confidence_bucket:
confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
elif row.recommendation or row.escalate is not None or row.suppress is not None:
confidence_distribution["unknown"] += 1
authority_violations += row.authority_violations or 0
for warning in row.warnings:
warnings[warning] = warnings.get(warning, 0) + 1
confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
return {
"type": "summary",
"timestamp": started_at,
@@ -401,6 +559,12 @@ def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_del
"proof_ok": proof_ok,
"proof_total": len(proof_rows),
"fallbacks": fallbacks,
"fallbacks_by_service": fallbacks_by_service,
"request_counts_by_service": request_counts_by_service,
"npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
"confidence_distribution": confidence_distribution,
"recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
"authority_violations": authority_violations,
"gates_closed": gates_closed,
"warnings": warnings,
"artifact": artifact_path,
@@ -411,8 +575,14 @@ def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
lines = [
f"NPU utilization digest {summary['timestamp']}",
f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} gates_closed={summary['gates_closed']}",
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
]
rec_counts = summary.get("recommendation_counts") or {}
if rec_counts:
lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
conf_dist = summary.get("confidence_distribution") or {}
if conf_dist:
lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
for r in rows:
parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
if r.calls:
@@ -437,6 +607,12 @@ def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
parts.append(f"suppress={r.suppress}")
if r.escalate is not None:
parts.append(f"escalate={r.escalate}")
if r.recommendation is not None:
parts.append(f"recommendation={r.recommendation}")
if r.confidence_bucket is not None:
parts.append(f"confidence={r.confidence_bucket}")
if r.authority_violations is not None:
parts.append(f"authority_violations={r.authority_violations}")
if r.loaded is not None:
parts.append(f"loaded={str(r.loaded).lower()}")
if r.allowed_roots_count is not None: