feat(npu): add advisory metrics to utilization digest
Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.
This commit is contained in:
@@ -72,6 +72,10 @@ class ServiceRow:
|
||||
dry_run: bool | None = None
|
||||
suppress: int | None = None
|
||||
escalate: int | None = None
|
||||
recommendation: str | None = None
|
||||
confidence: float | None = None
|
||||
confidence_bucket: str | None = None
|
||||
authority_violations: int | None = None
|
||||
loaded: bool | None = None
|
||||
allowed_roots_count: int | None = None
|
||||
reason: str | None = None
|
||||
@@ -83,6 +87,136 @@ def compact_dict(obj: Any) -> dict[str, Any]:
|
||||
return {k: v for k, v in data.items() if v is not None and v != []}
|
||||
|
||||
|
||||
AUTHORITY_SAFE_ACTIONS = {
|
||||
"", "none", "log", "observe", "dry_run", "recommend", "suppress", "escalate",
|
||||
"record_metric", "compare_with_expected_label", "include_in_digest",
|
||||
"open_review_ticket_candidate", "recommend_human_review",
|
||||
}
|
||||
AUTHORITY_FLAG_KEYS = {
|
||||
"advisory_post",
|
||||
"atlas_routing",
|
||||
"broad_private_scan",
|
||||
"delivery_send",
|
||||
"gateway_restart",
|
||||
"live_routing",
|
||||
"memory_write",
|
||||
"outbound_send",
|
||||
"private_root_scan",
|
||||
"service_restart",
|
||||
"tool_execution",
|
||||
"vector_mutation",
|
||||
}
|
||||
AUTHORITY_FLAG_ALIASES = {
|
||||
"can_route_atlas": "atlas_routing",
|
||||
"can_write_memory": "memory_write",
|
||||
"can_execute_tools": "tool_execution",
|
||||
"can_restart_services": "service_restart",
|
||||
"can_send_outbound": "outbound_send",
|
||||
"can_scan_private_roots": "private_root_scan",
|
||||
"can_mutate_vector_store": "vector_mutation",
|
||||
"can_post_advisory_event": "advisory_post",
|
||||
"can_change_gateway_config": "gateway_restart",
|
||||
"may_route": "atlas_routing",
|
||||
"may_write_memory": "memory_write",
|
||||
"may_execute_tools": "tool_execution",
|
||||
"may_restart_services": "service_restart",
|
||||
"may_send_external": "outbound_send",
|
||||
"may_process_private_dirs": "private_root_scan",
|
||||
"may_mutate_vector_db": "vector_mutation",
|
||||
"may_change_live_config": "gateway_restart",
|
||||
}
|
||||
|
||||
|
||||
def confidence_bucket(confidence: float | None) -> str | None:
|
||||
if confidence is None:
|
||||
return None
|
||||
if confidence >= 0.8:
|
||||
return "high"
|
||||
if confidence >= 0.5:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
def coerce_confidence(value: Any) -> float | None:
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return max(0.0, min(1.0, float(value)))
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
return max(0.0, min(1.0, float(value)))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def extract_confidence(payload: dict[str, Any]) -> float | None:
|
||||
direct = coerce_confidence(payload.get("confidence"))
|
||||
if direct is not None:
|
||||
return direct
|
||||
raw_labels = payload.get("labels")
|
||||
labels: dict[str, Any] = raw_labels if isinstance(raw_labels, dict) else {}
|
||||
scores: list[float] = []
|
||||
for value in labels.values():
|
||||
if isinstance(value, dict):
|
||||
for score_key in ("confidence", "score", "probability"):
|
||||
if score_key in value:
|
||||
score = coerce_confidence(value.get(score_key))
|
||||
break
|
||||
score = None
|
||||
else:
|
||||
score = coerce_confidence(value)
|
||||
if score is not None:
|
||||
scores.append(score)
|
||||
return max(scores) if scores else None
|
||||
|
||||
|
||||
def extract_recommendation(payload: dict[str, Any]) -> str | None:
|
||||
for key in ("recommendation", "classification", "input_class"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value[:48]
|
||||
raw_action = payload.get("action")
|
||||
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||
value = action.get("recommendation") or action.get("type")
|
||||
return str(value)[:48] if value else None
|
||||
|
||||
|
||||
def count_authority_violations(payload: dict[str, Any]) -> int:
|
||||
"""Count advisory response hints that would exceed read-only/dry-run authority.
|
||||
|
||||
Supports both legacy compact payloads and `npu_advisory_decision_v1`.
|
||||
Valid schema-safe allowed actions and object-shaped no-op actual actions must
|
||||
not count as violations; any true live-authority flag must count.
|
||||
"""
|
||||
violations = 0
|
||||
raw_flags = payload.get("authority_flags")
|
||||
flags: dict[str, Any] = raw_flags if isinstance(raw_flags, dict) else {}
|
||||
for key, value in flags.items():
|
||||
canonical = AUTHORITY_FLAG_ALIASES.get(key, key)
|
||||
if canonical in AUTHORITY_FLAG_KEYS and bool(value):
|
||||
violations += 1
|
||||
|
||||
raw_allowed = payload.get("allowed_actions")
|
||||
allowed: list[Any] = raw_allowed if isinstance(raw_allowed, list) else []
|
||||
for action in allowed:
|
||||
if str(action).lower() not in AUTHORITY_SAFE_ACTIONS:
|
||||
violations += 1
|
||||
|
||||
raw_actual = payload.get("actual_action")
|
||||
if isinstance(raw_actual, dict):
|
||||
performed = bool(raw_actual.get("performed"))
|
||||
side_effects = raw_actual.get("side_effects") or []
|
||||
kind = str(raw_actual.get("kind") or "none").lower()
|
||||
if performed or side_effects or kind not in AUTHORITY_SAFE_ACTIONS | {"recorded_metric", "dry_run_reported"}:
|
||||
violations += 1
|
||||
else:
|
||||
actual = str(raw_actual or "").lower()
|
||||
if actual and actual not in AUTHORITY_SAFE_ACTIONS:
|
||||
violations += 1
|
||||
return violations
|
||||
|
||||
|
||||
def read_busy(path: Path = BUSY_PATH) -> int | None:
|
||||
try:
|
||||
return int(path.read_text().strip())
|
||||
@@ -234,6 +368,12 @@ def probe_classifier(timeout: float, busy_path: Path = BUSY_PATH, post_json: Cal
|
||||
action: dict[str, Any] = raw_action if isinstance(raw_action, dict) else {}
|
||||
row.escalate = int(bool(action.get("escalate") or labels.get("action_required") or labels.get("tool_needed")))
|
||||
row.suppress = int(bool(action.get("suppress") or labels.get("no_op") or labels.get("duplicate")))
|
||||
row.recommendation = extract_recommendation(data) or ("escalate" if row.escalate else "suppress" if row.suppress else "log")
|
||||
row.confidence = extract_confidence(data)
|
||||
row.confidence_bucket = confidence_bucket(row.confidence)
|
||||
row.authority_violations = count_authority_violations(data)
|
||||
if row.authority_violations:
|
||||
row.warnings.append("authority_violation")
|
||||
row.items = len(labels)
|
||||
apply_proof(row, delta)
|
||||
if not row.reachable:
|
||||
@@ -387,10 +527,28 @@ def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_del
|
||||
proof_ok = sum(1 for r in proof_rows if r.proof_ok)
|
||||
gates_closed = sum(1 for r in rows if str(r.gate).startswith("closed:"))
|
||||
fallbacks = sum(r.fallbacks for r in rows)
|
||||
request_counts_by_service = {r.service: r.calls for r in rows if r.calls}
|
||||
npu_busy_delta_us_by_service = {r.service: r.npu_delta_us for r in rows if r.npu_delta_us is not None}
|
||||
fallbacks_by_service = {r.service: r.fallbacks for r in rows if r.fallbacks}
|
||||
recommendation_counts = {"escalate": 0, "suppress": 0}
|
||||
confidence_distribution: dict[str, int] = {"low": 0, "medium": 0, "high": 0, "unknown": 0}
|
||||
authority_violations = 0
|
||||
warnings: dict[str, int] = {}
|
||||
for row in rows:
|
||||
recommendation = (row.recommendation or "").lower()
|
||||
if recommendation in recommendation_counts:
|
||||
recommendation_counts[recommendation] += 1
|
||||
else:
|
||||
recommendation_counts["escalate"] += row.escalate or 0
|
||||
recommendation_counts["suppress"] += row.suppress or 0
|
||||
if row.confidence_bucket:
|
||||
confidence_distribution[row.confidence_bucket] = confidence_distribution.get(row.confidence_bucket, 0) + 1
|
||||
elif row.recommendation or row.escalate is not None or row.suppress is not None:
|
||||
confidence_distribution["unknown"] += 1
|
||||
authority_violations += row.authority_violations or 0
|
||||
for warning in row.warnings:
|
||||
warnings[warning] = warnings.get(warning, 0) + 1
|
||||
confidence_distribution = {k: v for k, v in confidence_distribution.items() if v}
|
||||
return {
|
||||
"type": "summary",
|
||||
"timestamp": started_at,
|
||||
@@ -401,6 +559,12 @@ def build_summary(rows: list[ServiceRow], artifact_path: str | None, counter_del
|
||||
"proof_ok": proof_ok,
|
||||
"proof_total": len(proof_rows),
|
||||
"fallbacks": fallbacks,
|
||||
"fallbacks_by_service": fallbacks_by_service,
|
||||
"request_counts_by_service": request_counts_by_service,
|
||||
"npu_busy_delta_us_by_service": npu_busy_delta_us_by_service,
|
||||
"confidence_distribution": confidence_distribution,
|
||||
"recommendation_counts": {k: v for k, v in recommendation_counts.items() if v},
|
||||
"authority_violations": authority_violations,
|
||||
"gates_closed": gates_closed,
|
||||
"warnings": warnings,
|
||||
"artifact": artifact_path,
|
||||
@@ -411,8 +575,14 @@ def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
|
||||
lines = [
|
||||
f"NPU utilization digest {summary['timestamp']}",
|
||||
f"counter={summary['counter']} delta_us={summary.get('delta_us')}",
|
||||
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} gates_closed={summary['gates_closed']}",
|
||||
f"services_ok={summary['services_ok']}/{summary['services_total']} proof_ok={summary['proof_ok']}/{summary['proof_total']} fallbacks={summary['fallbacks']} authority_violations={summary['authority_violations']} gates_closed={summary['gates_closed']}",
|
||||
]
|
||||
rec_counts = summary.get("recommendation_counts") or {}
|
||||
if rec_counts:
|
||||
lines.append("recommendations: " + " ".join(f"{k}={v}" for k, v in sorted(rec_counts.items())))
|
||||
conf_dist = summary.get("confidence_distribution") or {}
|
||||
if conf_dist:
|
||||
lines.append("confidence: " + " ".join(f"{k}={v}" for k, v in sorted(conf_dist.items())))
|
||||
for r in rows:
|
||||
parts = [f"- {r.service}:", f"ok={str(r.reachable).lower()}"]
|
||||
if r.calls:
|
||||
@@ -437,6 +607,12 @@ def render_text(summary: dict[str, Any], rows: list[ServiceRow]) -> str:
|
||||
parts.append(f"suppress={r.suppress}")
|
||||
if r.escalate is not None:
|
||||
parts.append(f"escalate={r.escalate}")
|
||||
if r.recommendation is not None:
|
||||
parts.append(f"recommendation={r.recommendation}")
|
||||
if r.confidence_bucket is not None:
|
||||
parts.append(f"confidence={r.confidence_bucket}")
|
||||
if r.authority_violations is not None:
|
||||
parts.append(f"authority_violations={r.authority_violations}")
|
||||
if r.loaded is not None:
|
||||
parts.append(f"loaded={str(r.loaded).lower()}")
|
||||
if r.allowed_roots_count is not None:
|
||||
|
||||
Reference in New Issue
Block a user