feat(npu): add advisory metrics to utilization digest

Roll up confidence, recommendation, authority, fallback, and service-level metrics, including v1 authority-flag handling.
This commit is contained in:
William Valentin
2026-06-06 15:30:31 -07:00
parent dae2a57124
commit 72434c8bc3
3 changed files with 254 additions and 4 deletions
+76 -2
View File
@@ -67,7 +67,15 @@ def test_classifier_dry_run_payload(tmp_path, monkeypatch):
def fake_post(url, payload, timeout):
seen.update(payload)
busy.write_text("35")
return 200, {"labels": {"tool_needed": True, "duplicate": False}, "npu_busy_delta_us": 25}
return 200, {
"labels": {"tool_needed": True, "duplicate": False},
"recommendation": "escalate",
"confidence": 0.84,
"authority_flags": {"tool_execution": False, "memory_write": False},
"allowed_actions": ["log", "recommend"],
"actual_action": "dry_run",
"npu_busy_delta_us": 25,
}
monkeypatch.setattr(digest, "health_row", fake_health)
row = digest.probe_classifier(1, busy_path=busy, post_json=fake_post)
@@ -75,6 +83,10 @@ def test_classifier_dry_run_payload(tmp_path, monkeypatch):
assert seen["options"]["include_evidence"] is False
assert row.escalate == 1
assert row.suppress == 0
assert row.recommendation == "escalate"
assert row.confidence == 0.84
assert row.confidence_bucket == "high"
assert row.authority_violations == 0
assert row.proof_ok is True
@@ -145,15 +157,77 @@ def test_disabled_proof_smokes_count_as_fallbacks(monkeypatch):
def test_jsonl_shape(tmp_path):
rows = [digest.ServiceRow(service="embeddings", reachable=True, probe_ran=True, proof_ok=True, npu_delta_us=1)]
rows = [digest.ServiceRow(service="embeddings", reachable=True, probe_ran=True, proof_ok=True, calls=1, npu_delta_us=1)]
summary = digest.build_summary(rows, None, 1, "2026-06-05T14:20:00-07:00")
path = digest.write_jsonl(summary, rows, tmp_path)
lines = [json.loads(line) for line in path.read_text().splitlines()]
assert lines[0]["type"] == "summary"
assert lines[0]["request_counts_by_service"] == {"embeddings": 1}
assert lines[0]["npu_busy_delta_us_by_service"] == {"embeddings": 1}
assert lines[1]["type"] == "service"
assert lines[1]["service"] == "embeddings"
def test_summary_observability_rollups_and_text():
rows = [
digest.ServiceRow(service="classifier", reachable=True, calls=1, npu_delta_us=25, fallbacks=0, escalate=1, suppress=0, recommendation="escalate", confidence=0.84, confidence_bucket="high", authority_violations=0),
digest.ServiceRow(service="doc_triage", reachable=True, calls=1, npu_delta_us=7, fallbacks=1, warnings=["no_positive_sysfs_delta"]),
digest.ServiceRow(service="advisory_gateway", reachable=True, gate="closed:advisory-post", authority_violations=1, warnings=["authority_violation"]),
]
summary = digest.build_summary(rows, None, 32, "2026-06-05T14:20:00-07:00")
assert summary["request_counts_by_service"] == {"classifier": 1, "doc_triage": 1}
assert summary["npu_busy_delta_us_by_service"] == {"classifier": 25, "doc_triage": 7}
assert summary["fallbacks_by_service"] == {"doc_triage": 1}
assert summary["confidence_distribution"] == {"high": 1}
assert summary["recommendation_counts"] == {"escalate": 1}
assert summary["authority_violations"] == 1
text = digest.render_text(summary, rows)
assert "authority_violations=1" in text
assert "recommendations: escalate=1" in text
assert "confidence: high=1" in text
def test_authority_violation_detection():
assert digest.count_authority_violations({
"authority_flags": {"tool_execution": True, "memory_write": False},
"allowed_actions": ["log", "service_restart"],
"actual_action": "outbound_send",
}) == 3
def test_v1_authority_violation_detection():
safe_payload = {
"authority_flags": {
"can_route_atlas": False,
"can_write_memory": False,
"can_execute_tools": False,
"can_restart_services": False,
"can_send_outbound": False,
"can_scan_private_roots": False,
"can_mutate_vector_store": False,
"can_post_advisory_event": False,
"can_change_gateway_config": False,
"requires_human_approval": True,
"advisory_only": True,
},
"allowed_actions": ["record_metric", "compare_with_expected_label", "include_in_digest", "recommend_human_review"],
"actual_action": {"kind": "dry_run_reported", "performed": False, "performed_by": "harness", "side_effects": []},
}
assert digest.count_authority_violations(safe_payload) == 0
unsafe = dict(safe_payload)
unsafe["authority_flags"] = dict(safe_payload["authority_flags"], can_execute_tools=True)
assert digest.count_authority_violations(unsafe) == 1
def test_recommendation_only_and_zero_confidence_rollups():
payload = {"labels": {"no_op": {"confidence": 0.0, "score": 0.9}}, "recommendation": "suppress"}
assert digest.extract_confidence(payload) == 0.0
row = digest.ServiceRow(service="classifier", reachable=True, recommendation="suppress", confidence=0.0, confidence_bucket="low")
summary = digest.build_summary([row], None, None, "2026-06-05T14:20:00-07:00")
assert summary["recommendation_counts"] == {"suppress": 1}
assert summary["confidence_distribution"] == {"low": 1}
def test_exit_codes(monkeypatch):
rows = [digest.ServiceRow(service="embeddings", reachable=True, probe_ran=True, proof_ok=False, warnings=["no_positive_sysfs_delta"])]
summary = digest.build_summary(rows, None, 0, "2026-06-05T14:20:00-07:00")