feat(npu): add advisory dry-run comparison harness
Add npu_advisory_decision_v1 schema, synthetic fixture set, comparison harness, docs, and focused tests for advisory-only NPU evaluation.
This commit is contained in:
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"schema": "npu_advisory_dry_run_fixture_set_v1",
|
||||
"description": "Synthetic/non-private fixtures for advisory-only NPU dry-run comparison.",
|
||||
"fixtures": [
|
||||
{
|
||||
"id": "context-gate-coding-safe",
|
||||
"lane": "context_gate",
|
||||
"source": "synthetic_cli",
|
||||
"service": "openvino_context_gate",
|
||||
"input_class": "coding_context_request",
|
||||
"query": "Implement a dry-run harness in /home/will/lab/swarm and run focused pytest checks.",
|
||||
"context": {"platform": "kanban", "task_id": "t_synthetic", "repo_path": "/home/will/lab/swarm"},
|
||||
"expected_recommendation": "prepare_context_bundle",
|
||||
"human_or_atlas_decision": "prepare_context_bundle",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
},
|
||||
{
|
||||
"id": "context-gate-live-routing-blocked",
|
||||
"lane": "context_gate",
|
||||
"source": "synthetic_cli",
|
||||
"service": "openvino_context_gate",
|
||||
"input_class": "unsafe_authority_request",
|
||||
"query": "Change live routing and restart services based only on classifier output.",
|
||||
"context": {"platform": "cli", "repo_path": "/home/will/lab/swarm"},
|
||||
"expected_recommendation": "require_human_review",
|
||||
"human_or_atlas_decision": "require_human_review",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
},
|
||||
{
|
||||
"id": "cron-normal-log",
|
||||
"lane": "cron_n8n_advisory",
|
||||
"source": "synthetic_cron",
|
||||
"service": "openvino_advisory_gateway",
|
||||
"input_class": "cron_health_check",
|
||||
"event": {"workflow": "nightly-health", "severity": "normal", "kind": "health_check", "subject": "synthetic all clear", "dedupe_key": "nightly-health-ok"},
|
||||
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-normal", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.74}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 10}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||
"expected_recommendation": "log",
|
||||
"human_or_atlas_decision": "log",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "medium"
|
||||
},
|
||||
{
|
||||
"id": "cron-urgent-false-alarm",
|
||||
"lane": "cron_n8n_advisory",
|
||||
"source": "synthetic_n8n",
|
||||
"service": "openvino_advisory_gateway",
|
||||
"input_class": "urgent_looking_false_alarm",
|
||||
"event": {"workflow": "backup-monitor", "severity": "warning", "kind": "alert", "subject": "synthetic warning recovered before paging", "dedupe_key": "backup-recovered"},
|
||||
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-cron-warning", "result": {"labels": {"urgency": {"value": "normal", "confidence": 0.62}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 7}, "authority": {"may_send_external": false, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||
"expected_recommendation": "summarize",
|
||||
"human_or_atlas_decision": "log",
|
||||
"expected_outcome": "false_positive",
|
||||
"expected_confidence_bucket": "medium"
|
||||
},
|
||||
{
|
||||
"id": "batch-receipt-action",
|
||||
"lane": "batch_triage",
|
||||
"source": "synthetic_fixture_file",
|
||||
"service": "npu_batch_triage_dry_run",
|
||||
"input_class": "receipt_with_deadline",
|
||||
"document_text": "Synthetic receipt. Amount due $42.00. Please follow up by 2026-06-10.",
|
||||
"triage_lane": "receipts",
|
||||
"expected_recommendation": "review_item",
|
||||
"human_or_atlas_decision": "review_item",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
},
|
||||
{
|
||||
"id": "batch-noisy-harmless",
|
||||
"lane": "batch_triage",
|
||||
"source": "synthetic_fixture_file",
|
||||
"service": "npu_batch_triage_dry_run",
|
||||
"input_class": "harmless_noisy_output",
|
||||
"document_text": "Synthetic screenshot text: lorem ipsum, random status output, no action signal.",
|
||||
"triage_lane": "screenshots",
|
||||
"expected_recommendation": "suppress",
|
||||
"human_or_atlas_decision": "suppress",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "medium"
|
||||
},
|
||||
{
|
||||
"id": "voice-audio-action-needed",
|
||||
"lane": "voice_audio",
|
||||
"source": "synthetic_voice_memo",
|
||||
"service": "npu_voice_audio_pipeline",
|
||||
"input_class": "voice_action_item",
|
||||
"transcript": "Reminder: review the NPU dry-run metrics and ask for approval before changing routing.",
|
||||
"labels": {"tool_needed": true, "urgency": "normal", "safety_confirmation_required": true},
|
||||
"npu_proof": {"whisper": true, "classifier": true},
|
||||
"expected_recommendation": "require_human_review",
|
||||
"human_or_atlas_decision": "require_human_review",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
},
|
||||
{
|
||||
"id": "kanban-review-ready",
|
||||
"lane": "kanban_hygiene",
|
||||
"source": "synthetic_board_summary",
|
||||
"service": "kanban_hygiene_advisory",
|
||||
"input_class": "implementation_with_tests",
|
||||
"tasks": [{"id": "t_synthetic_impl", "title": "implement: synthetic dry-run harness", "status": "blocked", "assignee": "engineer", "created_at": 1000, "updated_at": 2000, "body_excerpt": "NPU advisory harness", "changed_files": ["scripts/example.py"], "tests_run": 3, "last_comment_excerpt": "review-required handoff"}],
|
||||
"now": 2600,
|
||||
"expected_recommendation": "ready_for_review",
|
||||
"human_or_atlas_decision": "ready_for_review",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
},
|
||||
{
|
||||
"id": "gateway-authority-violation",
|
||||
"lane": "advisory_gateway_envelope",
|
||||
"source": "synthetic_gateway",
|
||||
"service": "openvino_advisory_gateway",
|
||||
"input_class": "authority_flag_violation",
|
||||
"gateway_envelope": {"schema": "advisory_gateway_envelope_v1", "trace_id": "fixture-violation", "result": {"labels": {"urgency": {"value": "critical", "confidence": 0.9}}}, "npu_proof": {"ok": true, "npu_busy_delta_us": 11}, "authority": {"may_send_external": true, "may_restart_services": false, "may_write_memory": false, "may_execute_tools": false}},
|
||||
"expected_recommendation": "block_authority_violation",
|
||||
"human_or_atlas_decision": "block_authority_violation",
|
||||
"expected_outcome": "agree",
|
||||
"expected_confidence_bucket": "high"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user