feat(audit): persist phase0 backend drift report artifacts
This commit is contained in:
@@ -0,0 +1,201 @@
|
||||
{
|
||||
"generated_at": "2026-02-27T17:04:49.009Z",
|
||||
"artifacts_dir": "/home/will/lab/flynn/docs/plans/artifacts",
|
||||
"backends": [
|
||||
"pi_embedded",
|
||||
"native"
|
||||
],
|
||||
"report_tag": "2026-02-27",
|
||||
"max_age_hours": 36,
|
||||
"thresholds": {
|
||||
"requireBaselineHistory": false,
|
||||
"minCandidateSampledEvents": 10,
|
||||
"maxSampledEventsDropPct": 80,
|
||||
"maxRunOutcomesDropPct": 80,
|
||||
"maxCompletionRateDropPp": 35,
|
||||
"maxCancelRateIncreasePp": 25,
|
||||
"maxErrorRateIncreasePp": 25,
|
||||
"maxCancelLatencyP95IncreaseMs": 6000
|
||||
},
|
||||
"overall_pass": true,
|
||||
"reports": {
|
||||
"summary_json_out": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_drift_2026-02-27.json",
|
||||
"summary_md_out": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_drift_2026-02-27.md"
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"backend": "pi_embedded",
|
||||
"pass": true,
|
||||
"candidate": {
|
||||
"tag": "2026-02-27",
|
||||
"path": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json",
|
||||
"generated_at": "2026-02-27T16:45:18.488Z"
|
||||
},
|
||||
"baseline": null,
|
||||
"comparison": {
|
||||
"baseline": null,
|
||||
"candidate": {
|
||||
"source_event_count": 110,
|
||||
"sampled_event_count": 56,
|
||||
"run_total_outcomes": 25,
|
||||
"completion_rate_pct": 100,
|
||||
"cancel_rate_pct": 0,
|
||||
"error_rate_pct": 0,
|
||||
"cancel_latency_p95_ms": null,
|
||||
"reaction_match_rate_pct": 0,
|
||||
"reaction_skip_rate_pct": 100
|
||||
},
|
||||
"deltas": {
|
||||
"sampled_event_count_pct": null,
|
||||
"run_total_outcomes_pct": null,
|
||||
"completion_rate_pp": null,
|
||||
"cancel_rate_pp": null,
|
||||
"error_rate_pp": null,
|
||||
"cancel_latency_p95_ms": null,
|
||||
"reaction_match_rate_pp": null,
|
||||
"reaction_skip_rate_pp": null
|
||||
}
|
||||
},
|
||||
"freshness": {
|
||||
"enabled": true,
|
||||
"pass": true,
|
||||
"actual_age_hours": 0.33,
|
||||
"threshold_hours": 36
|
||||
},
|
||||
"drift_gate": {
|
||||
"pass": true,
|
||||
"criteria": [
|
||||
{
|
||||
"criterion": "candidate_sampled_events",
|
||||
"pass": true,
|
||||
"actual": "56",
|
||||
"threshold": ">= 10"
|
||||
},
|
||||
{
|
||||
"criterion": "sampled_events_drop_pct",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 80"
|
||||
},
|
||||
{
|
||||
"criterion": "run_outcomes_drop_pct",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 80"
|
||||
},
|
||||
{
|
||||
"criterion": "completion_rate_drop_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 35"
|
||||
},
|
||||
{
|
||||
"criterion": "cancel_rate_increase_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 25"
|
||||
},
|
||||
{
|
||||
"criterion": "error_rate_increase_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 25"
|
||||
},
|
||||
{
|
||||
"criterion": "cancel_latency_p95_increase_ms",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 6000"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"backend": "native",
|
||||
"pass": true,
|
||||
"candidate": {
|
||||
"tag": "2026-02-27",
|
||||
"path": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json",
|
||||
"generated_at": "2026-02-27T16:45:18.490Z"
|
||||
},
|
||||
"baseline": null,
|
||||
"comparison": {
|
||||
"baseline": null,
|
||||
"candidate": {
|
||||
"source_event_count": 110,
|
||||
"sampled_event_count": 13,
|
||||
"run_total_outcomes": 2,
|
||||
"completion_rate_pct": 100,
|
||||
"cancel_rate_pct": 0,
|
||||
"error_rate_pct": 0,
|
||||
"cancel_latency_p95_ms": null,
|
||||
"reaction_match_rate_pct": null,
|
||||
"reaction_skip_rate_pct": null
|
||||
},
|
||||
"deltas": {
|
||||
"sampled_event_count_pct": null,
|
||||
"run_total_outcomes_pct": null,
|
||||
"completion_rate_pp": null,
|
||||
"cancel_rate_pp": null,
|
||||
"error_rate_pp": null,
|
||||
"cancel_latency_p95_ms": null,
|
||||
"reaction_match_rate_pp": null,
|
||||
"reaction_skip_rate_pp": null
|
||||
}
|
||||
},
|
||||
"freshness": {
|
||||
"enabled": true,
|
||||
"pass": true,
|
||||
"actual_age_hours": 0.33,
|
||||
"threshold_hours": 36
|
||||
},
|
||||
"drift_gate": {
|
||||
"pass": true,
|
||||
"criteria": [
|
||||
{
|
||||
"criterion": "candidate_sampled_events",
|
||||
"pass": true,
|
||||
"actual": "13",
|
||||
"threshold": ">= 10"
|
||||
},
|
||||
{
|
||||
"criterion": "sampled_events_drop_pct",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 80"
|
||||
},
|
||||
{
|
||||
"criterion": "run_outcomes_drop_pct",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 80"
|
||||
},
|
||||
{
|
||||
"criterion": "completion_rate_drop_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 35"
|
||||
},
|
||||
{
|
||||
"criterion": "cancel_rate_increase_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 25"
|
||||
},
|
||||
{
|
||||
"criterion": "error_rate_increase_pp",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 25"
|
||||
},
|
||||
{
|
||||
"criterion": "cancel_latency_p95_increase_ms",
|
||||
"pass": true,
|
||||
"actual": "n/a",
|
||||
"threshold": "<= 6000"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
# Phase-0 Backend Drift Check
|
||||
|
||||
Generated at: 2026-02-27T17:04:49.009Z
|
||||
Artifacts: /home/will/lab/flynn/docs/plans/artifacts
|
||||
Backends: pi_embedded, native
|
||||
Freshness max age (hours): 36
|
||||
Overall gate: PASS
|
||||
|
||||
## Thresholds
|
||||
- requireBaselineHistory: false
|
||||
- minCandidateSampledEvents: 10
|
||||
- maxSampledEventsDropPct: 80
|
||||
- maxRunOutcomesDropPct: 80
|
||||
- maxCompletionRateDropPp: 35
|
||||
- maxCancelRateIncreasePp: 25
|
||||
- maxErrorRateIncreasePp: 25
|
||||
- maxCancelLatencyP95IncreaseMs: 6000
|
||||
|
||||
## pi_embedded
|
||||
- status: PASS
|
||||
- candidate: tag=2026-02-27 file=/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json
|
||||
- candidate generated_at: 2026-02-27T16:45:18.488Z
|
||||
- baseline: none
|
||||
- candidate snapshot: sampled=56 outcomes=25 completion=100% cancel=0% error=0% cancel_p95_ms=n/a
|
||||
- deltas:
|
||||
sampled_event_count_pct=n/a
|
||||
run_total_outcomes_pct=n/a
|
||||
completion_rate_pp=n/a
|
||||
cancel_rate_pp=n/a
|
||||
error_rate_pp=n/a
|
||||
cancel_latency_p95_ms=n/a
|
||||
reaction_match_rate_pp=n/a
|
||||
reaction_skip_rate_pp=n/a
|
||||
- freshness gate: PASS (age_hours=0.33 threshold=36)
|
||||
- drift gate: PASS
|
||||
PASS candidate_sampled_events actual=56 threshold=>= 10
|
||||
PASS sampled_events_drop_pct actual=n/a threshold=<= 80
|
||||
PASS run_outcomes_drop_pct actual=n/a threshold=<= 80
|
||||
PASS completion_rate_drop_pp actual=n/a threshold=<= 35
|
||||
PASS cancel_rate_increase_pp actual=n/a threshold=<= 25
|
||||
PASS error_rate_increase_pp actual=n/a threshold=<= 25
|
||||
PASS cancel_latency_p95_increase_ms actual=n/a threshold=<= 6000
|
||||
|
||||
## native
|
||||
- status: PASS
|
||||
- candidate: tag=2026-02-27 file=/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json
|
||||
- candidate generated_at: 2026-02-27T16:45:18.490Z
|
||||
- baseline: none
|
||||
- candidate snapshot: sampled=13 outcomes=2 completion=100% cancel=0% error=0% cancel_p95_ms=n/a
|
||||
- deltas:
|
||||
sampled_event_count_pct=n/a
|
||||
run_total_outcomes_pct=n/a
|
||||
completion_rate_pp=n/a
|
||||
cancel_rate_pp=n/a
|
||||
error_rate_pp=n/a
|
||||
cancel_latency_p95_ms=n/a
|
||||
reaction_match_rate_pp=n/a
|
||||
reaction_skip_rate_pp=n/a
|
||||
- freshness gate: PASS (age_hours=0.33 threshold=36)
|
||||
- drift gate: PASS
|
||||
PASS candidate_sampled_events actual=13 threshold=>= 10
|
||||
PASS sampled_events_drop_pct actual=n/a threshold=<= 80
|
||||
PASS run_outcomes_drop_pct actual=n/a threshold=<= 80
|
||||
PASS completion_rate_drop_pp actual=n/a threshold=<= 35
|
||||
PASS cancel_rate_increase_pp actual=n/a threshold=<= 25
|
||||
PASS error_rate_increase_pp actual=n/a threshold=<= 25
|
||||
PASS cancel_latency_p95_increase_ms actual=n/a threshold=<= 6000
|
||||
|
||||
Reference in New Issue
Block a user