202 lines
5.6 KiB
JSON
202 lines
5.6 KiB
JSON
{
|
|
"generated_at": "2026-02-27T17:36:02.803Z",
|
|
"artifacts_dir": "/home/will/lab/flynn/docs/plans/artifacts",
|
|
"backends": [
|
|
"pi_embedded",
|
|
"native"
|
|
],
|
|
"report_tag": "2026-02-27",
|
|
"max_age_hours": 36,
|
|
"thresholds": {
|
|
"requireBaselineHistory": false,
|
|
"minCandidateSampledEvents": 10,
|
|
"maxSampledEventsDropPct": 80,
|
|
"maxRunOutcomesDropPct": 80,
|
|
"maxCompletionRateDropPp": 35,
|
|
"maxCancelRateIncreasePp": 25,
|
|
"maxErrorRateIncreasePp": 25,
|
|
"maxCancelLatencyP95IncreaseMs": 6000
|
|
},
|
|
"overall_pass": true,
|
|
"reports": {
|
|
"summary_json_out": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_drift_2026-02-27.json",
|
|
"summary_md_out": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_drift_2026-02-27.md"
|
|
},
|
|
"results": [
|
|
{
|
|
"backend": "pi_embedded",
|
|
"pass": true,
|
|
"candidate": {
|
|
"tag": "2026-02-27",
|
|
"path": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_pi_embedded_2026-02-27.json",
|
|
"generated_at": "2026-02-27T17:36:02.214Z"
|
|
},
|
|
"baseline": null,
|
|
"comparison": {
|
|
"baseline": null,
|
|
"candidate": {
|
|
"source_event_count": 115,
|
|
"sampled_event_count": 59,
|
|
"run_total_outcomes": 26,
|
|
"completion_rate_pct": 100,
|
|
"cancel_rate_pct": 0,
|
|
"error_rate_pct": 0,
|
|
"cancel_latency_p95_ms": null,
|
|
"reaction_match_rate_pct": 0,
|
|
"reaction_skip_rate_pct": 100
|
|
},
|
|
"deltas": {
|
|
"sampled_event_count_pct": null,
|
|
"run_total_outcomes_pct": null,
|
|
"completion_rate_pp": null,
|
|
"cancel_rate_pp": null,
|
|
"error_rate_pp": null,
|
|
"cancel_latency_p95_ms": null,
|
|
"reaction_match_rate_pp": null,
|
|
"reaction_skip_rate_pp": null
|
|
}
|
|
},
|
|
"freshness": {
|
|
"enabled": true,
|
|
"pass": true,
|
|
"actual_age_hours": 0,
|
|
"threshold_hours": 36
|
|
},
|
|
"drift_gate": {
|
|
"pass": true,
|
|
"criteria": [
|
|
{
|
|
"criterion": "candidate_sampled_events",
|
|
"pass": true,
|
|
"actual": "59",
|
|
"threshold": ">= 10"
|
|
},
|
|
{
|
|
"criterion": "sampled_events_drop_pct",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 80"
|
|
},
|
|
{
|
|
"criterion": "run_outcomes_drop_pct",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 80"
|
|
},
|
|
{
|
|
"criterion": "completion_rate_drop_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 35"
|
|
},
|
|
{
|
|
"criterion": "cancel_rate_increase_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 25"
|
|
},
|
|
{
|
|
"criterion": "error_rate_increase_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 25"
|
|
},
|
|
{
|
|
"criterion": "cancel_latency_p95_increase_ms",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 6000"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"backend": "native",
|
|
"pass": true,
|
|
"candidate": {
|
|
"tag": "2026-02-27",
|
|
"path": "/home/will/lab/flynn/docs/plans/artifacts/phase0_baseline_live_backend_native_2026-02-27.json",
|
|
"generated_at": "2026-02-27T17:36:02.514Z"
|
|
},
|
|
"baseline": null,
|
|
"comparison": {
|
|
"baseline": null,
|
|
"candidate": {
|
|
"source_event_count": 115,
|
|
"sampled_event_count": 15,
|
|
"run_total_outcomes": 2,
|
|
"completion_rate_pct": 100,
|
|
"cancel_rate_pct": 0,
|
|
"error_rate_pct": 0,
|
|
"cancel_latency_p95_ms": null,
|
|
"reaction_match_rate_pct": 0,
|
|
"reaction_skip_rate_pct": 100
|
|
},
|
|
"deltas": {
|
|
"sampled_event_count_pct": null,
|
|
"run_total_outcomes_pct": null,
|
|
"completion_rate_pp": null,
|
|
"cancel_rate_pp": null,
|
|
"error_rate_pp": null,
|
|
"cancel_latency_p95_ms": null,
|
|
"reaction_match_rate_pp": null,
|
|
"reaction_skip_rate_pp": null
|
|
}
|
|
},
|
|
"freshness": {
|
|
"enabled": true,
|
|
"pass": true,
|
|
"actual_age_hours": 0,
|
|
"threshold_hours": 36
|
|
},
|
|
"drift_gate": {
|
|
"pass": true,
|
|
"criteria": [
|
|
{
|
|
"criterion": "candidate_sampled_events",
|
|
"pass": true,
|
|
"actual": "15",
|
|
"threshold": ">= 10"
|
|
},
|
|
{
|
|
"criterion": "sampled_events_drop_pct",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 80"
|
|
},
|
|
{
|
|
"criterion": "run_outcomes_drop_pct",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 80"
|
|
},
|
|
{
|
|
"criterion": "completion_rate_drop_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 35"
|
|
},
|
|
{
|
|
"criterion": "cancel_rate_increase_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 25"
|
|
},
|
|
{
|
|
"criterion": "error_rate_increase_pp",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 25"
|
|
},
|
|
{
|
|
"criterion": "cancel_latency_p95_increase_ms",
|
|
"pass": true,
|
|
"actual": "n/a",
|
|
"threshold": "<= 6000"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|