docs(eval): enforce min-sample gates in canary artifacts and decision log

This commit is contained in:
William Valentin
2026-02-23 22:34:54 -08:00
parent 34d1562ce8
commit 2d42f65b9f
7 changed files with 76 additions and 16 deletions
@@ -1,5 +1,5 @@
{
"generated_at": "2026-02-24T06:28:12.014Z",
"generated_at": "2026-02-24T06:33:31.111Z",
"event_count": 45,
"filters": {},
"options": {
@@ -73,12 +73,12 @@
},
"fallback_categories": [
{
"category": "loaded pi module does not expose a supported session factory (expected one of: c",
"category": "empty_assistant_text",
"count": 1,
"pct": 50
},
{
"category": "pi agent runtime produced no assistant text",
"category": "pi_module_interface",
"count": 1,
"pct": 50
}
@@ -97,6 +97,24 @@
"gate": {
"pass": false,
"criteria": [
{
"criterion": "Minimum target routes",
"pass": true,
"actual": "8",
"threshold": ">= 8"
},
{
"criterion": "Minimum baseline routes",
"pass": true,
"actual": "2",
"threshold": ">= 2"
},
{
"criterion": "Minimum target external attempts",
"pass": true,
"actual": "8",
"threshold": ">= 8"
},
{
"criterion": "Completion rate delta (target - baseline)",
"pass": true,
@@ -32,8 +32,8 @@
| Category | Count | Percent |
| --- | ---: | ---: |
| loaded pi module does not expose a supported session factory (expected one of: c | 1 | 50.00% |
| pi agent runtime produced no assistant text | 1 | 50.00% |
| empty_assistant_text | 1 | 50.00% |
| pi_module_interface | 1 | 50.00% |
## Top Fallback Reasons
@@ -43,6 +43,9 @@
## Gate Evaluation
- Gate result: HOLD
- [x] Minimum target routes: actual=8, threshold=>= 8
- [x] Minimum baseline routes: actual=2, threshold=>= 2
- [x] Minimum target external attempts: actual=8, threshold=>= 8
- [x] Completion rate delta (target - baseline): actual=0.00pp, threshold=>= -2.00pp
- [ ] P50 latency delta (target - baseline): actual=259ms, threshold=<= 250ms
- [ ] P95 latency delta (target - baseline): actual=5695ms, threshold=<= 700ms
@@ -1,5 +1,5 @@
{
"generated_at": "2026-02-24T06:30:22.439Z",
"generated_at": "2026-02-24T06:33:37.931Z",
"event_count": 24,
"filters": {
"since_ms": 1771913640000
@@ -71,6 +71,24 @@
"gate": {
"pass": false,
"criteria": [
{
"criterion": "Minimum target routes",
"pass": false,
"actual": "6",
"threshold": ">= 8"
},
{
"criterion": "Minimum baseline routes",
"pass": false,
"actual": "0",
"threshold": ">= 2"
},
{
"criterion": "Minimum target external attempts",
"pass": false,
"actual": "6",
"threshold": ">= 8"
},
{
"criterion": "Completion rate delta (target - baseline)",
"pass": false,
@@ -40,6 +40,9 @@
## Gate Evaluation
- Gate result: HOLD
- [ ] Minimum target routes: actual=6, threshold=>= 8
- [ ] Minimum baseline routes: actual=0, threshold=>= 2
- [ ] Minimum target external attempts: actual=6, threshold=>= 8
- [ ] Completion rate delta (target - baseline): actual=n/a, threshold=>= -2.00pp
- [ ] P50 latency delta (target - baseline): actual=n/a, threshold=<= 250ms
- [ ] P95 latency delta (target - baseline): actual=n/a, threshold=<= 700ms