docs(eval): enforce min-sample gates in canary artifacts and decision log
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"generated_at": "2026-02-24T06:28:12.014Z",
|
||||
"generated_at": "2026-02-24T06:33:31.111Z",
|
||||
"event_count": 45,
|
||||
"filters": {},
|
||||
"options": {
|
||||
@@ -73,12 +73,12 @@
|
||||
},
|
||||
"fallback_categories": [
|
||||
{
|
||||
"category": "loaded pi module does not expose a supported session factory (expected one of: c",
|
||||
"category": "empty_assistant_text",
|
||||
"count": 1,
|
||||
"pct": 50
|
||||
},
|
||||
{
|
||||
"category": "pi agent runtime produced no assistant text",
|
||||
"category": "pi_module_interface",
|
||||
"count": 1,
|
||||
"pct": 50
|
||||
}
|
||||
@@ -97,6 +97,24 @@
|
||||
"gate": {
|
||||
"pass": false,
|
||||
"criteria": [
|
||||
{
|
||||
"criterion": "Minimum target routes",
|
||||
"pass": true,
|
||||
"actual": "8",
|
||||
"threshold": ">= 8"
|
||||
},
|
||||
{
|
||||
"criterion": "Minimum baseline routes",
|
||||
"pass": true,
|
||||
"actual": "2",
|
||||
"threshold": ">= 2"
|
||||
},
|
||||
{
|
||||
"criterion": "Minimum target external attempts",
|
||||
"pass": true,
|
||||
"actual": "8",
|
||||
"threshold": ">= 8"
|
||||
},
|
||||
{
|
||||
"criterion": "Completion rate delta (target - baseline)",
|
||||
"pass": true,
|
||||
|
||||
@@ -32,8 +32,8 @@
|
||||
|
||||
| Category | Count | Percent |
|
||||
| --- | ---: | ---: |
|
||||
| loaded pi module does not expose a supported session factory (expected one of: c | 1 | 50.00% |
|
||||
| pi agent runtime produced no assistant text | 1 | 50.00% |
|
||||
| empty_assistant_text | 1 | 50.00% |
|
||||
| pi_module_interface | 1 | 50.00% |
|
||||
|
||||
## Top Fallback Reasons
|
||||
|
||||
@@ -43,6 +43,9 @@
|
||||
## Gate Evaluation
|
||||
|
||||
- Gate result: HOLD
|
||||
- [x] Minimum target routes: actual=8, threshold=>= 8
|
||||
- [x] Minimum baseline routes: actual=2, threshold=>= 2
|
||||
- [x] Minimum target external attempts: actual=8, threshold=>= 8
|
||||
- [x] Completion rate delta (target - baseline): actual=0.00pp, threshold=>= -2.00pp
|
||||
- [ ] P50 latency delta (target - baseline): actual=259ms, threshold=<= 250ms
|
||||
- [ ] P95 latency delta (target - baseline): actual=5695ms, threshold=<= 700ms
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"generated_at": "2026-02-24T06:30:22.439Z",
|
||||
"generated_at": "2026-02-24T06:33:37.931Z",
|
||||
"event_count": 24,
|
||||
"filters": {
|
||||
"since_ms": 1771913640000
|
||||
@@ -71,6 +71,24 @@
|
||||
"gate": {
|
||||
"pass": false,
|
||||
"criteria": [
|
||||
{
|
||||
"criterion": "Minimum target routes",
|
||||
"pass": false,
|
||||
"actual": "6",
|
||||
"threshold": ">= 8"
|
||||
},
|
||||
{
|
||||
"criterion": "Minimum baseline routes",
|
||||
"pass": false,
|
||||
"actual": "0",
|
||||
"threshold": ">= 2"
|
||||
},
|
||||
{
|
||||
"criterion": "Minimum target external attempts",
|
||||
"pass": false,
|
||||
"actual": "6",
|
||||
"threshold": ">= 8"
|
||||
},
|
||||
{
|
||||
"criterion": "Completion rate delta (target - baseline)",
|
||||
"pass": false,
|
||||
|
||||
@@ -40,6 +40,9 @@
|
||||
## Gate Evaluation
|
||||
|
||||
- Gate result: HOLD
|
||||
- [ ] Minimum target routes: actual=6, threshold=>= 8
|
||||
- [ ] Minimum baseline routes: actual=0, threshold=>= 2
|
||||
- [ ] Minimum target external attempts: actual=6, threshold=>= 8
|
||||
- [ ] Completion rate delta (target - baseline): actual=n/a, threshold=>= -2.00pp
|
||||
- [ ] P50 latency delta (target - baseline): actual=n/a, threshold=<= 250ms
|
||||
- [ ] P95 latency delta (target - baseline): actual=n/a, threshold=<= 700ms
|
||||
|
||||
Reference in New Issue
Block a user