docs(eval): record pi canary window A results and hold decision
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"generated_at": "2026-02-24T06:28:12.014Z",
|
||||
"event_count": 45,
|
||||
"filters": {},
|
||||
"options": {
|
||||
"targetBackend": "pi_embedded",
|
||||
"baselineBackend": "native",
|
||||
"sessionIds": [
|
||||
"telegram:8367012007"
|
||||
]
|
||||
},
|
||||
"summary": {
|
||||
"route_stats": {
|
||||
"total": 10,
|
||||
"by_backend": {
|
||||
"native": 2,
|
||||
"pi_embedded": 8
|
||||
},
|
||||
"by_source": {
|
||||
"agent_override": 10
|
||||
},
|
||||
"forced_native_guards": {}
|
||||
},
|
||||
"target": {
|
||||
"backend": "pi_embedded",
|
||||
"routes": 8,
|
||||
"completed_turns": 8,
|
||||
"incomplete_turns": 0,
|
||||
"completion_rate_pct": 100,
|
||||
"e2e_latency_ms": {
|
||||
"count": 8,
|
||||
"avg_ms": 4615,
|
||||
"p50_ms": 3240,
|
||||
"p95_ms": 8776,
|
||||
"min_ms": 1859,
|
||||
"max_ms": 9381
|
||||
}
|
||||
},
|
||||
"baseline": {
|
||||
"backend": "native",
|
||||
"routes": 2,
|
||||
"completed_turns": 2,
|
||||
"incomplete_turns": 0,
|
||||
"completion_rate_pct": 100,
|
||||
"e2e_latency_ms": {
|
||||
"count": 2,
|
||||
"avg_ms": 2981,
|
||||
"p50_ms": 2981,
|
||||
"p95_ms": 3081,
|
||||
"min_ms": 2870,
|
||||
"max_ms": 3092
|
||||
}
|
||||
},
|
||||
"target_external_attempts": {
|
||||
"attempts": 8,
|
||||
"successes": 6,
|
||||
"fallbacks": 2,
|
||||
"unresolved_attempts": 0,
|
||||
"success_rate_pct": 75,
|
||||
"attempt_latency_ms": {
|
||||
"count": 8,
|
||||
"avg_ms": 3961,
|
||||
"p50_ms": 2636,
|
||||
"p95_ms": 8766,
|
||||
"min_ms": 135,
|
||||
"max_ms": 9371
|
||||
}
|
||||
},
|
||||
"comparison": {
|
||||
"completion_rate_delta_pp": 0,
|
||||
"p50_latency_delta_ms": 259,
|
||||
"p95_latency_delta_ms": 5695
|
||||
},
|
||||
"fallback_categories": [
|
||||
{
|
||||
"category": "loaded pi module does not expose a supported session factory (expected one of: c",
|
||||
"count": 1,
|
||||
"pct": 50
|
||||
},
|
||||
{
|
||||
"category": "pi agent runtime produced no assistant text",
|
||||
"count": 1,
|
||||
"pct": 50
|
||||
}
|
||||
],
|
||||
"fallback_top_reasons": [
|
||||
{
|
||||
"reason": "Loaded Pi module does not expose a supported session factory (expected one of: createAgentSession, createSession, createPiSession, createAge",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"reason": "Pi Agent runtime produced no assistant text",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"gate": {
|
||||
"pass": false,
|
||||
"criteria": [
|
||||
{
|
||||
"criterion": "Completion rate delta (target - baseline)",
|
||||
"pass": true,
|
||||
"actual": "0.00pp",
|
||||
"threshold": ">= -2.00pp"
|
||||
},
|
||||
{
|
||||
"criterion": "P50 latency delta (target - baseline)",
|
||||
"pass": false,
|
||||
"actual": "259ms",
|
||||
"threshold": "<= 250ms"
|
||||
},
|
||||
{
|
||||
"criterion": "P95 latency delta (target - baseline)",
|
||||
"pass": false,
|
||||
"actual": "5695ms",
|
||||
"threshold": "<= 700ms"
|
||||
},
|
||||
{
|
||||
"criterion": "Fallback rate (target external attempts)",
|
||||
"pass": false,
|
||||
"actual": "25.00%",
|
||||
"threshold": "<= 5.00%"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user