feat(audit): add pi canary summary analyzer and cli script

This commit is contained in:
William Valentin
2026-02-23 22:26:29 -08:00
parent 1dfa6ce2b4
commit afddd1ba7a
4 changed files with 1041 additions and 1 deletions
+213
View File
@@ -0,0 +1,213 @@
import { describe, expect, it } from 'vitest';
import type { AuditEvent } from './types.js';
import {
evaluateBackendCanaryGate,
renderBackendCanaryMarkdown,
summarizeBackendCanary,
} from './backendCanarySummary.js';
function makeEvent(
timestamp: number,
event_type: AuditEvent['event_type'],
event: Record<string, unknown>,
): AuditEvent {
return {
timestamp,
level: 'info',
event_type,
event,
};
}
describe('summarizeBackendCanary', () => {
it('computes route, reliability, latency, and fallback summaries', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'backend.route', {
session_id: 'telegram:canary',
channel: 'telegram',
sender: '8367012007',
selected_backend: 'pi_embedded',
source: 'agent_override',
}),
makeEvent(1120, 'backend.success', {
session_id: 'telegram:canary',
channel: 'telegram',
sender: '8367012007',
backend: 'pi_embedded',
duration_ms: 120,
response_length: 50,
}),
makeEvent(1140, 'session.message', {
session_id: 'telegram:canary',
role: 'assistant',
content_length: 50,
}),
makeEvent(2000, 'backend.route', {
session_id: 'telegram:canary',
channel: 'telegram',
sender: '8367012007',
selected_backend: 'pi_embedded',
source: 'agent_override',
}),
makeEvent(2300, 'backend.fallback', {
session_id: 'telegram:canary',
channel: 'telegram',
sender: '8367012007',
from_backend: 'pi_embedded',
to_backend: 'native',
reason: 'request timed out waiting for backend process',
duration_ms: 300,
}),
makeEvent(2340, 'session.message', {
session_id: 'telegram:canary',
role: 'assistant',
content_length: 80,
}),
makeEvent(3000, 'backend.route', {
session_id: 'telegram:control',
channel: 'telegram',
sender: '123',
selected_backend: 'native',
source: 'native',
}),
makeEvent(3080, 'session.message', {
session_id: 'telegram:control',
role: 'assistant',
content_length: 25,
}),
];
const summary = summarizeBackendCanary(events, {
targetBackend: 'pi_embedded',
baselineBackend: 'native',
});
expect(summary.route_stats.total).toBe(3);
expect(summary.route_stats.by_backend.pi_embedded).toBe(2);
expect(summary.route_stats.by_backend.native).toBe(1);
expect(summary.target.routes).toBe(2);
expect(summary.target.completed_turns).toBe(2);
expect(summary.target.completion_rate_pct).toBe(100);
expect(summary.target.e2e_latency_ms?.p50_ms).toBe(240);
expect(summary.target.e2e_latency_ms?.p95_ms).toBe(330);
expect(summary.baseline.routes).toBe(1);
expect(summary.baseline.completion_rate_pct).toBe(100);
expect(summary.baseline.e2e_latency_ms?.p50_ms).toBe(80);
expect(summary.target_external_attempts?.attempts).toBe(2);
expect(summary.target_external_attempts?.successes).toBe(1);
expect(summary.target_external_attempts?.fallbacks).toBe(1);
expect(summary.target_external_attempts?.success_rate_pct).toBe(50);
expect(summary.target_external_attempts?.attempt_latency_ms?.p50_ms).toBe(210);
expect(summary.comparison.p50_latency_delta_ms).toBe(160);
expect(summary.comparison.p95_latency_delta_ms).toBe(250);
expect(summary.fallback_categories).toEqual([
{ category: 'timeout', count: 1, pct: 100 },
]);
expect(summary.fallback_top_reasons[0]?.reason).toContain('request timed out');
});
it('filters routes by session id', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'backend.route', {
session_id: 'telegram:canary',
channel: 'telegram',
sender: '8367012007',
selected_backend: 'pi_embedded',
source: 'agent_override',
}),
makeEvent(1100, 'session.message', {
session_id: 'telegram:canary',
role: 'assistant',
content_length: 10,
}),
makeEvent(2000, 'backend.route', {
session_id: 'telegram:other',
channel: 'telegram',
sender: '9999',
selected_backend: 'native',
source: 'native',
}),
makeEvent(2100, 'session.message', {
session_id: 'telegram:other',
role: 'assistant',
content_length: 10,
}),
];
const summary = summarizeBackendCanary(events, {
targetBackend: 'pi_embedded',
baselineBackend: 'native',
sessionIds: ['telegram:canary'],
});
expect(summary.route_stats.total).toBe(1);
expect(summary.target.routes).toBe(1);
expect(summary.baseline.routes).toBe(0);
});
});
describe('evaluateBackendCanaryGate', () => {
it('evaluates configured pass/fail thresholds', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'backend.route', {
session_id: 's1',
channel: 'telegram',
sender: '1',
selected_backend: 'pi_embedded',
source: 'agent_override',
}),
makeEvent(1200, 'backend.success', {
session_id: 's1',
channel: 'telegram',
sender: '1',
backend: 'pi_embedded',
duration_ms: 200,
response_length: 10,
}),
makeEvent(1250, 'session.message', {
session_id: 's1',
role: 'assistant',
content_length: 20,
}),
makeEvent(2000, 'backend.route', {
session_id: 's2',
channel: 'telegram',
sender: '2',
selected_backend: 'native',
source: 'native',
}),
makeEvent(2050, 'session.message', {
session_id: 's2',
role: 'assistant',
content_length: 20,
}),
];
const summary = summarizeBackendCanary(events, {
targetBackend: 'pi_embedded',
baselineBackend: 'native',
});
const gate = evaluateBackendCanaryGate(summary, {
maxCompletionRateDropPp: 0,
maxP50LatencyIncreaseMs: 300,
maxP95LatencyIncreaseMs: 300,
maxFallbackRatePct: 5,
});
expect(gate.pass).toBe(true);
expect(gate.criteria).toHaveLength(4);
const markdown = renderBackendCanaryMarkdown(summary, {
targetBackend: 'pi_embedded',
baselineBackend: 'native',
}, gate);
expect(markdown).toContain('Pi Embedded Canary Summary');
expect(markdown).toContain('Gate result: PASS');
});
});