import { describe, expect, it } from 'vitest'; import type { AuditEvent } from './types.js'; import { evaluateBackendCanaryGate, renderBackendCanaryMarkdown, summarizeBackendCanary, } from './backendCanarySummary.js'; function makeEvent( timestamp: number, event_type: AuditEvent['event_type'], event: Record, ): AuditEvent { return { timestamp, level: 'info', event_type, event, }; } describe('summarizeBackendCanary', () => { it('computes route, reliability, latency, and fallback summaries', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1120, 'backend.success', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', backend: 'pi_embedded', duration_ms: 120, response_length: 50, }), makeEvent(1140, 'session.message', { session_id: 'telegram:canary', role: 'assistant', content_length: 50, }), makeEvent(2000, 'backend.route', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(2300, 'backend.fallback', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', from_backend: 'pi_embedded', to_backend: 'native', reason: 'request timed out waiting for backend process', duration_ms: 300, }), makeEvent(2340, 'session.message', { session_id: 'telegram:canary', role: 'assistant', content_length: 80, }), makeEvent(3000, 'backend.route', { session_id: 'telegram:control', channel: 'telegram', sender: '123', selected_backend: 'native', source: 'native', }), makeEvent(3080, 'session.message', { session_id: 'telegram:control', role: 'assistant', content_length: 25, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); expect(summary.route_stats.total).toBe(3); expect(summary.route_stats.by_backend.pi_embedded).toBe(2); expect(summary.route_stats.by_backend.native).toBe(1); expect(summary.target.routes).toBe(2); expect(summary.target.completed_turns).toBe(2); expect(summary.target.completion_rate_pct).toBe(100); expect(summary.target.e2e_latency_ms?.p50_ms).toBe(240); expect(summary.target.e2e_latency_ms?.p95_ms).toBe(330); expect(summary.baseline.routes).toBe(1); expect(summary.baseline.completion_rate_pct).toBe(100); expect(summary.baseline.e2e_latency_ms?.p50_ms).toBe(80); expect(summary.target_external_attempts?.attempts).toBe(2); expect(summary.target_external_attempts?.successes).toBe(1); expect(summary.target_external_attempts?.fallbacks).toBe(1); expect(summary.target_external_attempts?.success_rate_pct).toBe(50); expect(summary.target_external_attempts?.attempt_latency_ms?.p50_ms).toBe(210); expect(summary.comparison.p50_latency_delta_ms).toBe(160); expect(summary.comparison.p95_latency_delta_ms).toBe(250); expect(summary.fallback_categories).toEqual([ { category: 'timeout', count: 1, pct: 100 }, ]); expect(summary.fallback_top_reasons[0]?.reason).toContain('request timed out'); }); it('filters routes by session id', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1100, 'session.message', { session_id: 'telegram:canary', role: 'assistant', content_length: 10, }), makeEvent(2000, 'backend.route', { session_id: 'telegram:other', channel: 'telegram', sender: '9999', selected_backend: 'native', source: 'native', }), makeEvent(2100, 'session.message', { session_id: 'telegram:other', role: 'assistant', content_length: 10, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', sessionIds: ['telegram:canary'], }); expect(summary.route_stats.total).toBe(1); expect(summary.target.routes).toBe(1); expect(summary.baseline.routes).toBe(0); }); it('classifies pi-specific fallback reasons into stable categories', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1100, 'backend.fallback', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', from_backend: 'pi_embedded', to_backend: 'native', reason: 'Loaded Pi module does not expose a supported session factory (expected one of: createAgentSession)', duration_ms: 100, }), makeEvent(1200, 'session.message', { session_id: 'telegram:canary', role: 'assistant', content_length: 20, }), makeEvent(1300, 'backend.route', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1400, 'backend.fallback', { session_id: 'telegram:canary', channel: 'telegram', sender: '8367012007', from_backend: 'pi_embedded', to_backend: 'native', reason: 'Pi Agent runtime produced no assistant text', duration_ms: 120, }), makeEvent(1500, 'session.message', { session_id: 'telegram:canary', role: 'assistant', content_length: 20, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); expect(summary.fallback_categories).toEqual([ { category: 'empty_assistant_text', count: 1, pct: 50 }, { category: 'pi_module_interface', count: 1, pct: 50 }, ]); }); it('tracks forced-native guard reasons', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 's1', channel: 'telegram', sender: '1', selected_backend: 'native', source: 'forced_native_guard', guard_reason: 'pi_no_tools_mode', }), makeEvent(1010, 'session.message', { session_id: 's1', role: 'assistant', content_length: 10, }), makeEvent(2000, 'backend.route', { session_id: 's2', channel: 'telegram', sender: '2', selected_backend: 'native', source: 'forced_native_guard', guard_reason: 'capability_query', }), makeEvent(2010, 'session.message', { session_id: 's2', role: 'assistant', content_length: 10, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); expect(summary.route_stats.forced_native_guards.pi_no_tools_mode).toBe(1); expect(summary.route_stats.forced_native_guards.capability_query).toBe(1); const markdown = renderBackendCanaryMarkdown(summary, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); expect(markdown).toContain('Forced Native Guards'); expect(markdown).toContain('pi_no_tools_mode'); }); }); describe('evaluateBackendCanaryGate', () => { it('evaluates configured pass/fail thresholds', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 's1', channel: 'telegram', sender: '1', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1200, 'backend.success', { session_id: 's1', channel: 'telegram', sender: '1', backend: 'pi_embedded', duration_ms: 200, response_length: 10, }), makeEvent(1250, 'session.message', { session_id: 's1', role: 'assistant', content_length: 20, }), makeEvent(2000, 'backend.route', { session_id: 's2', channel: 'telegram', sender: '2', selected_backend: 'native', source: 'native', }), makeEvent(2050, 'session.message', { session_id: 's2', role: 'assistant', content_length: 20, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); const gate = evaluateBackendCanaryGate(summary, { maxCompletionRateDropPp: 0, maxP50LatencyIncreaseMs: 300, maxP95LatencyIncreaseMs: 300, maxFallbackRatePct: 5, }); expect(gate.pass).toBe(true); expect(gate.criteria).toHaveLength(4); const markdown = renderBackendCanaryMarkdown(summary, { targetBackend: 'pi_embedded', baselineBackend: 'native', }, gate); expect(markdown).toContain('Pi Embedded Canary Summary'); expect(markdown).toContain('Gate result: PASS'); }); it('fails gate when minimum sample thresholds are not met', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 's1', channel: 'telegram', sender: '1', selected_backend: 'pi_embedded', source: 'agent_override', }), makeEvent(1100, 'backend.success', { session_id: 's1', channel: 'telegram', sender: '1', backend: 'pi_embedded', duration_ms: 100, response_length: 8, }), makeEvent(1200, 'session.message', { session_id: 's1', role: 'assistant', content_length: 20, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); const gate = evaluateBackendCanaryGate(summary, { minTargetRoutes: 3, minBaselineRoutes: 2, minTargetAttempts: 3, maxFallbackRatePct: 5, }); expect(gate.pass).toBe(false); expect(gate.criteria).toHaveLength(4); expect(gate.criteria[0]).toEqual(expect.objectContaining({ criterion: 'Minimum target routes', pass: false, actual: '1', })); expect(gate.criteria[1]).toEqual(expect.objectContaining({ criterion: 'Minimum baseline routes', pass: false, actual: '0', })); expect(gate.criteria[2]).toEqual(expect.objectContaining({ criterion: 'Minimum target external attempts', pass: false, actual: '1', })); }); it('evaluates guard coverage thresholds', () => { const events: AuditEvent[] = [ makeEvent(1000, 'backend.route', { session_id: 's1', channel: 'telegram', sender: '1', selected_backend: 'native', source: 'forced_native_guard', guard_reason: 'pi_no_tools_mode', }), makeEvent(1010, 'session.message', { session_id: 's1', role: 'assistant', content_length: 20, }), makeEvent(2000, 'backend.route', { session_id: 's2', channel: 'telegram', sender: '2', selected_backend: 'native', source: 'forced_native_guard', guard_reason: 'attachments_present', }), makeEvent(2010, 'session.message', { session_id: 's2', role: 'assistant', content_length: 20, }), ]; const summary = summarizeBackendCanary(events, { targetBackend: 'pi_embedded', baselineBackend: 'native', }); const gate = evaluateBackendCanaryGate(summary, { minGuardPiNoToolsCount: 1, minGuardCapabilityQueryCount: 1, minGuardAttachmentsPresentCount: 1, }); expect(gate.pass).toBe(false); expect(gate.criteria).toEqual([ expect.objectContaining({ criterion: 'Minimum pi_no_tools_mode guard hits', pass: true, actual: '1', }), expect.objectContaining({ criterion: 'Minimum capability_query guard hits', pass: false, actual: '0', }), expect.objectContaining({ criterion: 'Minimum attachments_present guard hits', pass: true, actual: '1', }), ]); }); });