diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md index 0bcf8d6..36b00c8 100644 --- a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -119,6 +119,8 @@ Extend in-memory gateway metrics with baseline counters: ## Ticket 0.4 — Baseline Summary Tooling +Status: completed (2026-02-25) + ### Scope Add or extend report tooling to summarize phase-0 telemetry slices: diff --git a/docs/plans/state.json b/docs/plans/state.json index 9ae3b40..6ae00aa 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -48,6 +48,21 @@ ], "test_status": "pnpm test:run src/gateway/metrics.test.ts src/daemon/routing.test.ts src/gateway/handlers/agent.test.ts passing" }, + "phase0-ticket-0.4-baseline-summary-tooling": { + "status": "completed", + "date": "2026-02-25", + "updated": "2026-02-25", + "summary": "Implemented Phase 0 Ticket 0.4 by adding a phase-0 baseline summarizer for run outcomes, cancel latency, and reaction decision rates, with a CLI script for markdown/json outputs.", + "files_modified": [ + "src/audit/phase0BaselineSummary.ts", + "src/audit/phase0BaselineSummary.test.ts", + "scripts/summarize-phase0-baseline.ts", + "package.json", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/audit/phase0BaselineSummary.test.ts passing" + }, "phase0-instrumentation-ticket-checklist": { "status": "completed", "date": "2026-02-25", @@ -6661,7 +6676,8 @@ "deeper_surfaces_phase0_ticket_01": "completed — audit schema/logger now capture run lifecycle and reaction decision baseline events (`run.state`, `run.cancel`, `reaction.match`, `reaction.skip`) with regression test coverage", "deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests", "deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters", - "next_up": "Implement Ticket 0.4 from docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output", + "next_up": "Implement Ticket 0.5 from docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing", diff --git a/package.json b/package.json index 1eee38c..ca7051b 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "config:profiles:generate": "node scripts/generate-config-profiles.mjs", "config:profiles:check": "node scripts/generate-config-profiles.mjs --check", "audit:backend-canary": "node --import tsx/esm scripts/summarize-backend-canary.ts", + "audit:phase0-baseline": "node --import tsx/esm scripts/summarize-phase0-baseline.ts", "audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts" }, "keywords": [ diff --git a/scripts/summarize-phase0-baseline.ts b/scripts/summarize-phase0-baseline.ts new file mode 100644 index 0000000..9133e4e --- /dev/null +++ b/scripts/summarize-phase0-baseline.ts @@ -0,0 +1,169 @@ +#!/usr/bin/env node + +import { writeFile } from 'node:fs/promises'; +import { parseArgs } from 'node:util'; +import { queryAuditLogs } from '../src/audit/export.js'; +import { + renderPhase0BaselineMarkdown, + summarizePhase0Baseline, + type Phase0BaselineSummaryOptions, +} from '../src/audit/phase0BaselineSummary.js'; + +const DEFAULT_EVENT_TYPES = ['run.state', 'run.cancel', 'reaction.match', 'reaction.skip'] as const; + +function usage(): string { + return [ + 'Usage: node --import tsx/esm scripts/summarize-phase0-baseline.ts --audit [options]', + '', + 'Options:', + ' --audit Path to audit.log (required)', + ' --since Start time filter', + ' --until End time filter', + ' --session Restrict to session IDs', + ' --channel Restrict to channels', + ' --sender Restrict to senders', + ' --source Restrict to sources', + ' --max-sessions Limit session rows in output (default: 20)', + ' --max-channels Limit channel rows in output (default: 20)', + ' --max-skip-reasons Limit skip reason rows in output (default: 10)', + ' --format Output format (default: markdown)', + ' --out Write output to file instead of stdout', + ].join('\n'); +} + +function parseTime(value: string | undefined, flag: string): number | undefined { + if (!value) { + return undefined; + } + if (/^\d+$/.test(value)) { + const asNumber = Number(value); + if (Number.isFinite(asNumber)) { + return asNumber; + } + } + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid ${flag} value "${value}". Use ISO-8601 or epoch milliseconds.`); + } + return parsed; +} + +function parseCsv(value: string | undefined): string[] | undefined { + if (!value) { + return undefined; + } + const values = value + .split(',') + .map((item) => item.trim()) + .filter((item) => item.length > 0); + return values.length > 0 ? values : undefined; +} + +function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined { + if (!raw) { + return undefined; + } + const parsed = Number(raw); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`); + } + return parsed; +} + +function parseSources(raw: string | undefined): Array<'gateway' | 'channel'> | undefined { + const values = parseCsv(raw); + if (!values) { + return undefined; + } + const parsed: Array<'gateway' | 'channel'> = []; + for (const value of values) { + if (value === 'gateway' || value === 'channel') { + parsed.push(value); + continue; + } + throw new Error(`Invalid source "${value}".`); + } + return parsed; +} + +async function main(): Promise { + const { values } = parseArgs({ + options: { + audit: { type: 'string' }, + since: { type: 'string' }, + until: { type: 'string' }, + session: { type: 'string' }, + channel: { type: 'string' }, + sender: { type: 'string' }, + source: { type: 'string' }, + 'max-sessions': { type: 'string' }, + 'max-channels': { type: 'string' }, + 'max-skip-reasons': { type: 'string' }, + format: { type: 'string' }, + out: { type: 'string' }, + help: { type: 'boolean', short: 'h' }, + }, + strict: true, + allowPositionals: false, + }); + + if (values.help) { + process.stdout.write(`${usage()}\n`); + return; + } + + if (!values.audit) { + throw new Error('--audit is required.'); + } + + const format = values.format ?? 'markdown'; + if (format !== 'markdown' && format !== 'json') { + throw new Error(`Invalid --format value "${format}".`); + } + + const summaryOptions: Phase0BaselineSummaryOptions = { + sessionIds: parseCsv(values.session), + channels: parseCsv(values.channel), + senders: parseCsv(values.sender), + sources: parseSources(values.source), + maxSessions: parseOptionalNumber(values['max-sessions'], '--max-sessions') ?? 20, + maxChannels: parseOptionalNumber(values['max-channels'], '--max-channels') ?? 20, + maxSkipReasons: parseOptionalNumber(values['max-skip-reasons'], '--max-skip-reasons') ?? 10, + }; + + const startTime = parseTime(values.since, '--since'); + const endTime = parseTime(values.until, '--until'); + + const events = await queryAuditLogs(values.audit, { + start_time: startTime, + end_time: endTime, + event_types: [...DEFAULT_EVENT_TYPES], + }); + + const summary = summarizePhase0Baseline(events, summaryOptions); + + const output = format === 'json' + ? JSON.stringify({ + generated_at: new Date().toISOString(), + event_count: events.length, + filters: { + since_ms: startTime, + until_ms: endTime, + }, + options: summaryOptions, + summary, + }, null, 2) + : renderPhase0BaselineMarkdown(summary, summaryOptions); + + if (values.out) { + await writeFile(values.out, `${output}\n`, 'utf-8'); + } else { + process.stdout.write(`${output}\n`); + } +} + +main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n\n${usage()}\n`); + process.exitCode = 1; +}); diff --git a/src/audit/phase0BaselineSummary.test.ts b/src/audit/phase0BaselineSummary.test.ts new file mode 100644 index 0000000..10ea549 --- /dev/null +++ b/src/audit/phase0BaselineSummary.test.ts @@ -0,0 +1,195 @@ +import { describe, expect, it } from 'vitest'; +import type { AuditEvent } from './types.js'; +import { renderPhase0BaselineMarkdown, summarizePhase0Baseline } from './phase0BaselineSummary.js'; + +function makeEvent( + timestamp: number, + event_type: AuditEvent['event_type'], + event: Record, +): AuditEvent { + return { + timestamp, + level: 'info', + event_type, + event, + }; +} + +describe('summarizePhase0Baseline', () => { + it('summarizes run outcomes, cancel latency, and reaction decisions', () => { + const events: AuditEvent[] = [ + makeEvent(1000, 'run.state', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + state: 'start', + }), + makeEvent(1200, 'run.state', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + state: 'complete', + }), + makeEvent(2000, 'run.state', { + session_id: 'discord:s2', + channel: 'discord', + sender: 'u2', + source: 'gateway', + state: 'start', + }), + makeEvent(2400, 'run.state', { + session_id: 'discord:s2', + channel: 'discord', + sender: 'u2', + source: 'gateway', + state: 'error', + }), + makeEvent(3000, 'run.state', { + session_id: 'telegram:s3', + channel: 'telegram', + sender: 'u3', + source: 'channel', + state: 'start', + }), + makeEvent(3200, 'run.state', { + session_id: 'telegram:s3', + channel: 'telegram', + sender: 'u3', + source: 'channel', + state: 'cancelled', + }), + makeEvent(3300, 'run.state', { + session_id: 'telegram:s3', + channel: 'telegram', + sender: 'u3', + source: 'channel', + state: 'cancel_requested', + }), + makeEvent(3500, 'run.cancel', { + session_id: 'telegram:s3', + channel: 'telegram', + sender: 'u3', + source: 'channel', + requested: true, + acknowledged: true, + latency_ms: 120, + }), + makeEvent(3600, 'run.cancel', { + session_id: 'discord:s2', + channel: 'discord', + sender: 'u2', + source: 'gateway', + requested: true, + acknowledged: false, + latency_ms: 300, + }), + makeEvent(3700, 'reaction.match', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + rule_name: 'boss-email', + }), + makeEvent(3800, 'reaction.skip', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + reason: 'no_match', + candidate_count: 1, + }), + makeEvent(3900, 'reaction.skip', { + session_id: 'discord:s2', + channel: 'discord', + sender: 'u2', + source: 'gateway', + reason: 'no_rules', + candidate_count: 0, + }), + ]; + + const summary = summarizePhase0Baseline(events); + + expect(summary.event_counts.run_state).toBe(7); + expect(summary.run_outcomes.overall.total_outcomes).toBe(3); + expect(summary.run_outcomes.overall.complete).toBe(1); + expect(summary.run_outcomes.overall.cancelled).toBe(1); + expect(summary.run_outcomes.overall.error).toBe(1); + expect(summary.run_outcomes.overall.cancel_requested).toBe(1); + expect(summary.run_outcomes.overall.start).toBe(3); + + const telegram = summary.run_outcomes.by_channel.find((row) => row.key === 'telegram'); + expect(telegram?.stats.total_outcomes).toBe(2); + const discord = summary.run_outcomes.by_channel.find((row) => row.key === 'discord'); + expect(discord?.stats.total_outcomes).toBe(1); + + const cancelStats = summary.cancel_latency_ms; + expect(cancelStats?.count).toBe(2); + expect(cancelStats?.p50_ms).toBe(210); + expect(cancelStats?.p95_ms).toBe(291); + + expect(summary.reactions.matched).toBe(1); + expect(summary.reactions.skipped).toBe(2); + expect(summary.reactions.match_rate_pct).toBe(33.33); + + expect(summary.reactions.skip_reasons).toEqual([ + { reason: 'no_match', count: 1, pct: 50 }, + { reason: 'no_rules', count: 1, pct: 50 }, + ]); + }); + + it('filters by channel', () => { + const events: AuditEvent[] = [ + makeEvent(1000, 'run.state', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + state: 'complete', + }), + makeEvent(1100, 'run.state', { + session_id: 'discord:s2', + channel: 'discord', + sender: 'u2', + source: 'gateway', + state: 'error', + }), + ]; + + const summary = summarizePhase0Baseline(events, { channels: ['telegram'] }); + expect(summary.run_outcomes.overall.total_outcomes).toBe(1); + expect(summary.run_outcomes.by_channel).toHaveLength(1); + expect(summary.run_outcomes.by_channel[0]?.key).toBe('telegram'); + }); +}); + +describe('renderPhase0BaselineMarkdown', () => { + it('renders key sections', () => { + const events: AuditEvent[] = [ + makeEvent(1000, 'run.state', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + state: 'complete', + }), + makeEvent(1200, 'reaction.skip', { + session_id: 'telegram:s1', + channel: 'telegram', + sender: 'u1', + source: 'channel', + reason: 'no_match', + candidate_count: 1, + }), + ]; + + const summary = summarizePhase0Baseline(events); + const markdown = renderPhase0BaselineMarkdown(summary); + expect(markdown).toContain('Phase 0 Baseline Telemetry Summary'); + expect(markdown).toContain('Run Outcomes (Overall)'); + expect(markdown).toContain('Reaction Decisions'); + expect(markdown).toContain('no_match'); + }); +}); diff --git a/src/audit/phase0BaselineSummary.ts b/src/audit/phase0BaselineSummary.ts new file mode 100644 index 0000000..711fc6a --- /dev/null +++ b/src/audit/phase0BaselineSummary.ts @@ -0,0 +1,477 @@ +import type { AuditEvent } from './types.js'; + +export type AuditSource = 'gateway' | 'channel'; +export type RunState = 'start' | 'complete' | 'cancel_requested' | 'cancelled' | 'error'; + +export interface Phase0BaselineSummaryOptions { + sessionIds?: string[]; + channels?: string[]; + senders?: string[]; + sources?: AuditSource[]; + maxSessions?: number; + maxChannels?: number; + maxSkipReasons?: number; +} + +export interface RunOutcomeStats { + total_outcomes: number; + complete: number; + cancelled: number; + error: number; + cancel_requested: number; + start: number; + completion_rate_pct: number | null; + cancel_rate_pct: number | null; + error_rate_pct: number | null; +} + +export interface RunOutcomeGroup { + key: string; + stats: RunOutcomeStats; +} + +export interface CancelLatencyStats { + count: number; + avg_ms: number; + p50_ms: number; + p95_ms: number; + min_ms: number; + max_ms: number; +} + +export interface ReactionSkipReasonStats { + reason: string; + count: number; + pct: number; +} + +export interface ReactionSummary { + matched: number; + skipped: number; + total: number; + match_rate_pct: number | null; + skip_rate_pct: number | null; + skip_reasons: ReactionSkipReasonStats[]; +} + +export interface Phase0BaselineSummary { + event_counts: { + run_state: number; + run_cancel: number; + reaction_match: number; + reaction_skip: number; + }; + run_outcomes: { + overall: RunOutcomeStats; + by_channel: RunOutcomeGroup[]; + by_session: RunOutcomeGroup[]; + }; + cancel_latency_ms: CancelLatencyStats | null; + reactions: ReactionSummary; +} + +interface RunCounter { + start: number; + complete: number; + cancel_requested: number; + cancelled: number; + error: number; +} + +function createRunCounter(): RunCounter { + return { + start: 0, + complete: 0, + cancel_requested: 0, + cancelled: 0, + error: 0, + }; +} + +function toRecord(value: unknown): Record { + return (value && typeof value === 'object') ? value as Record : {}; +} + +function readString(value: unknown): string | undefined { + return typeof value === 'string' ? value : undefined; +} + +function readNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function isRunState(value: unknown): value is RunState { + return value === 'start' + || value === 'complete' + || value === 'cancel_requested' + || value === 'cancelled' + || value === 'error'; +} + +function isAuditSource(value: unknown): value is AuditSource { + return value === 'gateway' || value === 'channel'; +} + +function toPct(part: number, whole: number): number | null { + if (whole <= 0) { + return null; + } + return Math.round((part / whole) * 10000) / 100; +} + +function percentile(sortedAscending: number[], pct: number): number { + if (sortedAscending.length === 0) { + return 0; + } + if (sortedAscending.length === 1) { + return sortedAscending[0]; + } + const clampedPct = Math.max(0, Math.min(100, pct)); + const position = (clampedPct / 100) * (sortedAscending.length - 1); + const lowerIndex = Math.floor(position); + const upperIndex = Math.ceil(position); + if (lowerIndex === upperIndex) { + return sortedAscending[lowerIndex] ?? 0; + } + const lower = sortedAscending[lowerIndex] ?? 0; + const upper = sortedAscending[upperIndex] ?? 0; + const weight = position - lowerIndex; + return lower + ((upper - lower) * weight); +} + +function computeLatencyStats(samples: number[]): CancelLatencyStats | null { + if (samples.length === 0) { + return null; + } + + const sorted = [...samples].sort((a, b) => a - b); + const total = sorted.reduce((sum, value) => sum + value, 0); + + return { + count: sorted.length, + avg_ms: Math.round(total / sorted.length), + p50_ms: Math.round(percentile(sorted, 50)), + p95_ms: Math.round(percentile(sorted, 95)), + min_ms: sorted[0] ?? 0, + max_ms: sorted[sorted.length - 1] ?? 0, + }; +} + +function buildRunOutcomeStats(counter: RunCounter): RunOutcomeStats { + const totalOutcomes = counter.complete + counter.cancelled + counter.error; + return { + total_outcomes: totalOutcomes, + complete: counter.complete, + cancelled: counter.cancelled, + error: counter.error, + cancel_requested: counter.cancel_requested, + start: counter.start, + completion_rate_pct: toPct(counter.complete, totalOutcomes), + cancel_rate_pct: toPct(counter.cancelled, totalOutcomes), + error_rate_pct: toPct(counter.error, totalOutcomes), + }; +} + +function sortGroups(groups: Map, limit?: number): RunOutcomeGroup[] { + const rows = [...groups.entries()] + .map(([key, counter]) => ({ key, stats: buildRunOutcomeStats(counter) })) + .sort((a, b) => { + const delta = b.stats.total_outcomes - a.stats.total_outcomes; + if (delta !== 0) { + return delta; + } + return a.key.localeCompare(b.key); + }); + + if (typeof limit === 'number' && Number.isFinite(limit) && limit > 0) { + return rows.slice(0, Math.floor(limit)); + } + return rows; +} + +function sortSkipReasons(reasons: Map, limit?: number): ReactionSkipReasonStats[] { + const total = [...reasons.values()].reduce((sum, value) => sum + value, 0); + const rows = [...reasons.entries()] + .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])) + .map(([reason, count]) => ({ + reason, + count, + pct: total > 0 ? Math.round((count / total) * 10000) / 100 : 0, + })); + + if (typeof limit === 'number' && Number.isFinite(limit) && limit > 0) { + return rows.slice(0, Math.floor(limit)); + } + return rows; +} + +function shouldInclude( + payload: Record, + filters: { + session: Set; + channel: Set; + sender: Set; + source: Set; + }, +): { sessionId: string | undefined; channel: string | undefined; sender: string | undefined; source: AuditSource | undefined; include: boolean } { + const sessionId = readString(payload.session_id); + const channel = readString(payload.channel); + const sender = readString(payload.sender); + const sourceRaw = readString(payload.source); + const source = isAuditSource(sourceRaw) ? sourceRaw : undefined; + + if (!channel || !sender || !source) { + return { sessionId, channel, sender, source, include: false }; + } + + if (filters.session.size > 0 && (!sessionId || !filters.session.has(sessionId))) { + return { sessionId, channel, sender, source, include: false }; + } + if (filters.channel.size > 0 && !filters.channel.has(channel)) { + return { sessionId, channel, sender, source, include: false }; + } + if (filters.sender.size > 0 && !filters.sender.has(sender)) { + return { sessionId, channel, sender, source, include: false }; + } + if (filters.source.size > 0 && !filters.source.has(source)) { + return { sessionId, channel, sender, source, include: false }; + } + + return { sessionId, channel, sender, source, include: true }; +} + +export function summarizePhase0Baseline( + events: AuditEvent[], + options: Phase0BaselineSummaryOptions = {}, +): Phase0BaselineSummary { + const sessionFilter = new Set(options.sessionIds ?? []); + const channelFilter = new Set(options.channels ?? []); + const senderFilter = new Set(options.senders ?? []); + const sourceFilter = new Set(options.sources ?? []); + + const filters = { + session: sessionFilter, + channel: channelFilter, + sender: senderFilter, + source: sourceFilter, + }; + + const runTotals = createRunCounter(); + const runByChannel = new Map(); + const runBySession = new Map(); + const cancelLatencies: number[] = []; + const skipReasons = new Map(); + + const eventCounts = { + run_state: 0, + run_cancel: 0, + reaction_match: 0, + reaction_skip: 0, + }; + + let reactionMatched = 0; + let reactionSkipped = 0; + + for (const event of events) { + const payload = toRecord(event.event); + + if (event.event_type === 'run.state') { + const state = readString(payload.state); + if (!isRunState(state)) { + continue; + } + + const filterResult = shouldInclude(payload, filters); + if (!filterResult.include) { + continue; + } + eventCounts.run_state += 1; + + runTotals[state] += 1; + + const channelCounter = runByChannel.get(filterResult.channel!) ?? createRunCounter(); + channelCounter[state] += 1; + runByChannel.set(filterResult.channel!, channelCounter); + + if (filterResult.sessionId) { + const sessionCounter = runBySession.get(filterResult.sessionId) ?? createRunCounter(); + sessionCounter[state] += 1; + runBySession.set(filterResult.sessionId, sessionCounter); + } + continue; + } + + if (event.event_type === 'run.cancel') { + const filterResult = shouldInclude(payload, filters); + if (!filterResult.include) { + continue; + } + eventCounts.run_cancel += 1; + const latency = readNumber(payload.latency_ms); + if (typeof latency === 'number' && latency >= 0) { + cancelLatencies.push(latency); + } + continue; + } + + if (event.event_type === 'reaction.match') { + const filterResult = shouldInclude(payload, filters); + if (!filterResult.include) { + continue; + } + eventCounts.reaction_match += 1; + reactionMatched += 1; + continue; + } + + if (event.event_type === 'reaction.skip') { + const filterResult = shouldInclude(payload, filters); + if (!filterResult.include) { + continue; + } + eventCounts.reaction_skip += 1; + reactionSkipped += 1; + const reason = readString(payload.reason) ?? 'unknown'; + skipReasons.set(reason, (skipReasons.get(reason) ?? 0) + 1); + } + } + + const totalReactions = reactionMatched + reactionSkipped; + + return { + event_counts: eventCounts, + run_outcomes: { + overall: buildRunOutcomeStats(runTotals), + by_channel: sortGroups(runByChannel, options.maxChannels), + by_session: sortGroups(runBySession, options.maxSessions), + }, + cancel_latency_ms: computeLatencyStats(cancelLatencies), + reactions: { + matched: reactionMatched, + skipped: reactionSkipped, + total: totalReactions, + match_rate_pct: toPct(reactionMatched, totalReactions), + skip_rate_pct: toPct(reactionSkipped, totalReactions), + skip_reasons: sortSkipReasons(skipReasons, options.maxSkipReasons), + }, + }; +} + +function formatPct(value: number | null): string { + return value === null ? 'n/a' : `${value.toFixed(2)}%`; +} + +export function renderPhase0BaselineMarkdown( + summary: Phase0BaselineSummary, + options: Phase0BaselineSummaryOptions = {}, +): string { + const lines: string[] = []; + + lines.push('# Phase 0 Baseline Telemetry Summary'); + lines.push(''); + lines.push(`- Run state events: ${summary.event_counts.run_state}`); + lines.push(`- Run cancel events: ${summary.event_counts.run_cancel}`); + lines.push(`- Reaction matches: ${summary.event_counts.reaction_match}`); + lines.push(`- Reaction skips: ${summary.event_counts.reaction_skip}`); + lines.push(''); + + if (options.sessionIds?.length) { + lines.push(`- Sessions: ${options.sessionIds.join(', ')}`); + } + if (options.channels?.length) { + lines.push(`- Channels: ${options.channels.join(', ')}`); + } + if (options.senders?.length) { + lines.push(`- Senders: ${options.senders.join(', ')}`); + } + if (options.sources?.length) { + lines.push(`- Sources: ${options.sources.join(', ')}`); + } + if ( + (options.sessionIds?.length ?? 0) > 0 + || (options.channels?.length ?? 0) > 0 + || (options.senders?.length ?? 0) > 0 + || (options.sources?.length ?? 0) > 0 + ) { + lines.push(''); + } + + lines.push('## Run Outcomes (Overall)'); + lines.push(''); + lines.push(`- Total outcomes: ${summary.run_outcomes.overall.total_outcomes}`); + lines.push(`- Complete: ${summary.run_outcomes.overall.complete} (${formatPct(summary.run_outcomes.overall.completion_rate_pct)})`); + lines.push(`- Cancelled: ${summary.run_outcomes.overall.cancelled} (${formatPct(summary.run_outcomes.overall.cancel_rate_pct)})`); + lines.push(`- Errors: ${summary.run_outcomes.overall.error} (${formatPct(summary.run_outcomes.overall.error_rate_pct)})`); + lines.push(`- Cancel requested: ${summary.run_outcomes.overall.cancel_requested}`); + lines.push(`- Starts: ${summary.run_outcomes.overall.start}`); + lines.push(''); + + lines.push('## Run Outcomes by Channel'); + lines.push(''); + lines.push('| Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts |'); + lines.push('| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |'); + if (summary.run_outcomes.by_channel.length === 0) { + lines.push('| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 |'); + } else { + for (const row of summary.run_outcomes.by_channel) { + lines.push( + `| ${row.key} | ${row.stats.total_outcomes} | ${row.stats.complete} | ${row.stats.cancelled} | ${row.stats.error} | ` + + `${formatPct(row.stats.completion_rate_pct)} | ${formatPct(row.stats.cancel_rate_pct)} | ${formatPct(row.stats.error_rate_pct)} | ` + + `${row.stats.cancel_requested} | ${row.stats.start} |`, + ); + } + } + lines.push(''); + + lines.push('## Run Outcomes by Session'); + lines.push(''); + lines.push('| Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts |'); + lines.push('| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |'); + if (summary.run_outcomes.by_session.length === 0) { + lines.push('| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 |'); + } else { + for (const row of summary.run_outcomes.by_session) { + lines.push( + `| ${row.key} | ${row.stats.total_outcomes} | ${row.stats.complete} | ${row.stats.cancelled} | ${row.stats.error} | ` + + `${formatPct(row.stats.completion_rate_pct)} | ${formatPct(row.stats.cancel_rate_pct)} | ${formatPct(row.stats.error_rate_pct)} | ` + + `${row.stats.cancel_requested} | ${row.stats.start} |`, + ); + } + } + lines.push(''); + + lines.push('## Cancel Latency'); + lines.push(''); + if (!summary.cancel_latency_ms) { + lines.push('- No cancel latency samples.'); + } else { + const stats = summary.cancel_latency_ms; + lines.push(`- Count: ${stats.count}`); + lines.push(`- Avg: ${stats.avg_ms}ms`); + lines.push(`- P50: ${stats.p50_ms}ms`); + lines.push(`- P95: ${stats.p95_ms}ms`); + lines.push(`- Min: ${stats.min_ms}ms`); + lines.push(`- Max: ${stats.max_ms}ms`); + } + lines.push(''); + + lines.push('## Reaction Decisions'); + lines.push(''); + lines.push(`- Matched: ${summary.reactions.matched} (${formatPct(summary.reactions.match_rate_pct)})`); + lines.push(`- Skipped: ${summary.reactions.skipped} (${formatPct(summary.reactions.skip_rate_pct)})`); + lines.push(''); + lines.push('### Skip Reasons'); + lines.push(''); + lines.push('| Reason | Count | Percent |'); + lines.push('| --- | ---: | ---: |'); + if (summary.reactions.skip_reasons.length === 0) { + lines.push('| _none_ | 0 | 0.00% |'); + } else { + for (const reason of summary.reactions.skip_reasons) { + lines.push(`| ${reason.reason} | ${reason.count} | ${reason.pct.toFixed(2)}% |`); + } + } + lines.push(''); + + return lines.join('\n'); +}