feat(audit): add phase-0 baseline summary tooling

Diagrams reviewed: docs/architecture/AGENT_DIAGRAM.md, docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md, docs/api/PROTOCOL.md (no changes required).
This commit is contained in:
William Valentin
2026-02-25 09:14:38 -08:00
parent 2311826092
commit 0b8f7c7299
6 changed files with 861 additions and 1 deletions
+195
View File
@@ -0,0 +1,195 @@
import { describe, expect, it } from 'vitest';
import type { AuditEvent } from './types.js';
import { renderPhase0BaselineMarkdown, summarizePhase0Baseline } from './phase0BaselineSummary.js';
function makeEvent(
timestamp: number,
event_type: AuditEvent['event_type'],
event: Record<string, unknown>,
): AuditEvent {
return {
timestamp,
level: 'info',
event_type,
event,
};
}
describe('summarizePhase0Baseline', () => {
it('summarizes run outcomes, cancel latency, and reaction decisions', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'run.state', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
state: 'start',
}),
makeEvent(1200, 'run.state', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
state: 'complete',
}),
makeEvent(2000, 'run.state', {
session_id: 'discord:s2',
channel: 'discord',
sender: 'u2',
source: 'gateway',
state: 'start',
}),
makeEvent(2400, 'run.state', {
session_id: 'discord:s2',
channel: 'discord',
sender: 'u2',
source: 'gateway',
state: 'error',
}),
makeEvent(3000, 'run.state', {
session_id: 'telegram:s3',
channel: 'telegram',
sender: 'u3',
source: 'channel',
state: 'start',
}),
makeEvent(3200, 'run.state', {
session_id: 'telegram:s3',
channel: 'telegram',
sender: 'u3',
source: 'channel',
state: 'cancelled',
}),
makeEvent(3300, 'run.state', {
session_id: 'telegram:s3',
channel: 'telegram',
sender: 'u3',
source: 'channel',
state: 'cancel_requested',
}),
makeEvent(3500, 'run.cancel', {
session_id: 'telegram:s3',
channel: 'telegram',
sender: 'u3',
source: 'channel',
requested: true,
acknowledged: true,
latency_ms: 120,
}),
makeEvent(3600, 'run.cancel', {
session_id: 'discord:s2',
channel: 'discord',
sender: 'u2',
source: 'gateway',
requested: true,
acknowledged: false,
latency_ms: 300,
}),
makeEvent(3700, 'reaction.match', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
rule_name: 'boss-email',
}),
makeEvent(3800, 'reaction.skip', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
reason: 'no_match',
candidate_count: 1,
}),
makeEvent(3900, 'reaction.skip', {
session_id: 'discord:s2',
channel: 'discord',
sender: 'u2',
source: 'gateway',
reason: 'no_rules',
candidate_count: 0,
}),
];
const summary = summarizePhase0Baseline(events);
expect(summary.event_counts.run_state).toBe(7);
expect(summary.run_outcomes.overall.total_outcomes).toBe(3);
expect(summary.run_outcomes.overall.complete).toBe(1);
expect(summary.run_outcomes.overall.cancelled).toBe(1);
expect(summary.run_outcomes.overall.error).toBe(1);
expect(summary.run_outcomes.overall.cancel_requested).toBe(1);
expect(summary.run_outcomes.overall.start).toBe(3);
const telegram = summary.run_outcomes.by_channel.find((row) => row.key === 'telegram');
expect(telegram?.stats.total_outcomes).toBe(2);
const discord = summary.run_outcomes.by_channel.find((row) => row.key === 'discord');
expect(discord?.stats.total_outcomes).toBe(1);
const cancelStats = summary.cancel_latency_ms;
expect(cancelStats?.count).toBe(2);
expect(cancelStats?.p50_ms).toBe(210);
expect(cancelStats?.p95_ms).toBe(291);
expect(summary.reactions.matched).toBe(1);
expect(summary.reactions.skipped).toBe(2);
expect(summary.reactions.match_rate_pct).toBe(33.33);
expect(summary.reactions.skip_reasons).toEqual([
{ reason: 'no_match', count: 1, pct: 50 },
{ reason: 'no_rules', count: 1, pct: 50 },
]);
});
it('filters by channel', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'run.state', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
state: 'complete',
}),
makeEvent(1100, 'run.state', {
session_id: 'discord:s2',
channel: 'discord',
sender: 'u2',
source: 'gateway',
state: 'error',
}),
];
const summary = summarizePhase0Baseline(events, { channels: ['telegram'] });
expect(summary.run_outcomes.overall.total_outcomes).toBe(1);
expect(summary.run_outcomes.by_channel).toHaveLength(1);
expect(summary.run_outcomes.by_channel[0]?.key).toBe('telegram');
});
});
describe('renderPhase0BaselineMarkdown', () => {
it('renders key sections', () => {
const events: AuditEvent[] = [
makeEvent(1000, 'run.state', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
state: 'complete',
}),
makeEvent(1200, 'reaction.skip', {
session_id: 'telegram:s1',
channel: 'telegram',
sender: 'u1',
source: 'channel',
reason: 'no_match',
candidate_count: 1,
}),
];
const summary = summarizePhase0Baseline(events);
const markdown = renderPhase0BaselineMarkdown(summary);
expect(markdown).toContain('Phase 0 Baseline Telemetry Summary');
expect(markdown).toContain('Run Outcomes (Overall)');
expect(markdown).toContain('Reaction Decisions');
expect(markdown).toContain('no_match');
});
});
+477
View File
@@ -0,0 +1,477 @@
import type { AuditEvent } from './types.js';
export type AuditSource = 'gateway' | 'channel';
export type RunState = 'start' | 'complete' | 'cancel_requested' | 'cancelled' | 'error';
export interface Phase0BaselineSummaryOptions {
sessionIds?: string[];
channels?: string[];
senders?: string[];
sources?: AuditSource[];
maxSessions?: number;
maxChannels?: number;
maxSkipReasons?: number;
}
export interface RunOutcomeStats {
total_outcomes: number;
complete: number;
cancelled: number;
error: number;
cancel_requested: number;
start: number;
completion_rate_pct: number | null;
cancel_rate_pct: number | null;
error_rate_pct: number | null;
}
export interface RunOutcomeGroup {
key: string;
stats: RunOutcomeStats;
}
export interface CancelLatencyStats {
count: number;
avg_ms: number;
p50_ms: number;
p95_ms: number;
min_ms: number;
max_ms: number;
}
export interface ReactionSkipReasonStats {
reason: string;
count: number;
pct: number;
}
export interface ReactionSummary {
matched: number;
skipped: number;
total: number;
match_rate_pct: number | null;
skip_rate_pct: number | null;
skip_reasons: ReactionSkipReasonStats[];
}
export interface Phase0BaselineSummary {
event_counts: {
run_state: number;
run_cancel: number;
reaction_match: number;
reaction_skip: number;
};
run_outcomes: {
overall: RunOutcomeStats;
by_channel: RunOutcomeGroup[];
by_session: RunOutcomeGroup[];
};
cancel_latency_ms: CancelLatencyStats | null;
reactions: ReactionSummary;
}
interface RunCounter {
start: number;
complete: number;
cancel_requested: number;
cancelled: number;
error: number;
}
function createRunCounter(): RunCounter {
return {
start: 0,
complete: 0,
cancel_requested: 0,
cancelled: 0,
error: 0,
};
}
function toRecord(value: unknown): Record<string, unknown> {
return (value && typeof value === 'object') ? value as Record<string, unknown> : {};
}
function readString(value: unknown): string | undefined {
return typeof value === 'string' ? value : undefined;
}
function readNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
}
function isRunState(value: unknown): value is RunState {
return value === 'start'
|| value === 'complete'
|| value === 'cancel_requested'
|| value === 'cancelled'
|| value === 'error';
}
function isAuditSource(value: unknown): value is AuditSource {
return value === 'gateway' || value === 'channel';
}
function toPct(part: number, whole: number): number | null {
if (whole <= 0) {
return null;
}
return Math.round((part / whole) * 10000) / 100;
}
function percentile(sortedAscending: number[], pct: number): number {
if (sortedAscending.length === 0) {
return 0;
}
if (sortedAscending.length === 1) {
return sortedAscending[0];
}
const clampedPct = Math.max(0, Math.min(100, pct));
const position = (clampedPct / 100) * (sortedAscending.length - 1);
const lowerIndex = Math.floor(position);
const upperIndex = Math.ceil(position);
if (lowerIndex === upperIndex) {
return sortedAscending[lowerIndex] ?? 0;
}
const lower = sortedAscending[lowerIndex] ?? 0;
const upper = sortedAscending[upperIndex] ?? 0;
const weight = position - lowerIndex;
return lower + ((upper - lower) * weight);
}
function computeLatencyStats(samples: number[]): CancelLatencyStats | null {
if (samples.length === 0) {
return null;
}
const sorted = [...samples].sort((a, b) => a - b);
const total = sorted.reduce((sum, value) => sum + value, 0);
return {
count: sorted.length,
avg_ms: Math.round(total / sorted.length),
p50_ms: Math.round(percentile(sorted, 50)),
p95_ms: Math.round(percentile(sorted, 95)),
min_ms: sorted[0] ?? 0,
max_ms: sorted[sorted.length - 1] ?? 0,
};
}
function buildRunOutcomeStats(counter: RunCounter): RunOutcomeStats {
const totalOutcomes = counter.complete + counter.cancelled + counter.error;
return {
total_outcomes: totalOutcomes,
complete: counter.complete,
cancelled: counter.cancelled,
error: counter.error,
cancel_requested: counter.cancel_requested,
start: counter.start,
completion_rate_pct: toPct(counter.complete, totalOutcomes),
cancel_rate_pct: toPct(counter.cancelled, totalOutcomes),
error_rate_pct: toPct(counter.error, totalOutcomes),
};
}
function sortGroups(groups: Map<string, RunCounter>, limit?: number): RunOutcomeGroup[] {
const rows = [...groups.entries()]
.map(([key, counter]) => ({ key, stats: buildRunOutcomeStats(counter) }))
.sort((a, b) => {
const delta = b.stats.total_outcomes - a.stats.total_outcomes;
if (delta !== 0) {
return delta;
}
return a.key.localeCompare(b.key);
});
if (typeof limit === 'number' && Number.isFinite(limit) && limit > 0) {
return rows.slice(0, Math.floor(limit));
}
return rows;
}
function sortSkipReasons(reasons: Map<string, number>, limit?: number): ReactionSkipReasonStats[] {
const total = [...reasons.values()].reduce((sum, value) => sum + value, 0);
const rows = [...reasons.entries()]
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
.map(([reason, count]) => ({
reason,
count,
pct: total > 0 ? Math.round((count / total) * 10000) / 100 : 0,
}));
if (typeof limit === 'number' && Number.isFinite(limit) && limit > 0) {
return rows.slice(0, Math.floor(limit));
}
return rows;
}
function shouldInclude(
payload: Record<string, unknown>,
filters: {
session: Set<string>;
channel: Set<string>;
sender: Set<string>;
source: Set<string>;
},
): { sessionId: string | undefined; channel: string | undefined; sender: string | undefined; source: AuditSource | undefined; include: boolean } {
const sessionId = readString(payload.session_id);
const channel = readString(payload.channel);
const sender = readString(payload.sender);
const sourceRaw = readString(payload.source);
const source = isAuditSource(sourceRaw) ? sourceRaw : undefined;
if (!channel || !sender || !source) {
return { sessionId, channel, sender, source, include: false };
}
if (filters.session.size > 0 && (!sessionId || !filters.session.has(sessionId))) {
return { sessionId, channel, sender, source, include: false };
}
if (filters.channel.size > 0 && !filters.channel.has(channel)) {
return { sessionId, channel, sender, source, include: false };
}
if (filters.sender.size > 0 && !filters.sender.has(sender)) {
return { sessionId, channel, sender, source, include: false };
}
if (filters.source.size > 0 && !filters.source.has(source)) {
return { sessionId, channel, sender, source, include: false };
}
return { sessionId, channel, sender, source, include: true };
}
export function summarizePhase0Baseline(
events: AuditEvent[],
options: Phase0BaselineSummaryOptions = {},
): Phase0BaselineSummary {
const sessionFilter = new Set(options.sessionIds ?? []);
const channelFilter = new Set(options.channels ?? []);
const senderFilter = new Set(options.senders ?? []);
const sourceFilter = new Set(options.sources ?? []);
const filters = {
session: sessionFilter,
channel: channelFilter,
sender: senderFilter,
source: sourceFilter,
};
const runTotals = createRunCounter();
const runByChannel = new Map<string, RunCounter>();
const runBySession = new Map<string, RunCounter>();
const cancelLatencies: number[] = [];
const skipReasons = new Map<string, number>();
const eventCounts = {
run_state: 0,
run_cancel: 0,
reaction_match: 0,
reaction_skip: 0,
};
let reactionMatched = 0;
let reactionSkipped = 0;
for (const event of events) {
const payload = toRecord(event.event);
if (event.event_type === 'run.state') {
const state = readString(payload.state);
if (!isRunState(state)) {
continue;
}
const filterResult = shouldInclude(payload, filters);
if (!filterResult.include) {
continue;
}
eventCounts.run_state += 1;
runTotals[state] += 1;
const channelCounter = runByChannel.get(filterResult.channel!) ?? createRunCounter();
channelCounter[state] += 1;
runByChannel.set(filterResult.channel!, channelCounter);
if (filterResult.sessionId) {
const sessionCounter = runBySession.get(filterResult.sessionId) ?? createRunCounter();
sessionCounter[state] += 1;
runBySession.set(filterResult.sessionId, sessionCounter);
}
continue;
}
if (event.event_type === 'run.cancel') {
const filterResult = shouldInclude(payload, filters);
if (!filterResult.include) {
continue;
}
eventCounts.run_cancel += 1;
const latency = readNumber(payload.latency_ms);
if (typeof latency === 'number' && latency >= 0) {
cancelLatencies.push(latency);
}
continue;
}
if (event.event_type === 'reaction.match') {
const filterResult = shouldInclude(payload, filters);
if (!filterResult.include) {
continue;
}
eventCounts.reaction_match += 1;
reactionMatched += 1;
continue;
}
if (event.event_type === 'reaction.skip') {
const filterResult = shouldInclude(payload, filters);
if (!filterResult.include) {
continue;
}
eventCounts.reaction_skip += 1;
reactionSkipped += 1;
const reason = readString(payload.reason) ?? 'unknown';
skipReasons.set(reason, (skipReasons.get(reason) ?? 0) + 1);
}
}
const totalReactions = reactionMatched + reactionSkipped;
return {
event_counts: eventCounts,
run_outcomes: {
overall: buildRunOutcomeStats(runTotals),
by_channel: sortGroups(runByChannel, options.maxChannels),
by_session: sortGroups(runBySession, options.maxSessions),
},
cancel_latency_ms: computeLatencyStats(cancelLatencies),
reactions: {
matched: reactionMatched,
skipped: reactionSkipped,
total: totalReactions,
match_rate_pct: toPct(reactionMatched, totalReactions),
skip_rate_pct: toPct(reactionSkipped, totalReactions),
skip_reasons: sortSkipReasons(skipReasons, options.maxSkipReasons),
},
};
}
function formatPct(value: number | null): string {
return value === null ? 'n/a' : `${value.toFixed(2)}%`;
}
export function renderPhase0BaselineMarkdown(
summary: Phase0BaselineSummary,
options: Phase0BaselineSummaryOptions = {},
): string {
const lines: string[] = [];
lines.push('# Phase 0 Baseline Telemetry Summary');
lines.push('');
lines.push(`- Run state events: ${summary.event_counts.run_state}`);
lines.push(`- Run cancel events: ${summary.event_counts.run_cancel}`);
lines.push(`- Reaction matches: ${summary.event_counts.reaction_match}`);
lines.push(`- Reaction skips: ${summary.event_counts.reaction_skip}`);
lines.push('');
if (options.sessionIds?.length) {
lines.push(`- Sessions: ${options.sessionIds.join(', ')}`);
}
if (options.channels?.length) {
lines.push(`- Channels: ${options.channels.join(', ')}`);
}
if (options.senders?.length) {
lines.push(`- Senders: ${options.senders.join(', ')}`);
}
if (options.sources?.length) {
lines.push(`- Sources: ${options.sources.join(', ')}`);
}
if (
(options.sessionIds?.length ?? 0) > 0
|| (options.channels?.length ?? 0) > 0
|| (options.senders?.length ?? 0) > 0
|| (options.sources?.length ?? 0) > 0
) {
lines.push('');
}
lines.push('## Run Outcomes (Overall)');
lines.push('');
lines.push(`- Total outcomes: ${summary.run_outcomes.overall.total_outcomes}`);
lines.push(`- Complete: ${summary.run_outcomes.overall.complete} (${formatPct(summary.run_outcomes.overall.completion_rate_pct)})`);
lines.push(`- Cancelled: ${summary.run_outcomes.overall.cancelled} (${formatPct(summary.run_outcomes.overall.cancel_rate_pct)})`);
lines.push(`- Errors: ${summary.run_outcomes.overall.error} (${formatPct(summary.run_outcomes.overall.error_rate_pct)})`);
lines.push(`- Cancel requested: ${summary.run_outcomes.overall.cancel_requested}`);
lines.push(`- Starts: ${summary.run_outcomes.overall.start}`);
lines.push('');
lines.push('## Run Outcomes by Channel');
lines.push('');
lines.push('| Channel | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts |');
lines.push('| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |');
if (summary.run_outcomes.by_channel.length === 0) {
lines.push('| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 |');
} else {
for (const row of summary.run_outcomes.by_channel) {
lines.push(
`| ${row.key} | ${row.stats.total_outcomes} | ${row.stats.complete} | ${row.stats.cancelled} | ${row.stats.error} | `
+ `${formatPct(row.stats.completion_rate_pct)} | ${formatPct(row.stats.cancel_rate_pct)} | ${formatPct(row.stats.error_rate_pct)} | `
+ `${row.stats.cancel_requested} | ${row.stats.start} |`,
);
}
}
lines.push('');
lines.push('## Run Outcomes by Session');
lines.push('');
lines.push('| Session | Outcomes | Complete | Cancelled | Error | Complete % | Cancel % | Error % | Cancel Req | Starts |');
lines.push('| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |');
if (summary.run_outcomes.by_session.length === 0) {
lines.push('| _none_ | 0 | 0 | 0 | 0 | n/a | n/a | n/a | 0 | 0 |');
} else {
for (const row of summary.run_outcomes.by_session) {
lines.push(
`| ${row.key} | ${row.stats.total_outcomes} | ${row.stats.complete} | ${row.stats.cancelled} | ${row.stats.error} | `
+ `${formatPct(row.stats.completion_rate_pct)} | ${formatPct(row.stats.cancel_rate_pct)} | ${formatPct(row.stats.error_rate_pct)} | `
+ `${row.stats.cancel_requested} | ${row.stats.start} |`,
);
}
}
lines.push('');
lines.push('## Cancel Latency');
lines.push('');
if (!summary.cancel_latency_ms) {
lines.push('- No cancel latency samples.');
} else {
const stats = summary.cancel_latency_ms;
lines.push(`- Count: ${stats.count}`);
lines.push(`- Avg: ${stats.avg_ms}ms`);
lines.push(`- P50: ${stats.p50_ms}ms`);
lines.push(`- P95: ${stats.p95_ms}ms`);
lines.push(`- Min: ${stats.min_ms}ms`);
lines.push(`- Max: ${stats.max_ms}ms`);
}
lines.push('');
lines.push('## Reaction Decisions');
lines.push('');
lines.push(`- Matched: ${summary.reactions.matched} (${formatPct(summary.reactions.match_rate_pct)})`);
lines.push(`- Skipped: ${summary.reactions.skipped} (${formatPct(summary.reactions.skip_rate_pct)})`);
lines.push('');
lines.push('### Skip Reasons');
lines.push('');
lines.push('| Reason | Count | Percent |');
lines.push('| --- | ---: | ---: |');
if (summary.reactions.skip_reasons.length === 0) {
lines.push('| _none_ | 0 | 0.00% |');
} else {
for (const reason of summary.reactions.skip_reasons) {
lines.push(`| ${reason.reason} | ${reason.count} | ${reason.pct.toFixed(2)}% |`);
}
}
lines.push('');
return lines.join('\n');
}