feat(audit): replace probe baseline workflow with live anonymized capture

This commit is contained in:
William Valentin
2026-02-26 23:41:13 -08:00
parent c96aca5f1d
commit 4b07a1f166
13 changed files with 968 additions and 10 deletions
+79
View File
@@ -0,0 +1,79 @@
import { describe, expect, it } from 'vitest';
import type { AuditEvent } from './types.js';
import { capturePhase0LiveBaselineEvents } from './phase0LiveBaseline.js';
function event(
timestamp: number,
eventType: AuditEvent['event_type'],
payload: Record<string, unknown>,
): AuditEvent {
return {
timestamp,
level: 'info',
event_type: eventType,
event: payload,
};
}
describe('capturePhase0LiveBaselineEvents', () => {
it('filters to phase-0 event types and applies channel/source/session filters', () => {
const events: AuditEvent[] = [
event(3, 'session.message', { session_id: 's0', channel: 'cron', sender: 'bot', source: 'channel' }),
event(2, 'reaction.skip', { session_id: 'probe-1', channel: 'cron', sender: 'bot', source: 'channel' }),
event(1, 'run.state', { session_id: 's1', channel: 'cron', sender: 'bot', source: 'channel', state: 'start' }),
event(4, 'run.state', { session_id: 's2', channel: 'telegram', sender: 'u1', source: 'channel', state: 'start' }),
event(5, 'run.cancel', { session_id: 's3', channel: 'cron', sender: 'bot', source: 'gateway', latency_ms: 120 }),
];
const filtered = capturePhase0LiveBaselineEvents(events, {
channels: ['cron'],
sources: ['channel'],
excludeSessionSubstrings: ['probe'],
anonymizeIdentifiers: false,
});
expect(filtered).toHaveLength(1);
expect(filtered[0].event_type).toBe('run.state');
expect(filtered[0].timestamp).toBe(1);
expect(filtered[0].event.session_id).toBe('s1');
});
it('anonymizes session/sender/request/lane identifiers deterministically', () => {
const events: AuditEvent[] = [
event(1, 'run.state', {
session_id: 'gmail:user@example.com',
sender: 'user@example.com',
request_id: 'req-1',
lane_id: 'lane-1',
channel: 'gmail',
source: 'channel',
state: 'start',
}),
event(2, 'run.state', {
session_id: 'gmail:user@example.com',
sender: 'user@example.com',
request_id: 'req-2',
lane_id: 'lane-2',
channel: 'gmail',
source: 'channel',
state: 'complete',
}),
];
const anonymized = capturePhase0LiveBaselineEvents(events, {
anonymizeIdentifiers: true,
});
const first = anonymized[0].event;
const second = anonymized[1].event;
expect(first.session_id).toMatch(/^session_[0-9a-f]{12}$/);
expect(first.sender).toMatch(/^sender_[0-9a-f]{12}$/);
expect(first.request_id).toMatch(/^request_[0-9a-f]{12}$/);
expect(first.lane_id).toMatch(/^lane_[0-9a-f]{12}$/);
expect(first.session_id).toBe(second.session_id);
expect(first.sender).toBe(second.sender);
expect(first.request_id).not.toBe(second.request_id);
expect(first.lane_id).not.toBe(second.lane_id);
});
});
+106
View File
@@ -0,0 +1,106 @@
import { createHash } from 'node:crypto';
import type { AuditEvent, AuditEventType } from './types.js';
import type { AuditSource } from './phase0BaselineSummary.js';
const PHASE0_BASELINE_EVENT_TYPES: readonly AuditEventType[] = [
'run.state',
'run.cancel',
'reaction.match',
'reaction.skip',
];
export interface CapturePhase0LiveBaselineOptions {
channels?: string[];
sources?: AuditSource[];
excludeSessionSubstrings?: string[];
anonymizeIdentifiers?: boolean;
}
function readStringField(payload: Record<string, unknown>, key: string): string | undefined {
const value = payload[key];
return typeof value === 'string' ? value : undefined;
}
function toPayload(value: unknown): Record<string, unknown> {
return (value && typeof value === 'object')
? { ...(value as Record<string, unknown>) }
: {};
}
function hashIdentifier(prefix: string, value: string): string {
const digest = createHash('sha256').update(value).digest('hex').slice(0, 12);
return `${prefix}_${digest}`;
}
function anonymizePayloadIdentifiers(payload: Record<string, unknown>): Record<string, unknown> {
const next = { ...payload };
const sessionId = readStringField(next, 'session_id');
const sender = readStringField(next, 'sender');
const requestId = readStringField(next, 'request_id');
const laneId = readStringField(next, 'lane_id');
if (sessionId) {
next.session_id = hashIdentifier('session', sessionId);
}
if (sender) {
next.sender = hashIdentifier('sender', sender);
}
if (requestId) {
next.request_id = hashIdentifier('request', requestId);
}
if (laneId) {
next.lane_id = hashIdentifier('lane', laneId);
}
return next;
}
export function capturePhase0LiveBaselineEvents(
events: AuditEvent[],
options: CapturePhase0LiveBaselineOptions = {},
): AuditEvent[] {
const channelFilter = new Set((options.channels ?? []).filter((value) => value.length > 0));
const sourceFilter = new Set(options.sources ?? []);
const excludeSessionSubstrings = (options.excludeSessionSubstrings ?? [])
.map((value) => value.trim().toLowerCase())
.filter((value) => value.length > 0);
const anonymizeIdentifiers = options.anonymizeIdentifiers ?? true;
const filtered: AuditEvent[] = [];
for (const event of events) {
if (!PHASE0_BASELINE_EVENT_TYPES.includes(event.event_type)) {
continue;
}
const payload = toPayload(event.event);
const channel = readStringField(payload, 'channel');
const source = readStringField(payload, 'source');
const sessionId = readStringField(payload, 'session_id');
if (channelFilter.size > 0 && (!channel || !channelFilter.has(channel))) {
continue;
}
if (sourceFilter.size > 0 && (!source || !sourceFilter.has(source as AuditSource))) {
continue;
}
if (
sessionId
&& excludeSessionSubstrings.some((needle) => sessionId.toLowerCase().includes(needle))
) {
continue;
}
const nextPayload = anonymizeIdentifiers
? anonymizePayloadIdentifiers(payload)
: payload;
filtered.push({
...event,
event: nextPayload,
});
}
return filtered.sort((a, b) => a.timestamp - b.timestamp);
}