feat(audit): replace probe baseline workflow with live anonymized capture
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import type { AuditEvent } from './types.js';
|
||||
import { capturePhase0LiveBaselineEvents } from './phase0LiveBaseline.js';
|
||||
|
||||
function event(
|
||||
timestamp: number,
|
||||
eventType: AuditEvent['event_type'],
|
||||
payload: Record<string, unknown>,
|
||||
): AuditEvent {
|
||||
return {
|
||||
timestamp,
|
||||
level: 'info',
|
||||
event_type: eventType,
|
||||
event: payload,
|
||||
};
|
||||
}
|
||||
|
||||
describe('capturePhase0LiveBaselineEvents', () => {
|
||||
it('filters to phase-0 event types and applies channel/source/session filters', () => {
|
||||
const events: AuditEvent[] = [
|
||||
event(3, 'session.message', { session_id: 's0', channel: 'cron', sender: 'bot', source: 'channel' }),
|
||||
event(2, 'reaction.skip', { session_id: 'probe-1', channel: 'cron', sender: 'bot', source: 'channel' }),
|
||||
event(1, 'run.state', { session_id: 's1', channel: 'cron', sender: 'bot', source: 'channel', state: 'start' }),
|
||||
event(4, 'run.state', { session_id: 's2', channel: 'telegram', sender: 'u1', source: 'channel', state: 'start' }),
|
||||
event(5, 'run.cancel', { session_id: 's3', channel: 'cron', sender: 'bot', source: 'gateway', latency_ms: 120 }),
|
||||
];
|
||||
|
||||
const filtered = capturePhase0LiveBaselineEvents(events, {
|
||||
channels: ['cron'],
|
||||
sources: ['channel'],
|
||||
excludeSessionSubstrings: ['probe'],
|
||||
anonymizeIdentifiers: false,
|
||||
});
|
||||
|
||||
expect(filtered).toHaveLength(1);
|
||||
expect(filtered[0].event_type).toBe('run.state');
|
||||
expect(filtered[0].timestamp).toBe(1);
|
||||
expect(filtered[0].event.session_id).toBe('s1');
|
||||
});
|
||||
|
||||
it('anonymizes session/sender/request/lane identifiers deterministically', () => {
|
||||
const events: AuditEvent[] = [
|
||||
event(1, 'run.state', {
|
||||
session_id: 'gmail:user@example.com',
|
||||
sender: 'user@example.com',
|
||||
request_id: 'req-1',
|
||||
lane_id: 'lane-1',
|
||||
channel: 'gmail',
|
||||
source: 'channel',
|
||||
state: 'start',
|
||||
}),
|
||||
event(2, 'run.state', {
|
||||
session_id: 'gmail:user@example.com',
|
||||
sender: 'user@example.com',
|
||||
request_id: 'req-2',
|
||||
lane_id: 'lane-2',
|
||||
channel: 'gmail',
|
||||
source: 'channel',
|
||||
state: 'complete',
|
||||
}),
|
||||
];
|
||||
|
||||
const anonymized = capturePhase0LiveBaselineEvents(events, {
|
||||
anonymizeIdentifiers: true,
|
||||
});
|
||||
|
||||
const first = anonymized[0].event;
|
||||
const second = anonymized[1].event;
|
||||
expect(first.session_id).toMatch(/^session_[0-9a-f]{12}$/);
|
||||
expect(first.sender).toMatch(/^sender_[0-9a-f]{12}$/);
|
||||
expect(first.request_id).toMatch(/^request_[0-9a-f]{12}$/);
|
||||
expect(first.lane_id).toMatch(/^lane_[0-9a-f]{12}$/);
|
||||
expect(first.session_id).toBe(second.session_id);
|
||||
expect(first.sender).toBe(second.sender);
|
||||
expect(first.request_id).not.toBe(second.request_id);
|
||||
expect(first.lane_id).not.toBe(second.lane_id);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import type { AuditEvent, AuditEventType } from './types.js';
|
||||
import type { AuditSource } from './phase0BaselineSummary.js';
|
||||
|
||||
const PHASE0_BASELINE_EVENT_TYPES: readonly AuditEventType[] = [
|
||||
'run.state',
|
||||
'run.cancel',
|
||||
'reaction.match',
|
||||
'reaction.skip',
|
||||
];
|
||||
|
||||
export interface CapturePhase0LiveBaselineOptions {
|
||||
channels?: string[];
|
||||
sources?: AuditSource[];
|
||||
excludeSessionSubstrings?: string[];
|
||||
anonymizeIdentifiers?: boolean;
|
||||
}
|
||||
|
||||
function readStringField(payload: Record<string, unknown>, key: string): string | undefined {
|
||||
const value = payload[key];
|
||||
return typeof value === 'string' ? value : undefined;
|
||||
}
|
||||
|
||||
function toPayload(value: unknown): Record<string, unknown> {
|
||||
return (value && typeof value === 'object')
|
||||
? { ...(value as Record<string, unknown>) }
|
||||
: {};
|
||||
}
|
||||
|
||||
function hashIdentifier(prefix: string, value: string): string {
|
||||
const digest = createHash('sha256').update(value).digest('hex').slice(0, 12);
|
||||
return `${prefix}_${digest}`;
|
||||
}
|
||||
|
||||
function anonymizePayloadIdentifiers(payload: Record<string, unknown>): Record<string, unknown> {
|
||||
const next = { ...payload };
|
||||
const sessionId = readStringField(next, 'session_id');
|
||||
const sender = readStringField(next, 'sender');
|
||||
const requestId = readStringField(next, 'request_id');
|
||||
const laneId = readStringField(next, 'lane_id');
|
||||
|
||||
if (sessionId) {
|
||||
next.session_id = hashIdentifier('session', sessionId);
|
||||
}
|
||||
if (sender) {
|
||||
next.sender = hashIdentifier('sender', sender);
|
||||
}
|
||||
if (requestId) {
|
||||
next.request_id = hashIdentifier('request', requestId);
|
||||
}
|
||||
if (laneId) {
|
||||
next.lane_id = hashIdentifier('lane', laneId);
|
||||
}
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
export function capturePhase0LiveBaselineEvents(
|
||||
events: AuditEvent[],
|
||||
options: CapturePhase0LiveBaselineOptions = {},
|
||||
): AuditEvent[] {
|
||||
const channelFilter = new Set((options.channels ?? []).filter((value) => value.length > 0));
|
||||
const sourceFilter = new Set(options.sources ?? []);
|
||||
const excludeSessionSubstrings = (options.excludeSessionSubstrings ?? [])
|
||||
.map((value) => value.trim().toLowerCase())
|
||||
.filter((value) => value.length > 0);
|
||||
const anonymizeIdentifiers = options.anonymizeIdentifiers ?? true;
|
||||
|
||||
const filtered: AuditEvent[] = [];
|
||||
|
||||
for (const event of events) {
|
||||
if (!PHASE0_BASELINE_EVENT_TYPES.includes(event.event_type)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const payload = toPayload(event.event);
|
||||
const channel = readStringField(payload, 'channel');
|
||||
const source = readStringField(payload, 'source');
|
||||
const sessionId = readStringField(payload, 'session_id');
|
||||
|
||||
if (channelFilter.size > 0 && (!channel || !channelFilter.has(channel))) {
|
||||
continue;
|
||||
}
|
||||
if (sourceFilter.size > 0 && (!source || !sourceFilter.has(source as AuditSource))) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
sessionId
|
||||
&& excludeSessionSubstrings.some((needle) => sessionId.toLowerCase().includes(needle))
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const nextPayload = anonymizeIdentifiers
|
||||
? anonymizePayloadIdentifiers(payload)
|
||||
: payload;
|
||||
|
||||
filtered.push({
|
||||
...event,
|
||||
event: nextPayload,
|
||||
});
|
||||
}
|
||||
|
||||
return filtered.sort((a, b) => a.timestamp - b.timestamp);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user