feat(audit): replace probe baseline workflow with live anonymized capture

This commit is contained in:
William Valentin
2026-02-26 23:41:13 -08:00
parent c96aca5f1d
commit 4b07a1f166
13 changed files with 968 additions and 10 deletions
+217
View File
@@ -0,0 +1,217 @@
#!/usr/bin/env node
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { parseArgs } from 'node:util';
import { queryAuditLogs } from '../src/audit/export.js';
import { capturePhase0LiveBaselineEvents } from '../src/audit/phase0LiveBaseline.js';
import {
renderPhase0BaselineMarkdown,
summarizePhase0Baseline,
type AuditSource,
type Phase0BaselineSummaryOptions,
} from '../src/audit/phase0BaselineSummary.js';
const DEFAULT_EVENT_TYPES = ['run.state', 'run.cancel', 'reaction.match', 'reaction.skip'] as const;
function usage(): string {
return [
'Usage: node --import tsx/esm scripts/capture-phase0-live-baseline.ts [options]',
'',
'Options:',
' --audit <path> Source audit log path (default: ~/.local/share/flynn/audit.log)',
' --since <ISO-8601|epoch_ms> Start time filter',
' --until <ISO-8601|epoch_ms> End time filter',
' --channel <name[,name...]> Restrict sample to channels',
' --source <gateway|channel[,..]> Restrict sample to sources',
' --exclude-session-substring <text[,..]> Exclude sessions containing any substring (default: probe)',
' --raw-identifiers Keep raw session/sender/request IDs (default: anonymized)',
' --tag <YYYY-MM-DD> Output file tag (default: current date UTC)',
' --sample-out <path> Output JSONL sample path override',
' --summary-json-out <path> Output summary JSON path override',
' --summary-md-out <path> Output summary markdown path override',
' --max-sessions <number> Limit session rows in output (default: 20)',
' --max-channels <number> Limit channel rows in output (default: 20)',
' --max-skip-reasons <number> Limit skip reason rows in output (default: 10)',
].join('\n');
}
function expandHomePath(pathValue: string): string {
if (!pathValue.startsWith('~')) {
return pathValue;
}
const home = process.env.HOME;
if (!home) {
return pathValue;
}
return resolve(home, pathValue.slice(1));
}
function collapseHomePath(pathValue: string): string {
const home = process.env.HOME;
if (!home) {
return pathValue;
}
return pathValue.startsWith(home) ? `~${pathValue.slice(home.length)}` : pathValue;
}
function parseTime(value: string | undefined, flag: string): number | undefined {
if (!value) {
return undefined;
}
if (/^\d+$/.test(value)) {
const asNumber = Number(value);
if (Number.isFinite(asNumber)) {
return asNumber;
}
}
const parsed = Date.parse(value);
if (!Number.isFinite(parsed)) {
throw new Error(`Invalid ${flag} value "${value}". Use ISO-8601 or epoch milliseconds.`);
}
return parsed;
}
function parseCsv(value: string | undefined): string[] | undefined {
if (!value) {
return undefined;
}
const values = value
.split(',')
.map((item) => item.trim())
.filter((item) => item.length > 0);
return values.length > 0 ? values : undefined;
}
function parseSources(raw: string | undefined): AuditSource[] | undefined {
const values = parseCsv(raw);
if (!values) {
return undefined;
}
const parsed: AuditSource[] = [];
for (const value of values) {
if (value === 'gateway' || value === 'channel') {
parsed.push(value);
continue;
}
throw new Error(`Invalid source "${value}".`);
}
return parsed;
}
function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined {
if (!raw) {
return undefined;
}
const parsed = Number(raw);
if (!Number.isFinite(parsed)) {
throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`);
}
return parsed;
}
function isoDateTagNow(): string {
return new Date().toISOString().slice(0, 10);
}
async function writeTextFile(pathValue: string, contents: string): Promise<void> {
await mkdir(dirname(pathValue), { recursive: true });
await writeFile(pathValue, contents, 'utf8');
}
async function main(): Promise<void> {
const { values } = parseArgs({
options: {
audit: { type: 'string' },
since: { type: 'string' },
until: { type: 'string' },
channel: { type: 'string' },
source: { type: 'string' },
'exclude-session-substring': { type: 'string' },
'raw-identifiers': { type: 'boolean' },
tag: { type: 'string' },
'sample-out': { type: 'string' },
'summary-json-out': { type: 'string' },
'summary-md-out': { type: 'string' },
'max-sessions': { type: 'string' },
'max-channels': { type: 'string' },
'max-skip-reasons': { type: 'string' },
help: { type: 'boolean', short: 'h' },
},
strict: true,
allowPositionals: false,
});
if (values.help) {
process.stdout.write(`${usage()}\n`);
return;
}
const auditPath = expandHomePath(values.audit ?? '~/.local/share/flynn/audit.log');
const tag = values.tag ?? isoDateTagNow();
const sampleOut = values['sample-out'] ?? `docs/plans/artifacts/phase0_baseline_live_${tag}.jsonl`;
const summaryJsonOut = values['summary-json-out'] ?? `docs/plans/artifacts/phase0_baseline_live_${tag}.json`;
const summaryMdOut = values['summary-md-out'] ?? `docs/plans/artifacts/phase0_baseline_live_${tag}.md`;
const channels = parseCsv(values.channel);
const sources = parseSources(values.source);
const excludeSessionSubstrings = parseCsv(values['exclude-session-substring']) ?? ['probe'];
const startTime = parseTime(values.since, '--since');
const endTime = parseTime(values.until, '--until');
const summaryOptions: Phase0BaselineSummaryOptions = {
channels,
sources,
maxSessions: parseOptionalNumber(values['max-sessions'], '--max-sessions') ?? 20,
maxChannels: parseOptionalNumber(values['max-channels'], '--max-channels') ?? 20,
maxSkipReasons: parseOptionalNumber(values['max-skip-reasons'], '--max-skip-reasons') ?? 10,
};
const sourceEvents = await queryAuditLogs(auditPath, {
start_time: startTime,
end_time: endTime,
event_types: [...DEFAULT_EVENT_TYPES],
});
const sampledEvents = capturePhase0LiveBaselineEvents(sourceEvents, {
channels,
sources,
excludeSessionSubstrings,
anonymizeIdentifiers: !values['raw-identifiers'],
});
const summary = summarizePhase0Baseline(sampledEvents, summaryOptions);
const markdown = renderPhase0BaselineMarkdown(summary, summaryOptions);
const sampleJsonl = sampledEvents.map((entry) => JSON.stringify(entry)).join('\n');
const summaryJson = JSON.stringify({
generated_at: new Date().toISOString(),
source_audit_path: collapseHomePath(auditPath),
source_event_count: sourceEvents.length,
sampled_event_count: sampledEvents.length,
filters: {
since_ms: startTime,
until_ms: endTime,
channels,
sources,
exclude_session_substrings: excludeSessionSubstrings,
anonymized_identifiers: !values['raw-identifiers'],
},
options: summaryOptions,
summary,
}, null, 2);
await writeTextFile(sampleOut, sampleJsonl.length > 0 ? `${sampleJsonl}\n` : '');
await writeTextFile(summaryJsonOut, `${summaryJson}\n`);
await writeTextFile(summaryMdOut, `${markdown}\n`);
process.stdout.write(`Captured ${sampledEvents.length} events from ${sourceEvents.length} source events.\n`);
process.stdout.write(`- sample: ${sampleOut}\n`);
process.stdout.write(`- summary json: ${summaryJsonOut}\n`);
process.stdout.write(`- summary md: ${summaryMdOut}\n`);
}
main().catch((error) => {
const message = error instanceof Error ? error.message : String(error);
process.stderr.write(`${message}\n\n${usage()}\n`);
process.exitCode = 1;
});