feat(audit): add rolling phase0 artifact retention tooling

This commit is contained in:
William Valentin
2026-02-27 10:20:14 -08:00
parent 149adb1c85
commit 134fa60af1
10 changed files with 420 additions and 5 deletions
@@ -0,0 +1,88 @@
import { describe, expect, it } from 'vitest';
import {
collectRollingPhase0ArtifactFiles,
planRollingPhase0ArtifactRetention,
} from './phase0BaselineArtifactRetention.js';
describe('phase0BaselineArtifactRetention', () => {
it('collects only rolling-tag phase-0 artifact files', () => {
const rows = collectRollingPhase0ArtifactFiles([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.md',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.jsonl',
'phase0_baseline_live_backend_native_2026-02-27-010203.json',
'phase0_baseline_live_backend_drift_2026-02-27-010203.md',
'phase0_baseline_live_2026-02-27.json',
'phase0_baseline_live_gateway_2026-02-27.jsonl',
'phase0_baseline_2026-02-25.md',
'phase0_baseline_live_backend_pi_embedded_2026-02-27.md',
'not_a_phase0_file.txt',
]);
expect(rows).toHaveLength(5);
expect(rows.map((row) => row.family).sort()).toEqual([
'backend_drift',
'backend_native',
'backend_pi_embedded',
'channel',
'gateway',
]);
});
it('keeps most recent rolling tags per family and prunes older ones', () => {
const files = [
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_2026-02-27-010203.jsonl',
'phase0_baseline_live_2026-02-27-020304.json',
'phase0_baseline_live_2026-02-27-020304.md',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-020304.json',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-020304.json',
'phase0_baseline_live_backend_native_2026-02-27-010203.json',
'phase0_baseline_live_backend_native_2026-02-27-020304.json',
'phase0_baseline_live_backend_drift_2026-02-27-010203.json',
'phase0_baseline_live_backend_drift_2026-02-27-020304.json',
'phase0_baseline_live_2026-02-27.json',
];
const plan = planRollingPhase0ArtifactRetention(files, 1);
expect(plan.families).toEqual([
{ family: 'channel', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'gateway', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_pi_embedded', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_native', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_drift', total_tags: 2, keep_tags: 1, remove_tags: 1 },
]);
const removeSet = new Set(plan.remove.map((row) => row.file_name));
expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.jsonl')).toBe(true);
expect(removeSet.has('phase0_baseline_live_gateway_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_native_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_drift_2026-02-27-010203.json')).toBe(true);
const keepSet = new Set(plan.keep.map((row) => row.file_name));
expect(keepSet.has('phase0_baseline_live_2026-02-27.json')).toBe(false);
expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.json')).toBe(true);
expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.md')).toBe(true);
});
it('supports zero keep limit', () => {
const plan = planRollingPhase0ArtifactRetention([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
], 0);
expect(plan.keep).toHaveLength(0);
expect(plan.remove.map((row) => row.file_name).sort()).toEqual([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
]);
});
it('rejects negative keep limit', () => {
expect(() => planRollingPhase0ArtifactRetention([], -1)).toThrow('keepPerFamily');
});
});
@@ -0,0 +1,189 @@
export type Phase0RollingArtifactFamily =
| 'channel'
| 'gateway'
| 'backend_pi_embedded'
| 'backend_native'
| 'backend_drift';
export interface Phase0RollingArtifactFile {
file_name: string;
family: Phase0RollingArtifactFamily;
tag: string;
tag_timestamp_ms: number;
}
export interface Phase0RollingArtifactRetentionPlan {
keep: Phase0RollingArtifactFile[];
remove: Phase0RollingArtifactFile[];
families: Array<{
family: Phase0RollingArtifactFamily;
total_tags: number;
keep_tags: number;
remove_tags: number;
}>;
}
const ROLLING_TAG_PATTERN = /^(\d{4})-(\d{2})-(\d{2})-(\d{6})$/;
const FAMILY_PATTERNS: Array<{ family: Phase0RollingArtifactFamily; pattern: RegExp }> = [
{
family: 'channel',
pattern: /^phase0_baseline_live_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'gateway',
pattern: /^phase0_baseline_live_gateway_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_pi_embedded',
pattern: /^phase0_baseline_live_backend_pi_embedded_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_native',
pattern: /^phase0_baseline_live_backend_native_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_drift',
pattern: /^phase0_baseline_live_backend_drift_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|md)$/,
},
];
function parseRollingTagTimestampMs(tag: string): number | undefined {
const match = ROLLING_TAG_PATTERN.exec(tag);
if (!match) {
return undefined;
}
const year = Number(match[1]);
const month = Number(match[2]);
const day = Number(match[3]);
const hhmmss = match[4] ?? '';
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day) || hhmmss.length !== 6) {
return undefined;
}
const hour = Number(hhmmss.slice(0, 2));
const minute = Number(hhmmss.slice(2, 4));
const second = Number(hhmmss.slice(4, 6));
if (!Number.isFinite(hour) || !Number.isFinite(minute) || !Number.isFinite(second)) {
return undefined;
}
const timestampMs = Date.UTC(year, month - 1, day, hour, minute, second);
return Number.isFinite(timestampMs) ? timestampMs : undefined;
}
function parseRollingArtifactFile(fileName: string): Phase0RollingArtifactFile | undefined {
for (const entry of FAMILY_PATTERNS) {
const match = entry.pattern.exec(fileName);
if (!match) {
continue;
}
const tag = match[1] ?? '';
const timestampMs = parseRollingTagTimestampMs(tag);
if (typeof timestampMs !== 'number') {
continue;
}
return {
file_name: fileName,
family: entry.family,
tag,
tag_timestamp_ms: timestampMs,
};
}
return undefined;
}
function sortByTagTimeDesc(a: { tag_timestamp_ms: number; tag: string }, b: { tag_timestamp_ms: number; tag: string }): number {
const delta = b.tag_timestamp_ms - a.tag_timestamp_ms;
if (delta !== 0) {
return delta;
}
return b.tag.localeCompare(a.tag);
}
export function collectRollingPhase0ArtifactFiles(fileNames: string[]): Phase0RollingArtifactFile[] {
const parsed: Phase0RollingArtifactFile[] = [];
for (const fileName of fileNames) {
const row = parseRollingArtifactFile(fileName);
if (row) {
parsed.push(row);
}
}
return parsed;
}
export function planRollingPhase0ArtifactRetention(
fileNames: string[],
keepPerFamily: number,
): Phase0RollingArtifactRetentionPlan {
if (!Number.isFinite(keepPerFamily) || keepPerFamily < 0) {
throw new Error('keepPerFamily must be greater than or equal to 0.');
}
const keepLimit = Math.floor(keepPerFamily);
const parsed = collectRollingPhase0ArtifactFiles(fileNames);
const keep: Phase0RollingArtifactFile[] = [];
const remove: Phase0RollingArtifactFile[] = [];
const familyRows: Phase0RollingArtifactRetentionPlan['families'] = [];
for (const familyPattern of FAMILY_PATTERNS) {
const family = familyPattern.family;
const familyFiles = parsed.filter((row) => row.family === family);
const byTag = new Map<string, { tag_timestamp_ms: number; files: Phase0RollingArtifactFile[] }>();
for (const row of familyFiles) {
const existing = byTag.get(row.tag);
if (existing) {
existing.files.push(row);
existing.tag_timestamp_ms = Math.max(existing.tag_timestamp_ms, row.tag_timestamp_ms);
} else {
byTag.set(row.tag, {
tag_timestamp_ms: row.tag_timestamp_ms,
files: [row],
});
}
}
const sortedTags = [...byTag.entries()]
.map(([tag, row]) => ({ tag, tag_timestamp_ms: row.tag_timestamp_ms, files: row.files }))
.sort(sortByTagTimeDesc);
const keepTags = new Set(sortedTags.slice(0, keepLimit).map((row) => row.tag));
for (const row of familyFiles) {
if (keepTags.has(row.tag)) {
keep.push(row);
} else {
remove.push(row);
}
}
familyRows.push({
family,
total_tags: sortedTags.length,
keep_tags: Math.min(sortedTags.length, keepLimit),
remove_tags: Math.max(0, sortedTags.length - keepLimit),
});
}
const sortFilesAsc = (a: Phase0RollingArtifactFile, b: Phase0RollingArtifactFile): number => {
const familyDelta = a.family.localeCompare(b.family);
if (familyDelta !== 0) {
return familyDelta;
}
const tagDelta = sortByTagTimeDesc(a, b);
if (tagDelta !== 0) {
return tagDelta;
}
return a.file_name.localeCompare(b.file_name);
};
return {
keep: [...keep].sort(sortFilesAsc),
remove: [...remove].sort(sortFilesAsc),
families: familyRows,
};
}