feat(audit): add rolling phase0 artifact retention tooling

This commit is contained in:
William Valentin
2026-02-27 10:20:14 -08:00
parent 149adb1c85
commit 134fa60af1
10 changed files with 420 additions and 5 deletions
+1
View File
@@ -1654,6 +1654,7 @@ Cadence scheduling (example: every 6 hours via host cron) with rolling timestamp
``` ```
`audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted. `audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted.
Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately. Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately.
Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family.
Gateway-origin windows can be captured separately (for example when validating cancel paths): Gateway-origin windows can be captured separately (for example when validating cancel paths):
```bash ```bash
+1 -1
View File
@@ -23,7 +23,7 @@ The gateway provides:
- **HTTP Server**: Serves static dashboard and handles webhook endpoints - **HTTP Server**: Serves static dashboard and handles webhook endpoints
- **Node Capability Negotiation**: Optional companion-node role/capability registration - **Node Capability Negotiation**: Optional companion-node role/capability registration
Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap <path|->`), release-bundle export (`--export-release-bundle <dir>` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle <dir>` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template <dir>`), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_<tag>.md/.json` reports), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot. These scripts default to current UTC-date tags unless `--tag` is explicitly provided. Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap <path|->`), release-bundle export (`--export-release-bundle <dir>` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle <dir>` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template <dir>`), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_<tag>.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot, and `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management. These scripts default to current UTC-date tags unless `--tag` is explicitly provided.
### Execution Model (Sessions + Per-Session Queue) ### Execution Model (Sessions + Per-Session Queue)
+1
View File
@@ -172,6 +172,7 @@ Gateway streaming UX signals:
- `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture commands in one cadence step. - `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture commands in one cadence step.
- `pnpm audit:phase0-baseline:live:drift` evaluates backend-scoped artifact freshness/drift gates and writes `docs/plans/artifacts/phase0_baseline_live_backend_drift_<UTC-date>.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` runs capture + drift checks in one cadence step. - `pnpm audit:phase0-baseline:live:drift` evaluates backend-scoped artifact freshness/drift gates and writes `docs/plans/artifacts/phase0_baseline_live_backend_drift_<UTC-date>.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` runs capture + drift checks in one cadence step.
- `pnpm audit:phase0-baseline:live:refresh:drift:rolling` runs the same full refresh+drift flow with a shared UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so each cadence run keeps distinct backend/drift artifacts for immediate baseline-vs-prior comparisons. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` runs the same full refresh+drift flow with a shared UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so each cadence run keeps distinct backend/drift artifacts for immediate baseline-vs-prior comparisons.
- `pnpm audit:phase0-baseline:live:prune` provides dry-run retention planning for rolling-tag artifacts; `pnpm audit:phase0-baseline:live:prune:apply` deletes older rolling snapshots while keeping the newest tags per artifact family.
- `audit:phase0-baseline:live*` scripts are cadence-safe by default (UTC-date tags auto-generated unless explicitly overridden). - `audit:phase0-baseline:live*` scripts are cadence-safe by default (UTC-date tags auto-generated unless explicitly overridden).
- Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts. - Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts.
- TTS synthesis uses an ordered provider chain with health cooldown tracking; if all providers fail, replies degrade to text-only without dropping the response. - TTS synthesis uses an ordered provider chain with health cooldown tracking; if all providers fail, replies degrade to text-only without dropping the response.
@@ -37,6 +37,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`.
- `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture paths in one command. - `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture paths in one command.
- `pnpm audit:phase0-baseline:live:drift` checks backend-scoped artifact freshness/drift gates and writes `phase0_baseline_live_backend_drift_<UTC-date>.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` chains refresh + drift checks for scheduled cadence runs. - `pnpm audit:phase0-baseline:live:drift` checks backend-scoped artifact freshness/drift gates and writes `phase0_baseline_live_backend_drift_<UTC-date>.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` chains refresh + drift checks for scheduled cadence runs.
- `pnpm audit:phase0-baseline:live:refresh:drift:rolling` performs the same chain using one UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) across channel/gateway/backend/drift outputs so each cadence run preserves a distinct comparison point. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` performs the same chain using one UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) across channel/gateway/backend/drift outputs so each cadence run preserves a distinct comparison point.
- `pnpm audit:phase0-baseline:live:prune` (dry-run) and `pnpm audit:phase0-baseline:live:prune:apply` (delete) manage retention of rolling-tag artifacts to control artifact growth while preserving newest snapshots per family.
- `audit:phase0-baseline:live*` package scripts now omit fixed tags so scheduled runs automatically roll to current UTC-date artifact tags. - `audit:phase0-baseline:live*` package scripts now omit fixed tags so scheduled runs automatically roll to current UTC-date artifact tags.
- Companion CLI supports one-shot shell bootstrap metadata for live sessions (`--app-version`/`--status-text`, `--latitude`/`--longitude`, `--push-token`) so desktop/mobile wrappers can initialize node status/location/push in a single launch flow. - Companion CLI supports one-shot shell bootstrap metadata for live sessions (`--app-version`/`--status-text`, `--latitude`/`--longitude`, `--push-token`) so desktop/mobile wrappers can initialize node status/location/push in a single launch flow.
- Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts. - Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts.
@@ -203,7 +203,7 @@ Phase 0 is complete when:
2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. 2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`.
3. No user-visible response behavior changed compared to pre-phase baseline. 3. No user-visible response behavior changed compared to pre-phase baseline.
Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_<tag>.{md,json}`, and cadence runs can now preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling`. Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_<tag>.{md,json}`, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling`, and rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply`.
## Subagent Model Assignment Plan ## Subagent Model Assignment Plan
+22 -3
View File
@@ -294,6 +294,25 @@
], ],
"test_status": "pnpm audit:phase0-baseline:live:refresh:drift:rolling + pnpm test:run src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing" "test_status": "pnpm audit:phase0-baseline:live:refresh:drift:rolling + pnpm test:run src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing"
}, },
"phase0-live-baseline-rolling-artifact-retention": {
"status": "completed",
"date": "2026-02-27",
"updated": "2026-02-27",
"summary": "Added rolling-tag artifact retention tooling for phase-0 cadence snapshots: a tested retention planner (`phase0BaselineArtifactRetention.ts`) plus prune CLI (`prune-phase0-baseline-artifacts.ts`) with dry-run/apply package scripts to keep newest tags per family while controlling artifact growth.",
"files_modified": [
"src/audit/phase0BaselineArtifactRetention.ts",
"src/audit/phase0BaselineArtifactRetention.test.ts",
"scripts/prune-phase0-baseline-artifacts.ts",
"package.json",
"README.md",
"docs/api/PROTOCOL.md",
"docs/architecture/AGENT_DIAGRAM.md",
"docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
"docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md",
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/audit/phase0BaselineArtifactRetention.test.ts + pnpm audit:phase0-baseline:live:prune + pnpm typecheck passing"
},
"phase0-instrumentation-ticket-checklist": { "phase0-instrumentation-ticket-checklist": {
"status": "completed", "status": "completed",
"date": "2026-02-25", "date": "2026-02-25",
@@ -7443,7 +7462,7 @@
} }
}, },
"overall_progress": { "overall_progress": {
"total_test_count": 2590, "total_test_count": 2591,
"all_tests_passing": true, "all_tests_passing": true,
"p0_completion": "3/3 (100%)", "p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)", "p1_completion": "4/4 (100%)",
@@ -7479,8 +7498,8 @@
"deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests", "deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests",
"deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters", "deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters",
"deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output", "deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output",
"deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, generated anonymized live baseline artifacts for channel/gateway/backend-scoped (pi/native) windows, added backend freshness/drift gates with persisted drift reports, and added rolling timestamp-tag cadence runs for immediate baseline-vs-prior drift comparisons", "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, generated anonymized live baseline artifacts for channel/gateway/backend-scoped (pi/native) windows, added backend freshness/drift gates with persisted drift reports, added rolling timestamp-tag cadence runs for immediate baseline-vs-prior drift comparisons, and added rolling artifact retention tooling (`live:prune`)",
"next_up": "Run scheduled `pnpm audit:phase0-baseline:live:refresh:drift:rolling` in each active environment for at least one full cadence cycle, then tighten drift thresholds based on observed variance before additional run-control/reaction semantic changes.", "next_up": "Run scheduled `pnpm audit:phase0-baseline:live:refresh:drift:rolling` in each active environment for at least one full cadence cycle, then apply/tune retention via `pnpm audit:phase0-baseline:live:prune(:apply)` and tighten drift thresholds based on observed variance before additional run-control/reaction semantic changes.",
"pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default",
"pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)",
"pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing",
+2
View File
@@ -30,6 +30,8 @@
"audit:phase0-baseline:live:drift": "node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts", "audit:phase0-baseline:live:drift": "node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts",
"audit:phase0-baseline:live:refresh:drift": "pnpm audit:phase0-baseline:live:refresh && pnpm audit:phase0-baseline:live:drift", "audit:phase0-baseline:live:refresh:drift": "pnpm audit:phase0-baseline:live:refresh && pnpm audit:phase0-baseline:live:drift",
"audit:phase0-baseline:live:refresh:drift:rolling": "TAG=$(date -u +%F-%H%M%S) && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source gateway --auto-gateway-cancel-window --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend pi_embedded --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend native --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts --tag \"$TAG\" --report-tag \"$TAG\"", "audit:phase0-baseline:live:refresh:drift:rolling": "TAG=$(date -u +%F-%H%M%S) && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source gateway --auto-gateway-cancel-window --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend pi_embedded --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend native --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts --tag \"$TAG\" --report-tag \"$TAG\"",
"audit:phase0-baseline:live:prune": "node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts --artifacts-dir docs/plans/artifacts --keep-per-family 8",
"audit:phase0-baseline:live:prune:apply": "node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts --artifacts-dir docs/plans/artifacts --keep-per-family 8 --apply",
"audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts", "audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts",
"companion:bundle": "node --import tsx/esm scripts/build-companion-release-bundle.ts", "companion:bundle": "node --import tsx/esm scripts/build-companion-release-bundle.ts",
"companion:reference-apps": "node --import tsx/esm scripts/export-companion-reference-apps.ts", "companion:reference-apps": "node --import tsx/esm scripts/export-companion-reference-apps.ts",
+114
View File
@@ -0,0 +1,114 @@
#!/usr/bin/env node
import { readdir, rm } from 'node:fs/promises';
import { resolve } from 'node:path';
import { parseArgs } from 'node:util';
import {
planRollingPhase0ArtifactRetention,
type Phase0RollingArtifactRetentionPlan,
} from '../src/audit/phase0BaselineArtifactRetention.js';
function usage(): string {
return [
'Usage: node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts [options]',
'',
'Options:',
' --artifacts-dir <path> Artifacts directory (default: docs/plans/artifacts)',
' --keep-per-family <num> Keep newest rolling tags per family (default: 8)',
' --apply Apply deletions (default: dry-run)',
' --format <text|json> Output format (default: text)',
' --help Show usage',
].join('\n');
}
function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined {
if (!raw) {
return undefined;
}
const parsed = Number(raw);
if (!Number.isFinite(parsed)) {
throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`);
}
return parsed;
}
function renderText(plan: Phase0RollingArtifactRetentionPlan, artifactsDir: string, keepPerFamily: number, apply: boolean): string {
const lines: string[] = [];
lines.push('# Phase-0 Rolling Artifact Prune');
lines.push('');
lines.push(`Artifacts dir: ${artifactsDir}`);
lines.push(`Keep per family: ${keepPerFamily}`);
lines.push(`Mode: ${apply ? 'apply' : 'dry-run'}`);
lines.push(`Keep files: ${plan.keep.length}`);
lines.push(`Remove files: ${plan.remove.length}`);
lines.push('');
lines.push('## Families');
for (const row of plan.families) {
lines.push(`- ${row.family}: tags total=${row.total_tags} keep=${row.keep_tags} remove=${row.remove_tags}`);
}
lines.push('');
lines.push('## Remove List');
if (plan.remove.length === 0) {
lines.push('- none');
} else {
for (const row of plan.remove) {
lines.push(`- ${row.file_name}`);
}
}
return lines.join('\n');
}
async function main(): Promise<void> {
const { values } = parseArgs({
options: {
'artifacts-dir': { type: 'string' },
'keep-per-family': { type: 'string' },
apply: { type: 'boolean' },
format: { type: 'string' },
help: { type: 'boolean', short: 'h' },
},
strict: true,
allowPositionals: false,
});
if (values.help) {
process.stdout.write(`${usage()}\n`);
return;
}
const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts');
const keepPerFamily = parseOptionalNumber(values['keep-per-family'], '--keep-per-family') ?? 8;
const apply = Boolean(values.apply);
const format = values.format ?? 'text';
if (format !== 'text' && format !== 'json') {
throw new Error(`Invalid --format value "${format}".`);
}
const files = await readdir(artifactsDir);
const plan = planRollingPhase0ArtifactRetention(files, keepPerFamily);
if (apply) {
for (const row of plan.remove) {
await rm(resolve(artifactsDir, row.file_name));
}
}
if (format === 'json') {
process.stdout.write(`${JSON.stringify({
generated_at: new Date().toISOString(),
artifacts_dir: artifactsDir,
keep_per_family: Math.floor(keepPerFamily),
apply,
plan,
}, null, 2)}\n`);
} else {
process.stdout.write(`${renderText(plan, artifactsDir, Math.floor(keepPerFamily), apply)}\n`);
}
}
main().catch((error) => {
const message = error instanceof Error ? error.message : String(error);
process.stderr.write(`${message}\n\n${usage()}\n`);
process.exitCode = 1;
});
@@ -0,0 +1,88 @@
import { describe, expect, it } from 'vitest';
import {
collectRollingPhase0ArtifactFiles,
planRollingPhase0ArtifactRetention,
} from './phase0BaselineArtifactRetention.js';
describe('phase0BaselineArtifactRetention', () => {
it('collects only rolling-tag phase-0 artifact files', () => {
const rows = collectRollingPhase0ArtifactFiles([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.md',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.jsonl',
'phase0_baseline_live_backend_native_2026-02-27-010203.json',
'phase0_baseline_live_backend_drift_2026-02-27-010203.md',
'phase0_baseline_live_2026-02-27.json',
'phase0_baseline_live_gateway_2026-02-27.jsonl',
'phase0_baseline_2026-02-25.md',
'phase0_baseline_live_backend_pi_embedded_2026-02-27.md',
'not_a_phase0_file.txt',
]);
expect(rows).toHaveLength(5);
expect(rows.map((row) => row.family).sort()).toEqual([
'backend_drift',
'backend_native',
'backend_pi_embedded',
'channel',
'gateway',
]);
});
it('keeps most recent rolling tags per family and prunes older ones', () => {
const files = [
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_2026-02-27-010203.jsonl',
'phase0_baseline_live_2026-02-27-020304.json',
'phase0_baseline_live_2026-02-27-020304.md',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-020304.json',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json',
'phase0_baseline_live_backend_pi_embedded_2026-02-27-020304.json',
'phase0_baseline_live_backend_native_2026-02-27-010203.json',
'phase0_baseline_live_backend_native_2026-02-27-020304.json',
'phase0_baseline_live_backend_drift_2026-02-27-010203.json',
'phase0_baseline_live_backend_drift_2026-02-27-020304.json',
'phase0_baseline_live_2026-02-27.json',
];
const plan = planRollingPhase0ArtifactRetention(files, 1);
expect(plan.families).toEqual([
{ family: 'channel', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'gateway', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_pi_embedded', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_native', total_tags: 2, keep_tags: 1, remove_tags: 1 },
{ family: 'backend_drift', total_tags: 2, keep_tags: 1, remove_tags: 1 },
]);
const removeSet = new Set(plan.remove.map((row) => row.file_name));
expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.jsonl')).toBe(true);
expect(removeSet.has('phase0_baseline_live_gateway_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_native_2026-02-27-010203.json')).toBe(true);
expect(removeSet.has('phase0_baseline_live_backend_drift_2026-02-27-010203.json')).toBe(true);
const keepSet = new Set(plan.keep.map((row) => row.file_name));
expect(keepSet.has('phase0_baseline_live_2026-02-27.json')).toBe(false);
expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.json')).toBe(true);
expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.md')).toBe(true);
});
it('supports zero keep limit', () => {
const plan = planRollingPhase0ArtifactRetention([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
], 0);
expect(plan.keep).toHaveLength(0);
expect(plan.remove.map((row) => row.file_name).sort()).toEqual([
'phase0_baseline_live_2026-02-27-010203.json',
'phase0_baseline_live_gateway_2026-02-27-010203.json',
]);
});
it('rejects negative keep limit', () => {
expect(() => planRollingPhase0ArtifactRetention([], -1)).toThrow('keepPerFamily');
});
});
@@ -0,0 +1,189 @@
export type Phase0RollingArtifactFamily =
| 'channel'
| 'gateway'
| 'backend_pi_embedded'
| 'backend_native'
| 'backend_drift';
export interface Phase0RollingArtifactFile {
file_name: string;
family: Phase0RollingArtifactFamily;
tag: string;
tag_timestamp_ms: number;
}
export interface Phase0RollingArtifactRetentionPlan {
keep: Phase0RollingArtifactFile[];
remove: Phase0RollingArtifactFile[];
families: Array<{
family: Phase0RollingArtifactFamily;
total_tags: number;
keep_tags: number;
remove_tags: number;
}>;
}
const ROLLING_TAG_PATTERN = /^(\d{4})-(\d{2})-(\d{2})-(\d{6})$/;
const FAMILY_PATTERNS: Array<{ family: Phase0RollingArtifactFamily; pattern: RegExp }> = [
{
family: 'channel',
pattern: /^phase0_baseline_live_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'gateway',
pattern: /^phase0_baseline_live_gateway_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_pi_embedded',
pattern: /^phase0_baseline_live_backend_pi_embedded_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_native',
pattern: /^phase0_baseline_live_backend_native_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/,
},
{
family: 'backend_drift',
pattern: /^phase0_baseline_live_backend_drift_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|md)$/,
},
];
function parseRollingTagTimestampMs(tag: string): number | undefined {
const match = ROLLING_TAG_PATTERN.exec(tag);
if (!match) {
return undefined;
}
const year = Number(match[1]);
const month = Number(match[2]);
const day = Number(match[3]);
const hhmmss = match[4] ?? '';
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day) || hhmmss.length !== 6) {
return undefined;
}
const hour = Number(hhmmss.slice(0, 2));
const minute = Number(hhmmss.slice(2, 4));
const second = Number(hhmmss.slice(4, 6));
if (!Number.isFinite(hour) || !Number.isFinite(minute) || !Number.isFinite(second)) {
return undefined;
}
const timestampMs = Date.UTC(year, month - 1, day, hour, minute, second);
return Number.isFinite(timestampMs) ? timestampMs : undefined;
}
function parseRollingArtifactFile(fileName: string): Phase0RollingArtifactFile | undefined {
for (const entry of FAMILY_PATTERNS) {
const match = entry.pattern.exec(fileName);
if (!match) {
continue;
}
const tag = match[1] ?? '';
const timestampMs = parseRollingTagTimestampMs(tag);
if (typeof timestampMs !== 'number') {
continue;
}
return {
file_name: fileName,
family: entry.family,
tag,
tag_timestamp_ms: timestampMs,
};
}
return undefined;
}
function sortByTagTimeDesc(a: { tag_timestamp_ms: number; tag: string }, b: { tag_timestamp_ms: number; tag: string }): number {
const delta = b.tag_timestamp_ms - a.tag_timestamp_ms;
if (delta !== 0) {
return delta;
}
return b.tag.localeCompare(a.tag);
}
export function collectRollingPhase0ArtifactFiles(fileNames: string[]): Phase0RollingArtifactFile[] {
const parsed: Phase0RollingArtifactFile[] = [];
for (const fileName of fileNames) {
const row = parseRollingArtifactFile(fileName);
if (row) {
parsed.push(row);
}
}
return parsed;
}
export function planRollingPhase0ArtifactRetention(
fileNames: string[],
keepPerFamily: number,
): Phase0RollingArtifactRetentionPlan {
if (!Number.isFinite(keepPerFamily) || keepPerFamily < 0) {
throw new Error('keepPerFamily must be greater than or equal to 0.');
}
const keepLimit = Math.floor(keepPerFamily);
const parsed = collectRollingPhase0ArtifactFiles(fileNames);
const keep: Phase0RollingArtifactFile[] = [];
const remove: Phase0RollingArtifactFile[] = [];
const familyRows: Phase0RollingArtifactRetentionPlan['families'] = [];
for (const familyPattern of FAMILY_PATTERNS) {
const family = familyPattern.family;
const familyFiles = parsed.filter((row) => row.family === family);
const byTag = new Map<string, { tag_timestamp_ms: number; files: Phase0RollingArtifactFile[] }>();
for (const row of familyFiles) {
const existing = byTag.get(row.tag);
if (existing) {
existing.files.push(row);
existing.tag_timestamp_ms = Math.max(existing.tag_timestamp_ms, row.tag_timestamp_ms);
} else {
byTag.set(row.tag, {
tag_timestamp_ms: row.tag_timestamp_ms,
files: [row],
});
}
}
const sortedTags = [...byTag.entries()]
.map(([tag, row]) => ({ tag, tag_timestamp_ms: row.tag_timestamp_ms, files: row.files }))
.sort(sortByTagTimeDesc);
const keepTags = new Set(sortedTags.slice(0, keepLimit).map((row) => row.tag));
for (const row of familyFiles) {
if (keepTags.has(row.tag)) {
keep.push(row);
} else {
remove.push(row);
}
}
familyRows.push({
family,
total_tags: sortedTags.length,
keep_tags: Math.min(sortedTags.length, keepLimit),
remove_tags: Math.max(0, sortedTags.length - keepLimit),
});
}
const sortFilesAsc = (a: Phase0RollingArtifactFile, b: Phase0RollingArtifactFile): number => {
const familyDelta = a.family.localeCompare(b.family);
if (familyDelta !== 0) {
return familyDelta;
}
const tagDelta = sortByTagTimeDesc(a, b);
if (tagDelta !== 0) {
return tagDelta;
}
return a.file_name.localeCompare(b.file_name);
};
return {
keep: [...keep].sort(sortFilesAsc),
remove: [...remove].sort(sortFilesAsc),
families: familyRows,
};
}