diff --git a/README.md b/README.md index 9a80ae9..8abd026 100644 --- a/README.md +++ b/README.md @@ -1654,6 +1654,7 @@ Cadence scheduling (example: every 6 hours via host cron) with rolling timestamp ``` `audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted. Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately. +Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family. Gateway-origin windows can be captured separately (for example when validating cancel paths): ```bash diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index 9087063..10061de 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -23,7 +23,7 @@ The gateway provides: - **HTTP Server**: Serves static dashboard and handles webhook endpoints - **Node Capability Negotiation**: Optional companion-node role/capability registration -Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_.md/.json` reports), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot. These scripts default to current UTC-date tags unless `--tag` is explicitly provided. +Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot, and `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management. These scripts default to current UTC-date tags unless `--tag` is explicitly provided. ### Execution Model (Sessions + Per-Session Queue) diff --git a/docs/architecture/AGENT_DIAGRAM.md b/docs/architecture/AGENT_DIAGRAM.md index 675289a..cf8ed16 100644 --- a/docs/architecture/AGENT_DIAGRAM.md +++ b/docs/architecture/AGENT_DIAGRAM.md @@ -172,6 +172,7 @@ Gateway streaming UX signals: - `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture commands in one cadence step. - `pnpm audit:phase0-baseline:live:drift` evaluates backend-scoped artifact freshness/drift gates and writes `docs/plans/artifacts/phase0_baseline_live_backend_drift_.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` runs capture + drift checks in one cadence step. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` runs the same full refresh+drift flow with a shared UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so each cadence run keeps distinct backend/drift artifacts for immediate baseline-vs-prior comparisons. +- `pnpm audit:phase0-baseline:live:prune` provides dry-run retention planning for rolling-tag artifacts; `pnpm audit:phase0-baseline:live:prune:apply` deletes older rolling snapshots while keeping the newest tags per artifact family. - `audit:phase0-baseline:live*` scripts are cadence-safe by default (UTC-date tags auto-generated unless explicitly overridden). - Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts. - TTS synthesis uses an ordered provider chain with health cooldown tracking; if all providers fail, replies degrade to text-only without dropping the response. diff --git a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md index e6cac7f..c4f3155 100644 --- a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md +++ b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md @@ -37,6 +37,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`. - `pnpm audit:phase0-baseline:live:refresh` runs channel + gateway + backend-scoped (`pi_embedded` and `native`) capture paths in one command. - `pnpm audit:phase0-baseline:live:drift` checks backend-scoped artifact freshness/drift gates and writes `phase0_baseline_live_backend_drift_.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` chains refresh + drift checks for scheduled cadence runs. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` performs the same chain using one UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) across channel/gateway/backend/drift outputs so each cadence run preserves a distinct comparison point. +- `pnpm audit:phase0-baseline:live:prune` (dry-run) and `pnpm audit:phase0-baseline:live:prune:apply` (delete) manage retention of rolling-tag artifacts to control artifact growth while preserving newest snapshots per family. - `audit:phase0-baseline:live*` package scripts now omit fixed tags so scheduled runs automatically roll to current UTC-date artifact tags. - Companion CLI supports one-shot shell bootstrap metadata for live sessions (`--app-version`/`--status-text`, `--latitude`/`--longitude`, `--push-token`) so desktop/mobile wrappers can initialize node status/location/push in a single launch flow. - Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts. diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md index 35b683a..749e9be 100644 --- a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -203,7 +203,7 @@ Phase 0 is complete when: 2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. 3. No user-visible response behavior changed compared to pre-phase baseline. -Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}`, and cadence runs can now preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling`. +Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}`, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling`, and rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply`. ## Subagent Model Assignment Plan diff --git a/docs/plans/state.json b/docs/plans/state.json index 660dfa6..eeb2198 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -294,6 +294,25 @@ ], "test_status": "pnpm audit:phase0-baseline:live:refresh:drift:rolling + pnpm test:run src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing" }, + "phase0-live-baseline-rolling-artifact-retention": { + "status": "completed", + "date": "2026-02-27", + "updated": "2026-02-27", + "summary": "Added rolling-tag artifact retention tooling for phase-0 cadence snapshots: a tested retention planner (`phase0BaselineArtifactRetention.ts`) plus prune CLI (`prune-phase0-baseline-artifacts.ts`) with dry-run/apply package scripts to keep newest tags per family while controlling artifact growth.", + "files_modified": [ + "src/audit/phase0BaselineArtifactRetention.ts", + "src/audit/phase0BaselineArtifactRetention.test.ts", + "scripts/prune-phase0-baseline-artifacts.ts", + "package.json", + "README.md", + "docs/api/PROTOCOL.md", + "docs/architecture/AGENT_DIAGRAM.md", + "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/audit/phase0BaselineArtifactRetention.test.ts + pnpm audit:phase0-baseline:live:prune + pnpm typecheck passing" + }, "phase0-instrumentation-ticket-checklist": { "status": "completed", "date": "2026-02-25", @@ -7443,7 +7462,7 @@ } }, "overall_progress": { - "total_test_count": 2590, + "total_test_count": 2591, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -7479,8 +7498,8 @@ "deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests", "deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters", "deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output", - "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, generated anonymized live baseline artifacts for channel/gateway/backend-scoped (pi/native) windows, added backend freshness/drift gates with persisted drift reports, and added rolling timestamp-tag cadence runs for immediate baseline-vs-prior drift comparisons", - "next_up": "Run scheduled `pnpm audit:phase0-baseline:live:refresh:drift:rolling` in each active environment for at least one full cadence cycle, then tighten drift thresholds based on observed variance before additional run-control/reaction semantic changes.", + "deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, generated anonymized live baseline artifacts for channel/gateway/backend-scoped (pi/native) windows, added backend freshness/drift gates with persisted drift reports, added rolling timestamp-tag cadence runs for immediate baseline-vs-prior drift comparisons, and added rolling artifact retention tooling (`live:prune`)", + "next_up": "Run scheduled `pnpm audit:phase0-baseline:live:refresh:drift:rolling` in each active environment for at least one full cadence cycle, then apply/tune retention via `pnpm audit:phase0-baseline:live:prune(:apply)` and tighten drift thresholds based on observed variance before additional run-control/reaction semantic changes.", "pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default", "pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)", "pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing", diff --git a/package.json b/package.json index 5da48ae..9d69a8f 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,8 @@ "audit:phase0-baseline:live:drift": "node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts", "audit:phase0-baseline:live:refresh:drift": "pnpm audit:phase0-baseline:live:refresh && pnpm audit:phase0-baseline:live:drift", "audit:phase0-baseline:live:refresh:drift:rolling": "TAG=$(date -u +%F-%H%M%S) && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source gateway --auto-gateway-cancel-window --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend pi_embedded --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/capture-phase0-live-baseline.ts --audit ~/.local/share/flynn/audit.log --source channel --backend native --exclude-session-substring probe --tag \"$TAG\" && node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts --artifacts-dir docs/plans/artifacts --backend pi_embedded,native --max-age-hours 36 --min-candidate-sampled-events 10 --max-sampled-events-drop-pct 80 --max-run-outcomes-drop-pct 80 --max-completion-rate-drop-pp 35 --max-cancel-rate-increase-pp 25 --max-error-rate-increase-pp 25 --max-cancel-latency-p95-increase-ms 6000 --write-default-artifacts --tag \"$TAG\" --report-tag \"$TAG\"", + "audit:phase0-baseline:live:prune": "node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts --artifacts-dir docs/plans/artifacts --keep-per-family 8", + "audit:phase0-baseline:live:prune:apply": "node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts --artifacts-dir docs/plans/artifacts --keep-per-family 8 --apply", "audit:backend-canary:probes": "node --import tsx/esm scripts/run-pi-canary-guard-probes.ts", "companion:bundle": "node --import tsx/esm scripts/build-companion-release-bundle.ts", "companion:reference-apps": "node --import tsx/esm scripts/export-companion-reference-apps.ts", diff --git a/scripts/prune-phase0-baseline-artifacts.ts b/scripts/prune-phase0-baseline-artifacts.ts new file mode 100644 index 0000000..3d523ca --- /dev/null +++ b/scripts/prune-phase0-baseline-artifacts.ts @@ -0,0 +1,114 @@ +#!/usr/bin/env node + +import { readdir, rm } from 'node:fs/promises'; +import { resolve } from 'node:path'; +import { parseArgs } from 'node:util'; +import { + planRollingPhase0ArtifactRetention, + type Phase0RollingArtifactRetentionPlan, +} from '../src/audit/phase0BaselineArtifactRetention.js'; + +function usage(): string { + return [ + 'Usage: node --import tsx/esm scripts/prune-phase0-baseline-artifacts.ts [options]', + '', + 'Options:', + ' --artifacts-dir Artifacts directory (default: docs/plans/artifacts)', + ' --keep-per-family Keep newest rolling tags per family (default: 8)', + ' --apply Apply deletions (default: dry-run)', + ' --format Output format (default: text)', + ' --help Show usage', + ].join('\n'); +} + +function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined { + if (!raw) { + return undefined; + } + const parsed = Number(raw); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`); + } + return parsed; +} + +function renderText(plan: Phase0RollingArtifactRetentionPlan, artifactsDir: string, keepPerFamily: number, apply: boolean): string { + const lines: string[] = []; + lines.push('# Phase-0 Rolling Artifact Prune'); + lines.push(''); + lines.push(`Artifacts dir: ${artifactsDir}`); + lines.push(`Keep per family: ${keepPerFamily}`); + lines.push(`Mode: ${apply ? 'apply' : 'dry-run'}`); + lines.push(`Keep files: ${plan.keep.length}`); + lines.push(`Remove files: ${plan.remove.length}`); + lines.push(''); + lines.push('## Families'); + for (const row of plan.families) { + lines.push(`- ${row.family}: tags total=${row.total_tags} keep=${row.keep_tags} remove=${row.remove_tags}`); + } + lines.push(''); + lines.push('## Remove List'); + if (plan.remove.length === 0) { + lines.push('- none'); + } else { + for (const row of plan.remove) { + lines.push(`- ${row.file_name}`); + } + } + return lines.join('\n'); +} + +async function main(): Promise { + const { values } = parseArgs({ + options: { + 'artifacts-dir': { type: 'string' }, + 'keep-per-family': { type: 'string' }, + apply: { type: 'boolean' }, + format: { type: 'string' }, + help: { type: 'boolean', short: 'h' }, + }, + strict: true, + allowPositionals: false, + }); + + if (values.help) { + process.stdout.write(`${usage()}\n`); + return; + } + + const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts'); + const keepPerFamily = parseOptionalNumber(values['keep-per-family'], '--keep-per-family') ?? 8; + const apply = Boolean(values.apply); + const format = values.format ?? 'text'; + + if (format !== 'text' && format !== 'json') { + throw new Error(`Invalid --format value "${format}".`); + } + + const files = await readdir(artifactsDir); + const plan = planRollingPhase0ArtifactRetention(files, keepPerFamily); + + if (apply) { + for (const row of plan.remove) { + await rm(resolve(artifactsDir, row.file_name)); + } + } + + if (format === 'json') { + process.stdout.write(`${JSON.stringify({ + generated_at: new Date().toISOString(), + artifacts_dir: artifactsDir, + keep_per_family: Math.floor(keepPerFamily), + apply, + plan, + }, null, 2)}\n`); + } else { + process.stdout.write(`${renderText(plan, artifactsDir, Math.floor(keepPerFamily), apply)}\n`); + } +} + +main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n\n${usage()}\n`); + process.exitCode = 1; +}); diff --git a/src/audit/phase0BaselineArtifactRetention.test.ts b/src/audit/phase0BaselineArtifactRetention.test.ts new file mode 100644 index 0000000..6338372 --- /dev/null +++ b/src/audit/phase0BaselineArtifactRetention.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from 'vitest'; +import { + collectRollingPhase0ArtifactFiles, + planRollingPhase0ArtifactRetention, +} from './phase0BaselineArtifactRetention.js'; + +describe('phase0BaselineArtifactRetention', () => { + it('collects only rolling-tag phase-0 artifact files', () => { + const rows = collectRollingPhase0ArtifactFiles([ + 'phase0_baseline_live_2026-02-27-010203.json', + 'phase0_baseline_live_gateway_2026-02-27-010203.md', + 'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.jsonl', + 'phase0_baseline_live_backend_native_2026-02-27-010203.json', + 'phase0_baseline_live_backend_drift_2026-02-27-010203.md', + 'phase0_baseline_live_2026-02-27.json', + 'phase0_baseline_live_gateway_2026-02-27.jsonl', + 'phase0_baseline_2026-02-25.md', + 'phase0_baseline_live_backend_pi_embedded_2026-02-27.md', + 'not_a_phase0_file.txt', + ]); + + expect(rows).toHaveLength(5); + expect(rows.map((row) => row.family).sort()).toEqual([ + 'backend_drift', + 'backend_native', + 'backend_pi_embedded', + 'channel', + 'gateway', + ]); + }); + + it('keeps most recent rolling tags per family and prunes older ones', () => { + const files = [ + 'phase0_baseline_live_2026-02-27-010203.json', + 'phase0_baseline_live_2026-02-27-010203.jsonl', + 'phase0_baseline_live_2026-02-27-020304.json', + 'phase0_baseline_live_2026-02-27-020304.md', + 'phase0_baseline_live_gateway_2026-02-27-010203.json', + 'phase0_baseline_live_gateway_2026-02-27-020304.json', + 'phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json', + 'phase0_baseline_live_backend_pi_embedded_2026-02-27-020304.json', + 'phase0_baseline_live_backend_native_2026-02-27-010203.json', + 'phase0_baseline_live_backend_native_2026-02-27-020304.json', + 'phase0_baseline_live_backend_drift_2026-02-27-010203.json', + 'phase0_baseline_live_backend_drift_2026-02-27-020304.json', + 'phase0_baseline_live_2026-02-27.json', + ]; + + const plan = planRollingPhase0ArtifactRetention(files, 1); + expect(plan.families).toEqual([ + { family: 'channel', total_tags: 2, keep_tags: 1, remove_tags: 1 }, + { family: 'gateway', total_tags: 2, keep_tags: 1, remove_tags: 1 }, + { family: 'backend_pi_embedded', total_tags: 2, keep_tags: 1, remove_tags: 1 }, + { family: 'backend_native', total_tags: 2, keep_tags: 1, remove_tags: 1 }, + { family: 'backend_drift', total_tags: 2, keep_tags: 1, remove_tags: 1 }, + ]); + + const removeSet = new Set(plan.remove.map((row) => row.file_name)); + expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.json')).toBe(true); + expect(removeSet.has('phase0_baseline_live_2026-02-27-010203.jsonl')).toBe(true); + expect(removeSet.has('phase0_baseline_live_gateway_2026-02-27-010203.json')).toBe(true); + expect(removeSet.has('phase0_baseline_live_backend_pi_embedded_2026-02-27-010203.json')).toBe(true); + expect(removeSet.has('phase0_baseline_live_backend_native_2026-02-27-010203.json')).toBe(true); + expect(removeSet.has('phase0_baseline_live_backend_drift_2026-02-27-010203.json')).toBe(true); + + const keepSet = new Set(plan.keep.map((row) => row.file_name)); + expect(keepSet.has('phase0_baseline_live_2026-02-27.json')).toBe(false); + expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.json')).toBe(true); + expect(keepSet.has('phase0_baseline_live_2026-02-27-020304.md')).toBe(true); + }); + + it('supports zero keep limit', () => { + const plan = planRollingPhase0ArtifactRetention([ + 'phase0_baseline_live_2026-02-27-010203.json', + 'phase0_baseline_live_gateway_2026-02-27-010203.json', + ], 0); + + expect(plan.keep).toHaveLength(0); + expect(plan.remove.map((row) => row.file_name).sort()).toEqual([ + 'phase0_baseline_live_2026-02-27-010203.json', + 'phase0_baseline_live_gateway_2026-02-27-010203.json', + ]); + }); + + it('rejects negative keep limit', () => { + expect(() => planRollingPhase0ArtifactRetention([], -1)).toThrow('keepPerFamily'); + }); +}); diff --git a/src/audit/phase0BaselineArtifactRetention.ts b/src/audit/phase0BaselineArtifactRetention.ts new file mode 100644 index 0000000..9f9b96b --- /dev/null +++ b/src/audit/phase0BaselineArtifactRetention.ts @@ -0,0 +1,189 @@ +export type Phase0RollingArtifactFamily = + | 'channel' + | 'gateway' + | 'backend_pi_embedded' + | 'backend_native' + | 'backend_drift'; + +export interface Phase0RollingArtifactFile { + file_name: string; + family: Phase0RollingArtifactFamily; + tag: string; + tag_timestamp_ms: number; +} + +export interface Phase0RollingArtifactRetentionPlan { + keep: Phase0RollingArtifactFile[]; + remove: Phase0RollingArtifactFile[]; + families: Array<{ + family: Phase0RollingArtifactFamily; + total_tags: number; + keep_tags: number; + remove_tags: number; + }>; +} + +const ROLLING_TAG_PATTERN = /^(\d{4})-(\d{2})-(\d{2})-(\d{6})$/; + +const FAMILY_PATTERNS: Array<{ family: Phase0RollingArtifactFamily; pattern: RegExp }> = [ + { + family: 'channel', + pattern: /^phase0_baseline_live_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/, + }, + { + family: 'gateway', + pattern: /^phase0_baseline_live_gateway_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/, + }, + { + family: 'backend_pi_embedded', + pattern: /^phase0_baseline_live_backend_pi_embedded_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/, + }, + { + family: 'backend_native', + pattern: /^phase0_baseline_live_backend_native_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|jsonl|md)$/, + }, + { + family: 'backend_drift', + pattern: /^phase0_baseline_live_backend_drift_(\d{4}-\d{2}-\d{2}-\d{6})\.(json|md)$/, + }, +]; + +function parseRollingTagTimestampMs(tag: string): number | undefined { + const match = ROLLING_TAG_PATTERN.exec(tag); + if (!match) { + return undefined; + } + + const year = Number(match[1]); + const month = Number(match[2]); + const day = Number(match[3]); + const hhmmss = match[4] ?? ''; + + if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day) || hhmmss.length !== 6) { + return undefined; + } + + const hour = Number(hhmmss.slice(0, 2)); + const minute = Number(hhmmss.slice(2, 4)); + const second = Number(hhmmss.slice(4, 6)); + if (!Number.isFinite(hour) || !Number.isFinite(minute) || !Number.isFinite(second)) { + return undefined; + } + + const timestampMs = Date.UTC(year, month - 1, day, hour, minute, second); + return Number.isFinite(timestampMs) ? timestampMs : undefined; +} + +function parseRollingArtifactFile(fileName: string): Phase0RollingArtifactFile | undefined { + for (const entry of FAMILY_PATTERNS) { + const match = entry.pattern.exec(fileName); + if (!match) { + continue; + } + const tag = match[1] ?? ''; + const timestampMs = parseRollingTagTimestampMs(tag); + if (typeof timestampMs !== 'number') { + continue; + } + return { + file_name: fileName, + family: entry.family, + tag, + tag_timestamp_ms: timestampMs, + }; + } + return undefined; +} + +function sortByTagTimeDesc(a: { tag_timestamp_ms: number; tag: string }, b: { tag_timestamp_ms: number; tag: string }): number { + const delta = b.tag_timestamp_ms - a.tag_timestamp_ms; + if (delta !== 0) { + return delta; + } + return b.tag.localeCompare(a.tag); +} + +export function collectRollingPhase0ArtifactFiles(fileNames: string[]): Phase0RollingArtifactFile[] { + const parsed: Phase0RollingArtifactFile[] = []; + for (const fileName of fileNames) { + const row = parseRollingArtifactFile(fileName); + if (row) { + parsed.push(row); + } + } + return parsed; +} + +export function planRollingPhase0ArtifactRetention( + fileNames: string[], + keepPerFamily: number, +): Phase0RollingArtifactRetentionPlan { + if (!Number.isFinite(keepPerFamily) || keepPerFamily < 0) { + throw new Error('keepPerFamily must be greater than or equal to 0.'); + } + + const keepLimit = Math.floor(keepPerFamily); + const parsed = collectRollingPhase0ArtifactFiles(fileNames); + + const keep: Phase0RollingArtifactFile[] = []; + const remove: Phase0RollingArtifactFile[] = []; + const familyRows: Phase0RollingArtifactRetentionPlan['families'] = []; + + for (const familyPattern of FAMILY_PATTERNS) { + const family = familyPattern.family; + const familyFiles = parsed.filter((row) => row.family === family); + const byTag = new Map(); + + for (const row of familyFiles) { + const existing = byTag.get(row.tag); + if (existing) { + existing.files.push(row); + existing.tag_timestamp_ms = Math.max(existing.tag_timestamp_ms, row.tag_timestamp_ms); + } else { + byTag.set(row.tag, { + tag_timestamp_ms: row.tag_timestamp_ms, + files: [row], + }); + } + } + + const sortedTags = [...byTag.entries()] + .map(([tag, row]) => ({ tag, tag_timestamp_ms: row.tag_timestamp_ms, files: row.files })) + .sort(sortByTagTimeDesc); + + const keepTags = new Set(sortedTags.slice(0, keepLimit).map((row) => row.tag)); + + for (const row of familyFiles) { + if (keepTags.has(row.tag)) { + keep.push(row); + } else { + remove.push(row); + } + } + + familyRows.push({ + family, + total_tags: sortedTags.length, + keep_tags: Math.min(sortedTags.length, keepLimit), + remove_tags: Math.max(0, sortedTags.length - keepLimit), + }); + } + + const sortFilesAsc = (a: Phase0RollingArtifactFile, b: Phase0RollingArtifactFile): number => { + const familyDelta = a.family.localeCompare(b.family); + if (familyDelta !== 0) { + return familyDelta; + } + const tagDelta = sortByTagTimeDesc(a, b); + if (tagDelta !== 0) { + return tagDelta; + } + return a.file_name.localeCompare(b.file_name); + }; + + return { + keep: [...keep].sort(sortFilesAsc), + remove: [...remove].sort(sortFilesAsc), + families: familyRows, + }; +}