From 06998ac65dab10a9fe2162ec00aa9ee13eb23e42 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 27 Feb 2026 13:11:31 -0800 Subject: [PATCH] fix(audit): require integer rolling retention keep limits Validate keepPerFamily/--keep-per-family as non-negative integers, remove silent flooring, add regression coverage, and sync runbook/docs wording. --- README.md | 2 +- docs/api/PROTOCOL.md | 2 +- docs/architecture/AGENT_DIAGRAM.md | 2 +- .../architecture/GATEWAY_SESSIONS_AND_QUEUE.md | 2 +- ...-phase0-instrumentation-ticket-checklist.md | 2 +- docs/plans/state.json | 18 ++++++++++++++++++ scripts/prune-phase0-baseline-artifacts.ts | 16 +++++++++++----- .../phase0BaselineArtifactRetention.test.ts | 4 ++++ src/audit/phase0BaselineArtifactRetention.ts | 10 ++++++++-- 9 files changed, 46 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 428d3b3..d332cf9 100644 --- a/README.md +++ b/README.md @@ -1654,7 +1654,7 @@ Cadence scheduling (example: every 6 hours via host cron) with rolling timestamp ``` `audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted. Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately. -Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family. Retention depth defaults to `8` tags per family and can be overridden via `KEEP_PER_FAMILY=`. Prune runs also write reports to `docs/plans/artifacts/phase0_baseline_live_prune_.{md,json}`, and retention now includes these rolling prune reports as a managed family. +Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family. Retention depth defaults to `8` tags per family and can be overridden via non-negative integer `KEEP_PER_FAMILY=`. Prune runs also write reports to `docs/plans/artifacts/phase0_baseline_live_prune_.{md,json}`, and retention now includes these rolling prune reports as a managed family. Both rolling commands accept `TAG=` override; `audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the same rolling pipeline/tag and then applies prune retention for that exact tag. Gateway-origin windows can be captured separately (for example when validating cancel paths): diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index a42db52..77d90fe 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -23,7 +23,7 @@ The gateway provides: - **HTTP Server**: Serves static dashboard and handles webhook endpoints - **Node Capability Negotiation**: Optional companion-node role/capability registration -Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (`KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided. +Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (writing `phase0_baseline_live_backend_drift_.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (non-negative integer `KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided. ### Execution Model (Sessions + Per-Session Queue) diff --git a/docs/architecture/AGENT_DIAGRAM.md b/docs/architecture/AGENT_DIAGRAM.md index 69da14d..0c83eb2 100644 --- a/docs/architecture/AGENT_DIAGRAM.md +++ b/docs/architecture/AGENT_DIAGRAM.md @@ -173,7 +173,7 @@ Gateway streaming UX signals: - `pnpm audit:phase0-baseline:live:drift` evaluates backend-scoped artifact freshness/drift gates and writes `docs/plans/artifacts/phase0_baseline_live_backend_drift_.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` runs capture + drift checks in one cadence step. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` runs the same full refresh+drift flow with a shared UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so each cadence run keeps distinct backend/drift artifacts for immediate baseline-vs-prior comparisons. - `pnpm audit:phase0-baseline:live:prune` provides dry-run retention planning for rolling-tag artifacts; `pnpm audit:phase0-baseline:live:prune:apply` deletes older rolling snapshots while keeping the newest tags per artifact family. -- `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the rolling refresh+drift pipeline via shared `TAG` env wiring, then applies retention (`KEEP_PER_FAMILY`) and writes prune reports tagged to that same rolling run (`phase0_baseline_live_prune_.md/.json`). +- `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the rolling refresh+drift pipeline via shared `TAG` env wiring, then applies retention (non-negative integer `KEEP_PER_FAMILY`) and writes prune reports tagged to that same rolling run (`phase0_baseline_live_prune_.md/.json`). - Rolling retention families now include cadence prune reports themselves (`phase0_baseline_live_prune_.md/.json`) to prevent unbounded prune-report growth. - `audit:phase0-baseline:live*` scripts are cadence-safe by default (UTC-date tags auto-generated unless explicitly overridden). - Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts. diff --git a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md index 28762f4..9e96aa4 100644 --- a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md +++ b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md @@ -38,7 +38,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`. - `pnpm audit:phase0-baseline:live:drift` checks backend-scoped artifact freshness/drift gates and writes `phase0_baseline_live_backend_drift_.md/.json`; `pnpm audit:phase0-baseline:live:refresh:drift` chains refresh + drift checks for scheduled cadence runs. - `pnpm audit:phase0-baseline:live:refresh:drift:rolling` performs the same chain using one UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) across channel/gateway/backend/drift outputs so each cadence run preserves a distinct comparison point. - `pnpm audit:phase0-baseline:live:prune` (dry-run) and `pnpm audit:phase0-baseline:live:prune:apply` (delete) manage retention of rolling-tag artifacts to control artifact growth while preserving newest snapshots per family. -- `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` combines rolling refresh+drift with retention apply for one-command cron scheduling using a shared `TAG`; adjust retention depth with `KEEP_PER_FAMILY` and use generated `phase0_baseline_live_prune_.md/.json` artifacts for retention audit traceability. +- `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` combines rolling refresh+drift with retention apply for one-command cron scheduling using a shared `TAG`; adjust retention depth with non-negative integer `KEEP_PER_FAMILY` and use generated `phase0_baseline_live_prune_.md/.json` artifacts for retention audit traceability. - Retention management also covers rolling prune-report artifacts (`phase0_baseline_live_prune_.md/.json`) as a first-class family. - `audit:phase0-baseline:live*` package scripts now omit fixed tags so scheduled runs automatically roll to current UTC-date artifact tags. - Companion CLI supports one-shot shell bootstrap metadata for live sessions (`--app-version`/`--status-text`, `--latitude`/`--longitude`, `--push-token`) so desktop/mobile wrappers can initialize node status/location/push in a single launch flow. diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md index ef83710..3f5768f 100644 --- a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -203,7 +203,7 @@ Phase 0 is complete when: 2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. 3. No user-visible response behavior changed compared to pre-phase baseline. -Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}`, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (`KEEP_PER_FAMILY` optional override). +Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}`, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (non-negative integer `KEEP_PER_FAMILY` optional override). ## Subagent Model Assignment Plan diff --git a/docs/plans/state.json b/docs/plans/state.json index 9af7261..dc5ddbf 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -475,6 +475,24 @@ ], "test_status": "pnpm test:run src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing" }, + "phase0-live-baseline-retention-keep-integer-validation": { + "status": "completed", + "date": "2026-02-27", + "updated": "2026-02-27", + "summary": "Hardened rolling retention keep-limit handling by requiring non-negative integer `keepPerFamily` values in both planner and prune CLI (`--keep-per-family`), eliminating silent flooring of fractional values and surfacing invalid operator input early.", + "files_modified": [ + "src/audit/phase0BaselineArtifactRetention.ts", + "src/audit/phase0BaselineArtifactRetention.test.ts", + "scripts/prune-phase0-baseline-artifacts.ts", + "README.md", + "docs/api/PROTOCOL.md", + "docs/architecture/AGENT_DIAGRAM.md", + "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/audit/phase0BaselineArtifactRetention.test.ts + pnpm typecheck passing" + }, "phase0-instrumentation-ticket-checklist": { "status": "completed", "date": "2026-02-25", diff --git a/scripts/prune-phase0-baseline-artifacts.ts b/scripts/prune-phase0-baseline-artifacts.ts index f0825d0..574e7db 100644 --- a/scripts/prune-phase0-baseline-artifacts.ts +++ b/scripts/prune-phase0-baseline-artifacts.ts @@ -29,13 +29,19 @@ function isoDateTagNow(): string { return new Date().toISOString().slice(0, 10); } -function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined { +function parseOptionalInteger(raw: string | undefined, flag: string): number | undefined { if (!raw) { return undefined; } const parsed = Number(raw); if (!Number.isFinite(parsed)) { - throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`); + throw new Error(`Invalid ${flag} value "${raw}". Expected an integer.`); + } + if (!Number.isInteger(parsed)) { + throw new Error(`Invalid ${flag} value "${raw}". Expected an integer.`); + } + if (parsed < 0) { + throw new Error(`${flag} must be greater than or equal to 0.`); } return parsed; } @@ -94,7 +100,7 @@ async function main(): Promise { } const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts'); - const keepPerFamily = parseOptionalNumber(values['keep-per-family'], '--keep-per-family') ?? 8; + const keepPerFamily = parseOptionalInteger(values['keep-per-family'], '--keep-per-family') ?? 8; const apply = Boolean(values.apply); const format = values.format ?? 'text'; const reportTag = values['report-tag'] ?? isoDateTagNow(); @@ -128,7 +134,7 @@ async function main(): Promise { const payload = { generated_at: new Date().toISOString(), artifacts_dir: artifactsDir, - keep_per_family: Math.floor(keepPerFamily), + keep_per_family: keepPerFamily, apply, report_tag: reportTag, reports: { @@ -138,7 +144,7 @@ async function main(): Promise { plan, }; const jsonOutput = JSON.stringify(payload, null, 2); - const textOutput = renderText(plan, artifactsDir, Math.floor(keepPerFamily), apply); + const textOutput = renderText(plan, artifactsDir, keepPerFamily, apply); if (summaryJsonOut) { await writeTextFile(summaryJsonOut, jsonOutput); diff --git a/src/audit/phase0BaselineArtifactRetention.test.ts b/src/audit/phase0BaselineArtifactRetention.test.ts index fd34309..1072316 100644 --- a/src/audit/phase0BaselineArtifactRetention.test.ts +++ b/src/audit/phase0BaselineArtifactRetention.test.ts @@ -98,6 +98,10 @@ describe('phase0BaselineArtifactRetention', () => { expect(() => planRollingPhase0ArtifactRetention([], -1)).toThrow('keepPerFamily'); }); + it('rejects non-integer keep limit', () => { + expect(() => planRollingPhase0ArtifactRetention([], 1.5)).toThrow('keepPerFamily'); + }); + it('ignores malformed rolling tags with impossible date or time values', () => { const rows = collectRollingPhase0ArtifactFiles([ 'phase0_baseline_live_2026-13-27-010203.json', diff --git a/src/audit/phase0BaselineArtifactRetention.ts b/src/audit/phase0BaselineArtifactRetention.ts index 177165e..6f69869 100644 --- a/src/audit/phase0BaselineArtifactRetention.ts +++ b/src/audit/phase0BaselineArtifactRetention.ts @@ -142,11 +142,17 @@ export function planRollingPhase0ArtifactRetention( fileNames: string[], keepPerFamily: number, ): Phase0RollingArtifactRetentionPlan { - if (!Number.isFinite(keepPerFamily) || keepPerFamily < 0) { + if (!Number.isFinite(keepPerFamily)) { + throw new Error('keepPerFamily must be a finite integer greater than or equal to 0.'); + } + if (!Number.isInteger(keepPerFamily)) { + throw new Error('keepPerFamily must be an integer greater than or equal to 0.'); + } + if (keepPerFamily < 0) { throw new Error('keepPerFamily must be greater than or equal to 0.'); } - const keepLimit = Math.floor(keepPerFamily); + const keepLimit = keepPerFamily; const parsed = collectRollingPhase0ArtifactFiles(fileNames); const keep: Phase0RollingArtifactFile[] = [];