From 5b9bcbafeef81823c9000319df3ce6f4dadccec4 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 27 Feb 2026 13:25:35 -0800 Subject: [PATCH] fix(audit): validate phase0 artifact tag inputs Add shared artifact-tag normalization/validation and apply it to capture, drift, and prune scripts for --tag/--report-tag/--baseline-tag paths. Architecture diagrams reviewed; no flow changes required. --- README.md | 2 +- docs/api/PROTOCOL.md | 2 +- ...phase0-instrumentation-ticket-checklist.md | 2 +- docs/plans/state.json | 18 ++++++++++++++++ scripts/capture-phase0-live-baseline.ts | 3 ++- .../check-phase0-baseline-backend-drift.ts | 11 +++++++--- scripts/prune-phase0-baseline-artifacts.ts | 3 ++- src/audit/artifactTag.test.ts | 21 +++++++++++++++++++ src/audit/artifactTag.ts | 12 +++++++++++ 9 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 src/audit/artifactTag.test.ts create mode 100644 src/audit/artifactTag.ts diff --git a/README.md b/README.md index 8645eaa..f5e7d27 100644 --- a/README.md +++ b/README.md @@ -1656,7 +1656,7 @@ Cadence scheduling (example: every 6 hours via host cron) with rolling timestamp `audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted. Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately. Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family. Retention depth defaults to `8` tags per family and can be overridden via non-negative integer `KEEP_PER_FAMILY=`. Prune runs also write reports to `docs/plans/artifacts/phase0_baseline_live_prune_.{md,json}`, and retention now includes these rolling prune reports as a managed family. -Both rolling commands accept `TAG=` override; `audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the same rolling pipeline/tag and then applies prune retention for that exact tag. +Both rolling commands accept `TAG=` override (artifact tags must be simple filename-safe tokens using letters/numbers/`._-`); `audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the same rolling pipeline/tag and then applies prune retention for that exact tag. Gateway-origin windows can be captured separately (for example when validating cancel paths): ```bash diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index f00fcd7..5d1d9b6 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -23,7 +23,7 @@ The gateway provides: - **HTTP Server**: Serves static dashboard and handles webhook endpoints - **Node Capability Negotiation**: Optional companion-node role/capability registration -Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (including optional reaction-rate thresholds, writing `phase0_baseline_live_backend_drift_.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (non-negative integer `KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided. +Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap `), release-bundle export (`--export-release-bundle ` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle ` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template `), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (including optional reaction-rate thresholds, writing `phase0_baseline_live_backend_drift_.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (non-negative integer `KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided (artifact tags are constrained to filename-safe letters/numbers/`._-`). ### Execution Model (Sessions + Per-Session Queue) diff --git a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md index 19379ac..7a241c7 100644 --- a/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md +++ b/docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md @@ -203,7 +203,7 @@ Phase 0 is complete when: 2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`. 3. No user-visible response behavior changed compared to pre-phase baseline. -Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}` and optional reaction match/skip drift thresholds, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (non-negative integer `KEEP_PER_FAMILY` optional override). +Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_.{md,json}` and optional reaction match/skip drift thresholds, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override with filename-safe tag values), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (non-negative integer `KEEP_PER_FAMILY` optional override). ## Subagent Model Assignment Plan diff --git a/docs/plans/state.json b/docs/plans/state.json index da9e8f2..f11ba7e 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -552,6 +552,24 @@ ], "test_status": "pnpm test:run src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing" }, + "phase0-live-baseline-artifact-tag-validation-hardening": { + "status": "completed", + "date": "2026-02-27", + "updated": "2026-02-27", + "summary": "Added shared artifact-tag normalization/validation and applied it across phase-0 capture, drift, and prune scripts (`--tag`, `--report-tag`, `--baseline-tag`) to enforce filename-safe tags and block malformed path-like values.", + "files_modified": [ + "src/audit/artifactTag.ts", + "src/audit/artifactTag.test.ts", + "scripts/capture-phase0-live-baseline.ts", + "scripts/check-phase0-baseline-backend-drift.ts", + "scripts/prune-phase0-baseline-artifacts.ts", + "README.md", + "docs/api/PROTOCOL.md", + "docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/audit/artifactTag.test.ts src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing" + }, "phase0-instrumentation-ticket-checklist": { "status": "completed", "date": "2026-02-25", diff --git a/scripts/capture-phase0-live-baseline.ts b/scripts/capture-phase0-live-baseline.ts index 5d41103..c3075cc 100644 --- a/scripts/capture-phase0-live-baseline.ts +++ b/scripts/capture-phase0-live-baseline.ts @@ -4,6 +4,7 @@ import { mkdir, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; import { queryAuditLogs } from '../src/audit/export.js'; +import { normalizeArtifactTag } from '../src/audit/artifactTag.js'; import { capturePhase0LiveBaselineEvents, type Phase0BackendTarget, @@ -196,7 +197,7 @@ async function main(): Promise { } const auditPath = expandHomePath(values.audit ?? '~/.local/share/flynn/audit.log'); - const tag = values.tag ?? isoDateTagNow(); + const tag = normalizeArtifactTag(values.tag ?? isoDateTagNow(), '--tag'); const channels = parseCsv(values.channel); let sources = parseSources(values.source); const backendTargets = parseBackendTargets(values.backend); diff --git a/scripts/check-phase0-baseline-backend-drift.ts b/scripts/check-phase0-baseline-backend-drift.ts index ba96ccd..7a1481b 100644 --- a/scripts/check-phase0-baseline-backend-drift.ts +++ b/scripts/check-phase0-baseline-backend-drift.ts @@ -3,6 +3,7 @@ import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; +import { normalizeArtifactTag } from '../src/audit/artifactTag.js'; import { comparePhase0BaselineDrift, evaluatePhase0BaselineDriftGate, @@ -366,10 +367,14 @@ async function main(): Promise { const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts'); const backends = parseBackends(values.backend); - const candidateTag = values.tag; - const baselineTag = values['baseline-tag']; + const candidateTag = values.tag + ? normalizeArtifactTag(values.tag, '--tag') + : undefined; + const baselineTag = values['baseline-tag'] + ? normalizeArtifactTag(values['baseline-tag'], '--baseline-tag') + : undefined; const format = parseFormat(values.format); - const reportTag = values['report-tag'] ?? isoDateTagNow(); + const reportTag = normalizeArtifactTag(values['report-tag'] ?? isoDateTagNow(), '--report-tag'); const writeDefaultArtifacts = Boolean(values['write-default-artifacts']); const maxAgeHours = parseOptionalNumber(values['max-age-hours'], '--max-age-hours'); if (typeof maxAgeHours === 'number' && maxAgeHours < 0) { diff --git a/scripts/prune-phase0-baseline-artifacts.ts b/scripts/prune-phase0-baseline-artifacts.ts index 574e7db..fdf17ba 100644 --- a/scripts/prune-phase0-baseline-artifacts.ts +++ b/scripts/prune-phase0-baseline-artifacts.ts @@ -3,6 +3,7 @@ import { mkdir, readdir, rm, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; +import { normalizeArtifactTag } from '../src/audit/artifactTag.js'; import { planRollingPhase0ArtifactRetention, type Phase0RollingArtifactRetentionPlan, @@ -103,7 +104,7 @@ async function main(): Promise { const keepPerFamily = parseOptionalInteger(values['keep-per-family'], '--keep-per-family') ?? 8; const apply = Boolean(values.apply); const format = values.format ?? 'text'; - const reportTag = values['report-tag'] ?? isoDateTagNow(); + const reportTag = normalizeArtifactTag(values['report-tag'] ?? isoDateTagNow(), '--report-tag'); const writeDefaultArtifacts = Boolean(values['write-default-artifacts']); if (format !== 'text' && format !== 'json') { diff --git a/src/audit/artifactTag.test.ts b/src/audit/artifactTag.test.ts new file mode 100644 index 0000000..5979992 --- /dev/null +++ b/src/audit/artifactTag.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from 'vitest'; +import { normalizeArtifactTag } from './artifactTag.js'; + +describe('normalizeArtifactTag', () => { + it('accepts common date and rolling timestamp tags', () => { + expect(normalizeArtifactTag('2026-02-27', '--tag')).toBe('2026-02-27'); + expect(normalizeArtifactTag('2026-02-27-193429', '--report-tag')).toBe('2026-02-27-193429'); + expect(normalizeArtifactTag('phase0_debug.1', '--tag')).toBe('phase0_debug.1'); + }); + + it('trims surrounding whitespace', () => { + expect(normalizeArtifactTag(' 2026-02-27 ', '--tag')).toBe('2026-02-27'); + }); + + it('rejects empty or invalid tags', () => { + expect(() => normalizeArtifactTag(' ', '--tag')).toThrow('--tag'); + expect(() => normalizeArtifactTag('../escape', '--tag')).toThrow('--tag'); + expect(() => normalizeArtifactTag('tag with spaces', '--tag')).toThrow('--tag'); + expect(() => normalizeArtifactTag('tag/slash', '--tag')).toThrow('--tag'); + }); +}); diff --git a/src/audit/artifactTag.ts b/src/audit/artifactTag.ts new file mode 100644 index 0000000..2e665bb --- /dev/null +++ b/src/audit/artifactTag.ts @@ -0,0 +1,12 @@ +const ARTIFACT_TAG_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$/; + +export function normalizeArtifactTag(raw: string, flagName: string): string { + const value = raw.trim(); + if (value.length === 0) { + throw new Error(`${flagName} cannot be empty.`); + } + if (!ARTIFACT_TAG_PATTERN.test(value)) { + throw new Error(`${flagName} contains invalid characters. Allowed: letters, numbers, ".", "_", "-".`); + } + return value; +}