fix(audit): validate phase0 artifact tag inputs

Add shared artifact-tag normalization/validation and apply it to capture, drift, and prune scripts for --tag/--report-tag/--baseline-tag paths. Architecture diagrams reviewed; no flow changes required.
This commit is contained in:
William Valentin
2026-02-27 13:25:35 -08:00
parent 98f954de0d
commit 5b9bcbafee
9 changed files with 66 additions and 8 deletions
+1 -1
View File
@@ -1656,7 +1656,7 @@ Cadence scheduling (example: every 6 hours via host cron) with rolling timestamp
`audit:phase0-baseline:live*` scripts now default to the current UTC date tag when `--tag` is omitted.
Use `audit:phase0-baseline:live:refresh:drift:rolling` when you want each cadence run to keep a distinct tag (`YYYY-MM-DD-HHMMSS`) so drift checks compare against a recent prior snapshot immediately.
Use `audit:phase0-baseline:live:prune` for dry-run retention planning, and `audit:phase0-baseline:live:prune:apply` to prune older rolling-tag artifacts while keeping the newest snapshots per family. Retention depth defaults to `8` tags per family and can be overridden via non-negative integer `KEEP_PER_FAMILY=<n>`. Prune runs also write reports to `docs/plans/artifacts/phase0_baseline_live_prune_<tag>.{md,json}`, and retention now includes these rolling prune reports as a managed family.
Both rolling commands accept `TAG=<YYYY-MM-DD-HHMMSS>` override; `audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the same rolling pipeline/tag and then applies prune retention for that exact tag.
Both rolling commands accept `TAG=<YYYY-MM-DD-HHMMSS>` override (artifact tags must be simple filename-safe tokens using letters/numbers/`._-`); `audit:phase0-baseline:live:refresh:drift:rolling:prune` now reuses the same rolling pipeline/tag and then applies prune retention for that exact tag.
Gateway-origin windows can be captured separately (for example when validating cancel paths):
```bash
+1 -1
View File
@@ -23,7 +23,7 @@ The gateway provides:
- **HTTP Server**: Serves static dashboard and handles webhook endpoints
- **Node Capability Negotiation**: Optional companion-node role/capability registration
Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap <path|->`), release-bundle export (`--export-release-bundle <dir>` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle <dir>` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template <dir>`), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (including optional reaction-rate thresholds, writing `phase0_baseline_live_backend_drift_<tag>.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_<tag>.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (non-negative integer `KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided.
Operational note: onboarding (`flynn setup` / `flynn onboard`) now runs post-save live readiness checks (model/channel/memory/automation) and prints a guided first-success task flow. Companion CLI now also supports bootstrap-manifest export (`flynn companion --export-bootstrap <path|->`), release-bundle export (`--export-release-bundle <dir>` with optional `--signing-key`/`--signing-key-id` signature output), release-bundle verification (`--verify-release-bundle <dir>` with optional `--verify-signing-key`/`--verify-signing-key-id`/`--require-signature`), platform shell-template export (`--export-shell-template <dir>`), plus richer shell bootstrap flags for status/location/push (`--app-version`, `--latitude/--longitude`, `--push-token`, etc.) for desktop/mobile app packaging without changing JSON-RPC method/event shapes. Audit observability now includes live phase-0 baseline capture flows: `pnpm audit:phase0-baseline:live` for channel-origin windows, backend-scoped variants (`pnpm audit:phase0-baseline:live:pi` / `pnpm audit:phase0-baseline:live:native`) via `--backend`, `pnpm audit:phase0-baseline:live:gateway` (auto-detected cancel window) for gateway-origin windows, `pnpm audit:phase0-baseline:live:refresh` for one-shot refresh of all live windows (channel + gateway + backend-scoped), `pnpm audit:phase0-baseline:live:drift` for backend artifact freshness/drift gates (including optional reaction-rate thresholds, writing `phase0_baseline_live_backend_drift_<tag>.md/.json` reports), `pnpm audit:phase0-baseline:live:refresh:drift:rolling` for cadence runs that stamp each capture with a unique UTC timestamp tag (`YYYY-MM-DD-HHMMSS`) so drift comparisons can immediately use a prior snapshot (or externally supplied `TAG`), `pnpm audit:phase0-baseline:live:prune` / `pnpm audit:phase0-baseline:live:prune:apply` for rolling-tag artifact retention management (writing `phase0_baseline_live_prune_<tag>.md/.json` reports and retaining those prune reports as part of managed rolling families), and `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` for one-command cadence refresh+drift+retention apply reusing the same rolling tag (non-negative integer `KEEP_PER_FAMILY` override supported for retention depth). These scripts default to current UTC-date tags unless `--tag` is explicitly provided (artifact tags are constrained to filename-safe letters/numbers/`._-`).
### Execution Model (Sessions + Per-Session Queue)
@@ -203,7 +203,7 @@ Phase 0 is complete when:
2. A baseline summary artifact is generated and committed under `docs/plans/artifacts/`.
3. No user-visible response behavior changed compared to pre-phase baseline.
Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_<tag>.{md,json}` and optional reaction match/skip drift thresholds, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_<tag>.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (non-negative integer `KEEP_PER_FAMILY` optional override).
Follow-up status (2026-02-27): live channel-session artifacts exist under `docs/plans/artifacts/phase0_baseline_live_2026-02-27.*` via `pnpm audit:phase0-baseline:live` (anonymized IDs), and a second gateway-origin live window (including `run.cancel` + `cancel_requested`/`cancelled`) exists under `docs/plans/artifacts/phase0_baseline_live_gateway_2026-02-27.*`. Gateway window refreshes can now run via `pnpm audit:phase0-baseline:live:gateway` (auto-selected cancel window), all live windows can be refreshed together with `pnpm audit:phase0-baseline:live:refresh` (channel + gateway + backend-scoped `pi`/`native`; scheduling example included in README), backend artifact freshness/drift checks are now available via `pnpm audit:phase0-baseline:live:drift` (or chained with `pnpm audit:phase0-baseline:live:refresh:drift`) with drift report artifacts written to `docs/plans/artifacts/phase0_baseline_live_backend_drift_<tag>.{md,json}` and optional reaction match/skip drift thresholds, cadence runs can preserve distinct timestamped comparison points via `pnpm audit:phase0-baseline:live:refresh:drift:rolling` (supports shared `TAG` override with filename-safe tag values), rolling-tag retention can be managed via `pnpm audit:phase0-baseline:live:prune` (dry-run) / `pnpm audit:phase0-baseline:live:prune:apply` with prune report artifacts written to `phase0_baseline_live_prune_<tag>.{md,json}` (and retained as a managed rolling family), and one-command cadence scheduling is available via `pnpm audit:phase0-baseline:live:refresh:drift:rolling:prune` (non-negative integer `KEEP_PER_FAMILY` optional override).
## Subagent Model Assignment Plan
+18
View File
@@ -552,6 +552,24 @@
],
"test_status": "pnpm test:run src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing"
},
"phase0-live-baseline-artifact-tag-validation-hardening": {
"status": "completed",
"date": "2026-02-27",
"updated": "2026-02-27",
"summary": "Added shared artifact-tag normalization/validation and applied it across phase-0 capture, drift, and prune scripts (`--tag`, `--report-tag`, `--baseline-tag`) to enforce filename-safe tags and block malformed path-like values.",
"files_modified": [
"src/audit/artifactTag.ts",
"src/audit/artifactTag.test.ts",
"scripts/capture-phase0-live-baseline.ts",
"scripts/check-phase0-baseline-backend-drift.ts",
"scripts/prune-phase0-baseline-artifacts.ts",
"README.md",
"docs/api/PROTOCOL.md",
"docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md",
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/audit/artifactTag.test.ts src/audit/phase0BaselineDrift.test.ts + pnpm typecheck passing"
},
"phase0-instrumentation-ticket-checklist": {
"status": "completed",
"date": "2026-02-25",
+2 -1
View File
@@ -4,6 +4,7 @@ import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { parseArgs } from 'node:util';
import { queryAuditLogs } from '../src/audit/export.js';
import { normalizeArtifactTag } from '../src/audit/artifactTag.js';
import {
capturePhase0LiveBaselineEvents,
type Phase0BackendTarget,
@@ -196,7 +197,7 @@ async function main(): Promise<void> {
}
const auditPath = expandHomePath(values.audit ?? '~/.local/share/flynn/audit.log');
const tag = values.tag ?? isoDateTagNow();
const tag = normalizeArtifactTag(values.tag ?? isoDateTagNow(), '--tag');
const channels = parseCsv(values.channel);
let sources = parseSources(values.source);
const backendTargets = parseBackendTargets(values.backend);
@@ -3,6 +3,7 @@
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { parseArgs } from 'node:util';
import { normalizeArtifactTag } from '../src/audit/artifactTag.js';
import {
comparePhase0BaselineDrift,
evaluatePhase0BaselineDriftGate,
@@ -366,10 +367,14 @@ async function main(): Promise<void> {
const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts');
const backends = parseBackends(values.backend);
const candidateTag = values.tag;
const baselineTag = values['baseline-tag'];
const candidateTag = values.tag
? normalizeArtifactTag(values.tag, '--tag')
: undefined;
const baselineTag = values['baseline-tag']
? normalizeArtifactTag(values['baseline-tag'], '--baseline-tag')
: undefined;
const format = parseFormat(values.format);
const reportTag = values['report-tag'] ?? isoDateTagNow();
const reportTag = normalizeArtifactTag(values['report-tag'] ?? isoDateTagNow(), '--report-tag');
const writeDefaultArtifacts = Boolean(values['write-default-artifacts']);
const maxAgeHours = parseOptionalNumber(values['max-age-hours'], '--max-age-hours');
if (typeof maxAgeHours === 'number' && maxAgeHours < 0) {
+2 -1
View File
@@ -3,6 +3,7 @@
import { mkdir, readdir, rm, writeFile } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { parseArgs } from 'node:util';
import { normalizeArtifactTag } from '../src/audit/artifactTag.js';
import {
planRollingPhase0ArtifactRetention,
type Phase0RollingArtifactRetentionPlan,
@@ -103,7 +104,7 @@ async function main(): Promise<void> {
const keepPerFamily = parseOptionalInteger(values['keep-per-family'], '--keep-per-family') ?? 8;
const apply = Boolean(values.apply);
const format = values.format ?? 'text';
const reportTag = values['report-tag'] ?? isoDateTagNow();
const reportTag = normalizeArtifactTag(values['report-tag'] ?? isoDateTagNow(), '--report-tag');
const writeDefaultArtifacts = Boolean(values['write-default-artifacts']);
if (format !== 'text' && format !== 'json') {
+21
View File
@@ -0,0 +1,21 @@
import { describe, expect, it } from 'vitest';
import { normalizeArtifactTag } from './artifactTag.js';
describe('normalizeArtifactTag', () => {
it('accepts common date and rolling timestamp tags', () => {
expect(normalizeArtifactTag('2026-02-27', '--tag')).toBe('2026-02-27');
expect(normalizeArtifactTag('2026-02-27-193429', '--report-tag')).toBe('2026-02-27-193429');
expect(normalizeArtifactTag('phase0_debug.1', '--tag')).toBe('phase0_debug.1');
});
it('trims surrounding whitespace', () => {
expect(normalizeArtifactTag(' 2026-02-27 ', '--tag')).toBe('2026-02-27');
});
it('rejects empty or invalid tags', () => {
expect(() => normalizeArtifactTag(' ', '--tag')).toThrow('--tag');
expect(() => normalizeArtifactTag('../escape', '--tag')).toThrow('--tag');
expect(() => normalizeArtifactTag('tag with spaces', '--tag')).toThrow('--tag');
expect(() => normalizeArtifactTag('tag/slash', '--tag')).toThrow('--tag');
});
});
+12
View File
@@ -0,0 +1,12 @@
const ARTIFACT_TAG_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$/;
export function normalizeArtifactTag(raw: string, flagName: string): string {
const value = raw.trim();
if (value.length === 0) {
throw new Error(`${flagName} cannot be empty.`);
}
if (!ARTIFACT_TAG_PATTERN.test(value)) {
throw new Error(`${flagName} contains invalid characters. Allowed: letters, numbers, ".", "_", "-".`);
}
return value;
}