#!/usr/bin/env node import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { parseArgs } from 'node:util'; import { comparePhase0BaselineDrift, evaluatePhase0BaselineDriftGate, renderPhase0BaselineDriftSnapshot, type Phase0BaselineArtifactDocument, type Phase0BaselineDriftComparison, type Phase0BaselineDriftGateThresholds, } from '../src/audit/phase0BaselineDrift.js'; import type { Phase0BackendTarget } from '../src/audit/phase0LiveBaseline.js'; type OutputFormat = 'markdown' | 'json'; interface ArtifactRecord { backend: Phase0BackendTarget; tag: string; path: string; generatedAtIso?: string; generatedAtMs?: number; document: Phase0BaselineArtifactDocument; } interface BackendDriftResult { backend: Phase0BackendTarget; candidate: ArtifactRecord; baseline?: ArtifactRecord; comparison: Phase0BaselineDriftComparison; freshness: { enabled: boolean; pass: boolean; actual_age_hours: number | null; threshold_hours: number | null; }; driftGate: ReturnType; pass: boolean; } const BACKEND_TARGETS: readonly Phase0BackendTarget[] = [ 'native', 'claude_code', 'opencode', 'codex', 'gemini', 'pi_embedded', ]; const ARTIFACT_JSON_PATTERN = /^phase0_baseline_live_backend_(native|claude_code|opencode|codex|gemini|pi_embedded)_(.+)\.json$/; function usage(): string { return [ 'Usage: node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts [options]', '', 'Options:', ' --artifacts-dir Artifacts directory (default: docs/plans/artifacts)', ' --backend Backends to check (default: pi_embedded,native)', ' --tag Candidate artifact tag (default: latest available per backend)', ' --baseline-tag Baseline artifact tag (default: previous available per backend)', ' --max-age-hours Require candidate artifact freshness (optional)', ' --require-baseline-history Fail when no prior artifact exists', ' --report-tag Drift report tag (default: current UTC date)', ' --write-default-artifacts Write markdown/json drift reports under artifacts dir', ' --summary-json-out Write JSON report to path', ' --summary-md-out Write Markdown report to path', ' --format Output format (default: markdown)', ' --out Write output to file instead of stdout', '', 'Drift thresholds (optional):', ' --min-candidate-sampled-events ', ' --min-baseline-sampled-events ', ' --max-sampled-events-drop-pct ', ' --max-run-outcomes-drop-pct ', ' --max-completion-rate-drop-pp ', ' --max-cancel-rate-increase-pp ', ' --max-error-rate-increase-pp ', ' --max-cancel-latency-p95-increase-ms ', ' --max-reaction-match-rate-drop-pp ', ' --max-reaction-skip-rate-increase-pp ', ].join('\n'); } function isoDateTagNow(): string { return new Date().toISOString().slice(0, 10); } function parseCsv(value: string | undefined): string[] | undefined { if (!value) { return undefined; } const values = value .split(',') .map((item) => item.trim()) .filter((item) => item.length > 0); return values.length > 0 ? values : undefined; } function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined { if (!raw) { return undefined; } const parsed = Number(raw); if (!Number.isFinite(parsed)) { throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`); } return parsed; } function parseOptionalInteger(raw: string | undefined, flag: string): number | undefined { const parsed = parseOptionalNumber(raw, flag); if (parsed === undefined) { return undefined; } if (!Number.isInteger(parsed)) { throw new Error(`Invalid ${flag} value "${raw}". Expected an integer.`); } if (parsed < 0) { throw new Error(`${flag} must be greater than or equal to 0.`); } return parsed; } function parseBackends(raw: string | undefined): Phase0BackendTarget[] { const values = parseCsv(raw) ?? ['pi_embedded', 'native']; const parsed: Phase0BackendTarget[] = []; for (const value of values) { if (BACKEND_TARGETS.includes(value as Phase0BackendTarget)) { parsed.push(value as Phase0BackendTarget); continue; } throw new Error(`Invalid backend "${value}".`); } return parsed; } function parseFormat(raw: string | undefined): OutputFormat { const format = raw ?? 'markdown'; if (format !== 'markdown' && format !== 'json') { throw new Error(`Invalid --format value "${format}".`); } return format; } function sortRecordsDesc(records: ArtifactRecord[]): ArtifactRecord[] { return [...records].sort((a, b) => { const aTs = a.generatedAtMs ?? 0; const bTs = b.generatedAtMs ?? 0; if (aTs !== bTs) { return bTs - aTs; } return b.tag.localeCompare(a.tag); }); } function formatSignedNumber(value: number | null, suffix = ''): string { if (value === null || !Number.isFinite(value)) { return 'n/a'; } const rounded = Math.round(value * 100) / 100; const sign = rounded > 0 ? '+' : ''; return `${sign}${rounded}${suffix}`; } function formatFreshnessHours(value: number | null): string { if (value === null || !Number.isFinite(value)) { return 'n/a'; } return `${Math.round(value * 100) / 100}`; } async function writeOutput(pathValue: string, output: string): Promise { await mkdir(dirname(pathValue), { recursive: true }); await writeFile(pathValue, `${output}\n`, 'utf8'); } function buildThresholds(values: Record): Phase0BaselineDriftGateThresholds { return { requireBaselineHistory: Boolean(values['require-baseline-history']), minCandidateSampledEvents: parseOptionalInteger(values['min-candidate-sampled-events'] as string | undefined, '--min-candidate-sampled-events'), minBaselineSampledEvents: parseOptionalInteger(values['min-baseline-sampled-events'] as string | undefined, '--min-baseline-sampled-events'), maxSampledEventsDropPct: parseOptionalNumber(values['max-sampled-events-drop-pct'] as string | undefined, '--max-sampled-events-drop-pct'), maxRunOutcomesDropPct: parseOptionalNumber(values['max-run-outcomes-drop-pct'] as string | undefined, '--max-run-outcomes-drop-pct'), maxCompletionRateDropPp: parseOptionalNumber(values['max-completion-rate-drop-pp'] as string | undefined, '--max-completion-rate-drop-pp'), maxCancelRateIncreasePp: parseOptionalNumber(values['max-cancel-rate-increase-pp'] as string | undefined, '--max-cancel-rate-increase-pp'), maxErrorRateIncreasePp: parseOptionalNumber(values['max-error-rate-increase-pp'] as string | undefined, '--max-error-rate-increase-pp'), maxCancelLatencyP95IncreaseMs: parseOptionalNumber(values['max-cancel-latency-p95-increase-ms'] as string | undefined, '--max-cancel-latency-p95-increase-ms'), maxReactionMatchRateDropPp: parseOptionalNumber(values['max-reaction-match-rate-drop-pp'] as string | undefined, '--max-reaction-match-rate-drop-pp'), maxReactionSkipRateIncreasePp: parseOptionalNumber(values['max-reaction-skip-rate-increase-pp'] as string | undefined, '--max-reaction-skip-rate-increase-pp'), }; } async function readArtifactRecords(artifactsDir: string): Promise { const files = await readdir(artifactsDir); const records: ArtifactRecord[] = []; for (const file of files) { const match = ARTIFACT_JSON_PATTERN.exec(file); if (!match) { continue; } const backend = match[1] as Phase0BackendTarget; const tag = match[2] ?? ''; const path = resolve(artifactsDir, file); const raw = await readFile(path, 'utf8'); const document = JSON.parse(raw) as Phase0BaselineArtifactDocument; const generatedAtIso = typeof document.generated_at === 'string' ? document.generated_at : undefined; const generatedAtMs = generatedAtIso ? Date.parse(generatedAtIso) : NaN; records.push({ backend, tag, path, generatedAtIso, generatedAtMs: Number.isFinite(generatedAtMs) ? generatedAtMs : undefined, document, }); } return records; } function pickCandidate(records: ArtifactRecord[], explicitTag?: string): ArtifactRecord { if (explicitTag) { const match = records.find((record) => record.tag === explicitTag); if (!match) { throw new Error(`No artifact found for candidate tag "${explicitTag}".`); } return match; } const sorted = sortRecordsDesc(records); const latest = sorted[0]; if (!latest) { throw new Error('No candidate artifact found.'); } return latest; } function pickBaseline(records: ArtifactRecord[], candidate: ArtifactRecord, explicitBaselineTag?: string): ArtifactRecord | undefined { if (explicitBaselineTag) { const match = records.find((record) => record.tag === explicitBaselineTag); if (!match) { throw new Error(`No artifact found for baseline tag "${explicitBaselineTag}".`); } return match; } const sorted = sortRecordsDesc(records); for (const record of sorted) { if (record.path !== candidate.path) { return record; } } return undefined; } function renderMarkdown( artifactsDir: string, backends: Phase0BackendTarget[], thresholds: Phase0BaselineDriftGateThresholds, maxAgeHours: number | undefined, results: BackendDriftResult[], overallPass: boolean, ): string { const lines: string[] = []; lines.push('# Phase-0 Backend Drift Check'); lines.push(''); lines.push(`Generated at: ${new Date().toISOString()}`); lines.push(`Artifacts: ${artifactsDir}`); lines.push(`Backends: ${backends.join(', ')}`); if (typeof maxAgeHours === 'number') { lines.push(`Freshness max age (hours): ${maxAgeHours}`); } else { lines.push('Freshness max age (hours): disabled'); } lines.push(`Overall gate: ${overallPass ? 'PASS' : 'FAIL'}`); lines.push(''); const thresholdEntries = Object.entries(thresholds).filter(([, value]) => value !== undefined); lines.push('## Thresholds'); if (thresholdEntries.length === 0) { lines.push('- none (report-only mode)'); } else { for (const [key, value] of thresholdEntries) { lines.push(`- ${key}: ${String(value)}`); } } lines.push(''); for (const result of results) { lines.push(`## ${result.backend}`); lines.push(`- status: ${result.pass ? 'PASS' : 'FAIL'}`); lines.push(`- candidate: tag=${result.candidate.tag} file=${result.candidate.path}`); lines.push(`- candidate generated_at: ${result.candidate.generatedAtIso ?? 'n/a'}`); if (result.baseline) { lines.push(`- baseline: tag=${result.baseline.tag} file=${result.baseline.path}`); lines.push(`- baseline generated_at: ${result.baseline.generatedAtIso ?? 'n/a'}`); } else { lines.push('- baseline: none'); } lines.push(`- candidate snapshot: ${renderPhase0BaselineDriftSnapshot(result.comparison.candidate)}`); if (result.comparison.baseline) { lines.push(`- baseline snapshot: ${renderPhase0BaselineDriftSnapshot(result.comparison.baseline)}`); } lines.push('- deltas:'); lines.push(` sampled_event_count_pct=${formatSignedNumber(result.comparison.deltas.sampled_event_count_pct, '%')}`); lines.push(` run_total_outcomes_pct=${formatSignedNumber(result.comparison.deltas.run_total_outcomes_pct, '%')}`); lines.push(` completion_rate_pp=${formatSignedNumber(result.comparison.deltas.completion_rate_pp)}`); lines.push(` cancel_rate_pp=${formatSignedNumber(result.comparison.deltas.cancel_rate_pp)}`); lines.push(` error_rate_pp=${formatSignedNumber(result.comparison.deltas.error_rate_pp)}`); lines.push(` cancel_latency_p95_ms=${formatSignedNumber(result.comparison.deltas.cancel_latency_p95_ms)}`); lines.push(` reaction_match_rate_pp=${formatSignedNumber(result.comparison.deltas.reaction_match_rate_pp)}`); lines.push(` reaction_skip_rate_pp=${formatSignedNumber(result.comparison.deltas.reaction_skip_rate_pp)}`); lines.push(`- freshness gate: ${result.freshness.pass ? 'PASS' : 'FAIL'} (age_hours=${formatFreshnessHours(result.freshness.actual_age_hours)} threshold=${result.freshness.threshold_hours ?? 'n/a'})`); lines.push(`- drift gate: ${result.driftGate.pass ? 'PASS' : 'FAIL'}`); if (result.driftGate.criteria.length === 0) { lines.push(' criteria: none'); } else { for (const criterion of result.driftGate.criteria) { lines.push(` ${criterion.pass ? 'PASS' : 'FAIL'} ${criterion.criterion} actual=${criterion.actual} threshold=${criterion.threshold}`); } } lines.push(''); } return lines.join('\n'); } async function main(): Promise { const { values } = parseArgs({ options: { 'artifacts-dir': { type: 'string' }, backend: { type: 'string' }, tag: { type: 'string' }, 'baseline-tag': { type: 'string' }, 'max-age-hours': { type: 'string' }, 'require-baseline-history': { type: 'boolean' }, 'report-tag': { type: 'string' }, 'write-default-artifacts': { type: 'boolean' }, 'summary-json-out': { type: 'string' }, 'summary-md-out': { type: 'string' }, 'min-candidate-sampled-events': { type: 'string' }, 'min-baseline-sampled-events': { type: 'string' }, 'max-sampled-events-drop-pct': { type: 'string' }, 'max-run-outcomes-drop-pct': { type: 'string' }, 'max-completion-rate-drop-pp': { type: 'string' }, 'max-cancel-rate-increase-pp': { type: 'string' }, 'max-error-rate-increase-pp': { type: 'string' }, 'max-cancel-latency-p95-increase-ms': { type: 'string' }, 'max-reaction-match-rate-drop-pp': { type: 'string' }, 'max-reaction-skip-rate-increase-pp': { type: 'string' }, format: { type: 'string' }, out: { type: 'string' }, help: { type: 'boolean', short: 'h' }, }, strict: true, allowPositionals: false, }); if (values.help) { process.stdout.write(`${usage()}\n`); return; } const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts'); const backends = parseBackends(values.backend); const candidateTag = values.tag; const baselineTag = values['baseline-tag']; const format = parseFormat(values.format); const reportTag = values['report-tag'] ?? isoDateTagNow(); const writeDefaultArtifacts = Boolean(values['write-default-artifacts']); const maxAgeHours = parseOptionalNumber(values['max-age-hours'], '--max-age-hours'); if (typeof maxAgeHours === 'number' && maxAgeHours < 0) { throw new Error('--max-age-hours must be >= 0.'); } const defaultBaseName = resolve(artifactsDir, `phase0_baseline_live_backend_drift_${reportTag}`); const summaryJsonOut = values['summary-json-out'] ? resolve(values['summary-json-out']) : writeDefaultArtifacts ? `${defaultBaseName}.json` : undefined; const summaryMdOut = values['summary-md-out'] ? resolve(values['summary-md-out']) : writeDefaultArtifacts ? `${defaultBaseName}.md` : undefined; const thresholds = buildThresholds(values as Record); const allRecords = await readArtifactRecords(artifactsDir); const nowMs = Date.now(); const results: BackendDriftResult[] = []; for (const backend of backends) { const backendRecords = allRecords.filter((record) => record.backend === backend); if (backendRecords.length === 0) { throw new Error(`No backend artifact JSON files found for "${backend}" in ${artifactsDir}.`); } const candidate = pickCandidate(backendRecords, candidateTag); const baseline = pickBaseline(backendRecords, candidate, baselineTag); const comparison = comparePhase0BaselineDrift(candidate.document, baseline?.document); const driftGate = evaluatePhase0BaselineDriftGate(comparison, thresholds); const freshness = (() => { if (typeof maxAgeHours !== 'number') { return { enabled: false, pass: true, actual_age_hours: null, threshold_hours: null, }; } if (typeof candidate.generatedAtMs !== 'number') { return { enabled: true, pass: false, actual_age_hours: null, threshold_hours: maxAgeHours, }; } const ageHours = Math.max(0, (nowMs - candidate.generatedAtMs) / (1000 * 60 * 60)); return { enabled: true, pass: ageHours <= maxAgeHours, actual_age_hours: Math.round(ageHours * 100) / 100, threshold_hours: maxAgeHours, }; })(); results.push({ backend, candidate, baseline, comparison, freshness, driftGate, pass: freshness.pass && driftGate.pass, }); } const overallPass = results.every((result) => result.pass); const jsonOutput = JSON.stringify({ generated_at: new Date().toISOString(), artifacts_dir: artifactsDir, backends, candidate_tag: candidateTag, baseline_tag: baselineTag, report_tag: reportTag, max_age_hours: maxAgeHours, thresholds, overall_pass: overallPass, reports: { summary_json_out: summaryJsonOut, summary_md_out: summaryMdOut, }, results: results.map((result) => ({ backend: result.backend, pass: result.pass, candidate: { tag: result.candidate.tag, path: result.candidate.path, generated_at: result.candidate.generatedAtIso, }, baseline: result.baseline ? { tag: result.baseline.tag, path: result.baseline.path, generated_at: result.baseline.generatedAtIso, } : null, comparison: result.comparison, freshness: result.freshness, drift_gate: result.driftGate, })), }, null, 2); const markdownOutput = renderMarkdown(artifactsDir, backends, thresholds, maxAgeHours, results, overallPass); const output = format === 'json' ? jsonOutput : markdownOutput; if (summaryJsonOut) { await writeOutput(summaryJsonOut, jsonOutput); } if (summaryMdOut) { await writeOutput(summaryMdOut, markdownOutput); } if (values.out) { await writeOutput(resolve(values.out), output); } else { process.stdout.write(`${output}\n`); } if (!overallPass) { process.exitCode = 1; } } main().catch((error) => { const message = error instanceof Error ? error.message : String(error); process.stderr.write(`${message}\n\n${usage()}\n`); process.exitCode = 1; });