bfa857f074
Normalize repeated --backend values in phase0 capture/drift scripts so backend lists are unique and deterministic. Architecture/protocol diagrams reviewed; no updates needed for this parsing-only change.
515 lines
19 KiB
JavaScript
515 lines
19 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
|
import { dirname, resolve } from 'node:path';
|
|
import { parseArgs } from 'node:util';
|
|
import { normalizeArtifactTag } from '../src/audit/artifactTag.js';
|
|
import {
|
|
comparePhase0BaselineDrift,
|
|
evaluatePhase0BaselineDriftGate,
|
|
renderPhase0BaselineDriftSnapshot,
|
|
type Phase0BaselineArtifactDocument,
|
|
type Phase0BaselineDriftComparison,
|
|
type Phase0BaselineDriftGateThresholds,
|
|
} from '../src/audit/phase0BaselineDrift.js';
|
|
import type { Phase0BackendTarget } from '../src/audit/phase0LiveBaseline.js';
|
|
|
|
type OutputFormat = 'markdown' | 'json';
|
|
|
|
interface ArtifactRecord {
|
|
backend: Phase0BackendTarget;
|
|
tag: string;
|
|
path: string;
|
|
generatedAtIso?: string;
|
|
generatedAtMs?: number;
|
|
document: Phase0BaselineArtifactDocument;
|
|
}
|
|
|
|
interface BackendDriftResult {
|
|
backend: Phase0BackendTarget;
|
|
candidate: ArtifactRecord;
|
|
baseline?: ArtifactRecord;
|
|
comparison: Phase0BaselineDriftComparison;
|
|
freshness: {
|
|
enabled: boolean;
|
|
pass: boolean;
|
|
actual_age_hours: number | null;
|
|
threshold_hours: number | null;
|
|
};
|
|
driftGate: ReturnType<typeof evaluatePhase0BaselineDriftGate>;
|
|
pass: boolean;
|
|
}
|
|
|
|
const BACKEND_TARGETS: readonly Phase0BackendTarget[] = [
|
|
'native',
|
|
'claude_code',
|
|
'opencode',
|
|
'codex',
|
|
'gemini',
|
|
'pi_embedded',
|
|
];
|
|
|
|
const ARTIFACT_JSON_PATTERN = /^phase0_baseline_live_backend_(native|claude_code|opencode|codex|gemini|pi_embedded)_(.+)\.json$/;
|
|
|
|
function usage(): string {
|
|
return [
|
|
'Usage: node --import tsx/esm scripts/check-phase0-baseline-backend-drift.ts [options]',
|
|
'',
|
|
'Options:',
|
|
' --artifacts-dir <path> Artifacts directory (default: docs/plans/artifacts)',
|
|
' --backend <name[,name...]> Backends to check (default: pi_embedded,native)',
|
|
' --tag <value> Candidate artifact tag (default: latest available per backend)',
|
|
' --baseline-tag <value> Baseline artifact tag (default: previous available per backend)',
|
|
' --max-age-hours <number> Require candidate artifact freshness (optional)',
|
|
' --require-baseline-history Fail when no prior artifact exists',
|
|
' --report-tag <YYYY-MM-DD> Drift report tag (default: current UTC date)',
|
|
' --write-default-artifacts Write markdown/json drift reports under artifacts dir',
|
|
' --summary-json-out <path> Write JSON report to path',
|
|
' --summary-md-out <path> Write Markdown report to path',
|
|
' --format <markdown|json> Output format (default: markdown)',
|
|
' --out <path> Write output to file instead of stdout',
|
|
'',
|
|
'Drift thresholds (optional):',
|
|
' --min-candidate-sampled-events <integer>',
|
|
' --min-baseline-sampled-events <integer>',
|
|
' --max-sampled-events-drop-pct <number>',
|
|
' --max-run-outcomes-drop-pct <number>',
|
|
' --max-completion-rate-drop-pp <number>',
|
|
' --max-cancel-rate-increase-pp <number>',
|
|
' --max-error-rate-increase-pp <number>',
|
|
' --max-cancel-latency-p95-increase-ms <number>',
|
|
' --max-reaction-match-rate-drop-pp <number>',
|
|
' --max-reaction-skip-rate-increase-pp <number>',
|
|
].join('\n');
|
|
}
|
|
|
|
function isoDateTagNow(): string {
|
|
return new Date().toISOString().slice(0, 10);
|
|
}
|
|
|
|
function parseCsv(value: string | undefined): string[] | undefined {
|
|
if (!value) {
|
|
return undefined;
|
|
}
|
|
const values = value
|
|
.split(',')
|
|
.map((item) => item.trim())
|
|
.filter((item) => item.length > 0);
|
|
return values.length > 0 ? values : undefined;
|
|
}
|
|
|
|
function parseOptionalNumber(raw: string | undefined, flag: string): number | undefined {
|
|
if (!raw) {
|
|
return undefined;
|
|
}
|
|
const parsed = Number(raw);
|
|
if (!Number.isFinite(parsed)) {
|
|
throw new Error(`Invalid ${flag} value "${raw}". Expected a number.`);
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
function parseOptionalInteger(raw: string | undefined, flag: string): number | undefined {
|
|
const parsed = parseOptionalNumber(raw, flag);
|
|
if (parsed === undefined) {
|
|
return undefined;
|
|
}
|
|
if (!Number.isInteger(parsed)) {
|
|
throw new Error(`Invalid ${flag} value "${raw}". Expected an integer.`);
|
|
}
|
|
if (parsed < 0) {
|
|
throw new Error(`${flag} must be greater than or equal to 0.`);
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
function parseBackends(raw: string | undefined): Phase0BackendTarget[] {
|
|
const values = parseCsv(raw) ?? ['pi_embedded', 'native'];
|
|
const parsed: Phase0BackendTarget[] = [];
|
|
const seen = new Set<Phase0BackendTarget>();
|
|
for (const value of values) {
|
|
if (BACKEND_TARGETS.includes(value as Phase0BackendTarget)) {
|
|
const backend = value as Phase0BackendTarget;
|
|
if (!seen.has(backend)) {
|
|
parsed.push(backend);
|
|
seen.add(backend);
|
|
}
|
|
continue;
|
|
}
|
|
throw new Error(`Invalid backend "${value}".`);
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
function parseFormat(raw: string | undefined): OutputFormat {
|
|
const format = raw ?? 'markdown';
|
|
if (format !== 'markdown' && format !== 'json') {
|
|
throw new Error(`Invalid --format value "${format}".`);
|
|
}
|
|
return format;
|
|
}
|
|
|
|
function sortRecordsDesc(records: ArtifactRecord[]): ArtifactRecord[] {
|
|
return [...records].sort((a, b) => {
|
|
const aTs = a.generatedAtMs ?? 0;
|
|
const bTs = b.generatedAtMs ?? 0;
|
|
if (aTs !== bTs) {
|
|
return bTs - aTs;
|
|
}
|
|
return b.tag.localeCompare(a.tag);
|
|
});
|
|
}
|
|
|
|
function formatSignedNumber(value: number | null, suffix = ''): string {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return 'n/a';
|
|
}
|
|
const rounded = Math.round(value * 100) / 100;
|
|
const sign = rounded > 0 ? '+' : '';
|
|
return `${sign}${rounded}${suffix}`;
|
|
}
|
|
|
|
function formatFreshnessHours(value: number | null): string {
|
|
if (value === null || !Number.isFinite(value)) {
|
|
return 'n/a';
|
|
}
|
|
return `${Math.round(value * 100) / 100}`;
|
|
}
|
|
|
|
async function writeOutput(pathValue: string, output: string): Promise<void> {
|
|
await mkdir(dirname(pathValue), { recursive: true });
|
|
await writeFile(pathValue, `${output}\n`, 'utf8');
|
|
}
|
|
|
|
function buildThresholds(values: Record<string, string | boolean | undefined>): Phase0BaselineDriftGateThresholds {
|
|
return {
|
|
requireBaselineHistory: Boolean(values['require-baseline-history']),
|
|
minCandidateSampledEvents: parseOptionalInteger(values['min-candidate-sampled-events'] as string | undefined, '--min-candidate-sampled-events'),
|
|
minBaselineSampledEvents: parseOptionalInteger(values['min-baseline-sampled-events'] as string | undefined, '--min-baseline-sampled-events'),
|
|
maxSampledEventsDropPct: parseOptionalNumber(values['max-sampled-events-drop-pct'] as string | undefined, '--max-sampled-events-drop-pct'),
|
|
maxRunOutcomesDropPct: parseOptionalNumber(values['max-run-outcomes-drop-pct'] as string | undefined, '--max-run-outcomes-drop-pct'),
|
|
maxCompletionRateDropPp: parseOptionalNumber(values['max-completion-rate-drop-pp'] as string | undefined, '--max-completion-rate-drop-pp'),
|
|
maxCancelRateIncreasePp: parseOptionalNumber(values['max-cancel-rate-increase-pp'] as string | undefined, '--max-cancel-rate-increase-pp'),
|
|
maxErrorRateIncreasePp: parseOptionalNumber(values['max-error-rate-increase-pp'] as string | undefined, '--max-error-rate-increase-pp'),
|
|
maxCancelLatencyP95IncreaseMs: parseOptionalNumber(values['max-cancel-latency-p95-increase-ms'] as string | undefined, '--max-cancel-latency-p95-increase-ms'),
|
|
maxReactionMatchRateDropPp: parseOptionalNumber(values['max-reaction-match-rate-drop-pp'] as string | undefined, '--max-reaction-match-rate-drop-pp'),
|
|
maxReactionSkipRateIncreasePp: parseOptionalNumber(values['max-reaction-skip-rate-increase-pp'] as string | undefined, '--max-reaction-skip-rate-increase-pp'),
|
|
};
|
|
}
|
|
|
|
async function readArtifactRecords(artifactsDir: string): Promise<ArtifactRecord[]> {
|
|
const files = await readdir(artifactsDir);
|
|
const records: ArtifactRecord[] = [];
|
|
|
|
for (const file of files) {
|
|
const match = ARTIFACT_JSON_PATTERN.exec(file);
|
|
if (!match) {
|
|
continue;
|
|
}
|
|
const backend = match[1] as Phase0BackendTarget;
|
|
const tag = match[2] ?? '';
|
|
const path = resolve(artifactsDir, file);
|
|
const raw = await readFile(path, 'utf8');
|
|
const document = JSON.parse(raw) as Phase0BaselineArtifactDocument;
|
|
const generatedAtIso = typeof document.generated_at === 'string' ? document.generated_at : undefined;
|
|
const generatedAtMs = generatedAtIso ? Date.parse(generatedAtIso) : NaN;
|
|
|
|
records.push({
|
|
backend,
|
|
tag,
|
|
path,
|
|
generatedAtIso,
|
|
generatedAtMs: Number.isFinite(generatedAtMs) ? generatedAtMs : undefined,
|
|
document,
|
|
});
|
|
}
|
|
|
|
return records;
|
|
}
|
|
|
|
function pickCandidate(records: ArtifactRecord[], explicitTag?: string): ArtifactRecord {
|
|
if (explicitTag) {
|
|
const match = records.find((record) => record.tag === explicitTag);
|
|
if (!match) {
|
|
throw new Error(`No artifact found for candidate tag "${explicitTag}".`);
|
|
}
|
|
return match;
|
|
}
|
|
|
|
const sorted = sortRecordsDesc(records);
|
|
const latest = sorted[0];
|
|
if (!latest) {
|
|
throw new Error('No candidate artifact found.');
|
|
}
|
|
return latest;
|
|
}
|
|
|
|
function pickBaseline(records: ArtifactRecord[], candidate: ArtifactRecord, explicitBaselineTag?: string): ArtifactRecord | undefined {
|
|
if (explicitBaselineTag) {
|
|
const match = records.find((record) => record.tag === explicitBaselineTag);
|
|
if (!match) {
|
|
throw new Error(`No artifact found for baseline tag "${explicitBaselineTag}".`);
|
|
}
|
|
return match;
|
|
}
|
|
|
|
const sorted = sortRecordsDesc(records);
|
|
for (const record of sorted) {
|
|
if (record.path !== candidate.path) {
|
|
return record;
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function renderMarkdown(
|
|
artifactsDir: string,
|
|
backends: Phase0BackendTarget[],
|
|
thresholds: Phase0BaselineDriftGateThresholds,
|
|
maxAgeHours: number | undefined,
|
|
results: BackendDriftResult[],
|
|
overallPass: boolean,
|
|
): string {
|
|
const lines: string[] = [];
|
|
lines.push('# Phase-0 Backend Drift Check');
|
|
lines.push('');
|
|
lines.push(`Generated at: ${new Date().toISOString()}`);
|
|
lines.push(`Artifacts: ${artifactsDir}`);
|
|
lines.push(`Backends: ${backends.join(', ')}`);
|
|
if (typeof maxAgeHours === 'number') {
|
|
lines.push(`Freshness max age (hours): ${maxAgeHours}`);
|
|
} else {
|
|
lines.push('Freshness max age (hours): disabled');
|
|
}
|
|
lines.push(`Overall gate: ${overallPass ? 'PASS' : 'FAIL'}`);
|
|
lines.push('');
|
|
|
|
const thresholdEntries = Object.entries(thresholds).filter(([, value]) => value !== undefined);
|
|
lines.push('## Thresholds');
|
|
if (thresholdEntries.length === 0) {
|
|
lines.push('- none (report-only mode)');
|
|
} else {
|
|
for (const [key, value] of thresholdEntries) {
|
|
lines.push(`- ${key}: ${String(value)}`);
|
|
}
|
|
}
|
|
lines.push('');
|
|
|
|
for (const result of results) {
|
|
lines.push(`## ${result.backend}`);
|
|
lines.push(`- status: ${result.pass ? 'PASS' : 'FAIL'}`);
|
|
lines.push(`- candidate: tag=${result.candidate.tag} file=${result.candidate.path}`);
|
|
lines.push(`- candidate generated_at: ${result.candidate.generatedAtIso ?? 'n/a'}`);
|
|
if (result.baseline) {
|
|
lines.push(`- baseline: tag=${result.baseline.tag} file=${result.baseline.path}`);
|
|
lines.push(`- baseline generated_at: ${result.baseline.generatedAtIso ?? 'n/a'}`);
|
|
} else {
|
|
lines.push('- baseline: none');
|
|
}
|
|
lines.push(`- candidate snapshot: ${renderPhase0BaselineDriftSnapshot(result.comparison.candidate)}`);
|
|
if (result.comparison.baseline) {
|
|
lines.push(`- baseline snapshot: ${renderPhase0BaselineDriftSnapshot(result.comparison.baseline)}`);
|
|
}
|
|
lines.push('- deltas:');
|
|
lines.push(` sampled_event_count_pct=${formatSignedNumber(result.comparison.deltas.sampled_event_count_pct, '%')}`);
|
|
lines.push(` run_total_outcomes_pct=${formatSignedNumber(result.comparison.deltas.run_total_outcomes_pct, '%')}`);
|
|
lines.push(` completion_rate_pp=${formatSignedNumber(result.comparison.deltas.completion_rate_pp)}`);
|
|
lines.push(` cancel_rate_pp=${formatSignedNumber(result.comparison.deltas.cancel_rate_pp)}`);
|
|
lines.push(` error_rate_pp=${formatSignedNumber(result.comparison.deltas.error_rate_pp)}`);
|
|
lines.push(` cancel_latency_p95_ms=${formatSignedNumber(result.comparison.deltas.cancel_latency_p95_ms)}`);
|
|
lines.push(` reaction_match_rate_pp=${formatSignedNumber(result.comparison.deltas.reaction_match_rate_pp)}`);
|
|
lines.push(` reaction_skip_rate_pp=${formatSignedNumber(result.comparison.deltas.reaction_skip_rate_pp)}`);
|
|
lines.push(`- freshness gate: ${result.freshness.pass ? 'PASS' : 'FAIL'} (age_hours=${formatFreshnessHours(result.freshness.actual_age_hours)} threshold=${result.freshness.threshold_hours ?? 'n/a'})`);
|
|
lines.push(`- drift gate: ${result.driftGate.pass ? 'PASS' : 'FAIL'}`);
|
|
if (result.driftGate.criteria.length === 0) {
|
|
lines.push(' criteria: none');
|
|
} else {
|
|
for (const criterion of result.driftGate.criteria) {
|
|
lines.push(` ${criterion.pass ? 'PASS' : 'FAIL'} ${criterion.criterion} actual=${criterion.actual} threshold=${criterion.threshold}`);
|
|
}
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
const { values } = parseArgs({
|
|
options: {
|
|
'artifacts-dir': { type: 'string' },
|
|
backend: { type: 'string' },
|
|
tag: { type: 'string' },
|
|
'baseline-tag': { type: 'string' },
|
|
'max-age-hours': { type: 'string' },
|
|
'require-baseline-history': { type: 'boolean' },
|
|
'report-tag': { type: 'string' },
|
|
'write-default-artifacts': { type: 'boolean' },
|
|
'summary-json-out': { type: 'string' },
|
|
'summary-md-out': { type: 'string' },
|
|
'min-candidate-sampled-events': { type: 'string' },
|
|
'min-baseline-sampled-events': { type: 'string' },
|
|
'max-sampled-events-drop-pct': { type: 'string' },
|
|
'max-run-outcomes-drop-pct': { type: 'string' },
|
|
'max-completion-rate-drop-pp': { type: 'string' },
|
|
'max-cancel-rate-increase-pp': { type: 'string' },
|
|
'max-error-rate-increase-pp': { type: 'string' },
|
|
'max-cancel-latency-p95-increase-ms': { type: 'string' },
|
|
'max-reaction-match-rate-drop-pp': { type: 'string' },
|
|
'max-reaction-skip-rate-increase-pp': { type: 'string' },
|
|
format: { type: 'string' },
|
|
out: { type: 'string' },
|
|
help: { type: 'boolean', short: 'h' },
|
|
},
|
|
strict: true,
|
|
allowPositionals: false,
|
|
});
|
|
|
|
if (values.help) {
|
|
process.stdout.write(`${usage()}\n`);
|
|
return;
|
|
}
|
|
|
|
const artifactsDir = resolve(values['artifacts-dir'] ?? 'docs/plans/artifacts');
|
|
const backends = parseBackends(values.backend);
|
|
const candidateTag = values.tag
|
|
? normalizeArtifactTag(values.tag, '--tag')
|
|
: undefined;
|
|
const baselineTag = values['baseline-tag']
|
|
? normalizeArtifactTag(values['baseline-tag'], '--baseline-tag')
|
|
: undefined;
|
|
const format = parseFormat(values.format);
|
|
const reportTag = normalizeArtifactTag(values['report-tag'] ?? isoDateTagNow(), '--report-tag');
|
|
const writeDefaultArtifacts = Boolean(values['write-default-artifacts']);
|
|
const maxAgeHours = parseOptionalNumber(values['max-age-hours'], '--max-age-hours');
|
|
if (typeof maxAgeHours === 'number' && maxAgeHours < 0) {
|
|
throw new Error('--max-age-hours must be >= 0.');
|
|
}
|
|
|
|
const defaultBaseName = resolve(artifactsDir, `phase0_baseline_live_backend_drift_${reportTag}`);
|
|
const summaryJsonOut = values['summary-json-out']
|
|
? resolve(values['summary-json-out'])
|
|
: writeDefaultArtifacts
|
|
? `${defaultBaseName}.json`
|
|
: undefined;
|
|
const summaryMdOut = values['summary-md-out']
|
|
? resolve(values['summary-md-out'])
|
|
: writeDefaultArtifacts
|
|
? `${defaultBaseName}.md`
|
|
: undefined;
|
|
|
|
const thresholds = buildThresholds(values as Record<string, string | boolean | undefined>);
|
|
const allRecords = await readArtifactRecords(artifactsDir);
|
|
const nowMs = Date.now();
|
|
const results: BackendDriftResult[] = [];
|
|
|
|
for (const backend of backends) {
|
|
const backendRecords = allRecords.filter((record) => record.backend === backend);
|
|
if (backendRecords.length === 0) {
|
|
throw new Error(`No backend artifact JSON files found for "${backend}" in ${artifactsDir}.`);
|
|
}
|
|
|
|
const candidate = pickCandidate(backendRecords, candidateTag);
|
|
const baseline = pickBaseline(backendRecords, candidate, baselineTag);
|
|
const comparison = comparePhase0BaselineDrift(candidate.document, baseline?.document);
|
|
const driftGate = evaluatePhase0BaselineDriftGate(comparison, thresholds);
|
|
|
|
const freshness = (() => {
|
|
if (typeof maxAgeHours !== 'number') {
|
|
return {
|
|
enabled: false,
|
|
pass: true,
|
|
actual_age_hours: null,
|
|
threshold_hours: null,
|
|
};
|
|
}
|
|
if (typeof candidate.generatedAtMs !== 'number') {
|
|
return {
|
|
enabled: true,
|
|
pass: false,
|
|
actual_age_hours: null,
|
|
threshold_hours: maxAgeHours,
|
|
};
|
|
}
|
|
const ageHours = Math.max(0, (nowMs - candidate.generatedAtMs) / (1000 * 60 * 60));
|
|
return {
|
|
enabled: true,
|
|
pass: ageHours <= maxAgeHours,
|
|
actual_age_hours: Math.round(ageHours * 100) / 100,
|
|
threshold_hours: maxAgeHours,
|
|
};
|
|
})();
|
|
|
|
results.push({
|
|
backend,
|
|
candidate,
|
|
baseline,
|
|
comparison,
|
|
freshness,
|
|
driftGate,
|
|
pass: freshness.pass && driftGate.pass,
|
|
});
|
|
}
|
|
|
|
const overallPass = results.every((result) => result.pass);
|
|
const jsonOutput = JSON.stringify({
|
|
generated_at: new Date().toISOString(),
|
|
artifacts_dir: artifactsDir,
|
|
backends,
|
|
candidate_tag: candidateTag,
|
|
baseline_tag: baselineTag,
|
|
report_tag: reportTag,
|
|
max_age_hours: maxAgeHours,
|
|
thresholds,
|
|
overall_pass: overallPass,
|
|
reports: {
|
|
summary_json_out: summaryJsonOut,
|
|
summary_md_out: summaryMdOut,
|
|
},
|
|
results: results.map((result) => ({
|
|
backend: result.backend,
|
|
pass: result.pass,
|
|
candidate: {
|
|
tag: result.candidate.tag,
|
|
path: result.candidate.path,
|
|
generated_at: result.candidate.generatedAtIso,
|
|
},
|
|
baseline: result.baseline
|
|
? {
|
|
tag: result.baseline.tag,
|
|
path: result.baseline.path,
|
|
generated_at: result.baseline.generatedAtIso,
|
|
}
|
|
: null,
|
|
comparison: result.comparison,
|
|
freshness: result.freshness,
|
|
drift_gate: result.driftGate,
|
|
})),
|
|
}, null, 2);
|
|
const markdownOutput = renderMarkdown(artifactsDir, backends, thresholds, maxAgeHours, results, overallPass);
|
|
const output = format === 'json' ? jsonOutput : markdownOutput;
|
|
|
|
if (summaryJsonOut) {
|
|
await writeOutput(summaryJsonOut, jsonOutput);
|
|
}
|
|
if (summaryMdOut) {
|
|
await writeOutput(summaryMdOut, markdownOutput);
|
|
}
|
|
|
|
if (values.out) {
|
|
await writeOutput(resolve(values.out), output);
|
|
} else {
|
|
process.stdout.write(`${output}\n`);
|
|
}
|
|
|
|
if (!overallPass) {
|
|
process.exitCode = 1;
|
|
}
|
|
}
|
|
|
|
main().catch((error) => {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
process.stderr.write(`${message}\n\n${usage()}\n`);
|
|
process.exitCode = 1;
|
|
});
|