From 7ae0fb51c213daf0cfced6fa0968bade16109936 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 12 Feb 2026 22:43:46 -0800 Subject: [PATCH] feat(skills): add rollout status governance and promotion policy checks --- docs/plans/state.json | 52 ++++- src/cli/skills.test.ts | 508 ++++++++++++++++++++++++++++++++++++++++- src/cli/skills.ts | 340 +++++++++++++++++++++++++++ 3 files changed, 897 insertions(+), 3 deletions(-) diff --git a/docs/plans/state.json b/docs/plans/state.json index bbff0a2..dfbbad6 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -1577,6 +1577,54 @@ "src/cli/skills.test.ts" ], "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing" + }, + "shell_runner_rollout_status_and_guardrails": { + "status": "completed", + "description": "Added `skills rollout-status` with phased recommendation output (`locked|guarded_observe|guarded_review|expand_candidate`), guardrail checks for execution/audit/allowlist posture, and audit-window telemetry summary including hashed-command coverage", + "files_modified": [ + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing" + }, + "shell_runner_governance_workflow_operationalization": { + "status": "completed", + "description": "Operationalized shell-runner allowlist governance by adding explicit config-backed ownership/review/promotion criteria (`skills.shell_runner_governance`) and wiring `skills rollout-status` to enforce owner presence when shell runner is enabled", + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "config/default.yaml", + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/config/schema.test.ts src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" + }, + "shell_runner_rollout_status_export_output": { + "status": "completed", + "description": "Extended `skills rollout-status` with `--out ` export support so governance and recommendation payloads can be saved as machine-readable JSON artifacts for review workflows", + "files_modified": [ + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" + }, + "shell_runner_rollout_trend_snapshot": { + "status": "completed", + "description": "Added historical trend snapshots to `skills rollout-status` by comparing current and previous equal-duration windows, including deltas for failures, allowlist blocks, and hashed-command coverage in both console and JSON payloads", + "files_modified": [ + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" + }, + "shell_runner_rollout_promotion_policy_checks": { + "status": "completed", + "description": "Added promotion-policy evaluation to `skills rollout-status` using governance thresholds (`review_cadence_days`, `promotion_min_success_rate`) and trend deltas, with structured blockers/recommendation in JSON and console output", + "files_modified": [ + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" } } } @@ -1605,7 +1653,7 @@ }, "overall_progress": { - "total_test_count": 1575, + "total_test_count": 1586, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -1625,7 +1673,7 @@ "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", - "next_up": "Skills infrastructure follow-up: define phased enablement criteria for shell runner (allowlist governance, telemetry review, and rollout guardrails) now that audit command strings are hashed" + "next_up": "Skills infrastructure follow-up: expose promotion-policy status as a dedicated machine-readable contract for automation consumers (e.g., CI gate or dashboard ingest) before broader shell-runner rollout" }, "soul_md_and_cron_create": { "date": "2026-02-11", diff --git a/src/cli/skills.test.ts b/src/cli/skills.test.ts index 6d53d61..45fb950 100644 --- a/src/cli/skills.test.ts +++ b/src/cli/skills.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, vi } from 'vitest'; -import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs'; +import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; import { Command } from 'commander'; @@ -27,14 +27,21 @@ import { createShellSkillInstallerCommandRunner, checkCommandAgainstAllowlist, emitShellRunnerAuditEvents, + calculateShellRunnerHashCoveragePercent, + computeShellRunnerAuditTrendSnapshot, + evaluateShellRunnerPromotionPolicy, + evaluateShellRunnerRolloutGuardrails, hashSkillInstallerAuditCommand, + recommendShellRunnerRolloutPhase, sanitizeSkillInstallerAuditReason, + summarizeShellRunnerAuditWindow, resolveSkillInstallerCommandRunner, runSkillExecuteAction, runSkillInstallAction, registerSkillsCommand, } from './skills.js'; import type { Skill } from '../skills/index.js'; +import type { AuditEvent } from '../audit/types.js'; function buildSkill(overrides: Partial): Skill { return { @@ -61,9 +68,18 @@ function writeSkillsCliConfig( installationExecution?: 'disabled' | 'enabled'; allowShellRunner?: boolean; shellRunnerAllowlist?: string[]; + shellRunnerGovernanceOwner?: string; + auditEnabled?: boolean; + auditPath?: string; }, ): void { const allowlist = opts.shellRunnerAllowlist ?? []; + const auditLines = opts.auditPath + ? ['audit:', ` enabled: ${opts.auditEnabled ?? true}`, ` path: ${opts.auditPath}`] + : []; + const governanceOwnerLines = opts.shellRunnerGovernanceOwner + ? [' shell_runner_governance:', ` owner: '${opts.shellRunnerGovernanceOwner}'`] + : []; writeFileSync( configPath, [ @@ -78,6 +94,8 @@ function writeSkillsCliConfig( ` installation_execution: ${opts.installationExecution ?? 'disabled'}`, ` allow_shell_runner: ${opts.allowShellRunner ?? false}`, ` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`, + ...governanceOwnerLines, + ...auditLines, ].join('\n'), 'utf-8', ); @@ -503,6 +521,333 @@ describe('skills CLI helpers', () => { expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked'); }); + it('reports shell runner rollout guardrail blockers', () => { + const guardrails = evaluateShellRunnerRolloutGuardrails( + { + installation_execution: 'disabled', + allow_shell_runner: false, + shell_runner_allowlist: ['*'], + shell_runner_governance: { + review_cadence_days: 7, + promotion_min_success_rate: 0.9, + }, + load: { watch: false, watch_debounce_ms: 250 }, + }, + false, + ); + + expect(guardrails.blockers).toEqual([ + 'skills.installation_execution must be enabled', + 'skills.allow_shell_runner must be true', + "skills.shell_runner_allowlist cannot include wildcard-only '*' patterns", + 'audit.enabled must be true for shell runner rollout review', + ]); + }); + + it('requires governance owner when shell runner is enabled', () => { + const guardrails = evaluateShellRunnerRolloutGuardrails( + { + installation_execution: 'enabled', + allow_shell_runner: true, + shell_runner_allowlist: ['npm install*'], + shell_runner_governance: { + review_cadence_days: 7, + promotion_min_success_rate: 0.9, + }, + load: { watch: false, watch_debounce_ms: 250 }, + }, + true, + ); + + expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled'); + }); + + it('summarizes shell runner audit windows with hash coverage and failures', () => { + const events: AuditEvent[] = [ + { + timestamp: 1, + level: 'debug', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'audit-skill', + phase: 'install', + installer_type: 'node', + command: 'sha256:abc', + status: 'succeeded', + reason: 'runner_reported_success', + }, + }, + { + timestamp: 2, + level: 'warn', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'audit-skill', + phase: 'install', + installer_type: 'download', + command: 'download https://example.com/pkg.tgz', + status: 'failed', + reason: 'allowlist_blocked', + }, + }, + { + timestamp: 3, + level: 'warn', + event_type: 'skills.installer.execution_blocked', + event: { + skill_name: 'audit-skill', + phase: 'execute', + execution_requested: true, + execution_enabled: false, + reason: 'execution_policy_disabled', + attempted_command_count: 1, + }, + }, + ]; + + expect(summarizeShellRunnerAuditWindow(events)).toEqual({ + command_result_total: 2, + command_result_failed: 1, + allowlist_blocked: 1, + execution_blocked: 1, + hashed_command_count: 1, + unhashed_command_count: 1, + }); + }); + + it('calculates hash coverage percentage for shell runner summaries', () => { + expect( + calculateShellRunnerHashCoveragePercent({ + command_result_total: 0, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 0, + unhashed_command_count: 0, + }), + ).toBe(0); + + expect( + calculateShellRunnerHashCoveragePercent({ + command_result_total: 4, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 3, + unhashed_command_count: 1, + }), + ).toBe(75); + }); + + it('computes shell runner trend snapshot across current and previous windows', () => { + const now = 1_000_000; + const oneDay = 24 * 60 * 60 * 1000; + const window = 7 * oneDay; + const currentWindowStart = now - window; + + const events: AuditEvent[] = [ + { + timestamp: now - oneDay, + level: 'warn', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'demo', + phase: 'install', + installer_type: 'node', + command: 'sha256:new-a', + status: 'failed', + reason: 'exit_code_1', + }, + }, + { + timestamp: now - oneDay * 2, + level: 'warn', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'demo', + phase: 'install', + installer_type: 'node', + command: 'sha256:new-b', + status: 'failed', + reason: 'allowlist_blocked', + }, + }, + { + timestamp: now - window - oneDay, + level: 'warn', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'demo', + phase: 'install', + installer_type: 'node', + command: 'sha256:old-a', + status: 'failed', + reason: 'allowlist_blocked', + }, + }, + { + timestamp: now - window - oneDay * 2, + level: 'info', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'demo', + phase: 'install', + installer_type: 'node', + command: 'legacy-old-command', + status: 'succeeded', + reason: 'runner_reported_success', + }, + }, + ]; + + const trend = computeShellRunnerAuditTrendSnapshot({ + events, + currentWindowStartMs: currentWindowStart, + currentWindowEndMs: now, + }); + + expect(trend.current.command_result_failed).toBe(2); + expect(trend.previous.command_result_failed).toBe(1); + expect(trend.deltas.failures).toBe(1); + expect(trend.current.allowlist_blocked).toBe(1); + expect(trend.previous.allowlist_blocked).toBe(1); + expect(trend.deltas.allowlist_blocks).toBe(0); + expect(trend.deltas.hash_coverage_pct).toBe(50); + }); + + it('evaluates promotion policy with governance cadence and success thresholds', () => { + const policy = evaluateShellRunnerPromotionPolicy({ + trend: { + current: { + command_result_total: 4, + command_result_failed: 1, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 4, + unhashed_command_count: 0, + }, + previous: { + command_result_total: 4, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 4, + unhashed_command_count: 0, + }, + deltas: { + failures: 1, + allowlist_blocks: 0, + hash_coverage_pct: 0, + }, + }, + reviewedWindowDays: 7, + governance: { + review_cadence_days: 7, + promotion_min_success_rate: 0.9, + }, + }); + + expect(policy.eligible).toBe(false); + expect(policy.recommendation).toBe('not_eligible'); + expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%'); + expect(policy.blockers).toContain('failures increased by 1 vs previous window'); + }); + + it('marks promotion policy eligible when thresholds and trends are healthy', () => { + const policy = evaluateShellRunnerPromotionPolicy({ + trend: { + current: { + command_result_total: 5, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 5, + unhashed_command_count: 0, + }, + previous: { + command_result_total: 5, + command_result_failed: 1, + allowlist_blocked: 1, + execution_blocked: 0, + hashed_command_count: 4, + unhashed_command_count: 1, + }, + deltas: { + failures: -1, + allowlist_blocks: -1, + hash_coverage_pct: 20, + }, + }, + reviewedWindowDays: 7, + governance: { + review_cadence_days: 7, + promotion_min_success_rate: 0.9, + }, + }); + + expect(policy.eligible).toBe(true); + expect(policy.recommendation).toBe('eligible'); + expect(policy.blockers).toEqual([]); + }); + + it('recommends rollout phase from guardrails and audit summary', () => { + expect( + recommendShellRunnerRolloutPhase( + { blockers: ['skills.installation_execution must be enabled'] }, + { + command_result_total: 1, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 1, + unhashed_command_count: 0, + }, + ), + ).toBe('locked'); + + expect( + recommendShellRunnerRolloutPhase( + { blockers: [] }, + { + command_result_total: 0, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 0, + unhashed_command_count: 0, + }, + ), + ).toBe('guarded_observe'); + + expect( + recommendShellRunnerRolloutPhase( + { blockers: [] }, + { + command_result_total: 4, + command_result_failed: 1, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 4, + unhashed_command_count: 0, + }, + ), + ).toBe('guarded_review'); + + expect( + recommendShellRunnerRolloutPhase( + { blockers: [] }, + { + command_result_total: 3, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 3, + unhashed_command_count: 0, + }, + ), + ).toBe('expand_candidate'); + }); + it('emits hashed command values for both successful and failed audit command results', () => { const logger = { skillsInstallerExecutionBlocked: vi.fn(), @@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => { process.exitCode = undefined; rmSync(root, { recursive: true, force: true }); }); + + it('skills rollout-status reports governance owner blocker in JSON output', async () => { + const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-')); + const configPath = join(root, 'config.yaml'); + const managedDir = join(root, 'managed'); + const bundledDir = join(root, 'bundled'); + const workspaceDir = join(root, 'workspace'); + const auditPath = join(root, 'audit.log'); + mkdirSync(managedDir, { recursive: true }); + mkdirSync(bundledDir, { recursive: true }); + mkdirSync(workspaceDir, { recursive: true }); + writeFileSync(auditPath, '', 'utf-8'); + writeSkillsCliConfig(configPath, { + managedDir, + bundledDir, + workspaceDir, + installationExecution: 'enabled', + allowShellRunner: true, + shellRunnerAllowlist: ['npm install*'], + auditPath, + }); + + const program = new Command(); + registerSkillsCommand(program); + + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined); + process.exitCode = undefined; + + await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' }); + + const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0])); + expect(payload.recommendation).toBe('locked'); + expect(payload.promotion_policy.recommendation).toBe('not_eligible'); + expect(payload.governance.owner).toBeNull(); + expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled'); + + logSpy.mockRestore(); + process.exitCode = undefined; + rmSync(root, { recursive: true, force: true }); + }); + + it('skills rollout-status writes JSON payload to output file', async () => { + const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-')); + const configPath = join(root, 'config.yaml'); + const managedDir = join(root, 'managed'); + const bundledDir = join(root, 'bundled'); + const workspaceDir = join(root, 'workspace'); + const auditPath = join(root, 'audit.log'); + const outputPath = join(root, 'rollout-status.json'); + mkdirSync(managedDir, { recursive: true }); + mkdirSync(bundledDir, { recursive: true }); + mkdirSync(workspaceDir, { recursive: true }); + writeFileSync(auditPath, '', 'utf-8'); + writeSkillsCliConfig(configPath, { + managedDir, + bundledDir, + workspaceDir, + installationExecution: 'enabled', + allowShellRunner: true, + shellRunnerAllowlist: ['npm install*'], + shellRunnerGovernanceOwner: 'skills-team', + auditPath, + }); + + const program = new Command(); + registerSkillsCommand(program); + + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined); + process.exitCode = undefined; + + await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' }); + + expect(existsSync(outputPath)).toBe(true); + const payload = JSON.parse(readFileSync(outputPath, 'utf-8')); + expect(payload.governance.owner).toBe('skills-team'); + expect(payload.recommendation).toBe('guarded_observe'); + expect(payload.trend.current.command_result_total).toBe(0); + expect(payload.promotion_policy.recommendation).toBe('not_eligible'); + + logSpy.mockRestore(); + process.exitCode = undefined; + rmSync(root, { recursive: true, force: true }); + }); + + it('skills rollout-status includes trend deltas across adjacent windows', async () => { + const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-')); + const configPath = join(root, 'config.yaml'); + const managedDir = join(root, 'managed'); + const bundledDir = join(root, 'bundled'); + const workspaceDir = join(root, 'workspace'); + const auditPath = join(root, 'audit.log'); + const outputPath = join(root, 'rollout-trend.json'); + mkdirSync(managedDir, { recursive: true }); + mkdirSync(bundledDir, { recursive: true }); + mkdirSync(workspaceDir, { recursive: true }); + + const now = Date.now(); + const oneDay = 24 * 60 * 60 * 1000; + const events = [ + { + timestamp: now - oneDay, + level: 'warn', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'trend-skill', + phase: 'install', + installer_type: 'node', + command: 'sha256:new', + status: 'failed', + reason: 'allowlist_blocked', + }, + }, + { + timestamp: now - oneDay * 8, + level: 'info', + event_type: 'skills.installer.command_result', + event: { + skill_name: 'trend-skill', + phase: 'install', + installer_type: 'node', + command: 'legacy-prev', + status: 'succeeded', + reason: 'runner_reported_success', + }, + }, + ]; + writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8'); + + writeSkillsCliConfig(configPath, { + managedDir, + bundledDir, + workspaceDir, + installationExecution: 'enabled', + allowShellRunner: true, + shellRunnerAllowlist: ['npm install*'], + shellRunnerGovernanceOwner: 'skills-team', + auditPath, + }); + + const program = new Command(); + registerSkillsCommand(program); + + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined); + process.exitCode = undefined; + + await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], { + from: 'user', + }); + + const payload = JSON.parse(readFileSync(outputPath, 'utf-8')); + expect(payload.trend.current.command_result_total).toBe(1); + expect(payload.trend.previous.command_result_total).toBe(1); + expect(payload.trend.deltas.failures).toBe(1); + expect(payload.trend.deltas.allowlist_blocks).toBe(1); + expect(payload.promotion_policy.recommendation).toBe('not_eligible'); + expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window'); + + logSpy.mockRestore(); + process.exitCode = undefined; + rmSync(root, { recursive: true, force: true }); + }); }); diff --git a/src/cli/skills.ts b/src/cli/skills.ts index 609f1d1..d84a788 100644 --- a/src/cli/skills.ts +++ b/src/cli/skills.ts @@ -2,8 +2,12 @@ import type { Command } from 'commander'; import { resolve } from 'path'; import { homedir } from 'os'; import { spawnSync } from 'child_process'; +import { writeFileSync } from 'fs'; import { createHash } from 'crypto'; import { auditLogger } from '../audit/index.js'; +import { queryAuditLogs } from '../audit/export.js'; +import type { AuditEvent } from '../audit/types.js'; +import type { Config } from '../config/schema.js'; import type { Skill } from '../skills/index.js'; import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js'; import { loadConfigSafe } from './shared.js'; @@ -92,6 +96,238 @@ export function sanitizeSkillInstallerAuditReason(reason: string): string { return reason; } +export interface ShellRunnerRolloutGuardrailStatus { + blockers: string[]; +} + +export interface ShellRunnerAuditWindowSummary { + command_result_total: number; + command_result_failed: number; + allowlist_blocked: number; + execution_blocked: number; + hashed_command_count: number; + unhashed_command_count: number; +} + +export interface ShellRunnerAuditTrendSnapshot { + current: ShellRunnerAuditWindowSummary; + previous: ShellRunnerAuditWindowSummary; + deltas: { + failures: number; + allowlist_blocks: number; + hash_coverage_pct: number; + }; +} + +export interface ShellRunnerPromotionPolicyStatus { + eligible: boolean; + recommendation: 'eligible' | 'not_eligible'; + cadence_days: number; + reviewed_window_days: number; + success_rate: number; + minimum_success_rate: number; + failures_delta: number; + allowlist_blocks_delta: number; + hash_coverage_delta_pct: number; + blockers: string[]; +} + +export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate'; + +export function evaluateShellRunnerRolloutGuardrails( + skillsConfig: Config['skills'], + auditEnabled: boolean, +): ShellRunnerRolloutGuardrailStatus { + const blockers: string[] = []; + + if (skillsConfig.installation_execution !== 'enabled') { + blockers.push('skills.installation_execution must be enabled'); + } + + if (!skillsConfig.allow_shell_runner) { + blockers.push('skills.allow_shell_runner must be true'); + } + + if (skillsConfig.shell_runner_allowlist.length === 0) { + blockers.push('skills.shell_runner_allowlist must include at least one pattern'); + } + + if (skillsConfig.shell_runner_allowlist.some((pattern) => pattern.trim() === '*')) { + blockers.push("skills.shell_runner_allowlist cannot include wildcard-only '*' patterns"); + } + + if (skillsConfig.allow_shell_runner && !skillsConfig.shell_runner_governance.owner) { + blockers.push('skills.shell_runner_governance.owner must be set when shell runner is enabled'); + } + + if (!auditEnabled) { + blockers.push('audit.enabled must be true for shell runner rollout review'); + } + + return { blockers }; +} + +export function summarizeShellRunnerAuditWindow(events: AuditEvent[]): ShellRunnerAuditWindowSummary { + return events.reduce( + (summary, event) => { + if (event.event_type === 'skills.installer.command_result') { + summary.command_result_total += 1; + + const payload = event.event as Record; + const status = typeof payload.status === 'string' ? payload.status : ''; + const reason = typeof payload.reason === 'string' ? payload.reason : ''; + const command = typeof payload.command === 'string' ? payload.command : ''; + + if (status === 'failed') { + summary.command_result_failed += 1; + } + + if (reason === 'allowlist_blocked') { + summary.allowlist_blocked += 1; + } + + if (command.startsWith('sha256:')) { + summary.hashed_command_count += 1; + } else { + summary.unhashed_command_count += 1; + } + + return summary; + } + + if (event.event_type === 'skills.installer.execution_blocked') { + summary.execution_blocked += 1; + } + + return summary; + }, + { + command_result_total: 0, + command_result_failed: 0, + allowlist_blocked: 0, + execution_blocked: 0, + hashed_command_count: 0, + unhashed_command_count: 0, + }, + ); +} + +export function calculateShellRunnerHashCoveragePercent(summary: ShellRunnerAuditWindowSummary): number { + const total = summary.hashed_command_count + summary.unhashed_command_count; + if (total === 0) { + return 0; + } + return (summary.hashed_command_count / total) * 100; +} + +export function computeShellRunnerAuditTrendSnapshot(args: { + events: AuditEvent[]; + currentWindowStartMs: number; + currentWindowEndMs: number; +}): ShellRunnerAuditTrendSnapshot { + const previousWindowStartMs = args.currentWindowStartMs - (args.currentWindowEndMs - args.currentWindowStartMs); + + const currentEvents = args.events.filter( + (event) => event.timestamp >= args.currentWindowStartMs && event.timestamp <= args.currentWindowEndMs, + ); + const previousEvents = args.events.filter( + (event) => event.timestamp >= previousWindowStartMs && event.timestamp < args.currentWindowStartMs, + ); + + const current = summarizeShellRunnerAuditWindow(currentEvents); + const previous = summarizeShellRunnerAuditWindow(previousEvents); + + return { + current, + previous, + deltas: { + failures: current.command_result_failed - previous.command_result_failed, + allowlist_blocks: current.allowlist_blocked - previous.allowlist_blocked, + hash_coverage_pct: + calculateShellRunnerHashCoveragePercent(current) - calculateShellRunnerHashCoveragePercent(previous), + }, + }; +} + +export function evaluateShellRunnerPromotionPolicy(args: { + trend: ShellRunnerAuditTrendSnapshot; + reviewedWindowDays: number; + governance: { + review_cadence_days: number; + promotion_min_success_rate: number; + }; +}): ShellRunnerPromotionPolicyStatus { + const blockers: string[] = []; + const successRate = + args.trend.current.command_result_total === 0 + ? 0 + : (args.trend.current.command_result_total - args.trend.current.command_result_failed) + / args.trend.current.command_result_total; + + if (args.reviewedWindowDays > args.governance.review_cadence_days) { + blockers.push( + `review window (${args.reviewedWindowDays}d) exceeds governance cadence (${args.governance.review_cadence_days}d)`, + ); + } + + if (args.trend.current.command_result_total === 0) { + blockers.push('no shell-runner command results in current window'); + } + + if (successRate < args.governance.promotion_min_success_rate) { + blockers.push( + `success rate ${(successRate * 100).toFixed(2)}% below minimum ${(args.governance.promotion_min_success_rate * 100).toFixed(2)}%`, + ); + } + + if (args.trend.deltas.failures > 0) { + blockers.push(`failures increased by ${args.trend.deltas.failures} vs previous window`); + } + + if (args.trend.deltas.allowlist_blocks > 0) { + blockers.push(`allowlist blocks increased by ${args.trend.deltas.allowlist_blocks} vs previous window`); + } + + return { + eligible: blockers.length === 0, + recommendation: blockers.length === 0 ? 'eligible' : 'not_eligible', + cadence_days: args.governance.review_cadence_days, + reviewed_window_days: args.reviewedWindowDays, + success_rate: successRate, + minimum_success_rate: args.governance.promotion_min_success_rate, + failures_delta: args.trend.deltas.failures, + allowlist_blocks_delta: args.trend.deltas.allowlist_blocks, + hash_coverage_delta_pct: args.trend.deltas.hash_coverage_pct, + blockers, + }; +} + +export function recommendShellRunnerRolloutPhase( + guardrails: ShellRunnerRolloutGuardrailStatus, + summary: ShellRunnerAuditWindowSummary, +): ShellRunnerRolloutRecommendation { + if (guardrails.blockers.length > 0) { + return 'locked'; + } + + if (summary.command_result_total === 0) { + return 'guarded_observe'; + } + + if (summary.unhashed_command_count > 0 || summary.command_result_failed > 0) { + return 'guarded_review'; + } + + return 'expand_candidate'; +} + +function expandHomePath(pathValue: string): string { + if (pathValue.startsWith('~/')) { + return resolve(homedir(), pathValue.slice(2)); + } + return resolve(pathValue); +} + interface SkillShellRunnerAuditLogger { skillsInstallerExecutionBlocked(event: { skill_name: string; @@ -1097,6 +1333,110 @@ export function registerSkillsCommand(program: Command): void { console.log(renderSkillInstallerPlan(view)); }); + skills + .command('rollout-status') + .description('Show shell runner rollout guardrails and audit review summary') + .option('--days ', 'Look back N days in audit logs (default: 7)', '7') + .option('--out ', 'Write rollout JSON payload to file') + .option('--json', 'Output as JSON') + .option('-c, --config ', 'Config file path') + .action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => { + const loaded = loadConfigSafe(opts.config); + if (loaded.error || !loaded.config) { + console.error(loaded.error ?? 'Failed to load config'); + process.exitCode = 1; + return; + } + + const parsedDays = Number.parseInt(opts.days ?? '7', 10); + if (!Number.isFinite(parsedDays) || parsedDays <= 0) { + console.error('`--days` must be a positive integer.'); + process.exitCode = 1; + return; + } + + const guardrails = evaluateShellRunnerRolloutGuardrails(loaded.config.skills, loaded.config.audit.enabled); + const windowDurationMs = parsedDays * 24 * 60 * 60 * 1000; + const nowMs = Date.now(); + const currentWindowStartMs = nowMs - windowDurationMs; + const queryStartMs = nowMs - windowDurationMs * 2; + const auditPath = expandHomePath(loaded.config.audit.path); + const auditEvents = await queryAuditLogs(auditPath, { + start_time: queryStartMs, + event_types: ['skills.installer.command_result', 'skills.installer.execution_blocked'], + }); + const trend = computeShellRunnerAuditTrendSnapshot({ + events: auditEvents, + currentWindowStartMs, + currentWindowEndMs: nowMs, + }); + const recommendation = recommendShellRunnerRolloutPhase(guardrails, trend.current); + const governance = loaded.config.skills.shell_runner_governance; + const promotionPolicy = evaluateShellRunnerPromotionPolicy({ + trend, + reviewedWindowDays: parsedDays, + governance: { + review_cadence_days: governance.review_cadence_days, + promotion_min_success_rate: governance.promotion_min_success_rate, + }, + }); + const rolloutPayload = { + days: parsedDays, + guardrails, + summary: trend.current, + trend, + recommendation, + promotion_policy: promotionPolicy, + governance: { + owner: governance.owner ?? null, + review_cadence_days: governance.review_cadence_days, + promotion_min_success_rate: governance.promotion_min_success_rate, + }, + }; + + if (opts.out) { + writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8'); + } + + if (opts.json) { + console.log(JSON.stringify(rolloutPayload, null, 2)); + return; + } + + console.log('Shell runner rollout status'); + console.log(`Window: ${parsedDays}d`); + console.log(`Recommendation: ${recommendation}`); + console.log(`Governance owner: ${governance.owner ?? '(unset)'}`); + console.log(`Governance review cadence (days): ${governance.review_cadence_days}`); + console.log(`Governance promotion min success rate: ${governance.promotion_min_success_rate}`); + console.log(`Guardrail blockers: ${guardrails.blockers.length}`); + if (guardrails.blockers.length > 0) { + for (const blocker of guardrails.blockers) { + console.log(`- ${blocker}`); + } + } + console.log(`Audited command results: ${trend.current.command_result_total}`); + console.log(`Audited failures: ${trend.current.command_result_failed}`); + console.log(`Allowlist blocks: ${trend.current.allowlist_blocked}`); + console.log(`Execution blocks: ${trend.current.execution_blocked}`); + console.log(`Hashed command payloads: ${trend.current.hashed_command_count}`); + console.log(`Unhashed command payloads: ${trend.current.unhashed_command_count}`); + console.log(`Failure delta vs previous window: ${trend.deltas.failures}`); + console.log(`Allowlist block delta vs previous window: ${trend.deltas.allowlist_blocks}`); + console.log(`Hash coverage delta vs previous window (%): ${trend.deltas.hash_coverage_pct.toFixed(2)}`); + console.log( + `Promotion policy: ${promotionPolicy.recommendation} (success ${(promotionPolicy.success_rate * 100).toFixed(2)}% / min ${(promotionPolicy.minimum_success_rate * 100).toFixed(2)}%)`, + ); + if (promotionPolicy.blockers.length > 0) { + for (const blocker of promotionPolicy.blockers) { + console.log(`- ${blocker}`); + } + } + if (opts.out) { + console.log(`Wrote rollout payload: ${expandHomePath(opts.out)}`); + } + }); + skills .command('execute ') .description('Preview or execute installer steps for an installed skill')