feat(skills): add rollout status governance and promotion policy checks

2026-02-12 22:43:46 -08:00
parent 43b584257f
commit 7ae0fb51c2
3 changed files with 897 additions and 3 deletions
@@ -1577,6 +1577,54 @@
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
          },
          "shell_runner_rollout_status_and_guardrails": {
            "status": "completed",
            "description": "Added `skills rollout-status` with phased recommendation output (`locked|guarded_observe|guarded_review|expand_candidate`), guardrail checks for execution/audit/allowlist posture, and audit-window telemetry summary including hashed-command coverage",
            "files_modified": [
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
          },
          "shell_runner_governance_workflow_operationalization": {
            "status": "completed",
            "description": "Operationalized shell-runner allowlist governance by adding explicit config-backed ownership/review/promotion criteria (`skills.shell_runner_governance`) and wiring `skills rollout-status` to enforce owner presence when shell runner is enabled",
            "files_modified": [
              "src/config/schema.ts",
              "src/config/schema.test.ts",
              "config/default.yaml",
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/config/schema.test.ts src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          },
          "shell_runner_rollout_status_export_output": {
            "status": "completed",
            "description": "Extended `skills rollout-status` with `--out <path>` export support so governance and recommendation payloads can be saved as machine-readable JSON artifacts for review workflows",
            "files_modified": [
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          },
          "shell_runner_rollout_trend_snapshot": {
            "status": "completed",
            "description": "Added historical trend snapshots to `skills rollout-status` by comparing current and previous equal-duration windows, including deltas for failures, allowlist blocks, and hashed-command coverage in both console and JSON payloads",
            "files_modified": [
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          },
          "shell_runner_rollout_promotion_policy_checks": {
            "status": "completed",
            "description": "Added promotion-policy evaluation to `skills rollout-status` using governance thresholds (`review_cadence_days`, `promotion_min_success_rate`) and trend deltas, with structured blockers/recommendation in JSON and console output",
            "files_modified": [
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          }
        }
      }
@@ -1605,7 +1653,7 @@
  },
  "overall_progress": {
-    "total_test_count": 1575,
+    "total_test_count": 1586,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -1625,7 +1673,7 @@
    "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
    "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
    "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
-    "next_up": "Skills infrastructure follow-up: define phased enablement criteria for shell runner (allowlist governance, telemetry review, and rollout guardrails) now that audit command strings are hashed"
+    "next_up": "Skills infrastructure follow-up: expose promotion-policy status as a dedicated machine-readable contract for automation consumers (e.g., CI gate or dashboard ingest) before broader shell-runner rollout"
  },
  "soul_md_and_cron_create": {
    "date": "2026-02-11",
@@ -1,5 +1,5 @@
 import { describe, it, expect, vi } from 'vitest';
-import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
+import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
 import { join } from 'path';
 import { tmpdir } from 'os';
 import { Command } from 'commander';
@@ -27,14 +27,21 @@ import {
  createShellSkillInstallerCommandRunner,
  checkCommandAgainstAllowlist,
  emitShellRunnerAuditEvents,
  calculateShellRunnerHashCoveragePercent,
  computeShellRunnerAuditTrendSnapshot,
  evaluateShellRunnerPromotionPolicy,
  evaluateShellRunnerRolloutGuardrails,
  hashSkillInstallerAuditCommand,
  recommendShellRunnerRolloutPhase,
  sanitizeSkillInstallerAuditReason,
  summarizeShellRunnerAuditWindow,
  resolveSkillInstallerCommandRunner,
  runSkillExecuteAction,
  runSkillInstallAction,
  registerSkillsCommand,
 } from './skills.js';
 import type { Skill } from '../skills/index.js';
 import type { AuditEvent } from '../audit/types.js';
 function buildSkill(overrides: Partial<Skill>): Skill {
  return {
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
    installationExecution?: 'disabled' | 'enabled';
    allowShellRunner?: boolean;
    shellRunnerAllowlist?: string[];
    shellRunnerGovernanceOwner?: string;
    auditEnabled?: boolean;
    auditPath?: string;
  },
 ): void {
  const allowlist = opts.shellRunnerAllowlist ?? [];
  const auditLines = opts.auditPath
    ? ['audit:', `  enabled: ${opts.auditEnabled ?? true}`, `  path: ${opts.auditPath}`]
    : [];
  const governanceOwnerLines = opts.shellRunnerGovernanceOwner
    ? ['  shell_runner_governance:', `    owner: '${opts.shellRunnerGovernanceOwner}'`]
    : [];
  writeFileSync(
    configPath,
    [
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
      `  installation_execution: ${opts.installationExecution ?? 'disabled'}`,
      `  allow_shell_runner: ${opts.allowShellRunner ?? false}`,
      `  shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
      ...governanceOwnerLines,
      ...auditLines,
    ].join('\n'),
    'utf-8',
  );
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
    expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
  });
  it('reports shell runner rollout guardrail blockers', () => {
    const guardrails = evaluateShellRunnerRolloutGuardrails(
      {
        installation_execution: 'disabled',
        allow_shell_runner: false,
        shell_runner_allowlist: ['*'],
        shell_runner_governance: {
          review_cadence_days: 7,
          promotion_min_success_rate: 0.9,
        },
        load: { watch: false, watch_debounce_ms: 250 },
      },
      false,
    );
    expect(guardrails.blockers).toEqual([
      'skills.installation_execution must be enabled',
      'skills.allow_shell_runner must be true',
      "skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
      'audit.enabled must be true for shell runner rollout review',
    ]);
  });
  it('requires governance owner when shell runner is enabled', () => {
    const guardrails = evaluateShellRunnerRolloutGuardrails(
      {
        installation_execution: 'enabled',
        allow_shell_runner: true,
        shell_runner_allowlist: ['npm install*'],
        shell_runner_governance: {
          review_cadence_days: 7,
          promotion_min_success_rate: 0.9,
        },
        load: { watch: false, watch_debounce_ms: 250 },
      },
      true,
    );
    expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
  });
  it('summarizes shell runner audit windows with hash coverage and failures', () => {
    const events: AuditEvent[] = [
      {
        timestamp: 1,
        level: 'debug',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'audit-skill',
          phase: 'install',
          installer_type: 'node',
          command: 'sha256:abc',
          status: 'succeeded',
          reason: 'runner_reported_success',
        },
      },
      {
        timestamp: 2,
        level: 'warn',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'audit-skill',
          phase: 'install',
          installer_type: 'download',
          command: 'download https://example.com/pkg.tgz',
          status: 'failed',
          reason: 'allowlist_blocked',
        },
      },
      {
        timestamp: 3,
        level: 'warn',
        event_type: 'skills.installer.execution_blocked',
        event: {
          skill_name: 'audit-skill',
          phase: 'execute',
          execution_requested: true,
          execution_enabled: false,
          reason: 'execution_policy_disabled',
          attempted_command_count: 1,
        },
      },
    ];
    expect(summarizeShellRunnerAuditWindow(events)).toEqual({
      command_result_total: 2,
      command_result_failed: 1,
      allowlist_blocked: 1,
      execution_blocked: 1,
      hashed_command_count: 1,
      unhashed_command_count: 1,
    });
  });
  it('calculates hash coverage percentage for shell runner summaries', () => {
    expect(
      calculateShellRunnerHashCoveragePercent({
        command_result_total: 0,
        command_result_failed: 0,
        allowlist_blocked: 0,
        execution_blocked: 0,
        hashed_command_count: 0,
        unhashed_command_count: 0,
      }),
    ).toBe(0);
    expect(
      calculateShellRunnerHashCoveragePercent({
        command_result_total: 4,
        command_result_failed: 0,
        allowlist_blocked: 0,
        execution_blocked: 0,
        hashed_command_count: 3,
        unhashed_command_count: 1,
      }),
    ).toBe(75);
  });
  it('computes shell runner trend snapshot across current and previous windows', () => {
    const now = 1_000_000;
    const oneDay = 24 * 60 * 60 * 1000;
    const window = 7 * oneDay;
    const currentWindowStart = now - window;
    const events: AuditEvent[] = [
      {
        timestamp: now - oneDay,
        level: 'warn',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'demo',
          phase: 'install',
          installer_type: 'node',
          command: 'sha256:new-a',
          status: 'failed',
          reason: 'exit_code_1',
        },
      },
      {
        timestamp: now - oneDay * 2,
        level: 'warn',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'demo',
          phase: 'install',
          installer_type: 'node',
          command: 'sha256:new-b',
          status: 'failed',
          reason: 'allowlist_blocked',
        },
      },
      {
        timestamp: now - window - oneDay,
        level: 'warn',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'demo',
          phase: 'install',
          installer_type: 'node',
          command: 'sha256:old-a',
          status: 'failed',
          reason: 'allowlist_blocked',
        },
      },
      {
        timestamp: now - window - oneDay * 2,
        level: 'info',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'demo',
          phase: 'install',
          installer_type: 'node',
          command: 'legacy-old-command',
          status: 'succeeded',
          reason: 'runner_reported_success',
        },
      },
    ];
    const trend = computeShellRunnerAuditTrendSnapshot({
      events,
      currentWindowStartMs: currentWindowStart,
      currentWindowEndMs: now,
    });
    expect(trend.current.command_result_failed).toBe(2);
    expect(trend.previous.command_result_failed).toBe(1);
    expect(trend.deltas.failures).toBe(1);
    expect(trend.current.allowlist_blocked).toBe(1);
    expect(trend.previous.allowlist_blocked).toBe(1);
    expect(trend.deltas.allowlist_blocks).toBe(0);
    expect(trend.deltas.hash_coverage_pct).toBe(50);
  });
  it('evaluates promotion policy with governance cadence and success thresholds', () => {
    const policy = evaluateShellRunnerPromotionPolicy({
      trend: {
        current: {
          command_result_total: 4,
          command_result_failed: 1,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 4,
          unhashed_command_count: 0,
        },
        previous: {
          command_result_total: 4,
          command_result_failed: 0,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 4,
          unhashed_command_count: 0,
        },
        deltas: {
          failures: 1,
          allowlist_blocks: 0,
          hash_coverage_pct: 0,
        },
      },
      reviewedWindowDays: 7,
      governance: {
        review_cadence_days: 7,
        promotion_min_success_rate: 0.9,
      },
    });
    expect(policy.eligible).toBe(false);
    expect(policy.recommendation).toBe('not_eligible');
    expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
    expect(policy.blockers).toContain('failures increased by 1 vs previous window');
  });
  it('marks promotion policy eligible when thresholds and trends are healthy', () => {
    const policy = evaluateShellRunnerPromotionPolicy({
      trend: {
        current: {
          command_result_total: 5,
          command_result_failed: 0,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 5,
          unhashed_command_count: 0,
        },
        previous: {
          command_result_total: 5,
          command_result_failed: 1,
          allowlist_blocked: 1,
          execution_blocked: 0,
          hashed_command_count: 4,
          unhashed_command_count: 1,
        },
        deltas: {
          failures: -1,
          allowlist_blocks: -1,
          hash_coverage_pct: 20,
        },
      },
      reviewedWindowDays: 7,
      governance: {
        review_cadence_days: 7,
        promotion_min_success_rate: 0.9,
      },
    });
    expect(policy.eligible).toBe(true);
    expect(policy.recommendation).toBe('eligible');
    expect(policy.blockers).toEqual([]);
  });
  it('recommends rollout phase from guardrails and audit summary', () => {
    expect(
      recommendShellRunnerRolloutPhase(
        { blockers: ['skills.installation_execution must be enabled'] },
        {
          command_result_total: 1,
          command_result_failed: 0,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 1,
          unhashed_command_count: 0,
        },
      ),
    ).toBe('locked');
    expect(
      recommendShellRunnerRolloutPhase(
        { blockers: [] },
        {
          command_result_total: 0,
          command_result_failed: 0,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 0,
          unhashed_command_count: 0,
        },
      ),
    ).toBe('guarded_observe');
    expect(
      recommendShellRunnerRolloutPhase(
        { blockers: [] },
        {
          command_result_total: 4,
          command_result_failed: 1,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 4,
          unhashed_command_count: 0,
        },
      ),
    ).toBe('guarded_review');
    expect(
      recommendShellRunnerRolloutPhase(
        { blockers: [] },
        {
          command_result_total: 3,
          command_result_failed: 0,
          allowlist_blocked: 0,
          execution_blocked: 0,
          hashed_command_count: 3,
          unhashed_command_count: 0,
        },
      ),
    ).toBe('expand_candidate');
  });
  it('emits hashed command values for both successful and failed audit command results', () => {
    const logger = {
      skillsInstallerExecutionBlocked: vi.fn(),
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
    process.exitCode = undefined;
    rmSync(root, { recursive: true, force: true });
  });
  it('skills rollout-status reports governance owner blocker in JSON output', async () => {
    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
    const configPath = join(root, 'config.yaml');
    const managedDir = join(root, 'managed');
    const bundledDir = join(root, 'bundled');
    const workspaceDir = join(root, 'workspace');
    const auditPath = join(root, 'audit.log');
    mkdirSync(managedDir, { recursive: true });
    mkdirSync(bundledDir, { recursive: true });
    mkdirSync(workspaceDir, { recursive: true });
    writeFileSync(auditPath, '', 'utf-8');
    writeSkillsCliConfig(configPath, {
      managedDir,
      bundledDir,
      workspaceDir,
      installationExecution: 'enabled',
      allowShellRunner: true,
      shellRunnerAllowlist: ['npm install*'],
      auditPath,
    });
    const program = new Command();
    registerSkillsCommand(program);
    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
    process.exitCode = undefined;
    await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
    const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
    expect(payload.recommendation).toBe('locked');
    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
    expect(payload.governance.owner).toBeNull();
    expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
    logSpy.mockRestore();
    process.exitCode = undefined;
    rmSync(root, { recursive: true, force: true });
  });
  it('skills rollout-status writes JSON payload to output file', async () => {
    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
    const configPath = join(root, 'config.yaml');
    const managedDir = join(root, 'managed');
    const bundledDir = join(root, 'bundled');
    const workspaceDir = join(root, 'workspace');
    const auditPath = join(root, 'audit.log');
    const outputPath = join(root, 'rollout-status.json');
    mkdirSync(managedDir, { recursive: true });
    mkdirSync(bundledDir, { recursive: true });
    mkdirSync(workspaceDir, { recursive: true });
    writeFileSync(auditPath, '', 'utf-8');
    writeSkillsCliConfig(configPath, {
      managedDir,
      bundledDir,
      workspaceDir,
      installationExecution: 'enabled',
      allowShellRunner: true,
      shellRunnerAllowlist: ['npm install*'],
      shellRunnerGovernanceOwner: 'skills-team',
      auditPath,
    });
    const program = new Command();
    registerSkillsCommand(program);
    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
    process.exitCode = undefined;
    await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
    expect(existsSync(outputPath)).toBe(true);
    const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
    expect(payload.governance.owner).toBe('skills-team');
    expect(payload.recommendation).toBe('guarded_observe');
    expect(payload.trend.current.command_result_total).toBe(0);
    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
    logSpy.mockRestore();
    process.exitCode = undefined;
    rmSync(root, { recursive: true, force: true });
  });
  it('skills rollout-status includes trend deltas across adjacent windows', async () => {
    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
    const configPath = join(root, 'config.yaml');
    const managedDir = join(root, 'managed');
    const bundledDir = join(root, 'bundled');
    const workspaceDir = join(root, 'workspace');
    const auditPath = join(root, 'audit.log');
    const outputPath = join(root, 'rollout-trend.json');
    mkdirSync(managedDir, { recursive: true });
    mkdirSync(bundledDir, { recursive: true });
    mkdirSync(workspaceDir, { recursive: true });
    const now = Date.now();
    const oneDay = 24 * 60 * 60 * 1000;
    const events = [
      {
        timestamp: now - oneDay,
        level: 'warn',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'trend-skill',
          phase: 'install',
          installer_type: 'node',
          command: 'sha256:new',
          status: 'failed',
          reason: 'allowlist_blocked',
        },
      },
      {
        timestamp: now - oneDay * 8,
        level: 'info',
        event_type: 'skills.installer.command_result',
        event: {
          skill_name: 'trend-skill',
          phase: 'install',
          installer_type: 'node',
          command: 'legacy-prev',
          status: 'succeeded',
          reason: 'runner_reported_success',
        },
      },
    ];
    writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
    writeSkillsCliConfig(configPath, {
      managedDir,
      bundledDir,
      workspaceDir,
      installationExecution: 'enabled',
      allowShellRunner: true,
      shellRunnerAllowlist: ['npm install*'],
      shellRunnerGovernanceOwner: 'skills-team',
      auditPath,
    });
    const program = new Command();
    registerSkillsCommand(program);
    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
    process.exitCode = undefined;
    await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
      from: 'user',
    });
    const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
    expect(payload.trend.current.command_result_total).toBe(1);
    expect(payload.trend.previous.command_result_total).toBe(1);
    expect(payload.trend.deltas.failures).toBe(1);
    expect(payload.trend.deltas.allowlist_blocks).toBe(1);
    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
    expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
    logSpy.mockRestore();
    process.exitCode = undefined;
    rmSync(root, { recursive: true, force: true });
  });
 });
@@ -2,8 +2,12 @@ import type { Command } from 'commander';
 import { resolve } from 'path';
 import { homedir } from 'os';
 import { spawnSync } from 'child_process';
 import { writeFileSync } from 'fs';
 import { createHash } from 'crypto';
 import { auditLogger } from '../audit/index.js';
 import { queryAuditLogs } from '../audit/export.js';
 import type { AuditEvent } from '../audit/types.js';
 import type { Config } from '../config/schema.js';
 import type { Skill } from '../skills/index.js';
 import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js';
 import { loadConfigSafe } from './shared.js';
@@ -92,6 +96,238 @@ export function sanitizeSkillInstallerAuditReason(reason: string): string {
  return reason;
 }
 export interface ShellRunnerRolloutGuardrailStatus {
  blockers: string[];
 }
 export interface ShellRunnerAuditWindowSummary {
  command_result_total: number;
  command_result_failed: number;
  allowlist_blocked: number;
  execution_blocked: number;
  hashed_command_count: number;
  unhashed_command_count: number;
 }
 export interface ShellRunnerAuditTrendSnapshot {
  current: ShellRunnerAuditWindowSummary;
  previous: ShellRunnerAuditWindowSummary;
  deltas: {
    failures: number;
    allowlist_blocks: number;
    hash_coverage_pct: number;
  };
 }
 export interface ShellRunnerPromotionPolicyStatus {
  eligible: boolean;
  recommendation: 'eligible' | 'not_eligible';
  cadence_days: number;
  reviewed_window_days: number;
  success_rate: number;
  minimum_success_rate: number;
  failures_delta: number;
  allowlist_blocks_delta: number;
  hash_coverage_delta_pct: number;
  blockers: string[];
 }
 export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
 export function evaluateShellRunnerRolloutGuardrails(
  skillsConfig: Config['skills'],
  auditEnabled: boolean,
 ): ShellRunnerRolloutGuardrailStatus {
  const blockers: string[] = [];
  if (skillsConfig.installation_execution !== 'enabled') {
    blockers.push('skills.installation_execution must be enabled');
  }
  if (!skillsConfig.allow_shell_runner) {
    blockers.push('skills.allow_shell_runner must be true');
  }
  if (skillsConfig.shell_runner_allowlist.length === 0) {
    blockers.push('skills.shell_runner_allowlist must include at least one pattern');
  }
  if (skillsConfig.shell_runner_allowlist.some((pattern) => pattern.trim() === '*')) {
    blockers.push("skills.shell_runner_allowlist cannot include wildcard-only '*' patterns");
  }
  if (skillsConfig.allow_shell_runner && !skillsConfig.shell_runner_governance.owner) {
    blockers.push('skills.shell_runner_governance.owner must be set when shell runner is enabled');
  }
  if (!auditEnabled) {
    blockers.push('audit.enabled must be true for shell runner rollout review');
  }
  return { blockers };
 }
 export function summarizeShellRunnerAuditWindow(events: AuditEvent[]): ShellRunnerAuditWindowSummary {
  return events.reduce<ShellRunnerAuditWindowSummary>(
    (summary, event) => {
      if (event.event_type === 'skills.installer.command_result') {
        summary.command_result_total += 1;
        const payload = event.event as Record<string, unknown>;
        const status = typeof payload.status === 'string' ? payload.status : '';
        const reason = typeof payload.reason === 'string' ? payload.reason : '';
        const command = typeof payload.command === 'string' ? payload.command : '';
        if (status === 'failed') {
          summary.command_result_failed += 1;
        }
        if (reason === 'allowlist_blocked') {
          summary.allowlist_blocked += 1;
        }
        if (command.startsWith('sha256:')) {
          summary.hashed_command_count += 1;
        } else {
          summary.unhashed_command_count += 1;
        }
        return summary;
      }
      if (event.event_type === 'skills.installer.execution_blocked') {
        summary.execution_blocked += 1;
      }
      return summary;
    },
    {
      command_result_total: 0,
      command_result_failed: 0,
      allowlist_blocked: 0,
      execution_blocked: 0,
      hashed_command_count: 0,
      unhashed_command_count: 0,
    },
  );
 }
 export function calculateShellRunnerHashCoveragePercent(summary: ShellRunnerAuditWindowSummary): number {
  const total = summary.hashed_command_count + summary.unhashed_command_count;
  if (total === 0) {
    return 0;
  }
  return (summary.hashed_command_count / total) * 100;
 }
 export function computeShellRunnerAuditTrendSnapshot(args: {
  events: AuditEvent[];
  currentWindowStartMs: number;
  currentWindowEndMs: number;
 }): ShellRunnerAuditTrendSnapshot {
  const previousWindowStartMs = args.currentWindowStartMs - (args.currentWindowEndMs - args.currentWindowStartMs);
  const currentEvents = args.events.filter(
    (event) => event.timestamp >= args.currentWindowStartMs && event.timestamp <= args.currentWindowEndMs,
  );
  const previousEvents = args.events.filter(
    (event) => event.timestamp >= previousWindowStartMs && event.timestamp < args.currentWindowStartMs,
  );
  const current = summarizeShellRunnerAuditWindow(currentEvents);
  const previous = summarizeShellRunnerAuditWindow(previousEvents);
  return {
    current,
    previous,
    deltas: {
      failures: current.command_result_failed - previous.command_result_failed,
      allowlist_blocks: current.allowlist_blocked - previous.allowlist_blocked,
      hash_coverage_pct:
        calculateShellRunnerHashCoveragePercent(current) - calculateShellRunnerHashCoveragePercent(previous),
    },
  };
 }
 export function evaluateShellRunnerPromotionPolicy(args: {
  trend: ShellRunnerAuditTrendSnapshot;
  reviewedWindowDays: number;
  governance: {
    review_cadence_days: number;
    promotion_min_success_rate: number;
  };
 }): ShellRunnerPromotionPolicyStatus {
  const blockers: string[] = [];
  const successRate =
    args.trend.current.command_result_total === 0
      ? 0
      : (args.trend.current.command_result_total - args.trend.current.command_result_failed)
        / args.trend.current.command_result_total;
  if (args.reviewedWindowDays > args.governance.review_cadence_days) {
    blockers.push(
      `review window (${args.reviewedWindowDays}d) exceeds governance cadence (${args.governance.review_cadence_days}d)`,
    );
  }
  if (args.trend.current.command_result_total === 0) {
    blockers.push('no shell-runner command results in current window');
  }
  if (successRate < args.governance.promotion_min_success_rate) {
    blockers.push(
      `success rate ${(successRate * 100).toFixed(2)}% below minimum ${(args.governance.promotion_min_success_rate * 100).toFixed(2)}%`,
    );
  }
  if (args.trend.deltas.failures > 0) {
    blockers.push(`failures increased by ${args.trend.deltas.failures} vs previous window`);
  }
  if (args.trend.deltas.allowlist_blocks > 0) {
    blockers.push(`allowlist blocks increased by ${args.trend.deltas.allowlist_blocks} vs previous window`);
  }
  return {
    eligible: blockers.length === 0,
    recommendation: blockers.length === 0 ? 'eligible' : 'not_eligible',
    cadence_days: args.governance.review_cadence_days,
    reviewed_window_days: args.reviewedWindowDays,
    success_rate: successRate,
    minimum_success_rate: args.governance.promotion_min_success_rate,
    failures_delta: args.trend.deltas.failures,
    allowlist_blocks_delta: args.trend.deltas.allowlist_blocks,
    hash_coverage_delta_pct: args.trend.deltas.hash_coverage_pct,
    blockers,
  };
 }
 export function recommendShellRunnerRolloutPhase(
  guardrails: ShellRunnerRolloutGuardrailStatus,
  summary: ShellRunnerAuditWindowSummary,
 ): ShellRunnerRolloutRecommendation {
  if (guardrails.blockers.length > 0) {
    return 'locked';
  }
  if (summary.command_result_total === 0) {
    return 'guarded_observe';
  }
  if (summary.unhashed_command_count > 0 || summary.command_result_failed > 0) {
    return 'guarded_review';
  }
  return 'expand_candidate';
 }
 function expandHomePath(pathValue: string): string {
  if (pathValue.startsWith('~/')) {
    return resolve(homedir(), pathValue.slice(2));
  }
  return resolve(pathValue);
 }
 interface SkillShellRunnerAuditLogger {
  skillsInstallerExecutionBlocked(event: {
    skill_name: string;
@@ -1097,6 +1333,110 @@ export function registerSkillsCommand(program: Command): void {
      console.log(renderSkillInstallerPlan(view));
    });
  skills
    .command('rollout-status')
    .description('Show shell runner rollout guardrails and audit review summary')
    .option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
    .option('--out <path>', 'Write rollout JSON payload to file')
    .option('--json', 'Output as JSON')
    .option('-c, --config <path>', 'Config file path')
    .action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => {
      const loaded = loadConfigSafe(opts.config);
      if (loaded.error || !loaded.config) {
        console.error(loaded.error ?? 'Failed to load config');
        process.exitCode = 1;
        return;
      }
      const parsedDays = Number.parseInt(opts.days ?? '7', 10);
      if (!Number.isFinite(parsedDays) || parsedDays <= 0) {
        console.error('`--days` must be a positive integer.');
        process.exitCode = 1;
        return;
      }
      const guardrails = evaluateShellRunnerRolloutGuardrails(loaded.config.skills, loaded.config.audit.enabled);
      const windowDurationMs = parsedDays * 24 * 60 * 60 * 1000;
      const nowMs = Date.now();
      const currentWindowStartMs = nowMs - windowDurationMs;
      const queryStartMs = nowMs - windowDurationMs * 2;
      const auditPath = expandHomePath(loaded.config.audit.path);
      const auditEvents = await queryAuditLogs(auditPath, {
        start_time: queryStartMs,
        event_types: ['skills.installer.command_result', 'skills.installer.execution_blocked'],
      });
      const trend = computeShellRunnerAuditTrendSnapshot({
        events: auditEvents,
        currentWindowStartMs,
        currentWindowEndMs: nowMs,
      });
      const recommendation = recommendShellRunnerRolloutPhase(guardrails, trend.current);
      const governance = loaded.config.skills.shell_runner_governance;
      const promotionPolicy = evaluateShellRunnerPromotionPolicy({
        trend,
        reviewedWindowDays: parsedDays,
        governance: {
          review_cadence_days: governance.review_cadence_days,
          promotion_min_success_rate: governance.promotion_min_success_rate,
        },
      });
      const rolloutPayload = {
        days: parsedDays,
        guardrails,
        summary: trend.current,
        trend,
        recommendation,
        promotion_policy: promotionPolicy,
        governance: {
          owner: governance.owner ?? null,
          review_cadence_days: governance.review_cadence_days,
          promotion_min_success_rate: governance.promotion_min_success_rate,
        },
      };
      if (opts.out) {
        writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8');
      }
      if (opts.json) {
        console.log(JSON.stringify(rolloutPayload, null, 2));
        return;
      }
      console.log('Shell runner rollout status');
      console.log(`Window: ${parsedDays}d`);
      console.log(`Recommendation: ${recommendation}`);
      console.log(`Governance owner: ${governance.owner ?? '(unset)'}`);
      console.log(`Governance review cadence (days): ${governance.review_cadence_days}`);
      console.log(`Governance promotion min success rate: ${governance.promotion_min_success_rate}`);
      console.log(`Guardrail blockers: ${guardrails.blockers.length}`);
      if (guardrails.blockers.length > 0) {
        for (const blocker of guardrails.blockers) {
          console.log(`- ${blocker}`);
        }
      }
      console.log(`Audited command results: ${trend.current.command_result_total}`);
      console.log(`Audited failures: ${trend.current.command_result_failed}`);
      console.log(`Allowlist blocks: ${trend.current.allowlist_blocked}`);
      console.log(`Execution blocks: ${trend.current.execution_blocked}`);
      console.log(`Hashed command payloads: ${trend.current.hashed_command_count}`);
      console.log(`Unhashed command payloads: ${trend.current.unhashed_command_count}`);
      console.log(`Failure delta vs previous window: ${trend.deltas.failures}`);
      console.log(`Allowlist block delta vs previous window: ${trend.deltas.allowlist_blocks}`);
      console.log(`Hash coverage delta vs previous window (%): ${trend.deltas.hash_coverage_pct.toFixed(2)}`);
      console.log(
        `Promotion policy: ${promotionPolicy.recommendation} (success ${(promotionPolicy.success_rate * 100).toFixed(2)}% / min ${(promotionPolicy.minimum_success_rate * 100).toFixed(2)}%)`,
      );
      if (promotionPolicy.blockers.length > 0) {
        for (const blocker of promotionPolicy.blockers) {
          console.log(`- ${blocker}`);
        }
      }
      if (opts.out) {
        console.log(`Wrote rollout payload: ${expandHomePath(opts.out)}`);
      }
    });
  skills
    .command('execute <name>')
    .description('Preview or execute installer steps for an installed skill')