feat(skills): add rollout status governance and promotion policy checks

2026-02-12 22:43:46 -08:00
parent 43b584257f
commit 7ae0fb51c2
3 changed files with 897 additions and 3 deletions
@@ -1,5 +1,5 @@
 import { describe, it, expect, vi } from 'vitest';
-import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
+import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
 import { join } from 'path';
 import { tmpdir } from 'os';
 import { Command } from 'commander';
@@ -27,14 +27,21 @@ import {
  createShellSkillInstallerCommandRunner,
  checkCommandAgainstAllowlist,
  emitShellRunnerAuditEvents,
+  calculateShellRunnerHashCoveragePercent,
+  computeShellRunnerAuditTrendSnapshot,
+  evaluateShellRunnerPromotionPolicy,
+  evaluateShellRunnerRolloutGuardrails,
  hashSkillInstallerAuditCommand,
+  recommendShellRunnerRolloutPhase,
  sanitizeSkillInstallerAuditReason,
+  summarizeShellRunnerAuditWindow,
  resolveSkillInstallerCommandRunner,
  runSkillExecuteAction,
  runSkillInstallAction,
  registerSkillsCommand,
 } from './skills.js';
 import type { Skill } from '../skills/index.js';
+import type { AuditEvent } from '../audit/types.js';

 function buildSkill(overrides: Partial<Skill>): Skill {
  return {
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
    installationExecution?: 'disabled' | 'enabled';
    allowShellRunner?: boolean;
    shellRunnerAllowlist?: string[];
+    shellRunnerGovernanceOwner?: string;
+    auditEnabled?: boolean;
+    auditPath?: string;
  },
 ): void {
  const allowlist = opts.shellRunnerAllowlist ?? [];
+  const auditLines = opts.auditPath
+    ? ['audit:', `  enabled: ${opts.auditEnabled ?? true}`, `  path: ${opts.auditPath}`]
+    : [];
+  const governanceOwnerLines = opts.shellRunnerGovernanceOwner
+    ? ['  shell_runner_governance:', `    owner: '${opts.shellRunnerGovernanceOwner}'`]
+    : [];
  writeFileSync(
    configPath,
    [
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
      `  installation_execution: ${opts.installationExecution ?? 'disabled'}`,
      `  allow_shell_runner: ${opts.allowShellRunner ?? false}`,
      `  shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
+      ...governanceOwnerLines,
+      ...auditLines,
    ].join('\n'),
    'utf-8',
  );
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
    expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
  });

+  it('reports shell runner rollout guardrail blockers', () => {
+    const guardrails = evaluateShellRunnerRolloutGuardrails(
+      {
+        installation_execution: 'disabled',
+        allow_shell_runner: false,
+        shell_runner_allowlist: ['*'],
+        shell_runner_governance: {
+          review_cadence_days: 7,
+          promotion_min_success_rate: 0.9,
+        },
+        load: { watch: false, watch_debounce_ms: 250 },
+      },
+      false,
+    );
+
+    expect(guardrails.blockers).toEqual([
+      'skills.installation_execution must be enabled',
+      'skills.allow_shell_runner must be true',
+      "skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
+      'audit.enabled must be true for shell runner rollout review',
+    ]);
+  });
+
+  it('requires governance owner when shell runner is enabled', () => {
+    const guardrails = evaluateShellRunnerRolloutGuardrails(
+      {
+        installation_execution: 'enabled',
+        allow_shell_runner: true,
+        shell_runner_allowlist: ['npm install*'],
+        shell_runner_governance: {
+          review_cadence_days: 7,
+          promotion_min_success_rate: 0.9,
+        },
+        load: { watch: false, watch_debounce_ms: 250 },
+      },
+      true,
+    );
+
+    expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
+  });
+
+  it('summarizes shell runner audit windows with hash coverage and failures', () => {
+    const events: AuditEvent[] = [
+      {
+        timestamp: 1,
+        level: 'debug',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'audit-skill',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'sha256:abc',
+          status: 'succeeded',
+          reason: 'runner_reported_success',
+        },
+      },
+      {
+        timestamp: 2,
+        level: 'warn',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'audit-skill',
+          phase: 'install',
+          installer_type: 'download',
+          command: 'download https://example.com/pkg.tgz',
+          status: 'failed',
+          reason: 'allowlist_blocked',
+        },
+      },
+      {
+        timestamp: 3,
+        level: 'warn',
+        event_type: 'skills.installer.execution_blocked',
+        event: {
+          skill_name: 'audit-skill',
+          phase: 'execute',
+          execution_requested: true,
+          execution_enabled: false,
+          reason: 'execution_policy_disabled',
+          attempted_command_count: 1,
+        },
+      },
+    ];
+
+    expect(summarizeShellRunnerAuditWindow(events)).toEqual({
+      command_result_total: 2,
+      command_result_failed: 1,
+      allowlist_blocked: 1,
+      execution_blocked: 1,
+      hashed_command_count: 1,
+      unhashed_command_count: 1,
+    });
+  });
+
+  it('calculates hash coverage percentage for shell runner summaries', () => {
+    expect(
+      calculateShellRunnerHashCoveragePercent({
+        command_result_total: 0,
+        command_result_failed: 0,
+        allowlist_blocked: 0,
+        execution_blocked: 0,
+        hashed_command_count: 0,
+        unhashed_command_count: 0,
+      }),
+    ).toBe(0);
+
+    expect(
+      calculateShellRunnerHashCoveragePercent({
+        command_result_total: 4,
+        command_result_failed: 0,
+        allowlist_blocked: 0,
+        execution_blocked: 0,
+        hashed_command_count: 3,
+        unhashed_command_count: 1,
+      }),
+    ).toBe(75);
+  });
+
+  it('computes shell runner trend snapshot across current and previous windows', () => {
+    const now = 1_000_000;
+    const oneDay = 24 * 60 * 60 * 1000;
+    const window = 7 * oneDay;
+    const currentWindowStart = now - window;
+
+    const events: AuditEvent[] = [
+      {
+        timestamp: now - oneDay,
+        level: 'warn',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'demo',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'sha256:new-a',
+          status: 'failed',
+          reason: 'exit_code_1',
+        },
+      },
+      {
+        timestamp: now - oneDay * 2,
+        level: 'warn',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'demo',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'sha256:new-b',
+          status: 'failed',
+          reason: 'allowlist_blocked',
+        },
+      },
+      {
+        timestamp: now - window - oneDay,
+        level: 'warn',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'demo',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'sha256:old-a',
+          status: 'failed',
+          reason: 'allowlist_blocked',
+        },
+      },
+      {
+        timestamp: now - window - oneDay * 2,
+        level: 'info',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'demo',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'legacy-old-command',
+          status: 'succeeded',
+          reason: 'runner_reported_success',
+        },
+      },
+    ];
+
+    const trend = computeShellRunnerAuditTrendSnapshot({
+      events,
+      currentWindowStartMs: currentWindowStart,
+      currentWindowEndMs: now,
+    });
+
+    expect(trend.current.command_result_failed).toBe(2);
+    expect(trend.previous.command_result_failed).toBe(1);
+    expect(trend.deltas.failures).toBe(1);
+    expect(trend.current.allowlist_blocked).toBe(1);
+    expect(trend.previous.allowlist_blocked).toBe(1);
+    expect(trend.deltas.allowlist_blocks).toBe(0);
+    expect(trend.deltas.hash_coverage_pct).toBe(50);
+  });
+
+  it('evaluates promotion policy with governance cadence and success thresholds', () => {
+    const policy = evaluateShellRunnerPromotionPolicy({
+      trend: {
+        current: {
+          command_result_total: 4,
+          command_result_failed: 1,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 4,
+          unhashed_command_count: 0,
+        },
+        previous: {
+          command_result_total: 4,
+          command_result_failed: 0,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 4,
+          unhashed_command_count: 0,
+        },
+        deltas: {
+          failures: 1,
+          allowlist_blocks: 0,
+          hash_coverage_pct: 0,
+        },
+      },
+      reviewedWindowDays: 7,
+      governance: {
+        review_cadence_days: 7,
+        promotion_min_success_rate: 0.9,
+      },
+    });
+
+    expect(policy.eligible).toBe(false);
+    expect(policy.recommendation).toBe('not_eligible');
+    expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
+    expect(policy.blockers).toContain('failures increased by 1 vs previous window');
+  });
+
+  it('marks promotion policy eligible when thresholds and trends are healthy', () => {
+    const policy = evaluateShellRunnerPromotionPolicy({
+      trend: {
+        current: {
+          command_result_total: 5,
+          command_result_failed: 0,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 5,
+          unhashed_command_count: 0,
+        },
+        previous: {
+          command_result_total: 5,
+          command_result_failed: 1,
+          allowlist_blocked: 1,
+          execution_blocked: 0,
+          hashed_command_count: 4,
+          unhashed_command_count: 1,
+        },
+        deltas: {
+          failures: -1,
+          allowlist_blocks: -1,
+          hash_coverage_pct: 20,
+        },
+      },
+      reviewedWindowDays: 7,
+      governance: {
+        review_cadence_days: 7,
+        promotion_min_success_rate: 0.9,
+      },
+    });
+
+    expect(policy.eligible).toBe(true);
+    expect(policy.recommendation).toBe('eligible');
+    expect(policy.blockers).toEqual([]);
+  });
+
+  it('recommends rollout phase from guardrails and audit summary', () => {
+    expect(
+      recommendShellRunnerRolloutPhase(
+        { blockers: ['skills.installation_execution must be enabled'] },
+        {
+          command_result_total: 1,
+          command_result_failed: 0,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 1,
+          unhashed_command_count: 0,
+        },
+      ),
+    ).toBe('locked');
+
+    expect(
+      recommendShellRunnerRolloutPhase(
+        { blockers: [] },
+        {
+          command_result_total: 0,
+          command_result_failed: 0,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 0,
+          unhashed_command_count: 0,
+        },
+      ),
+    ).toBe('guarded_observe');
+
+    expect(
+      recommendShellRunnerRolloutPhase(
+        { blockers: [] },
+        {
+          command_result_total: 4,
+          command_result_failed: 1,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 4,
+          unhashed_command_count: 0,
+        },
+      ),
+    ).toBe('guarded_review');
+
+    expect(
+      recommendShellRunnerRolloutPhase(
+        { blockers: [] },
+        {
+          command_result_total: 3,
+          command_result_failed: 0,
+          allowlist_blocked: 0,
+          execution_blocked: 0,
+          hashed_command_count: 3,
+          unhashed_command_count: 0,
+        },
+      ),
+    ).toBe('expand_candidate');
+  });
+
  it('emits hashed command values for both successful and failed audit command results', () => {
    const logger = {
      skillsInstallerExecutionBlocked: vi.fn(),
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
    process.exitCode = undefined;
    rmSync(root, { recursive: true, force: true });
  });
+
+  it('skills rollout-status reports governance owner blocker in JSON output', async () => {
+    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
+    const configPath = join(root, 'config.yaml');
+    const managedDir = join(root, 'managed');
+    const bundledDir = join(root, 'bundled');
+    const workspaceDir = join(root, 'workspace');
+    const auditPath = join(root, 'audit.log');
+    mkdirSync(managedDir, { recursive: true });
+    mkdirSync(bundledDir, { recursive: true });
+    mkdirSync(workspaceDir, { recursive: true });
+    writeFileSync(auditPath, '', 'utf-8');
+    writeSkillsCliConfig(configPath, {
+      managedDir,
+      bundledDir,
+      workspaceDir,
+      installationExecution: 'enabled',
+      allowShellRunner: true,
+      shellRunnerAllowlist: ['npm install*'],
+      auditPath,
+    });
+
+    const program = new Command();
+    registerSkillsCommand(program);
+
+    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
+    process.exitCode = undefined;
+
+    await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
+
+    const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
+    expect(payload.recommendation).toBe('locked');
+    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
+    expect(payload.governance.owner).toBeNull();
+    expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
+
+    logSpy.mockRestore();
+    process.exitCode = undefined;
+    rmSync(root, { recursive: true, force: true });
+  });
+
+  it('skills rollout-status writes JSON payload to output file', async () => {
+    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
+    const configPath = join(root, 'config.yaml');
+    const managedDir = join(root, 'managed');
+    const bundledDir = join(root, 'bundled');
+    const workspaceDir = join(root, 'workspace');
+    const auditPath = join(root, 'audit.log');
+    const outputPath = join(root, 'rollout-status.json');
+    mkdirSync(managedDir, { recursive: true });
+    mkdirSync(bundledDir, { recursive: true });
+    mkdirSync(workspaceDir, { recursive: true });
+    writeFileSync(auditPath, '', 'utf-8');
+    writeSkillsCliConfig(configPath, {
+      managedDir,
+      bundledDir,
+      workspaceDir,
+      installationExecution: 'enabled',
+      allowShellRunner: true,
+      shellRunnerAllowlist: ['npm install*'],
+      shellRunnerGovernanceOwner: 'skills-team',
+      auditPath,
+    });
+
+    const program = new Command();
+    registerSkillsCommand(program);
+
+    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
+    process.exitCode = undefined;
+
+    await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
+
+    expect(existsSync(outputPath)).toBe(true);
+    const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
+    expect(payload.governance.owner).toBe('skills-team');
+    expect(payload.recommendation).toBe('guarded_observe');
+    expect(payload.trend.current.command_result_total).toBe(0);
+    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
+
+    logSpy.mockRestore();
+    process.exitCode = undefined;
+    rmSync(root, { recursive: true, force: true });
+  });
+
+  it('skills rollout-status includes trend deltas across adjacent windows', async () => {
+    const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
+    const configPath = join(root, 'config.yaml');
+    const managedDir = join(root, 'managed');
+    const bundledDir = join(root, 'bundled');
+    const workspaceDir = join(root, 'workspace');
+    const auditPath = join(root, 'audit.log');
+    const outputPath = join(root, 'rollout-trend.json');
+    mkdirSync(managedDir, { recursive: true });
+    mkdirSync(bundledDir, { recursive: true });
+    mkdirSync(workspaceDir, { recursive: true });
+
+    const now = Date.now();
+    const oneDay = 24 * 60 * 60 * 1000;
+    const events = [
+      {
+        timestamp: now - oneDay,
+        level: 'warn',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'trend-skill',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'sha256:new',
+          status: 'failed',
+          reason: 'allowlist_blocked',
+        },
+      },
+      {
+        timestamp: now - oneDay * 8,
+        level: 'info',
+        event_type: 'skills.installer.command_result',
+        event: {
+          skill_name: 'trend-skill',
+          phase: 'install',
+          installer_type: 'node',
+          command: 'legacy-prev',
+          status: 'succeeded',
+          reason: 'runner_reported_success',
+        },
+      },
+    ];
+    writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
+
+    writeSkillsCliConfig(configPath, {
+      managedDir,
+      bundledDir,
+      workspaceDir,
+      installationExecution: 'enabled',
+      allowShellRunner: true,
+      shellRunnerAllowlist: ['npm install*'],
+      shellRunnerGovernanceOwner: 'skills-team',
+      auditPath,
+    });
+
+    const program = new Command();
+    registerSkillsCommand(program);
+
+    const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
+    process.exitCode = undefined;
+
+    await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
+      from: 'user',
+    });
+
+    const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
+    expect(payload.trend.current.command_result_total).toBe(1);
+    expect(payload.trend.previous.command_result_total).toBe(1);
+    expect(payload.trend.deltas.failures).toBe(1);
+    expect(payload.trend.deltas.allowlist_blocks).toBe(1);
+    expect(payload.promotion_policy.recommendation).toBe('not_eligible');
+    expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
+
+    logSpy.mockRestore();
+    process.exitCode = undefined;
+    rmSync(root, { recursive: true, force: true });
+  });
 });