feat(skills): add rollout status governance and promotion policy checks

This commit is contained in:
William Valentin
2026-02-12 22:43:46 -08:00
parent 43b584257f
commit 7ae0fb51c2
3 changed files with 897 additions and 3 deletions
+50 -2
View File
@@ -1577,6 +1577,54 @@
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
},
"shell_runner_rollout_status_and_guardrails": {
"status": "completed",
"description": "Added `skills rollout-status` with phased recommendation output (`locked|guarded_observe|guarded_review|expand_candidate`), guardrail checks for execution/audit/allowlist posture, and audit-window telemetry summary including hashed-command coverage",
"files_modified": [
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
},
"shell_runner_governance_workflow_operationalization": {
"status": "completed",
"description": "Operationalized shell-runner allowlist governance by adding explicit config-backed ownership/review/promotion criteria (`skills.shell_runner_governance`) and wiring `skills rollout-status` to enforce owner presence when shell runner is enabled",
"files_modified": [
"src/config/schema.ts",
"src/config/schema.test.ts",
"config/default.yaml",
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/config/schema.test.ts src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
},
"shell_runner_rollout_status_export_output": {
"status": "completed",
"description": "Extended `skills rollout-status` with `--out <path>` export support so governance and recommendation payloads can be saved as machine-readable JSON artifacts for review workflows",
"files_modified": [
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
},
"shell_runner_rollout_trend_snapshot": {
"status": "completed",
"description": "Added historical trend snapshots to `skills rollout-status` by comparing current and previous equal-duration windows, including deltas for failures, allowlist blocks, and hashed-command coverage in both console and JSON payloads",
"files_modified": [
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
},
"shell_runner_rollout_promotion_policy_checks": {
"status": "completed",
"description": "Added promotion-policy evaluation to `skills rollout-status` using governance thresholds (`review_cadence_days`, `promotion_min_success_rate`) and trend deltas, with structured blockers/recommendation in JSON and console output",
"files_modified": [
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
}
}
}
@@ -1605,7 +1653,7 @@
},
"overall_progress": {
"total_test_count": 1575,
"total_test_count": 1586,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
@@ -1625,7 +1673,7 @@
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
"next_up": "Skills infrastructure follow-up: define phased enablement criteria for shell runner (allowlist governance, telemetry review, and rollout guardrails) now that audit command strings are hashed"
"next_up": "Skills infrastructure follow-up: expose promotion-policy status as a dedicated machine-readable contract for automation consumers (e.g., CI gate or dashboard ingest) before broader shell-runner rollout"
},
"soul_md_and_cron_create": {
"date": "2026-02-11",
+507 -1
View File
@@ -1,5 +1,5 @@
import { describe, it, expect, vi } from 'vitest';
import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { Command } from 'commander';
@@ -27,14 +27,21 @@ import {
createShellSkillInstallerCommandRunner,
checkCommandAgainstAllowlist,
emitShellRunnerAuditEvents,
calculateShellRunnerHashCoveragePercent,
computeShellRunnerAuditTrendSnapshot,
evaluateShellRunnerPromotionPolicy,
evaluateShellRunnerRolloutGuardrails,
hashSkillInstallerAuditCommand,
recommendShellRunnerRolloutPhase,
sanitizeSkillInstallerAuditReason,
summarizeShellRunnerAuditWindow,
resolveSkillInstallerCommandRunner,
runSkillExecuteAction,
runSkillInstallAction,
registerSkillsCommand,
} from './skills.js';
import type { Skill } from '../skills/index.js';
import type { AuditEvent } from '../audit/types.js';
function buildSkill(overrides: Partial<Skill>): Skill {
return {
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
installationExecution?: 'disabled' | 'enabled';
allowShellRunner?: boolean;
shellRunnerAllowlist?: string[];
shellRunnerGovernanceOwner?: string;
auditEnabled?: boolean;
auditPath?: string;
},
): void {
const allowlist = opts.shellRunnerAllowlist ?? [];
const auditLines = opts.auditPath
? ['audit:', ` enabled: ${opts.auditEnabled ?? true}`, ` path: ${opts.auditPath}`]
: [];
const governanceOwnerLines = opts.shellRunnerGovernanceOwner
? [' shell_runner_governance:', ` owner: '${opts.shellRunnerGovernanceOwner}'`]
: [];
writeFileSync(
configPath,
[
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
` installation_execution: ${opts.installationExecution ?? 'disabled'}`,
` allow_shell_runner: ${opts.allowShellRunner ?? false}`,
` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
...governanceOwnerLines,
...auditLines,
].join('\n'),
'utf-8',
);
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
});
it('reports shell runner rollout guardrail blockers', () => {
const guardrails = evaluateShellRunnerRolloutGuardrails(
{
installation_execution: 'disabled',
allow_shell_runner: false,
shell_runner_allowlist: ['*'],
shell_runner_governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
load: { watch: false, watch_debounce_ms: 250 },
},
false,
);
expect(guardrails.blockers).toEqual([
'skills.installation_execution must be enabled',
'skills.allow_shell_runner must be true',
"skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
'audit.enabled must be true for shell runner rollout review',
]);
});
it('requires governance owner when shell runner is enabled', () => {
const guardrails = evaluateShellRunnerRolloutGuardrails(
{
installation_execution: 'enabled',
allow_shell_runner: true,
shell_runner_allowlist: ['npm install*'],
shell_runner_governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
load: { watch: false, watch_debounce_ms: 250 },
},
true,
);
expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
});
it('summarizes shell runner audit windows with hash coverage and failures', () => {
const events: AuditEvent[] = [
{
timestamp: 1,
level: 'debug',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'audit-skill',
phase: 'install',
installer_type: 'node',
command: 'sha256:abc',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
{
timestamp: 2,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'audit-skill',
phase: 'install',
installer_type: 'download',
command: 'download https://example.com/pkg.tgz',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: 3,
level: 'warn',
event_type: 'skills.installer.execution_blocked',
event: {
skill_name: 'audit-skill',
phase: 'execute',
execution_requested: true,
execution_enabled: false,
reason: 'execution_policy_disabled',
attempted_command_count: 1,
},
},
];
expect(summarizeShellRunnerAuditWindow(events)).toEqual({
command_result_total: 2,
command_result_failed: 1,
allowlist_blocked: 1,
execution_blocked: 1,
hashed_command_count: 1,
unhashed_command_count: 1,
});
});
it('calculates hash coverage percentage for shell runner summaries', () => {
expect(
calculateShellRunnerHashCoveragePercent({
command_result_total: 0,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 0,
unhashed_command_count: 0,
}),
).toBe(0);
expect(
calculateShellRunnerHashCoveragePercent({
command_result_total: 4,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 3,
unhashed_command_count: 1,
}),
).toBe(75);
});
it('computes shell runner trend snapshot across current and previous windows', () => {
const now = 1_000_000;
const oneDay = 24 * 60 * 60 * 1000;
const window = 7 * oneDay;
const currentWindowStart = now - window;
const events: AuditEvent[] = [
{
timestamp: now - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:new-a',
status: 'failed',
reason: 'exit_code_1',
},
},
{
timestamp: now - oneDay * 2,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:new-b',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - window - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:old-a',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - window - oneDay * 2,
level: 'info',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'legacy-old-command',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
];
const trend = computeShellRunnerAuditTrendSnapshot({
events,
currentWindowStartMs: currentWindowStart,
currentWindowEndMs: now,
});
expect(trend.current.command_result_failed).toBe(2);
expect(trend.previous.command_result_failed).toBe(1);
expect(trend.deltas.failures).toBe(1);
expect(trend.current.allowlist_blocked).toBe(1);
expect(trend.previous.allowlist_blocked).toBe(1);
expect(trend.deltas.allowlist_blocks).toBe(0);
expect(trend.deltas.hash_coverage_pct).toBe(50);
});
it('evaluates promotion policy with governance cadence and success thresholds', () => {
const policy = evaluateShellRunnerPromotionPolicy({
trend: {
current: {
command_result_total: 4,
command_result_failed: 1,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
previous: {
command_result_total: 4,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
deltas: {
failures: 1,
allowlist_blocks: 0,
hash_coverage_pct: 0,
},
},
reviewedWindowDays: 7,
governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
});
expect(policy.eligible).toBe(false);
expect(policy.recommendation).toBe('not_eligible');
expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
});
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
const policy = evaluateShellRunnerPromotionPolicy({
trend: {
current: {
command_result_total: 5,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 5,
unhashed_command_count: 0,
},
previous: {
command_result_total: 5,
command_result_failed: 1,
allowlist_blocked: 1,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 1,
},
deltas: {
failures: -1,
allowlist_blocks: -1,
hash_coverage_pct: 20,
},
},
reviewedWindowDays: 7,
governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
});
expect(policy.eligible).toBe(true);
expect(policy.recommendation).toBe('eligible');
expect(policy.blockers).toEqual([]);
});
it('recommends rollout phase from guardrails and audit summary', () => {
expect(
recommendShellRunnerRolloutPhase(
{ blockers: ['skills.installation_execution must be enabled'] },
{
command_result_total: 1,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 1,
unhashed_command_count: 0,
},
),
).toBe('locked');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 0,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 0,
unhashed_command_count: 0,
},
),
).toBe('guarded_observe');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 4,
command_result_failed: 1,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
),
).toBe('guarded_review');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 3,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 3,
unhashed_command_count: 0,
},
),
).toBe('expand_candidate');
});
it('emits hashed command values for both successful and failed audit command results', () => {
const logger = {
skillsInstallerExecutionBlocked: vi.fn(),
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status reports governance owner blocker in JSON output', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
writeFileSync(auditPath, '', 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
expect(payload.recommendation).toBe('locked');
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
expect(payload.governance.owner).toBeNull();
expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status writes JSON payload to output file', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
const outputPath = join(root, 'rollout-status.json');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
writeFileSync(auditPath, '', 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
shellRunnerGovernanceOwner: 'skills-team',
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
expect(existsSync(outputPath)).toBe(true);
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
expect(payload.governance.owner).toBe('skills-team');
expect(payload.recommendation).toBe('guarded_observe');
expect(payload.trend.current.command_result_total).toBe(0);
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
const outputPath = join(root, 'rollout-trend.json');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
const now = Date.now();
const oneDay = 24 * 60 * 60 * 1000;
const events = [
{
timestamp: now - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'trend-skill',
phase: 'install',
installer_type: 'node',
command: 'sha256:new',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - oneDay * 8,
level: 'info',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'trend-skill',
phase: 'install',
installer_type: 'node',
command: 'legacy-prev',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
];
writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
shellRunnerGovernanceOwner: 'skills-team',
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
from: 'user',
});
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
expect(payload.trend.current.command_result_total).toBe(1);
expect(payload.trend.previous.command_result_total).toBe(1);
expect(payload.trend.deltas.failures).toBe(1);
expect(payload.trend.deltas.allowlist_blocks).toBe(1);
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
});
+340
View File
@@ -2,8 +2,12 @@ import type { Command } from 'commander';
import { resolve } from 'path';
import { homedir } from 'os';
import { spawnSync } from 'child_process';
import { writeFileSync } from 'fs';
import { createHash } from 'crypto';
import { auditLogger } from '../audit/index.js';
import { queryAuditLogs } from '../audit/export.js';
import type { AuditEvent } from '../audit/types.js';
import type { Config } from '../config/schema.js';
import type { Skill } from '../skills/index.js';
import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js';
import { loadConfigSafe } from './shared.js';
@@ -92,6 +96,238 @@ export function sanitizeSkillInstallerAuditReason(reason: string): string {
return reason;
}
export interface ShellRunnerRolloutGuardrailStatus {
blockers: string[];
}
export interface ShellRunnerAuditWindowSummary {
command_result_total: number;
command_result_failed: number;
allowlist_blocked: number;
execution_blocked: number;
hashed_command_count: number;
unhashed_command_count: number;
}
export interface ShellRunnerAuditTrendSnapshot {
current: ShellRunnerAuditWindowSummary;
previous: ShellRunnerAuditWindowSummary;
deltas: {
failures: number;
allowlist_blocks: number;
hash_coverage_pct: number;
};
}
export interface ShellRunnerPromotionPolicyStatus {
eligible: boolean;
recommendation: 'eligible' | 'not_eligible';
cadence_days: number;
reviewed_window_days: number;
success_rate: number;
minimum_success_rate: number;
failures_delta: number;
allowlist_blocks_delta: number;
hash_coverage_delta_pct: number;
blockers: string[];
}
export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
export function evaluateShellRunnerRolloutGuardrails(
skillsConfig: Config['skills'],
auditEnabled: boolean,
): ShellRunnerRolloutGuardrailStatus {
const blockers: string[] = [];
if (skillsConfig.installation_execution !== 'enabled') {
blockers.push('skills.installation_execution must be enabled');
}
if (!skillsConfig.allow_shell_runner) {
blockers.push('skills.allow_shell_runner must be true');
}
if (skillsConfig.shell_runner_allowlist.length === 0) {
blockers.push('skills.shell_runner_allowlist must include at least one pattern');
}
if (skillsConfig.shell_runner_allowlist.some((pattern) => pattern.trim() === '*')) {
blockers.push("skills.shell_runner_allowlist cannot include wildcard-only '*' patterns");
}
if (skillsConfig.allow_shell_runner && !skillsConfig.shell_runner_governance.owner) {
blockers.push('skills.shell_runner_governance.owner must be set when shell runner is enabled');
}
if (!auditEnabled) {
blockers.push('audit.enabled must be true for shell runner rollout review');
}
return { blockers };
}
export function summarizeShellRunnerAuditWindow(events: AuditEvent[]): ShellRunnerAuditWindowSummary {
return events.reduce<ShellRunnerAuditWindowSummary>(
(summary, event) => {
if (event.event_type === 'skills.installer.command_result') {
summary.command_result_total += 1;
const payload = event.event as Record<string, unknown>;
const status = typeof payload.status === 'string' ? payload.status : '';
const reason = typeof payload.reason === 'string' ? payload.reason : '';
const command = typeof payload.command === 'string' ? payload.command : '';
if (status === 'failed') {
summary.command_result_failed += 1;
}
if (reason === 'allowlist_blocked') {
summary.allowlist_blocked += 1;
}
if (command.startsWith('sha256:')) {
summary.hashed_command_count += 1;
} else {
summary.unhashed_command_count += 1;
}
return summary;
}
if (event.event_type === 'skills.installer.execution_blocked') {
summary.execution_blocked += 1;
}
return summary;
},
{
command_result_total: 0,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 0,
unhashed_command_count: 0,
},
);
}
export function calculateShellRunnerHashCoveragePercent(summary: ShellRunnerAuditWindowSummary): number {
const total = summary.hashed_command_count + summary.unhashed_command_count;
if (total === 0) {
return 0;
}
return (summary.hashed_command_count / total) * 100;
}
export function computeShellRunnerAuditTrendSnapshot(args: {
events: AuditEvent[];
currentWindowStartMs: number;
currentWindowEndMs: number;
}): ShellRunnerAuditTrendSnapshot {
const previousWindowStartMs = args.currentWindowStartMs - (args.currentWindowEndMs - args.currentWindowStartMs);
const currentEvents = args.events.filter(
(event) => event.timestamp >= args.currentWindowStartMs && event.timestamp <= args.currentWindowEndMs,
);
const previousEvents = args.events.filter(
(event) => event.timestamp >= previousWindowStartMs && event.timestamp < args.currentWindowStartMs,
);
const current = summarizeShellRunnerAuditWindow(currentEvents);
const previous = summarizeShellRunnerAuditWindow(previousEvents);
return {
current,
previous,
deltas: {
failures: current.command_result_failed - previous.command_result_failed,
allowlist_blocks: current.allowlist_blocked - previous.allowlist_blocked,
hash_coverage_pct:
calculateShellRunnerHashCoveragePercent(current) - calculateShellRunnerHashCoveragePercent(previous),
},
};
}
export function evaluateShellRunnerPromotionPolicy(args: {
trend: ShellRunnerAuditTrendSnapshot;
reviewedWindowDays: number;
governance: {
review_cadence_days: number;
promotion_min_success_rate: number;
};
}): ShellRunnerPromotionPolicyStatus {
const blockers: string[] = [];
const successRate =
args.trend.current.command_result_total === 0
? 0
: (args.trend.current.command_result_total - args.trend.current.command_result_failed)
/ args.trend.current.command_result_total;
if (args.reviewedWindowDays > args.governance.review_cadence_days) {
blockers.push(
`review window (${args.reviewedWindowDays}d) exceeds governance cadence (${args.governance.review_cadence_days}d)`,
);
}
if (args.trend.current.command_result_total === 0) {
blockers.push('no shell-runner command results in current window');
}
if (successRate < args.governance.promotion_min_success_rate) {
blockers.push(
`success rate ${(successRate * 100).toFixed(2)}% below minimum ${(args.governance.promotion_min_success_rate * 100).toFixed(2)}%`,
);
}
if (args.trend.deltas.failures > 0) {
blockers.push(`failures increased by ${args.trend.deltas.failures} vs previous window`);
}
if (args.trend.deltas.allowlist_blocks > 0) {
blockers.push(`allowlist blocks increased by ${args.trend.deltas.allowlist_blocks} vs previous window`);
}
return {
eligible: blockers.length === 0,
recommendation: blockers.length === 0 ? 'eligible' : 'not_eligible',
cadence_days: args.governance.review_cadence_days,
reviewed_window_days: args.reviewedWindowDays,
success_rate: successRate,
minimum_success_rate: args.governance.promotion_min_success_rate,
failures_delta: args.trend.deltas.failures,
allowlist_blocks_delta: args.trend.deltas.allowlist_blocks,
hash_coverage_delta_pct: args.trend.deltas.hash_coverage_pct,
blockers,
};
}
export function recommendShellRunnerRolloutPhase(
guardrails: ShellRunnerRolloutGuardrailStatus,
summary: ShellRunnerAuditWindowSummary,
): ShellRunnerRolloutRecommendation {
if (guardrails.blockers.length > 0) {
return 'locked';
}
if (summary.command_result_total === 0) {
return 'guarded_observe';
}
if (summary.unhashed_command_count > 0 || summary.command_result_failed > 0) {
return 'guarded_review';
}
return 'expand_candidate';
}
function expandHomePath(pathValue: string): string {
if (pathValue.startsWith('~/')) {
return resolve(homedir(), pathValue.slice(2));
}
return resolve(pathValue);
}
interface SkillShellRunnerAuditLogger {
skillsInstallerExecutionBlocked(event: {
skill_name: string;
@@ -1097,6 +1333,110 @@ export function registerSkillsCommand(program: Command): void {
console.log(renderSkillInstallerPlan(view));
});
skills
.command('rollout-status')
.description('Show shell runner rollout guardrails and audit review summary')
.option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
.option('--out <path>', 'Write rollout JSON payload to file')
.option('--json', 'Output as JSON')
.option('-c, --config <path>', 'Config file path')
.action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => {
const loaded = loadConfigSafe(opts.config);
if (loaded.error || !loaded.config) {
console.error(loaded.error ?? 'Failed to load config');
process.exitCode = 1;
return;
}
const parsedDays = Number.parseInt(opts.days ?? '7', 10);
if (!Number.isFinite(parsedDays) || parsedDays <= 0) {
console.error('`--days` must be a positive integer.');
process.exitCode = 1;
return;
}
const guardrails = evaluateShellRunnerRolloutGuardrails(loaded.config.skills, loaded.config.audit.enabled);
const windowDurationMs = parsedDays * 24 * 60 * 60 * 1000;
const nowMs = Date.now();
const currentWindowStartMs = nowMs - windowDurationMs;
const queryStartMs = nowMs - windowDurationMs * 2;
const auditPath = expandHomePath(loaded.config.audit.path);
const auditEvents = await queryAuditLogs(auditPath, {
start_time: queryStartMs,
event_types: ['skills.installer.command_result', 'skills.installer.execution_blocked'],
});
const trend = computeShellRunnerAuditTrendSnapshot({
events: auditEvents,
currentWindowStartMs,
currentWindowEndMs: nowMs,
});
const recommendation = recommendShellRunnerRolloutPhase(guardrails, trend.current);
const governance = loaded.config.skills.shell_runner_governance;
const promotionPolicy = evaluateShellRunnerPromotionPolicy({
trend,
reviewedWindowDays: parsedDays,
governance: {
review_cadence_days: governance.review_cadence_days,
promotion_min_success_rate: governance.promotion_min_success_rate,
},
});
const rolloutPayload = {
days: parsedDays,
guardrails,
summary: trend.current,
trend,
recommendation,
promotion_policy: promotionPolicy,
governance: {
owner: governance.owner ?? null,
review_cadence_days: governance.review_cadence_days,
promotion_min_success_rate: governance.promotion_min_success_rate,
},
};
if (opts.out) {
writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8');
}
if (opts.json) {
console.log(JSON.stringify(rolloutPayload, null, 2));
return;
}
console.log('Shell runner rollout status');
console.log(`Window: ${parsedDays}d`);
console.log(`Recommendation: ${recommendation}`);
console.log(`Governance owner: ${governance.owner ?? '(unset)'}`);
console.log(`Governance review cadence (days): ${governance.review_cadence_days}`);
console.log(`Governance promotion min success rate: ${governance.promotion_min_success_rate}`);
console.log(`Guardrail blockers: ${guardrails.blockers.length}`);
if (guardrails.blockers.length > 0) {
for (const blocker of guardrails.blockers) {
console.log(`- ${blocker}`);
}
}
console.log(`Audited command results: ${trend.current.command_result_total}`);
console.log(`Audited failures: ${trend.current.command_result_failed}`);
console.log(`Allowlist blocks: ${trend.current.allowlist_blocked}`);
console.log(`Execution blocks: ${trend.current.execution_blocked}`);
console.log(`Hashed command payloads: ${trend.current.hashed_command_count}`);
console.log(`Unhashed command payloads: ${trend.current.unhashed_command_count}`);
console.log(`Failure delta vs previous window: ${trend.deltas.failures}`);
console.log(`Allowlist block delta vs previous window: ${trend.deltas.allowlist_blocks}`);
console.log(`Hash coverage delta vs previous window (%): ${trend.deltas.hash_coverage_pct.toFixed(2)}`);
console.log(
`Promotion policy: ${promotionPolicy.recommendation} (success ${(promotionPolicy.success_rate * 100).toFixed(2)}% / min ${(promotionPolicy.minimum_success_rate * 100).toFixed(2)}%)`,
);
if (promotionPolicy.blockers.length > 0) {
for (const blocker of promotionPolicy.blockers) {
console.log(`- ${blocker}`);
}
}
if (opts.out) {
console.log(`Wrote rollout payload: ${expandHomePath(opts.out)}`);
}
});
skills
.command('execute <name>')
.description('Preview or execute installer steps for an installed skill')