feat(skills): add rollout status governance and promotion policy checks
This commit is contained in:
+507
-1
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { Command } from 'commander';
|
||||
@@ -27,14 +27,21 @@ import {
|
||||
createShellSkillInstallerCommandRunner,
|
||||
checkCommandAgainstAllowlist,
|
||||
emitShellRunnerAuditEvents,
|
||||
calculateShellRunnerHashCoveragePercent,
|
||||
computeShellRunnerAuditTrendSnapshot,
|
||||
evaluateShellRunnerPromotionPolicy,
|
||||
evaluateShellRunnerRolloutGuardrails,
|
||||
hashSkillInstallerAuditCommand,
|
||||
recommendShellRunnerRolloutPhase,
|
||||
sanitizeSkillInstallerAuditReason,
|
||||
summarizeShellRunnerAuditWindow,
|
||||
resolveSkillInstallerCommandRunner,
|
||||
runSkillExecuteAction,
|
||||
runSkillInstallAction,
|
||||
registerSkillsCommand,
|
||||
} from './skills.js';
|
||||
import type { Skill } from '../skills/index.js';
|
||||
import type { AuditEvent } from '../audit/types.js';
|
||||
|
||||
function buildSkill(overrides: Partial<Skill>): Skill {
|
||||
return {
|
||||
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
|
||||
installationExecution?: 'disabled' | 'enabled';
|
||||
allowShellRunner?: boolean;
|
||||
shellRunnerAllowlist?: string[];
|
||||
shellRunnerGovernanceOwner?: string;
|
||||
auditEnabled?: boolean;
|
||||
auditPath?: string;
|
||||
},
|
||||
): void {
|
||||
const allowlist = opts.shellRunnerAllowlist ?? [];
|
||||
const auditLines = opts.auditPath
|
||||
? ['audit:', ` enabled: ${opts.auditEnabled ?? true}`, ` path: ${opts.auditPath}`]
|
||||
: [];
|
||||
const governanceOwnerLines = opts.shellRunnerGovernanceOwner
|
||||
? [' shell_runner_governance:', ` owner: '${opts.shellRunnerGovernanceOwner}'`]
|
||||
: [];
|
||||
writeFileSync(
|
||||
configPath,
|
||||
[
|
||||
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
|
||||
` installation_execution: ${opts.installationExecution ?? 'disabled'}`,
|
||||
` allow_shell_runner: ${opts.allowShellRunner ?? false}`,
|
||||
` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
|
||||
...governanceOwnerLines,
|
||||
...auditLines,
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
|
||||
expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
|
||||
});
|
||||
|
||||
it('reports shell runner rollout guardrail blockers', () => {
|
||||
const guardrails = evaluateShellRunnerRolloutGuardrails(
|
||||
{
|
||||
installation_execution: 'disabled',
|
||||
allow_shell_runner: false,
|
||||
shell_runner_allowlist: ['*'],
|
||||
shell_runner_governance: {
|
||||
review_cadence_days: 7,
|
||||
promotion_min_success_rate: 0.9,
|
||||
},
|
||||
load: { watch: false, watch_debounce_ms: 250 },
|
||||
},
|
||||
false,
|
||||
);
|
||||
|
||||
expect(guardrails.blockers).toEqual([
|
||||
'skills.installation_execution must be enabled',
|
||||
'skills.allow_shell_runner must be true',
|
||||
"skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
|
||||
'audit.enabled must be true for shell runner rollout review',
|
||||
]);
|
||||
});
|
||||
|
||||
it('requires governance owner when shell runner is enabled', () => {
|
||||
const guardrails = evaluateShellRunnerRolloutGuardrails(
|
||||
{
|
||||
installation_execution: 'enabled',
|
||||
allow_shell_runner: true,
|
||||
shell_runner_allowlist: ['npm install*'],
|
||||
shell_runner_governance: {
|
||||
review_cadence_days: 7,
|
||||
promotion_min_success_rate: 0.9,
|
||||
},
|
||||
load: { watch: false, watch_debounce_ms: 250 },
|
||||
},
|
||||
true,
|
||||
);
|
||||
|
||||
expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||
});
|
||||
|
||||
it('summarizes shell runner audit windows with hash coverage and failures', () => {
|
||||
const events: AuditEvent[] = [
|
||||
{
|
||||
timestamp: 1,
|
||||
level: 'debug',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'audit-skill',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'sha256:abc',
|
||||
status: 'succeeded',
|
||||
reason: 'runner_reported_success',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: 2,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'audit-skill',
|
||||
phase: 'install',
|
||||
installer_type: 'download',
|
||||
command: 'download https://example.com/pkg.tgz',
|
||||
status: 'failed',
|
||||
reason: 'allowlist_blocked',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: 3,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.execution_blocked',
|
||||
event: {
|
||||
skill_name: 'audit-skill',
|
||||
phase: 'execute',
|
||||
execution_requested: true,
|
||||
execution_enabled: false,
|
||||
reason: 'execution_policy_disabled',
|
||||
attempted_command_count: 1,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
expect(summarizeShellRunnerAuditWindow(events)).toEqual({
|
||||
command_result_total: 2,
|
||||
command_result_failed: 1,
|
||||
allowlist_blocked: 1,
|
||||
execution_blocked: 1,
|
||||
hashed_command_count: 1,
|
||||
unhashed_command_count: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it('calculates hash coverage percentage for shell runner summaries', () => {
|
||||
expect(
|
||||
calculateShellRunnerHashCoveragePercent({
|
||||
command_result_total: 0,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 0,
|
||||
unhashed_command_count: 0,
|
||||
}),
|
||||
).toBe(0);
|
||||
|
||||
expect(
|
||||
calculateShellRunnerHashCoveragePercent({
|
||||
command_result_total: 4,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 3,
|
||||
unhashed_command_count: 1,
|
||||
}),
|
||||
).toBe(75);
|
||||
});
|
||||
|
||||
it('computes shell runner trend snapshot across current and previous windows', () => {
|
||||
const now = 1_000_000;
|
||||
const oneDay = 24 * 60 * 60 * 1000;
|
||||
const window = 7 * oneDay;
|
||||
const currentWindowStart = now - window;
|
||||
|
||||
const events: AuditEvent[] = [
|
||||
{
|
||||
timestamp: now - oneDay,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'demo',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'sha256:new-a',
|
||||
status: 'failed',
|
||||
reason: 'exit_code_1',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: now - oneDay * 2,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'demo',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'sha256:new-b',
|
||||
status: 'failed',
|
||||
reason: 'allowlist_blocked',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: now - window - oneDay,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'demo',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'sha256:old-a',
|
||||
status: 'failed',
|
||||
reason: 'allowlist_blocked',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: now - window - oneDay * 2,
|
||||
level: 'info',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'demo',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'legacy-old-command',
|
||||
status: 'succeeded',
|
||||
reason: 'runner_reported_success',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const trend = computeShellRunnerAuditTrendSnapshot({
|
||||
events,
|
||||
currentWindowStartMs: currentWindowStart,
|
||||
currentWindowEndMs: now,
|
||||
});
|
||||
|
||||
expect(trend.current.command_result_failed).toBe(2);
|
||||
expect(trend.previous.command_result_failed).toBe(1);
|
||||
expect(trend.deltas.failures).toBe(1);
|
||||
expect(trend.current.allowlist_blocked).toBe(1);
|
||||
expect(trend.previous.allowlist_blocked).toBe(1);
|
||||
expect(trend.deltas.allowlist_blocks).toBe(0);
|
||||
expect(trend.deltas.hash_coverage_pct).toBe(50);
|
||||
});
|
||||
|
||||
it('evaluates promotion policy with governance cadence and success thresholds', () => {
|
||||
const policy = evaluateShellRunnerPromotionPolicy({
|
||||
trend: {
|
||||
current: {
|
||||
command_result_total: 4,
|
||||
command_result_failed: 1,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 4,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
previous: {
|
||||
command_result_total: 4,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 4,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
deltas: {
|
||||
failures: 1,
|
||||
allowlist_blocks: 0,
|
||||
hash_coverage_pct: 0,
|
||||
},
|
||||
},
|
||||
reviewedWindowDays: 7,
|
||||
governance: {
|
||||
review_cadence_days: 7,
|
||||
promotion_min_success_rate: 0.9,
|
||||
},
|
||||
});
|
||||
|
||||
expect(policy.eligible).toBe(false);
|
||||
expect(policy.recommendation).toBe('not_eligible');
|
||||
expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
|
||||
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
|
||||
});
|
||||
|
||||
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
|
||||
const policy = evaluateShellRunnerPromotionPolicy({
|
||||
trend: {
|
||||
current: {
|
||||
command_result_total: 5,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 5,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
previous: {
|
||||
command_result_total: 5,
|
||||
command_result_failed: 1,
|
||||
allowlist_blocked: 1,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 4,
|
||||
unhashed_command_count: 1,
|
||||
},
|
||||
deltas: {
|
||||
failures: -1,
|
||||
allowlist_blocks: -1,
|
||||
hash_coverage_pct: 20,
|
||||
},
|
||||
},
|
||||
reviewedWindowDays: 7,
|
||||
governance: {
|
||||
review_cadence_days: 7,
|
||||
promotion_min_success_rate: 0.9,
|
||||
},
|
||||
});
|
||||
|
||||
expect(policy.eligible).toBe(true);
|
||||
expect(policy.recommendation).toBe('eligible');
|
||||
expect(policy.blockers).toEqual([]);
|
||||
});
|
||||
|
||||
it('recommends rollout phase from guardrails and audit summary', () => {
|
||||
expect(
|
||||
recommendShellRunnerRolloutPhase(
|
||||
{ blockers: ['skills.installation_execution must be enabled'] },
|
||||
{
|
||||
command_result_total: 1,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 1,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
),
|
||||
).toBe('locked');
|
||||
|
||||
expect(
|
||||
recommendShellRunnerRolloutPhase(
|
||||
{ blockers: [] },
|
||||
{
|
||||
command_result_total: 0,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 0,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
),
|
||||
).toBe('guarded_observe');
|
||||
|
||||
expect(
|
||||
recommendShellRunnerRolloutPhase(
|
||||
{ blockers: [] },
|
||||
{
|
||||
command_result_total: 4,
|
||||
command_result_failed: 1,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 4,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
),
|
||||
).toBe('guarded_review');
|
||||
|
||||
expect(
|
||||
recommendShellRunnerRolloutPhase(
|
||||
{ blockers: [] },
|
||||
{
|
||||
command_result_total: 3,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 3,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
),
|
||||
).toBe('expand_candidate');
|
||||
});
|
||||
|
||||
it('emits hashed command values for both successful and failed audit command results', () => {
|
||||
const logger = {
|
||||
skillsInstallerExecutionBlocked: vi.fn(),
|
||||
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
|
||||
process.exitCode = undefined;
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('skills rollout-status reports governance owner blocker in JSON output', async () => {
|
||||
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||
const configPath = join(root, 'config.yaml');
|
||||
const managedDir = join(root, 'managed');
|
||||
const bundledDir = join(root, 'bundled');
|
||||
const workspaceDir = join(root, 'workspace');
|
||||
const auditPath = join(root, 'audit.log');
|
||||
mkdirSync(managedDir, { recursive: true });
|
||||
mkdirSync(bundledDir, { recursive: true });
|
||||
mkdirSync(workspaceDir, { recursive: true });
|
||||
writeFileSync(auditPath, '', 'utf-8');
|
||||
writeSkillsCliConfig(configPath, {
|
||||
managedDir,
|
||||
bundledDir,
|
||||
workspaceDir,
|
||||
installationExecution: 'enabled',
|
||||
allowShellRunner: true,
|
||||
shellRunnerAllowlist: ['npm install*'],
|
||||
auditPath,
|
||||
});
|
||||
|
||||
const program = new Command();
|
||||
registerSkillsCommand(program);
|
||||
|
||||
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||
process.exitCode = undefined;
|
||||
|
||||
await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
|
||||
|
||||
const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
|
||||
expect(payload.recommendation).toBe('locked');
|
||||
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||
expect(payload.governance.owner).toBeNull();
|
||||
expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||
|
||||
logSpy.mockRestore();
|
||||
process.exitCode = undefined;
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('skills rollout-status writes JSON payload to output file', async () => {
|
||||
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||
const configPath = join(root, 'config.yaml');
|
||||
const managedDir = join(root, 'managed');
|
||||
const bundledDir = join(root, 'bundled');
|
||||
const workspaceDir = join(root, 'workspace');
|
||||
const auditPath = join(root, 'audit.log');
|
||||
const outputPath = join(root, 'rollout-status.json');
|
||||
mkdirSync(managedDir, { recursive: true });
|
||||
mkdirSync(bundledDir, { recursive: true });
|
||||
mkdirSync(workspaceDir, { recursive: true });
|
||||
writeFileSync(auditPath, '', 'utf-8');
|
||||
writeSkillsCliConfig(configPath, {
|
||||
managedDir,
|
||||
bundledDir,
|
||||
workspaceDir,
|
||||
installationExecution: 'enabled',
|
||||
allowShellRunner: true,
|
||||
shellRunnerAllowlist: ['npm install*'],
|
||||
shellRunnerGovernanceOwner: 'skills-team',
|
||||
auditPath,
|
||||
});
|
||||
|
||||
const program = new Command();
|
||||
registerSkillsCommand(program);
|
||||
|
||||
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||
process.exitCode = undefined;
|
||||
|
||||
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
|
||||
|
||||
expect(existsSync(outputPath)).toBe(true);
|
||||
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
|
||||
expect(payload.governance.owner).toBe('skills-team');
|
||||
expect(payload.recommendation).toBe('guarded_observe');
|
||||
expect(payload.trend.current.command_result_total).toBe(0);
|
||||
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||
|
||||
logSpy.mockRestore();
|
||||
process.exitCode = undefined;
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
|
||||
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||
const configPath = join(root, 'config.yaml');
|
||||
const managedDir = join(root, 'managed');
|
||||
const bundledDir = join(root, 'bundled');
|
||||
const workspaceDir = join(root, 'workspace');
|
||||
const auditPath = join(root, 'audit.log');
|
||||
const outputPath = join(root, 'rollout-trend.json');
|
||||
mkdirSync(managedDir, { recursive: true });
|
||||
mkdirSync(bundledDir, { recursive: true });
|
||||
mkdirSync(workspaceDir, { recursive: true });
|
||||
|
||||
const now = Date.now();
|
||||
const oneDay = 24 * 60 * 60 * 1000;
|
||||
const events = [
|
||||
{
|
||||
timestamp: now - oneDay,
|
||||
level: 'warn',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'trend-skill',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'sha256:new',
|
||||
status: 'failed',
|
||||
reason: 'allowlist_blocked',
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamp: now - oneDay * 8,
|
||||
level: 'info',
|
||||
event_type: 'skills.installer.command_result',
|
||||
event: {
|
||||
skill_name: 'trend-skill',
|
||||
phase: 'install',
|
||||
installer_type: 'node',
|
||||
command: 'legacy-prev',
|
||||
status: 'succeeded',
|
||||
reason: 'runner_reported_success',
|
||||
},
|
||||
},
|
||||
];
|
||||
writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
|
||||
|
||||
writeSkillsCliConfig(configPath, {
|
||||
managedDir,
|
||||
bundledDir,
|
||||
workspaceDir,
|
||||
installationExecution: 'enabled',
|
||||
allowShellRunner: true,
|
||||
shellRunnerAllowlist: ['npm install*'],
|
||||
shellRunnerGovernanceOwner: 'skills-team',
|
||||
auditPath,
|
||||
});
|
||||
|
||||
const program = new Command();
|
||||
registerSkillsCommand(program);
|
||||
|
||||
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||
process.exitCode = undefined;
|
||||
|
||||
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
|
||||
from: 'user',
|
||||
});
|
||||
|
||||
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
|
||||
expect(payload.trend.current.command_result_total).toBe(1);
|
||||
expect(payload.trend.previous.command_result_total).toBe(1);
|
||||
expect(payload.trend.deltas.failures).toBe(1);
|
||||
expect(payload.trend.deltas.allowlist_blocks).toBe(1);
|
||||
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||
expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
|
||||
|
||||
logSpy.mockRestore();
|
||||
process.exitCode = undefined;
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,8 +2,12 @@ import type { Command } from 'commander';
|
||||
import { resolve } from 'path';
|
||||
import { homedir } from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
import { writeFileSync } from 'fs';
|
||||
import { createHash } from 'crypto';
|
||||
import { auditLogger } from '../audit/index.js';
|
||||
import { queryAuditLogs } from '../audit/export.js';
|
||||
import type { AuditEvent } from '../audit/types.js';
|
||||
import type { Config } from '../config/schema.js';
|
||||
import type { Skill } from '../skills/index.js';
|
||||
import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js';
|
||||
import { loadConfigSafe } from './shared.js';
|
||||
@@ -92,6 +96,238 @@ export function sanitizeSkillInstallerAuditReason(reason: string): string {
|
||||
return reason;
|
||||
}
|
||||
|
||||
export interface ShellRunnerRolloutGuardrailStatus {
|
||||
blockers: string[];
|
||||
}
|
||||
|
||||
export interface ShellRunnerAuditWindowSummary {
|
||||
command_result_total: number;
|
||||
command_result_failed: number;
|
||||
allowlist_blocked: number;
|
||||
execution_blocked: number;
|
||||
hashed_command_count: number;
|
||||
unhashed_command_count: number;
|
||||
}
|
||||
|
||||
export interface ShellRunnerAuditTrendSnapshot {
|
||||
current: ShellRunnerAuditWindowSummary;
|
||||
previous: ShellRunnerAuditWindowSummary;
|
||||
deltas: {
|
||||
failures: number;
|
||||
allowlist_blocks: number;
|
||||
hash_coverage_pct: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ShellRunnerPromotionPolicyStatus {
|
||||
eligible: boolean;
|
||||
recommendation: 'eligible' | 'not_eligible';
|
||||
cadence_days: number;
|
||||
reviewed_window_days: number;
|
||||
success_rate: number;
|
||||
minimum_success_rate: number;
|
||||
failures_delta: number;
|
||||
allowlist_blocks_delta: number;
|
||||
hash_coverage_delta_pct: number;
|
||||
blockers: string[];
|
||||
}
|
||||
|
||||
export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
|
||||
|
||||
export function evaluateShellRunnerRolloutGuardrails(
|
||||
skillsConfig: Config['skills'],
|
||||
auditEnabled: boolean,
|
||||
): ShellRunnerRolloutGuardrailStatus {
|
||||
const blockers: string[] = [];
|
||||
|
||||
if (skillsConfig.installation_execution !== 'enabled') {
|
||||
blockers.push('skills.installation_execution must be enabled');
|
||||
}
|
||||
|
||||
if (!skillsConfig.allow_shell_runner) {
|
||||
blockers.push('skills.allow_shell_runner must be true');
|
||||
}
|
||||
|
||||
if (skillsConfig.shell_runner_allowlist.length === 0) {
|
||||
blockers.push('skills.shell_runner_allowlist must include at least one pattern');
|
||||
}
|
||||
|
||||
if (skillsConfig.shell_runner_allowlist.some((pattern) => pattern.trim() === '*')) {
|
||||
blockers.push("skills.shell_runner_allowlist cannot include wildcard-only '*' patterns");
|
||||
}
|
||||
|
||||
if (skillsConfig.allow_shell_runner && !skillsConfig.shell_runner_governance.owner) {
|
||||
blockers.push('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||
}
|
||||
|
||||
if (!auditEnabled) {
|
||||
blockers.push('audit.enabled must be true for shell runner rollout review');
|
||||
}
|
||||
|
||||
return { blockers };
|
||||
}
|
||||
|
||||
export function summarizeShellRunnerAuditWindow(events: AuditEvent[]): ShellRunnerAuditWindowSummary {
|
||||
return events.reduce<ShellRunnerAuditWindowSummary>(
|
||||
(summary, event) => {
|
||||
if (event.event_type === 'skills.installer.command_result') {
|
||||
summary.command_result_total += 1;
|
||||
|
||||
const payload = event.event as Record<string, unknown>;
|
||||
const status = typeof payload.status === 'string' ? payload.status : '';
|
||||
const reason = typeof payload.reason === 'string' ? payload.reason : '';
|
||||
const command = typeof payload.command === 'string' ? payload.command : '';
|
||||
|
||||
if (status === 'failed') {
|
||||
summary.command_result_failed += 1;
|
||||
}
|
||||
|
||||
if (reason === 'allowlist_blocked') {
|
||||
summary.allowlist_blocked += 1;
|
||||
}
|
||||
|
||||
if (command.startsWith('sha256:')) {
|
||||
summary.hashed_command_count += 1;
|
||||
} else {
|
||||
summary.unhashed_command_count += 1;
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
if (event.event_type === 'skills.installer.execution_blocked') {
|
||||
summary.execution_blocked += 1;
|
||||
}
|
||||
|
||||
return summary;
|
||||
},
|
||||
{
|
||||
command_result_total: 0,
|
||||
command_result_failed: 0,
|
||||
allowlist_blocked: 0,
|
||||
execution_blocked: 0,
|
||||
hashed_command_count: 0,
|
||||
unhashed_command_count: 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
export function calculateShellRunnerHashCoveragePercent(summary: ShellRunnerAuditWindowSummary): number {
|
||||
const total = summary.hashed_command_count + summary.unhashed_command_count;
|
||||
if (total === 0) {
|
||||
return 0;
|
||||
}
|
||||
return (summary.hashed_command_count / total) * 100;
|
||||
}
|
||||
|
||||
export function computeShellRunnerAuditTrendSnapshot(args: {
|
||||
events: AuditEvent[];
|
||||
currentWindowStartMs: number;
|
||||
currentWindowEndMs: number;
|
||||
}): ShellRunnerAuditTrendSnapshot {
|
||||
const previousWindowStartMs = args.currentWindowStartMs - (args.currentWindowEndMs - args.currentWindowStartMs);
|
||||
|
||||
const currentEvents = args.events.filter(
|
||||
(event) => event.timestamp >= args.currentWindowStartMs && event.timestamp <= args.currentWindowEndMs,
|
||||
);
|
||||
const previousEvents = args.events.filter(
|
||||
(event) => event.timestamp >= previousWindowStartMs && event.timestamp < args.currentWindowStartMs,
|
||||
);
|
||||
|
||||
const current = summarizeShellRunnerAuditWindow(currentEvents);
|
||||
const previous = summarizeShellRunnerAuditWindow(previousEvents);
|
||||
|
||||
return {
|
||||
current,
|
||||
previous,
|
||||
deltas: {
|
||||
failures: current.command_result_failed - previous.command_result_failed,
|
||||
allowlist_blocks: current.allowlist_blocked - previous.allowlist_blocked,
|
||||
hash_coverage_pct:
|
||||
calculateShellRunnerHashCoveragePercent(current) - calculateShellRunnerHashCoveragePercent(previous),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function evaluateShellRunnerPromotionPolicy(args: {
|
||||
trend: ShellRunnerAuditTrendSnapshot;
|
||||
reviewedWindowDays: number;
|
||||
governance: {
|
||||
review_cadence_days: number;
|
||||
promotion_min_success_rate: number;
|
||||
};
|
||||
}): ShellRunnerPromotionPolicyStatus {
|
||||
const blockers: string[] = [];
|
||||
const successRate =
|
||||
args.trend.current.command_result_total === 0
|
||||
? 0
|
||||
: (args.trend.current.command_result_total - args.trend.current.command_result_failed)
|
||||
/ args.trend.current.command_result_total;
|
||||
|
||||
if (args.reviewedWindowDays > args.governance.review_cadence_days) {
|
||||
blockers.push(
|
||||
`review window (${args.reviewedWindowDays}d) exceeds governance cadence (${args.governance.review_cadence_days}d)`,
|
||||
);
|
||||
}
|
||||
|
||||
if (args.trend.current.command_result_total === 0) {
|
||||
blockers.push('no shell-runner command results in current window');
|
||||
}
|
||||
|
||||
if (successRate < args.governance.promotion_min_success_rate) {
|
||||
blockers.push(
|
||||
`success rate ${(successRate * 100).toFixed(2)}% below minimum ${(args.governance.promotion_min_success_rate * 100).toFixed(2)}%`,
|
||||
);
|
||||
}
|
||||
|
||||
if (args.trend.deltas.failures > 0) {
|
||||
blockers.push(`failures increased by ${args.trend.deltas.failures} vs previous window`);
|
||||
}
|
||||
|
||||
if (args.trend.deltas.allowlist_blocks > 0) {
|
||||
blockers.push(`allowlist blocks increased by ${args.trend.deltas.allowlist_blocks} vs previous window`);
|
||||
}
|
||||
|
||||
return {
|
||||
eligible: blockers.length === 0,
|
||||
recommendation: blockers.length === 0 ? 'eligible' : 'not_eligible',
|
||||
cadence_days: args.governance.review_cadence_days,
|
||||
reviewed_window_days: args.reviewedWindowDays,
|
||||
success_rate: successRate,
|
||||
minimum_success_rate: args.governance.promotion_min_success_rate,
|
||||
failures_delta: args.trend.deltas.failures,
|
||||
allowlist_blocks_delta: args.trend.deltas.allowlist_blocks,
|
||||
hash_coverage_delta_pct: args.trend.deltas.hash_coverage_pct,
|
||||
blockers,
|
||||
};
|
||||
}
|
||||
|
||||
export function recommendShellRunnerRolloutPhase(
|
||||
guardrails: ShellRunnerRolloutGuardrailStatus,
|
||||
summary: ShellRunnerAuditWindowSummary,
|
||||
): ShellRunnerRolloutRecommendation {
|
||||
if (guardrails.blockers.length > 0) {
|
||||
return 'locked';
|
||||
}
|
||||
|
||||
if (summary.command_result_total === 0) {
|
||||
return 'guarded_observe';
|
||||
}
|
||||
|
||||
if (summary.unhashed_command_count > 0 || summary.command_result_failed > 0) {
|
||||
return 'guarded_review';
|
||||
}
|
||||
|
||||
return 'expand_candidate';
|
||||
}
|
||||
|
||||
function expandHomePath(pathValue: string): string {
|
||||
if (pathValue.startsWith('~/')) {
|
||||
return resolve(homedir(), pathValue.slice(2));
|
||||
}
|
||||
return resolve(pathValue);
|
||||
}
|
||||
|
||||
interface SkillShellRunnerAuditLogger {
|
||||
skillsInstallerExecutionBlocked(event: {
|
||||
skill_name: string;
|
||||
@@ -1097,6 +1333,110 @@ export function registerSkillsCommand(program: Command): void {
|
||||
console.log(renderSkillInstallerPlan(view));
|
||||
});
|
||||
|
||||
skills
|
||||
.command('rollout-status')
|
||||
.description('Show shell runner rollout guardrails and audit review summary')
|
||||
.option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
|
||||
.option('--out <path>', 'Write rollout JSON payload to file')
|
||||
.option('--json', 'Output as JSON')
|
||||
.option('-c, --config <path>', 'Config file path')
|
||||
.action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => {
|
||||
const loaded = loadConfigSafe(opts.config);
|
||||
if (loaded.error || !loaded.config) {
|
||||
console.error(loaded.error ?? 'Failed to load config');
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const parsedDays = Number.parseInt(opts.days ?? '7', 10);
|
||||
if (!Number.isFinite(parsedDays) || parsedDays <= 0) {
|
||||
console.error('`--days` must be a positive integer.');
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const guardrails = evaluateShellRunnerRolloutGuardrails(loaded.config.skills, loaded.config.audit.enabled);
|
||||
const windowDurationMs = parsedDays * 24 * 60 * 60 * 1000;
|
||||
const nowMs = Date.now();
|
||||
const currentWindowStartMs = nowMs - windowDurationMs;
|
||||
const queryStartMs = nowMs - windowDurationMs * 2;
|
||||
const auditPath = expandHomePath(loaded.config.audit.path);
|
||||
const auditEvents = await queryAuditLogs(auditPath, {
|
||||
start_time: queryStartMs,
|
||||
event_types: ['skills.installer.command_result', 'skills.installer.execution_blocked'],
|
||||
});
|
||||
const trend = computeShellRunnerAuditTrendSnapshot({
|
||||
events: auditEvents,
|
||||
currentWindowStartMs,
|
||||
currentWindowEndMs: nowMs,
|
||||
});
|
||||
const recommendation = recommendShellRunnerRolloutPhase(guardrails, trend.current);
|
||||
const governance = loaded.config.skills.shell_runner_governance;
|
||||
const promotionPolicy = evaluateShellRunnerPromotionPolicy({
|
||||
trend,
|
||||
reviewedWindowDays: parsedDays,
|
||||
governance: {
|
||||
review_cadence_days: governance.review_cadence_days,
|
||||
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||
},
|
||||
});
|
||||
const rolloutPayload = {
|
||||
days: parsedDays,
|
||||
guardrails,
|
||||
summary: trend.current,
|
||||
trend,
|
||||
recommendation,
|
||||
promotion_policy: promotionPolicy,
|
||||
governance: {
|
||||
owner: governance.owner ?? null,
|
||||
review_cadence_days: governance.review_cadence_days,
|
||||
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||
},
|
||||
};
|
||||
|
||||
if (opts.out) {
|
||||
writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8');
|
||||
}
|
||||
|
||||
if (opts.json) {
|
||||
console.log(JSON.stringify(rolloutPayload, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Shell runner rollout status');
|
||||
console.log(`Window: ${parsedDays}d`);
|
||||
console.log(`Recommendation: ${recommendation}`);
|
||||
console.log(`Governance owner: ${governance.owner ?? '(unset)'}`);
|
||||
console.log(`Governance review cadence (days): ${governance.review_cadence_days}`);
|
||||
console.log(`Governance promotion min success rate: ${governance.promotion_min_success_rate}`);
|
||||
console.log(`Guardrail blockers: ${guardrails.blockers.length}`);
|
||||
if (guardrails.blockers.length > 0) {
|
||||
for (const blocker of guardrails.blockers) {
|
||||
console.log(`- ${blocker}`);
|
||||
}
|
||||
}
|
||||
console.log(`Audited command results: ${trend.current.command_result_total}`);
|
||||
console.log(`Audited failures: ${trend.current.command_result_failed}`);
|
||||
console.log(`Allowlist blocks: ${trend.current.allowlist_blocked}`);
|
||||
console.log(`Execution blocks: ${trend.current.execution_blocked}`);
|
||||
console.log(`Hashed command payloads: ${trend.current.hashed_command_count}`);
|
||||
console.log(`Unhashed command payloads: ${trend.current.unhashed_command_count}`);
|
||||
console.log(`Failure delta vs previous window: ${trend.deltas.failures}`);
|
||||
console.log(`Allowlist block delta vs previous window: ${trend.deltas.allowlist_blocks}`);
|
||||
console.log(`Hash coverage delta vs previous window (%): ${trend.deltas.hash_coverage_pct.toFixed(2)}`);
|
||||
console.log(
|
||||
`Promotion policy: ${promotionPolicy.recommendation} (success ${(promotionPolicy.success_rate * 100).toFixed(2)}% / min ${(promotionPolicy.minimum_success_rate * 100).toFixed(2)}%)`,
|
||||
);
|
||||
if (promotionPolicy.blockers.length > 0) {
|
||||
for (const blocker of promotionPolicy.blockers) {
|
||||
console.log(`- ${blocker}`);
|
||||
}
|
||||
}
|
||||
if (opts.out) {
|
||||
console.log(`Wrote rollout payload: ${expandHomePath(opts.out)}`);
|
||||
}
|
||||
});
|
||||
|
||||
skills
|
||||
.command('execute <name>')
|
||||
.description('Preview or execute installer steps for an installed skill')
|
||||
|
||||
Reference in New Issue
Block a user