feat(skills): add rollout status governance and promotion policy checks

This commit is contained in:
William Valentin
2026-02-12 22:43:46 -08:00
parent 43b584257f
commit 7ae0fb51c2
3 changed files with 897 additions and 3 deletions
+507 -1
View File
@@ -1,5 +1,5 @@
import { describe, it, expect, vi } from 'vitest';
import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { Command } from 'commander';
@@ -27,14 +27,21 @@ import {
createShellSkillInstallerCommandRunner,
checkCommandAgainstAllowlist,
emitShellRunnerAuditEvents,
calculateShellRunnerHashCoveragePercent,
computeShellRunnerAuditTrendSnapshot,
evaluateShellRunnerPromotionPolicy,
evaluateShellRunnerRolloutGuardrails,
hashSkillInstallerAuditCommand,
recommendShellRunnerRolloutPhase,
sanitizeSkillInstallerAuditReason,
summarizeShellRunnerAuditWindow,
resolveSkillInstallerCommandRunner,
runSkillExecuteAction,
runSkillInstallAction,
registerSkillsCommand,
} from './skills.js';
import type { Skill } from '../skills/index.js';
import type { AuditEvent } from '../audit/types.js';
function buildSkill(overrides: Partial<Skill>): Skill {
return {
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
installationExecution?: 'disabled' | 'enabled';
allowShellRunner?: boolean;
shellRunnerAllowlist?: string[];
shellRunnerGovernanceOwner?: string;
auditEnabled?: boolean;
auditPath?: string;
},
): void {
const allowlist = opts.shellRunnerAllowlist ?? [];
const auditLines = opts.auditPath
? ['audit:', ` enabled: ${opts.auditEnabled ?? true}`, ` path: ${opts.auditPath}`]
: [];
const governanceOwnerLines = opts.shellRunnerGovernanceOwner
? [' shell_runner_governance:', ` owner: '${opts.shellRunnerGovernanceOwner}'`]
: [];
writeFileSync(
configPath,
[
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
` installation_execution: ${opts.installationExecution ?? 'disabled'}`,
` allow_shell_runner: ${opts.allowShellRunner ?? false}`,
` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
...governanceOwnerLines,
...auditLines,
].join('\n'),
'utf-8',
);
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
});
it('reports shell runner rollout guardrail blockers', () => {
const guardrails = evaluateShellRunnerRolloutGuardrails(
{
installation_execution: 'disabled',
allow_shell_runner: false,
shell_runner_allowlist: ['*'],
shell_runner_governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
load: { watch: false, watch_debounce_ms: 250 },
},
false,
);
expect(guardrails.blockers).toEqual([
'skills.installation_execution must be enabled',
'skills.allow_shell_runner must be true',
"skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
'audit.enabled must be true for shell runner rollout review',
]);
});
it('requires governance owner when shell runner is enabled', () => {
const guardrails = evaluateShellRunnerRolloutGuardrails(
{
installation_execution: 'enabled',
allow_shell_runner: true,
shell_runner_allowlist: ['npm install*'],
shell_runner_governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
load: { watch: false, watch_debounce_ms: 250 },
},
true,
);
expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
});
it('summarizes shell runner audit windows with hash coverage and failures', () => {
const events: AuditEvent[] = [
{
timestamp: 1,
level: 'debug',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'audit-skill',
phase: 'install',
installer_type: 'node',
command: 'sha256:abc',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
{
timestamp: 2,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'audit-skill',
phase: 'install',
installer_type: 'download',
command: 'download https://example.com/pkg.tgz',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: 3,
level: 'warn',
event_type: 'skills.installer.execution_blocked',
event: {
skill_name: 'audit-skill',
phase: 'execute',
execution_requested: true,
execution_enabled: false,
reason: 'execution_policy_disabled',
attempted_command_count: 1,
},
},
];
expect(summarizeShellRunnerAuditWindow(events)).toEqual({
command_result_total: 2,
command_result_failed: 1,
allowlist_blocked: 1,
execution_blocked: 1,
hashed_command_count: 1,
unhashed_command_count: 1,
});
});
it('calculates hash coverage percentage for shell runner summaries', () => {
expect(
calculateShellRunnerHashCoveragePercent({
command_result_total: 0,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 0,
unhashed_command_count: 0,
}),
).toBe(0);
expect(
calculateShellRunnerHashCoveragePercent({
command_result_total: 4,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 3,
unhashed_command_count: 1,
}),
).toBe(75);
});
it('computes shell runner trend snapshot across current and previous windows', () => {
const now = 1_000_000;
const oneDay = 24 * 60 * 60 * 1000;
const window = 7 * oneDay;
const currentWindowStart = now - window;
const events: AuditEvent[] = [
{
timestamp: now - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:new-a',
status: 'failed',
reason: 'exit_code_1',
},
},
{
timestamp: now - oneDay * 2,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:new-b',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - window - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'sha256:old-a',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - window - oneDay * 2,
level: 'info',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'demo',
phase: 'install',
installer_type: 'node',
command: 'legacy-old-command',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
];
const trend = computeShellRunnerAuditTrendSnapshot({
events,
currentWindowStartMs: currentWindowStart,
currentWindowEndMs: now,
});
expect(trend.current.command_result_failed).toBe(2);
expect(trend.previous.command_result_failed).toBe(1);
expect(trend.deltas.failures).toBe(1);
expect(trend.current.allowlist_blocked).toBe(1);
expect(trend.previous.allowlist_blocked).toBe(1);
expect(trend.deltas.allowlist_blocks).toBe(0);
expect(trend.deltas.hash_coverage_pct).toBe(50);
});
it('evaluates promotion policy with governance cadence and success thresholds', () => {
const policy = evaluateShellRunnerPromotionPolicy({
trend: {
current: {
command_result_total: 4,
command_result_failed: 1,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
previous: {
command_result_total: 4,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
deltas: {
failures: 1,
allowlist_blocks: 0,
hash_coverage_pct: 0,
},
},
reviewedWindowDays: 7,
governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
});
expect(policy.eligible).toBe(false);
expect(policy.recommendation).toBe('not_eligible');
expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
});
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
const policy = evaluateShellRunnerPromotionPolicy({
trend: {
current: {
command_result_total: 5,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 5,
unhashed_command_count: 0,
},
previous: {
command_result_total: 5,
command_result_failed: 1,
allowlist_blocked: 1,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 1,
},
deltas: {
failures: -1,
allowlist_blocks: -1,
hash_coverage_pct: 20,
},
},
reviewedWindowDays: 7,
governance: {
review_cadence_days: 7,
promotion_min_success_rate: 0.9,
},
});
expect(policy.eligible).toBe(true);
expect(policy.recommendation).toBe('eligible');
expect(policy.blockers).toEqual([]);
});
it('recommends rollout phase from guardrails and audit summary', () => {
expect(
recommendShellRunnerRolloutPhase(
{ blockers: ['skills.installation_execution must be enabled'] },
{
command_result_total: 1,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 1,
unhashed_command_count: 0,
},
),
).toBe('locked');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 0,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 0,
unhashed_command_count: 0,
},
),
).toBe('guarded_observe');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 4,
command_result_failed: 1,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 4,
unhashed_command_count: 0,
},
),
).toBe('guarded_review');
expect(
recommendShellRunnerRolloutPhase(
{ blockers: [] },
{
command_result_total: 3,
command_result_failed: 0,
allowlist_blocked: 0,
execution_blocked: 0,
hashed_command_count: 3,
unhashed_command_count: 0,
},
),
).toBe('expand_candidate');
});
it('emits hashed command values for both successful and failed audit command results', () => {
const logger = {
skillsInstallerExecutionBlocked: vi.fn(),
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status reports governance owner blocker in JSON output', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
writeFileSync(auditPath, '', 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
expect(payload.recommendation).toBe('locked');
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
expect(payload.governance.owner).toBeNull();
expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status writes JSON payload to output file', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
const outputPath = join(root, 'rollout-status.json');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
writeFileSync(auditPath, '', 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
shellRunnerGovernanceOwner: 'skills-team',
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
expect(existsSync(outputPath)).toBe(true);
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
expect(payload.governance.owner).toBe('skills-team');
expect(payload.recommendation).toBe('guarded_observe');
expect(payload.trend.current.command_result_total).toBe(0);
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
const configPath = join(root, 'config.yaml');
const managedDir = join(root, 'managed');
const bundledDir = join(root, 'bundled');
const workspaceDir = join(root, 'workspace');
const auditPath = join(root, 'audit.log');
const outputPath = join(root, 'rollout-trend.json');
mkdirSync(managedDir, { recursive: true });
mkdirSync(bundledDir, { recursive: true });
mkdirSync(workspaceDir, { recursive: true });
const now = Date.now();
const oneDay = 24 * 60 * 60 * 1000;
const events = [
{
timestamp: now - oneDay,
level: 'warn',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'trend-skill',
phase: 'install',
installer_type: 'node',
command: 'sha256:new',
status: 'failed',
reason: 'allowlist_blocked',
},
},
{
timestamp: now - oneDay * 8,
level: 'info',
event_type: 'skills.installer.command_result',
event: {
skill_name: 'trend-skill',
phase: 'install',
installer_type: 'node',
command: 'legacy-prev',
status: 'succeeded',
reason: 'runner_reported_success',
},
},
];
writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
writeSkillsCliConfig(configPath, {
managedDir,
bundledDir,
workspaceDir,
installationExecution: 'enabled',
allowShellRunner: true,
shellRunnerAllowlist: ['npm install*'],
shellRunnerGovernanceOwner: 'skills-team',
auditPath,
});
const program = new Command();
registerSkillsCommand(program);
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
process.exitCode = undefined;
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
from: 'user',
});
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
expect(payload.trend.current.command_result_total).toBe(1);
expect(payload.trend.previous.command_result_total).toBe(1);
expect(payload.trend.deltas.failures).toBe(1);
expect(payload.trend.deltas.allowlist_blocks).toBe(1);
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
logSpy.mockRestore();
process.exitCode = undefined;
rmSync(root, { recursive: true, force: true });
});
});