feat(skills): add rollout status governance and promotion policy checks
This commit is contained in:
+50
-2
@@ -1577,6 +1577,54 @@
|
|||||||
"src/cli/skills.test.ts"
|
"src/cli/skills.test.ts"
|
||||||
],
|
],
|
||||||
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_rollout_status_and_guardrails": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Added `skills rollout-status` with phased recommendation output (`locked|guarded_observe|guarded_review|expand_candidate`), guardrail checks for execution/audit/allowlist posture, and audit-window telemetry summary including hashed-command coverage",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_governance_workflow_operationalization": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Operationalized shell-runner allowlist governance by adding explicit config-backed ownership/review/promotion criteria (`skills.shell_runner_governance`) and wiring `skills rollout-status` to enforce owner presence when shell runner is enabled",
|
||||||
|
"files_modified": [
|
||||||
|
"src/config/schema.ts",
|
||||||
|
"src/config/schema.test.ts",
|
||||||
|
"config/default.yaml",
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run src/config/schema.test.ts src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_rollout_status_export_output": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Extended `skills rollout-status` with `--out <path>` export support so governance and recommendation payloads can be saved as machine-readable JSON artifacts for review workflows",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_rollout_trend_snapshot": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Added historical trend snapshots to `skills rollout-status` by comparing current and previous equal-duration windows, including deltas for failures, allowlist blocks, and hashed-command coverage in both console and JSON payloads",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
||||||
|
},
|
||||||
|
"shell_runner_rollout_promotion_policy_checks": {
|
||||||
|
"status": "completed",
|
||||||
|
"description": "Added promotion-policy evaluation to `skills rollout-status` using governance thresholds (`review_cadence_days`, `promotion_min_success_rate`) and trend deltas, with structured blockers/recommendation in JSON and console output",
|
||||||
|
"files_modified": [
|
||||||
|
"src/cli/skills.ts",
|
||||||
|
"src/cli/skills.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1605,7 +1653,7 @@
|
|||||||
},
|
},
|
||||||
|
|
||||||
"overall_progress": {
|
"overall_progress": {
|
||||||
"total_test_count": 1575,
|
"total_test_count": 1586,
|
||||||
"all_tests_passing": true,
|
"all_tests_passing": true,
|
||||||
"p0_completion": "3/3 (100%)",
|
"p0_completion": "3/3 (100%)",
|
||||||
"p1_completion": "4/4 (100%)",
|
"p1_completion": "4/4 (100%)",
|
||||||
@@ -1625,7 +1673,7 @@
|
|||||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
|
||||||
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
||||||
"next_up": "Skills infrastructure follow-up: define phased enablement criteria for shell runner (allowlist governance, telemetry review, and rollout guardrails) now that audit command strings are hashed"
|
"next_up": "Skills infrastructure follow-up: expose promotion-policy status as a dedicated machine-readable contract for automation consumers (e.g., CI gate or dashboard ingest) before broader shell-runner rollout"
|
||||||
},
|
},
|
||||||
"soul_md_and_cron_create": {
|
"soul_md_and_cron_create": {
|
||||||
"date": "2026-02-11",
|
"date": "2026-02-11",
|
||||||
|
|||||||
+507
-1
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, vi } from 'vitest';
|
import { describe, it, expect, vi } from 'vitest';
|
||||||
import { mkdtempSync, mkdirSync, writeFileSync, existsSync, rmSync } from 'fs';
|
import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from 'fs';
|
||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
import { tmpdir } from 'os';
|
import { tmpdir } from 'os';
|
||||||
import { Command } from 'commander';
|
import { Command } from 'commander';
|
||||||
@@ -27,14 +27,21 @@ import {
|
|||||||
createShellSkillInstallerCommandRunner,
|
createShellSkillInstallerCommandRunner,
|
||||||
checkCommandAgainstAllowlist,
|
checkCommandAgainstAllowlist,
|
||||||
emitShellRunnerAuditEvents,
|
emitShellRunnerAuditEvents,
|
||||||
|
calculateShellRunnerHashCoveragePercent,
|
||||||
|
computeShellRunnerAuditTrendSnapshot,
|
||||||
|
evaluateShellRunnerPromotionPolicy,
|
||||||
|
evaluateShellRunnerRolloutGuardrails,
|
||||||
hashSkillInstallerAuditCommand,
|
hashSkillInstallerAuditCommand,
|
||||||
|
recommendShellRunnerRolloutPhase,
|
||||||
sanitizeSkillInstallerAuditReason,
|
sanitizeSkillInstallerAuditReason,
|
||||||
|
summarizeShellRunnerAuditWindow,
|
||||||
resolveSkillInstallerCommandRunner,
|
resolveSkillInstallerCommandRunner,
|
||||||
runSkillExecuteAction,
|
runSkillExecuteAction,
|
||||||
runSkillInstallAction,
|
runSkillInstallAction,
|
||||||
registerSkillsCommand,
|
registerSkillsCommand,
|
||||||
} from './skills.js';
|
} from './skills.js';
|
||||||
import type { Skill } from '../skills/index.js';
|
import type { Skill } from '../skills/index.js';
|
||||||
|
import type { AuditEvent } from '../audit/types.js';
|
||||||
|
|
||||||
function buildSkill(overrides: Partial<Skill>): Skill {
|
function buildSkill(overrides: Partial<Skill>): Skill {
|
||||||
return {
|
return {
|
||||||
@@ -61,9 +68,18 @@ function writeSkillsCliConfig(
|
|||||||
installationExecution?: 'disabled' | 'enabled';
|
installationExecution?: 'disabled' | 'enabled';
|
||||||
allowShellRunner?: boolean;
|
allowShellRunner?: boolean;
|
||||||
shellRunnerAllowlist?: string[];
|
shellRunnerAllowlist?: string[];
|
||||||
|
shellRunnerGovernanceOwner?: string;
|
||||||
|
auditEnabled?: boolean;
|
||||||
|
auditPath?: string;
|
||||||
},
|
},
|
||||||
): void {
|
): void {
|
||||||
const allowlist = opts.shellRunnerAllowlist ?? [];
|
const allowlist = opts.shellRunnerAllowlist ?? [];
|
||||||
|
const auditLines = opts.auditPath
|
||||||
|
? ['audit:', ` enabled: ${opts.auditEnabled ?? true}`, ` path: ${opts.auditPath}`]
|
||||||
|
: [];
|
||||||
|
const governanceOwnerLines = opts.shellRunnerGovernanceOwner
|
||||||
|
? [' shell_runner_governance:', ` owner: '${opts.shellRunnerGovernanceOwner}'`]
|
||||||
|
: [];
|
||||||
writeFileSync(
|
writeFileSync(
|
||||||
configPath,
|
configPath,
|
||||||
[
|
[
|
||||||
@@ -78,6 +94,8 @@ function writeSkillsCliConfig(
|
|||||||
` installation_execution: ${opts.installationExecution ?? 'disabled'}`,
|
` installation_execution: ${opts.installationExecution ?? 'disabled'}`,
|
||||||
` allow_shell_runner: ${opts.allowShellRunner ?? false}`,
|
` allow_shell_runner: ${opts.allowShellRunner ?? false}`,
|
||||||
` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
|
` shell_runner_allowlist: [${allowlist.map((item) => `'${item}'`).join(', ')}]`,
|
||||||
|
...governanceOwnerLines,
|
||||||
|
...auditLines,
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
'utf-8',
|
'utf-8',
|
||||||
);
|
);
|
||||||
@@ -503,6 +521,333 @@ describe('skills CLI helpers', () => {
|
|||||||
expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
|
expect(sanitizeSkillInstallerAuditReason('allowlist_blocked')).toBe('allowlist_blocked');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('reports shell runner rollout guardrail blockers', () => {
|
||||||
|
const guardrails = evaluateShellRunnerRolloutGuardrails(
|
||||||
|
{
|
||||||
|
installation_execution: 'disabled',
|
||||||
|
allow_shell_runner: false,
|
||||||
|
shell_runner_allowlist: ['*'],
|
||||||
|
shell_runner_governance: {
|
||||||
|
review_cadence_days: 7,
|
||||||
|
promotion_min_success_rate: 0.9,
|
||||||
|
},
|
||||||
|
load: { watch: false, watch_debounce_ms: 250 },
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(guardrails.blockers).toEqual([
|
||||||
|
'skills.installation_execution must be enabled',
|
||||||
|
'skills.allow_shell_runner must be true',
|
||||||
|
"skills.shell_runner_allowlist cannot include wildcard-only '*' patterns",
|
||||||
|
'audit.enabled must be true for shell runner rollout review',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('requires governance owner when shell runner is enabled', () => {
|
||||||
|
const guardrails = evaluateShellRunnerRolloutGuardrails(
|
||||||
|
{
|
||||||
|
installation_execution: 'enabled',
|
||||||
|
allow_shell_runner: true,
|
||||||
|
shell_runner_allowlist: ['npm install*'],
|
||||||
|
shell_runner_governance: {
|
||||||
|
review_cadence_days: 7,
|
||||||
|
promotion_min_success_rate: 0.9,
|
||||||
|
},
|
||||||
|
load: { watch: false, watch_debounce_ms: 250 },
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('summarizes shell runner audit windows with hash coverage and failures', () => {
|
||||||
|
const events: AuditEvent[] = [
|
||||||
|
{
|
||||||
|
timestamp: 1,
|
||||||
|
level: 'debug',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'audit-skill',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'sha256:abc',
|
||||||
|
status: 'succeeded',
|
||||||
|
reason: 'runner_reported_success',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: 2,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'audit-skill',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'download',
|
||||||
|
command: 'download https://example.com/pkg.tgz',
|
||||||
|
status: 'failed',
|
||||||
|
reason: 'allowlist_blocked',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: 3,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.execution_blocked',
|
||||||
|
event: {
|
||||||
|
skill_name: 'audit-skill',
|
||||||
|
phase: 'execute',
|
||||||
|
execution_requested: true,
|
||||||
|
execution_enabled: false,
|
||||||
|
reason: 'execution_policy_disabled',
|
||||||
|
attempted_command_count: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(summarizeShellRunnerAuditWindow(events)).toEqual({
|
||||||
|
command_result_total: 2,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 1,
|
||||||
|
execution_blocked: 1,
|
||||||
|
hashed_command_count: 1,
|
||||||
|
unhashed_command_count: 1,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calculates hash coverage percentage for shell runner summaries', () => {
|
||||||
|
expect(
|
||||||
|
calculateShellRunnerHashCoveragePercent({
|
||||||
|
command_result_total: 0,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 0,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
}),
|
||||||
|
).toBe(0);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
calculateShellRunnerHashCoveragePercent({
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 3,
|
||||||
|
unhashed_command_count: 1,
|
||||||
|
}),
|
||||||
|
).toBe(75);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('computes shell runner trend snapshot across current and previous windows', () => {
|
||||||
|
const now = 1_000_000;
|
||||||
|
const oneDay = 24 * 60 * 60 * 1000;
|
||||||
|
const window = 7 * oneDay;
|
||||||
|
const currentWindowStart = now - window;
|
||||||
|
|
||||||
|
const events: AuditEvent[] = [
|
||||||
|
{
|
||||||
|
timestamp: now - oneDay,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'demo',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'sha256:new-a',
|
||||||
|
status: 'failed',
|
||||||
|
reason: 'exit_code_1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: now - oneDay * 2,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'demo',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'sha256:new-b',
|
||||||
|
status: 'failed',
|
||||||
|
reason: 'allowlist_blocked',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: now - window - oneDay,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'demo',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'sha256:old-a',
|
||||||
|
status: 'failed',
|
||||||
|
reason: 'allowlist_blocked',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: now - window - oneDay * 2,
|
||||||
|
level: 'info',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'demo',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'legacy-old-command',
|
||||||
|
status: 'succeeded',
|
||||||
|
reason: 'runner_reported_success',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const trend = computeShellRunnerAuditTrendSnapshot({
|
||||||
|
events,
|
||||||
|
currentWindowStartMs: currentWindowStart,
|
||||||
|
currentWindowEndMs: now,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(trend.current.command_result_failed).toBe(2);
|
||||||
|
expect(trend.previous.command_result_failed).toBe(1);
|
||||||
|
expect(trend.deltas.failures).toBe(1);
|
||||||
|
expect(trend.current.allowlist_blocked).toBe(1);
|
||||||
|
expect(trend.previous.allowlist_blocked).toBe(1);
|
||||||
|
expect(trend.deltas.allowlist_blocks).toBe(0);
|
||||||
|
expect(trend.deltas.hash_coverage_pct).toBe(50);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('evaluates promotion policy with governance cadence and success thresholds', () => {
|
||||||
|
const policy = evaluateShellRunnerPromotionPolicy({
|
||||||
|
trend: {
|
||||||
|
current: {
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 4,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
previous: {
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 4,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
deltas: {
|
||||||
|
failures: 1,
|
||||||
|
allowlist_blocks: 0,
|
||||||
|
hash_coverage_pct: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
reviewedWindowDays: 7,
|
||||||
|
governance: {
|
||||||
|
review_cadence_days: 7,
|
||||||
|
promotion_min_success_rate: 0.9,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(policy.eligible).toBe(false);
|
||||||
|
expect(policy.recommendation).toBe('not_eligible');
|
||||||
|
expect(policy.blockers).toContain('success rate 75.00% below minimum 90.00%');
|
||||||
|
expect(policy.blockers).toContain('failures increased by 1 vs previous window');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks promotion policy eligible when thresholds and trends are healthy', () => {
|
||||||
|
const policy = evaluateShellRunnerPromotionPolicy({
|
||||||
|
trend: {
|
||||||
|
current: {
|
||||||
|
command_result_total: 5,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 5,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
previous: {
|
||||||
|
command_result_total: 5,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 1,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 4,
|
||||||
|
unhashed_command_count: 1,
|
||||||
|
},
|
||||||
|
deltas: {
|
||||||
|
failures: -1,
|
||||||
|
allowlist_blocks: -1,
|
||||||
|
hash_coverage_pct: 20,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
reviewedWindowDays: 7,
|
||||||
|
governance: {
|
||||||
|
review_cadence_days: 7,
|
||||||
|
promotion_min_success_rate: 0.9,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(policy.eligible).toBe(true);
|
||||||
|
expect(policy.recommendation).toBe('eligible');
|
||||||
|
expect(policy.blockers).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('recommends rollout phase from guardrails and audit summary', () => {
|
||||||
|
expect(
|
||||||
|
recommendShellRunnerRolloutPhase(
|
||||||
|
{ blockers: ['skills.installation_execution must be enabled'] },
|
||||||
|
{
|
||||||
|
command_result_total: 1,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 1,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
).toBe('locked');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
recommendShellRunnerRolloutPhase(
|
||||||
|
{ blockers: [] },
|
||||||
|
{
|
||||||
|
command_result_total: 0,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 0,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
).toBe('guarded_observe');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
recommendShellRunnerRolloutPhase(
|
||||||
|
{ blockers: [] },
|
||||||
|
{
|
||||||
|
command_result_total: 4,
|
||||||
|
command_result_failed: 1,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 4,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
).toBe('guarded_review');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
recommendShellRunnerRolloutPhase(
|
||||||
|
{ blockers: [] },
|
||||||
|
{
|
||||||
|
command_result_total: 3,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 3,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
).toBe('expand_candidate');
|
||||||
|
});
|
||||||
|
|
||||||
it('emits hashed command values for both successful and failed audit command results', () => {
|
it('emits hashed command values for both successful and failed audit command results', () => {
|
||||||
const logger = {
|
const logger = {
|
||||||
skillsInstallerExecutionBlocked: vi.fn(),
|
skillsInstallerExecutionBlocked: vi.fn(),
|
||||||
@@ -1732,4 +2077,165 @@ describe('skills CLI helpers', () => {
|
|||||||
process.exitCode = undefined;
|
process.exitCode = undefined;
|
||||||
rmSync(root, { recursive: true, force: true });
|
rmSync(root, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('skills rollout-status reports governance owner blocker in JSON output', async () => {
|
||||||
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
|
const configPath = join(root, 'config.yaml');
|
||||||
|
const managedDir = join(root, 'managed');
|
||||||
|
const bundledDir = join(root, 'bundled');
|
||||||
|
const workspaceDir = join(root, 'workspace');
|
||||||
|
const auditPath = join(root, 'audit.log');
|
||||||
|
mkdirSync(managedDir, { recursive: true });
|
||||||
|
mkdirSync(bundledDir, { recursive: true });
|
||||||
|
mkdirSync(workspaceDir, { recursive: true });
|
||||||
|
writeFileSync(auditPath, '', 'utf-8');
|
||||||
|
writeSkillsCliConfig(configPath, {
|
||||||
|
managedDir,
|
||||||
|
bundledDir,
|
||||||
|
workspaceDir,
|
||||||
|
installationExecution: 'enabled',
|
||||||
|
allowShellRunner: true,
|
||||||
|
shellRunnerAllowlist: ['npm install*'],
|
||||||
|
auditPath,
|
||||||
|
});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
registerSkillsCommand(program);
|
||||||
|
|
||||||
|
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||||
|
process.exitCode = undefined;
|
||||||
|
|
||||||
|
await program.parseAsync(['skills', 'rollout-status', '--json', '-c', configPath], { from: 'user' });
|
||||||
|
|
||||||
|
const payload = JSON.parse(String(logSpy.mock.calls[0]?.[0]));
|
||||||
|
expect(payload.recommendation).toBe('locked');
|
||||||
|
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||||
|
expect(payload.governance.owner).toBeNull();
|
||||||
|
expect(payload.guardrails.blockers).toContain('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||||
|
|
||||||
|
logSpy.mockRestore();
|
||||||
|
process.exitCode = undefined;
|
||||||
|
rmSync(root, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skills rollout-status writes JSON payload to output file', async () => {
|
||||||
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
|
const configPath = join(root, 'config.yaml');
|
||||||
|
const managedDir = join(root, 'managed');
|
||||||
|
const bundledDir = join(root, 'bundled');
|
||||||
|
const workspaceDir = join(root, 'workspace');
|
||||||
|
const auditPath = join(root, 'audit.log');
|
||||||
|
const outputPath = join(root, 'rollout-status.json');
|
||||||
|
mkdirSync(managedDir, { recursive: true });
|
||||||
|
mkdirSync(bundledDir, { recursive: true });
|
||||||
|
mkdirSync(workspaceDir, { recursive: true });
|
||||||
|
writeFileSync(auditPath, '', 'utf-8');
|
||||||
|
writeSkillsCliConfig(configPath, {
|
||||||
|
managedDir,
|
||||||
|
bundledDir,
|
||||||
|
workspaceDir,
|
||||||
|
installationExecution: 'enabled',
|
||||||
|
allowShellRunner: true,
|
||||||
|
shellRunnerAllowlist: ['npm install*'],
|
||||||
|
shellRunnerGovernanceOwner: 'skills-team',
|
||||||
|
auditPath,
|
||||||
|
});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
registerSkillsCommand(program);
|
||||||
|
|
||||||
|
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||||
|
process.exitCode = undefined;
|
||||||
|
|
||||||
|
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '-c', configPath], { from: 'user' });
|
||||||
|
|
||||||
|
expect(existsSync(outputPath)).toBe(true);
|
||||||
|
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
|
||||||
|
expect(payload.governance.owner).toBe('skills-team');
|
||||||
|
expect(payload.recommendation).toBe('guarded_observe');
|
||||||
|
expect(payload.trend.current.command_result_total).toBe(0);
|
||||||
|
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||||
|
|
||||||
|
logSpy.mockRestore();
|
||||||
|
process.exitCode = undefined;
|
||||||
|
rmSync(root, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skills rollout-status includes trend deltas across adjacent windows', async () => {
|
||||||
|
const root = mkdtempSync(join(tmpdir(), 'flynn-skills-cli-'));
|
||||||
|
const configPath = join(root, 'config.yaml');
|
||||||
|
const managedDir = join(root, 'managed');
|
||||||
|
const bundledDir = join(root, 'bundled');
|
||||||
|
const workspaceDir = join(root, 'workspace');
|
||||||
|
const auditPath = join(root, 'audit.log');
|
||||||
|
const outputPath = join(root, 'rollout-trend.json');
|
||||||
|
mkdirSync(managedDir, { recursive: true });
|
||||||
|
mkdirSync(bundledDir, { recursive: true });
|
||||||
|
mkdirSync(workspaceDir, { recursive: true });
|
||||||
|
|
||||||
|
const now = Date.now();
|
||||||
|
const oneDay = 24 * 60 * 60 * 1000;
|
||||||
|
const events = [
|
||||||
|
{
|
||||||
|
timestamp: now - oneDay,
|
||||||
|
level: 'warn',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'trend-skill',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'sha256:new',
|
||||||
|
status: 'failed',
|
||||||
|
reason: 'allowlist_blocked',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: now - oneDay * 8,
|
||||||
|
level: 'info',
|
||||||
|
event_type: 'skills.installer.command_result',
|
||||||
|
event: {
|
||||||
|
skill_name: 'trend-skill',
|
||||||
|
phase: 'install',
|
||||||
|
installer_type: 'node',
|
||||||
|
command: 'legacy-prev',
|
||||||
|
status: 'succeeded',
|
||||||
|
reason: 'runner_reported_success',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
writeFileSync(auditPath, `${events.map((event) => JSON.stringify(event)).join('\n')}\n`, 'utf-8');
|
||||||
|
|
||||||
|
writeSkillsCliConfig(configPath, {
|
||||||
|
managedDir,
|
||||||
|
bundledDir,
|
||||||
|
workspaceDir,
|
||||||
|
installationExecution: 'enabled',
|
||||||
|
allowShellRunner: true,
|
||||||
|
shellRunnerAllowlist: ['npm install*'],
|
||||||
|
shellRunnerGovernanceOwner: 'skills-team',
|
||||||
|
auditPath,
|
||||||
|
});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
registerSkillsCommand(program);
|
||||||
|
|
||||||
|
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined);
|
||||||
|
process.exitCode = undefined;
|
||||||
|
|
||||||
|
await program.parseAsync(['skills', 'rollout-status', '--json', '--out', outputPath, '--days', '7', '-c', configPath], {
|
||||||
|
from: 'user',
|
||||||
|
});
|
||||||
|
|
||||||
|
const payload = JSON.parse(readFileSync(outputPath, 'utf-8'));
|
||||||
|
expect(payload.trend.current.command_result_total).toBe(1);
|
||||||
|
expect(payload.trend.previous.command_result_total).toBe(1);
|
||||||
|
expect(payload.trend.deltas.failures).toBe(1);
|
||||||
|
expect(payload.trend.deltas.allowlist_blocks).toBe(1);
|
||||||
|
expect(payload.promotion_policy.recommendation).toBe('not_eligible');
|
||||||
|
expect(payload.promotion_policy.blockers).toContain('failures increased by 1 vs previous window');
|
||||||
|
|
||||||
|
logSpy.mockRestore();
|
||||||
|
process.exitCode = undefined;
|
||||||
|
rmSync(root, { recursive: true, force: true });
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -2,8 +2,12 @@ import type { Command } from 'commander';
|
|||||||
import { resolve } from 'path';
|
import { resolve } from 'path';
|
||||||
import { homedir } from 'os';
|
import { homedir } from 'os';
|
||||||
import { spawnSync } from 'child_process';
|
import { spawnSync } from 'child_process';
|
||||||
|
import { writeFileSync } from 'fs';
|
||||||
import { createHash } from 'crypto';
|
import { createHash } from 'crypto';
|
||||||
import { auditLogger } from '../audit/index.js';
|
import { auditLogger } from '../audit/index.js';
|
||||||
|
import { queryAuditLogs } from '../audit/export.js';
|
||||||
|
import type { AuditEvent } from '../audit/types.js';
|
||||||
|
import type { Config } from '../config/schema.js';
|
||||||
import type { Skill } from '../skills/index.js';
|
import type { Skill } from '../skills/index.js';
|
||||||
import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js';
|
import { loadAllSkills, SkillInstaller, buildInstallerPlan, loadSkill } from '../skills/index.js';
|
||||||
import { loadConfigSafe } from './shared.js';
|
import { loadConfigSafe } from './shared.js';
|
||||||
@@ -92,6 +96,238 @@ export function sanitizeSkillInstallerAuditReason(reason: string): string {
|
|||||||
return reason;
|
return reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ShellRunnerRolloutGuardrailStatus {
|
||||||
|
blockers: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ShellRunnerAuditWindowSummary {
|
||||||
|
command_result_total: number;
|
||||||
|
command_result_failed: number;
|
||||||
|
allowlist_blocked: number;
|
||||||
|
execution_blocked: number;
|
||||||
|
hashed_command_count: number;
|
||||||
|
unhashed_command_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ShellRunnerAuditTrendSnapshot {
|
||||||
|
current: ShellRunnerAuditWindowSummary;
|
||||||
|
previous: ShellRunnerAuditWindowSummary;
|
||||||
|
deltas: {
|
||||||
|
failures: number;
|
||||||
|
allowlist_blocks: number;
|
||||||
|
hash_coverage_pct: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ShellRunnerPromotionPolicyStatus {
|
||||||
|
eligible: boolean;
|
||||||
|
recommendation: 'eligible' | 'not_eligible';
|
||||||
|
cadence_days: number;
|
||||||
|
reviewed_window_days: number;
|
||||||
|
success_rate: number;
|
||||||
|
minimum_success_rate: number;
|
||||||
|
failures_delta: number;
|
||||||
|
allowlist_blocks_delta: number;
|
||||||
|
hash_coverage_delta_pct: number;
|
||||||
|
blockers: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ShellRunnerRolloutRecommendation = 'locked' | 'guarded_observe' | 'guarded_review' | 'expand_candidate';
|
||||||
|
|
||||||
|
export function evaluateShellRunnerRolloutGuardrails(
|
||||||
|
skillsConfig: Config['skills'],
|
||||||
|
auditEnabled: boolean,
|
||||||
|
): ShellRunnerRolloutGuardrailStatus {
|
||||||
|
const blockers: string[] = [];
|
||||||
|
|
||||||
|
if (skillsConfig.installation_execution !== 'enabled') {
|
||||||
|
blockers.push('skills.installation_execution must be enabled');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skillsConfig.allow_shell_runner) {
|
||||||
|
blockers.push('skills.allow_shell_runner must be true');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skillsConfig.shell_runner_allowlist.length === 0) {
|
||||||
|
blockers.push('skills.shell_runner_allowlist must include at least one pattern');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skillsConfig.shell_runner_allowlist.some((pattern) => pattern.trim() === '*')) {
|
||||||
|
blockers.push("skills.shell_runner_allowlist cannot include wildcard-only '*' patterns");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skillsConfig.allow_shell_runner && !skillsConfig.shell_runner_governance.owner) {
|
||||||
|
blockers.push('skills.shell_runner_governance.owner must be set when shell runner is enabled');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!auditEnabled) {
|
||||||
|
blockers.push('audit.enabled must be true for shell runner rollout review');
|
||||||
|
}
|
||||||
|
|
||||||
|
return { blockers };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function summarizeShellRunnerAuditWindow(events: AuditEvent[]): ShellRunnerAuditWindowSummary {
|
||||||
|
return events.reduce<ShellRunnerAuditWindowSummary>(
|
||||||
|
(summary, event) => {
|
||||||
|
if (event.event_type === 'skills.installer.command_result') {
|
||||||
|
summary.command_result_total += 1;
|
||||||
|
|
||||||
|
const payload = event.event as Record<string, unknown>;
|
||||||
|
const status = typeof payload.status === 'string' ? payload.status : '';
|
||||||
|
const reason = typeof payload.reason === 'string' ? payload.reason : '';
|
||||||
|
const command = typeof payload.command === 'string' ? payload.command : '';
|
||||||
|
|
||||||
|
if (status === 'failed') {
|
||||||
|
summary.command_result_failed += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reason === 'allowlist_blocked') {
|
||||||
|
summary.allowlist_blocked += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (command.startsWith('sha256:')) {
|
||||||
|
summary.hashed_command_count += 1;
|
||||||
|
} else {
|
||||||
|
summary.unhashed_command_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.event_type === 'skills.installer.execution_blocked') {
|
||||||
|
summary.execution_blocked += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
},
|
||||||
|
{
|
||||||
|
command_result_total: 0,
|
||||||
|
command_result_failed: 0,
|
||||||
|
allowlist_blocked: 0,
|
||||||
|
execution_blocked: 0,
|
||||||
|
hashed_command_count: 0,
|
||||||
|
unhashed_command_count: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function calculateShellRunnerHashCoveragePercent(summary: ShellRunnerAuditWindowSummary): number {
|
||||||
|
const total = summary.hashed_command_count + summary.unhashed_command_count;
|
||||||
|
if (total === 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return (summary.hashed_command_count / total) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function computeShellRunnerAuditTrendSnapshot(args: {
|
||||||
|
events: AuditEvent[];
|
||||||
|
currentWindowStartMs: number;
|
||||||
|
currentWindowEndMs: number;
|
||||||
|
}): ShellRunnerAuditTrendSnapshot {
|
||||||
|
const previousWindowStartMs = args.currentWindowStartMs - (args.currentWindowEndMs - args.currentWindowStartMs);
|
||||||
|
|
||||||
|
const currentEvents = args.events.filter(
|
||||||
|
(event) => event.timestamp >= args.currentWindowStartMs && event.timestamp <= args.currentWindowEndMs,
|
||||||
|
);
|
||||||
|
const previousEvents = args.events.filter(
|
||||||
|
(event) => event.timestamp >= previousWindowStartMs && event.timestamp < args.currentWindowStartMs,
|
||||||
|
);
|
||||||
|
|
||||||
|
const current = summarizeShellRunnerAuditWindow(currentEvents);
|
||||||
|
const previous = summarizeShellRunnerAuditWindow(previousEvents);
|
||||||
|
|
||||||
|
return {
|
||||||
|
current,
|
||||||
|
previous,
|
||||||
|
deltas: {
|
||||||
|
failures: current.command_result_failed - previous.command_result_failed,
|
||||||
|
allowlist_blocks: current.allowlist_blocked - previous.allowlist_blocked,
|
||||||
|
hash_coverage_pct:
|
||||||
|
calculateShellRunnerHashCoveragePercent(current) - calculateShellRunnerHashCoveragePercent(previous),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function evaluateShellRunnerPromotionPolicy(args: {
|
||||||
|
trend: ShellRunnerAuditTrendSnapshot;
|
||||||
|
reviewedWindowDays: number;
|
||||||
|
governance: {
|
||||||
|
review_cadence_days: number;
|
||||||
|
promotion_min_success_rate: number;
|
||||||
|
};
|
||||||
|
}): ShellRunnerPromotionPolicyStatus {
|
||||||
|
const blockers: string[] = [];
|
||||||
|
const successRate =
|
||||||
|
args.trend.current.command_result_total === 0
|
||||||
|
? 0
|
||||||
|
: (args.trend.current.command_result_total - args.trend.current.command_result_failed)
|
||||||
|
/ args.trend.current.command_result_total;
|
||||||
|
|
||||||
|
if (args.reviewedWindowDays > args.governance.review_cadence_days) {
|
||||||
|
blockers.push(
|
||||||
|
`review window (${args.reviewedWindowDays}d) exceeds governance cadence (${args.governance.review_cadence_days}d)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.trend.current.command_result_total === 0) {
|
||||||
|
blockers.push('no shell-runner command results in current window');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (successRate < args.governance.promotion_min_success_rate) {
|
||||||
|
blockers.push(
|
||||||
|
`success rate ${(successRate * 100).toFixed(2)}% below minimum ${(args.governance.promotion_min_success_rate * 100).toFixed(2)}%`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.trend.deltas.failures > 0) {
|
||||||
|
blockers.push(`failures increased by ${args.trend.deltas.failures} vs previous window`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.trend.deltas.allowlist_blocks > 0) {
|
||||||
|
blockers.push(`allowlist blocks increased by ${args.trend.deltas.allowlist_blocks} vs previous window`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
eligible: blockers.length === 0,
|
||||||
|
recommendation: blockers.length === 0 ? 'eligible' : 'not_eligible',
|
||||||
|
cadence_days: args.governance.review_cadence_days,
|
||||||
|
reviewed_window_days: args.reviewedWindowDays,
|
||||||
|
success_rate: successRate,
|
||||||
|
minimum_success_rate: args.governance.promotion_min_success_rate,
|
||||||
|
failures_delta: args.trend.deltas.failures,
|
||||||
|
allowlist_blocks_delta: args.trend.deltas.allowlist_blocks,
|
||||||
|
hash_coverage_delta_pct: args.trend.deltas.hash_coverage_pct,
|
||||||
|
blockers,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function recommendShellRunnerRolloutPhase(
|
||||||
|
guardrails: ShellRunnerRolloutGuardrailStatus,
|
||||||
|
summary: ShellRunnerAuditWindowSummary,
|
||||||
|
): ShellRunnerRolloutRecommendation {
|
||||||
|
if (guardrails.blockers.length > 0) {
|
||||||
|
return 'locked';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (summary.command_result_total === 0) {
|
||||||
|
return 'guarded_observe';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (summary.unhashed_command_count > 0 || summary.command_result_failed > 0) {
|
||||||
|
return 'guarded_review';
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'expand_candidate';
|
||||||
|
}
|
||||||
|
|
||||||
|
function expandHomePath(pathValue: string): string {
|
||||||
|
if (pathValue.startsWith('~/')) {
|
||||||
|
return resolve(homedir(), pathValue.slice(2));
|
||||||
|
}
|
||||||
|
return resolve(pathValue);
|
||||||
|
}
|
||||||
|
|
||||||
interface SkillShellRunnerAuditLogger {
|
interface SkillShellRunnerAuditLogger {
|
||||||
skillsInstallerExecutionBlocked(event: {
|
skillsInstallerExecutionBlocked(event: {
|
||||||
skill_name: string;
|
skill_name: string;
|
||||||
@@ -1097,6 +1333,110 @@ export function registerSkillsCommand(program: Command): void {
|
|||||||
console.log(renderSkillInstallerPlan(view));
|
console.log(renderSkillInstallerPlan(view));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
skills
|
||||||
|
.command('rollout-status')
|
||||||
|
.description('Show shell runner rollout guardrails and audit review summary')
|
||||||
|
.option('--days <n>', 'Look back N days in audit logs (default: 7)', '7')
|
||||||
|
.option('--out <path>', 'Write rollout JSON payload to file')
|
||||||
|
.option('--json', 'Output as JSON')
|
||||||
|
.option('-c, --config <path>', 'Config file path')
|
||||||
|
.action(async (opts: { days?: string; out?: string; json?: boolean; config?: string }) => {
|
||||||
|
const loaded = loadConfigSafe(opts.config);
|
||||||
|
if (loaded.error || !loaded.config) {
|
||||||
|
console.error(loaded.error ?? 'Failed to load config');
|
||||||
|
process.exitCode = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsedDays = Number.parseInt(opts.days ?? '7', 10);
|
||||||
|
if (!Number.isFinite(parsedDays) || parsedDays <= 0) {
|
||||||
|
console.error('`--days` must be a positive integer.');
|
||||||
|
process.exitCode = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const guardrails = evaluateShellRunnerRolloutGuardrails(loaded.config.skills, loaded.config.audit.enabled);
|
||||||
|
const windowDurationMs = parsedDays * 24 * 60 * 60 * 1000;
|
||||||
|
const nowMs = Date.now();
|
||||||
|
const currentWindowStartMs = nowMs - windowDurationMs;
|
||||||
|
const queryStartMs = nowMs - windowDurationMs * 2;
|
||||||
|
const auditPath = expandHomePath(loaded.config.audit.path);
|
||||||
|
const auditEvents = await queryAuditLogs(auditPath, {
|
||||||
|
start_time: queryStartMs,
|
||||||
|
event_types: ['skills.installer.command_result', 'skills.installer.execution_blocked'],
|
||||||
|
});
|
||||||
|
const trend = computeShellRunnerAuditTrendSnapshot({
|
||||||
|
events: auditEvents,
|
||||||
|
currentWindowStartMs,
|
||||||
|
currentWindowEndMs: nowMs,
|
||||||
|
});
|
||||||
|
const recommendation = recommendShellRunnerRolloutPhase(guardrails, trend.current);
|
||||||
|
const governance = loaded.config.skills.shell_runner_governance;
|
||||||
|
const promotionPolicy = evaluateShellRunnerPromotionPolicy({
|
||||||
|
trend,
|
||||||
|
reviewedWindowDays: parsedDays,
|
||||||
|
governance: {
|
||||||
|
review_cadence_days: governance.review_cadence_days,
|
||||||
|
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const rolloutPayload = {
|
||||||
|
days: parsedDays,
|
||||||
|
guardrails,
|
||||||
|
summary: trend.current,
|
||||||
|
trend,
|
||||||
|
recommendation,
|
||||||
|
promotion_policy: promotionPolicy,
|
||||||
|
governance: {
|
||||||
|
owner: governance.owner ?? null,
|
||||||
|
review_cadence_days: governance.review_cadence_days,
|
||||||
|
promotion_min_success_rate: governance.promotion_min_success_rate,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (opts.out) {
|
||||||
|
writeFileSync(expandHomePath(opts.out), JSON.stringify(rolloutPayload, null, 2), 'utf-8');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts.json) {
|
||||||
|
console.log(JSON.stringify(rolloutPayload, null, 2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Shell runner rollout status');
|
||||||
|
console.log(`Window: ${parsedDays}d`);
|
||||||
|
console.log(`Recommendation: ${recommendation}`);
|
||||||
|
console.log(`Governance owner: ${governance.owner ?? '(unset)'}`);
|
||||||
|
console.log(`Governance review cadence (days): ${governance.review_cadence_days}`);
|
||||||
|
console.log(`Governance promotion min success rate: ${governance.promotion_min_success_rate}`);
|
||||||
|
console.log(`Guardrail blockers: ${guardrails.blockers.length}`);
|
||||||
|
if (guardrails.blockers.length > 0) {
|
||||||
|
for (const blocker of guardrails.blockers) {
|
||||||
|
console.log(`- ${blocker}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(`Audited command results: ${trend.current.command_result_total}`);
|
||||||
|
console.log(`Audited failures: ${trend.current.command_result_failed}`);
|
||||||
|
console.log(`Allowlist blocks: ${trend.current.allowlist_blocked}`);
|
||||||
|
console.log(`Execution blocks: ${trend.current.execution_blocked}`);
|
||||||
|
console.log(`Hashed command payloads: ${trend.current.hashed_command_count}`);
|
||||||
|
console.log(`Unhashed command payloads: ${trend.current.unhashed_command_count}`);
|
||||||
|
console.log(`Failure delta vs previous window: ${trend.deltas.failures}`);
|
||||||
|
console.log(`Allowlist block delta vs previous window: ${trend.deltas.allowlist_blocks}`);
|
||||||
|
console.log(`Hash coverage delta vs previous window (%): ${trend.deltas.hash_coverage_pct.toFixed(2)}`);
|
||||||
|
console.log(
|
||||||
|
`Promotion policy: ${promotionPolicy.recommendation} (success ${(promotionPolicy.success_rate * 100).toFixed(2)}% / min ${(promotionPolicy.minimum_success_rate * 100).toFixed(2)}%)`,
|
||||||
|
);
|
||||||
|
if (promotionPolicy.blockers.length > 0) {
|
||||||
|
for (const blocker of promotionPolicy.blockers) {
|
||||||
|
console.log(`- ${blocker}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (opts.out) {
|
||||||
|
console.log(`Wrote rollout payload: ${expandHomePath(opts.out)}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
skills
|
skills
|
||||||
.command('execute <name>')
|
.command('execute <name>')
|
||||||
.description('Preview or execute installer steps for an installed skill')
|
.description('Preview or execute installer steps for an installed skill')
|
||||||
|
|||||||
Reference in New Issue
Block a user