feat(skills): gate execution receipts with policy checks

2026-02-12 18:48:08 -08:00
parent 1159fac640
commit a983e01db7
3 changed files with 73 additions and 15 deletions
@@ -1415,6 +1415,15 @@
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
+          },
+          "installer_execution_policy_gate_checks": {
+            "status": "completed",
+            "description": "Added explicit execution policy gate evaluation for install/stub modes and surfaced policy reasons (including confirmation_required) in no-op execution receipts while preserving disabled command execution",
+            "files_modified": [
+              "src/cli/skills.ts",
+              "src/cli/skills.test.ts"
+            ],
+            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          }
        }
      }
@@ -1443,7 +1452,7 @@
  },

  "overall_progress": {
-    "total_test_count": 1535,
+    "total_test_count": 1537,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -1463,7 +1472,7 @@
    "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
    "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
    "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
-    "next_up": "Skills infrastructure Phase 3: add explicit execution-policy gate checks for future real installer command runners while preserving no-op default"
+    "next_up": "Skills infrastructure Phase 3: introduce a pluggable installer command runner interface behind the existing execution policy gates (default no-op)"
  },
  "soul_md_and_cron_create": {
    "date": "2026-02-11",
@@ -18,6 +18,7 @@ import {
  toSkillInstallerExecutionStubView,
  renderSkillInstallerExecutionStub,
  toSkillInstallerExecutionStubFromPreflight,
+  evaluateInstallerExecutionPolicy,
  runSkillInstallAction,
 } from './skills.js';
 import type { Skill } from '../skills/index.js';
@@ -279,6 +280,22 @@ describe('skills CLI helpers', () => {
    expect(view.wouldRun).toEqual(['download https://example.com/a.tgz -> /tmp/a.tgz']);
  });

+  it('marks install execution policy as confirmation_required when not confirmed', () => {
+    const policy = evaluateInstallerExecutionPolicy({ mode: 'install', confirmed: false });
+
+    expect(policy.confirmed).toBe(false);
+    expect(policy.execution_enabled).toBe(false);
+    expect(policy.reason).toBe('confirmation_required');
+  });
+
+  it('keeps execution policy disabled after confirmation', () => {
+    const policy = evaluateInstallerExecutionPolicy({ mode: 'install', confirmed: true });
+
+    expect(policy.confirmed).toBe(true);
+    expect(policy.execution_enabled).toBe(false);
+    expect(policy.reason).toBe('execution_disabled');
+  });
+
  it('summarizes refresh counts across status and tiers', () => {
    const summary = summarizeSkillsRefresh([
      buildSkill({ manifest: { name: 'a', description: 'a', version: '1.0.0', tier: 'bundled' } }),
@@ -413,7 +430,7 @@ describe('skills CLI helpers', () => {
    expect(payload.execution.mode).toBe('install');
    expect(payload.execution.execution_enabled).toBe(false);
    expect(payload.execution.executed).toEqual([]);
-    expect(payload.execution.reason).toBe('execution_disabled');
+    expect(payload.execution.reason).toBe('confirmation_required');

    logSpy.mockRestore();
    rmSync(root, { recursive: true, force: true });
@@ -45,12 +45,19 @@ export interface SkillInstallerExecutionStubView {
  confirmed: boolean;
  execution_enabled: boolean;
  executed: string[];
-  reason: 'execution_disabled';
+  reason: SkillInstallerExecutionReason;
  wouldRun: string[];
  skipped: SkillInstallerPlanView['skipped'];
 }

 export type SkillInstallActionMode = 'plan-only' | 'stub' | 'install';
+export type SkillInstallerExecutionReason = 'execution_disabled' | 'confirmation_required';
+
+export interface SkillInstallerExecutionPolicy {
+  confirmed: boolean;
+  execution_enabled: boolean;
+  reason: SkillInstallerExecutionReason;
+}

 export function toSkillListRows(skills: Skill[]): SkillListRow[] {
  return skills
@@ -212,14 +219,15 @@ export function renderSkillInstallPreflight(view: SkillInstallPreflightView): st

 export function toSkillInstallerExecutionStubView(skill: Skill): SkillInstallerExecutionStubView {
  const plan = toSkillInstallerPlanView(skill);
+  const policy = evaluateInstallerExecutionPolicy({ mode: 'stub', confirmed: false });
  return {
    skill: plan.skill,
    execution: 'stub',
    mode: 'stub',
-    confirmed: false,
-    execution_enabled: false,
+    confirmed: policy.confirmed,
+    execution_enabled: policy.execution_enabled,
    executed: [],
-    reason: 'execution_disabled',
+    reason: policy.reason,
    wouldRun: plan.steps.map((step) => step.command),
    skipped: plan.skipped,
  };
@@ -230,20 +238,39 @@ export function toSkillInstallerExecutionStubFromPreflight(
  options?: { mode?: SkillInstallActionMode; confirmed?: boolean },
 ): SkillInstallerExecutionStubView {
  const mode = options?.mode ?? 'stub';
-  const confirmed = options?.confirmed ?? false;
+  const policy = evaluateInstallerExecutionPolicy({ mode, confirmed: options?.confirmed ?? false });
  return {
    skill: preflight.skill,
    execution: 'stub',
    mode,
-    confirmed,
-    execution_enabled: false,
+    confirmed: policy.confirmed,
+    execution_enabled: policy.execution_enabled,
    executed: [],
-    reason: 'execution_disabled',
+    reason: policy.reason,
    wouldRun: preflight.steps.map((step) => step.command),
    skipped: preflight.skipped,
  };
 }

+export function evaluateInstallerExecutionPolicy(opts: {
+  mode: SkillInstallActionMode;
+  confirmed: boolean;
+}): SkillInstallerExecutionPolicy {
+  if (opts.mode === 'install' && !opts.confirmed) {
+    return {
+      confirmed: false,
+      execution_enabled: false,
+      reason: 'confirmation_required',
+    };
+  }
+
+  return {
+    confirmed: opts.confirmed,
+    execution_enabled: false,
+    reason: 'execution_disabled',
+  };
+}
+
 export function renderSkillInstallerExecutionStub(view: SkillInstallerExecutionStubView): string {
  const lines: string[] = [
    `Installer execution stub for '${view.skill.name}' (${view.skill.tier}, v${view.skill.version})`,
@@ -377,6 +404,8 @@ export function runSkillInstallAction(
    return { ok: false, error: result.error ?? `Failed to install skill from '${sourcePath}'.` };
  }

+  const installPolicy = evaluateInstallerExecutionPolicy({ mode: 'install', confirmed: opts.confirmed });
+
  const execution =
    preflight !== null
      ? toSkillInstallerExecutionStubFromPreflight(preflight, {
@@ -391,10 +420,10 @@ export function runSkillInstallAction(
        },
        execution: 'stub' as const,
        mode: 'install' as const,
-        confirmed: opts.confirmed,
-        execution_enabled: false,
+        confirmed: installPolicy.confirmed,
+        execution_enabled: installPolicy.execution_enabled,
        executed: [],
-        reason: 'execution_disabled' as const,
+        reason: installPolicy.reason,
        wouldRun: [],
        skipped: [],
      };
@@ -647,7 +676,10 @@ export function registerSkillsCommand(program: Command): void {
      }

      const view = toSkillInstallerExecutionStubView(skill);
-      view.confirmed = opts.confirm ?? false;
+      const policy = evaluateInstallerExecutionPolicy({ mode: 'stub', confirmed: opts.confirm ?? false });
+      view.confirmed = policy.confirmed;
+      view.execution_enabled = policy.execution_enabled;
+      view.reason = policy.reason;
      if (opts.json) {
        console.log(JSON.stringify(view, null, 2));
        return;