feat(skills): map runner outcomes into step receipts

2026-02-12 19:07:13 -08:00
parent 5e5d96523e
commit 3a1bac0891
3 changed files with 120 additions and 12 deletions
@@ -1442,6 +1442,15 @@
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          },
          "installer_runner_terminal_status_mapping": {
            "status": "completed",
            "description": "Extended step result envelopes to support real-runner terminal statuses (`succeeded`/`failed`) and added runner-to-envelope mapping helpers while retaining execution-disabled defaults",
            "files_modified": [
              "src/cli/skills.ts",
              "src/cli/skills.test.ts"
            ],
            "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
          }
        }
      }
@@ -1470,7 +1479,7 @@
  },
  "overall_progress": {
-    "total_test_count": 1541,
+    "total_test_count": 1543,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -1490,7 +1499,7 @@
    "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
    "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
    "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
-    "next_up": "Skills infrastructure Phase 3: define real-runner-compatible per-step terminal statuses and map runner return values into structured execution envelopes (execution still policy-gated by default)"
+    "next_up": "Skills infrastructure Phase 3: add an optional concrete runner implementation (still opt-in) that emits command-level success/failure reasons into the existing execution envelope schema"
  },
  "soul_md_and_cron_create": {
    "date": "2026-02-11",
@@ -20,6 +20,7 @@ import {
  toSkillInstallerExecutionStubFromPreflight,
  evaluateInstallerExecutionPolicy,
  toInstallerExecutionStepEnvelopes,
  mergeInstallerExecutionResults,
  runInstallerCommandsWithPolicy,
  noOpSkillInstallerCommandRunner,
  runSkillInstallAction,
@@ -347,7 +348,7 @@ describe('skills CLI helpers', () => {
  it('does not invoke command runner when policy disables execution', () => {
    const runner = {
-      run: vi.fn((_commands: string[]) => ['should-not-run']),
+      run: vi.fn((_commands: string[]) => [{ command: 'should-not-run', status: 'succeeded' as const }]),
    };
    const executed = runInstallerCommandsWithPolicy(
@@ -362,7 +363,7 @@ describe('skills CLI helpers', () => {
  it('supports pluggable command runner when policy enables execution', () => {
    const runner = {
-      run: vi.fn((commands: string[]) => commands),
+      run: vi.fn((commands: string[]) => commands.map((command) => ({ command, status: 'succeeded' as const }))),
    };
    const executed = runInstallerCommandsWithPolicy(
@@ -371,10 +372,58 @@ describe('skills CLI helpers', () => {
      runner,
    );
-    expect(executed).toEqual(['brew install jq']);
+    expect(executed).toEqual([{ command: 'brew install jq', status: 'succeeded' }]);
    expect(runner.run).toHaveBeenCalledWith(['brew install jq']);
  });
  it('maps runner command results into structured per-step statuses', () => {
    const attempted = [
      { installer_type: 'brew', command: 'brew install jq' },
      { installer_type: 'node', command: 'pnpm add -g zx' },
    ];
    const results = mergeInstallerExecutionResults(
      attempted,
      { confirmed: true, execution_enabled: true, reason: 'execution_disabled' },
      [
        { command: 'brew install jq', status: 'succeeded', reason: 'ok' },
        { command: 'pnpm add -g zx', status: 'failed', reason: 'exit_code_1' },
      ],
    );
    expect(results).toEqual([
      {
        installer_type: 'brew',
        command: 'brew install jq',
        status: 'succeeded',
        reason: 'ok',
      },
      {
        installer_type: 'node',
        command: 'pnpm add -g zx',
        status: 'failed',
        reason: 'exit_code_1',
      },
    ]);
  });
  it('marks attempted steps failed when runner does not report a result', () => {
    const results = mergeInstallerExecutionResults(
      [{ installer_type: 'brew', command: 'brew install jq' }],
      { confirmed: true, execution_enabled: true, reason: 'execution_disabled' },
      [],
    );
    expect(results).toEqual([
      {
        installer_type: 'brew',
        command: 'brew install jq',
        status: 'failed',
        reason: 'runner_no_result',
      },
    ]);
  });
  it('summarizes refresh counts across status and tiers', () => {
    const summary = summarizeSkillsRefresh([
      buildSkill({ manifest: { name: 'a', description: 'a', version: '1.0.0', tier: 'bundled' } }),
@@ -47,13 +47,14 @@ export interface SkillInstallerExecutionStubView {
  executed: string[];
  reason: SkillInstallerExecutionReason;
  attempted: Array<{ installer_type: string; command: string }>;
-  results: Array<{ installer_type: string; command: string; status: 'blocked' | 'skipped'; reason: SkillInstallerExecutionReason }>;
+  results: Array<{ installer_type: string; command: string; status: SkillInstallerStepStatus; reason: string }>;
  wouldRun: string[];
  skipped: SkillInstallerPlanView['skipped'];
 }
 export type SkillInstallActionMode = 'plan-only' | 'stub' | 'install';
 export type SkillInstallerExecutionReason = 'execution_disabled' | 'confirmation_required';
 export type SkillInstallerStepStatus = 'blocked' | 'skipped' | 'succeeded' | 'failed';
 export interface SkillInstallerExecutionPolicy {
  confirmed: boolean;
@@ -62,11 +63,17 @@ export interface SkillInstallerExecutionPolicy {
 }
 export interface SkillInstallerCommandRunner {
-  run(commands: string[]): string[];
+  run(commands: string[]): SkillInstallerCommandRunResult[];
 }
 export interface SkillInstallerCommandRunResult {
  command: string;
  status: 'succeeded' | 'failed';
  reason?: string;
 }
 export const noOpSkillInstallerCommandRunner: SkillInstallerCommandRunner = {
-  run(_commands: string[]): string[] {
+  run(_commands: string[]): SkillInstallerCommandRunResult[] {
    return [];
  },
 };
@@ -83,8 +90,7 @@ export function toInstallerExecutionStepEnvelopes(
    command: step.command,
  }));
-  const status: SkillInstallerExecutionStubView['results'][number]['status'] =
+  const status: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
    policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
  const results = attempted.map((step) => ({
    installer_type: step.installer_type,
@@ -96,6 +102,44 @@ export function toInstallerExecutionStepEnvelopes(
  return { attempted, results };
 }
 export function mergeInstallerExecutionResults(
  attempted: SkillInstallerExecutionStubView['attempted'],
  policy: SkillInstallerExecutionPolicy,
  commandResults: SkillInstallerCommandRunResult[],
 ): SkillInstallerExecutionStubView['results'] {
  if (!policy.execution_enabled) {
    const blockedStatus: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
    return attempted.map((step) => ({
      installer_type: step.installer_type,
      command: step.command,
      status: blockedStatus,
      reason: policy.reason,
    }));
  }
  const resultByCommand = new Map(commandResults.map((result) => [result.command, result]));
  return attempted.map((step) => {
    const commandResult = resultByCommand.get(step.command);
    if (!commandResult) {
      return {
        installer_type: step.installer_type,
        command: step.command,
        status: 'failed' as const,
        reason: 'runner_no_result',
      };
    }
    return {
      installer_type: step.installer_type,
      command: step.command,
      status: commandResult.status,
      reason:
        commandResult.reason ??
        (commandResult.status === 'succeeded' ? 'runner_reported_success' : 'runner_reported_failure'),
    };
  });
 }
 export function toSkillListRows(skills: Skill[]): SkillListRow[] {
  return skills
    .map((skill) => ({
@@ -318,7 +362,7 @@ export function runInstallerCommandsWithPolicy(
  commands: string[],
  policy: SkillInstallerExecutionPolicy,
  runner: SkillInstallerCommandRunner,
-): string[] {
+): SkillInstallerCommandRunResult[] {
  if (!policy.execution_enabled) {
    return [];
  }
@@ -490,11 +534,17 @@ export function runSkillInstallAction(
        skipped: [],
      };
-  execution.executed = runInstallerCommandsWithPolicy(
+  const commandResults = runInstallerCommandsWithPolicy(
    execution.wouldRun,
    installPolicy,
    opts.commandRunner ?? noOpSkillInstallerCommandRunner,
  );
  execution.executed = commandResults.map((result) => result.command);
  execution.results = mergeInstallerExecutionResults(
    execution.attempted,
    installPolicy,
    commandResults,
  );
  if (opts.asJson) {
    console.log(