From 3a1bac0891c79c853a5b1f5a73ea523c2fd1bb5e Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 12 Feb 2026 19:07:13 -0800 Subject: [PATCH] feat(skills): map runner outcomes into step receipts --- docs/plans/state.json | 13 +++++++-- src/cli/skills.test.ts | 55 ++++++++++++++++++++++++++++++++++-- src/cli/skills.ts | 64 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 120 insertions(+), 12 deletions(-) diff --git a/docs/plans/state.json b/docs/plans/state.json index 8ea4002..9235cb8 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -1442,6 +1442,15 @@ "src/cli/skills.test.ts" ], "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" + }, + "installer_runner_terminal_status_mapping": { + "status": "completed", + "description": "Extended step result envelopes to support real-runner terminal statuses (`succeeded`/`failed`) and added runner-to-envelope mapping helpers while retaining execution-disabled defaults", + "files_modified": [ + "src/cli/skills.ts", + "src/cli/skills.test.ts" + ], + "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" } } } @@ -1470,7 +1479,7 @@ }, "overall_progress": { - "total_test_count": 1541, + "total_test_count": 1543, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -1490,7 +1499,7 @@ "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", - "next_up": "Skills infrastructure Phase 3: define real-runner-compatible per-step terminal statuses and map runner return values into structured execution envelopes (execution still policy-gated by default)" + "next_up": "Skills infrastructure Phase 3: add an optional concrete runner implementation (still opt-in) that emits command-level success/failure reasons into the existing execution envelope schema" }, "soul_md_and_cron_create": { "date": "2026-02-11", diff --git a/src/cli/skills.test.ts b/src/cli/skills.test.ts index 78f6d74..adef9e0 100644 --- a/src/cli/skills.test.ts +++ b/src/cli/skills.test.ts @@ -20,6 +20,7 @@ import { toSkillInstallerExecutionStubFromPreflight, evaluateInstallerExecutionPolicy, toInstallerExecutionStepEnvelopes, + mergeInstallerExecutionResults, runInstallerCommandsWithPolicy, noOpSkillInstallerCommandRunner, runSkillInstallAction, @@ -347,7 +348,7 @@ describe('skills CLI helpers', () => { it('does not invoke command runner when policy disables execution', () => { const runner = { - run: vi.fn((_commands: string[]) => ['should-not-run']), + run: vi.fn((_commands: string[]) => [{ command: 'should-not-run', status: 'succeeded' as const }]), }; const executed = runInstallerCommandsWithPolicy( @@ -362,7 +363,7 @@ describe('skills CLI helpers', () => { it('supports pluggable command runner when policy enables execution', () => { const runner = { - run: vi.fn((commands: string[]) => commands), + run: vi.fn((commands: string[]) => commands.map((command) => ({ command, status: 'succeeded' as const }))), }; const executed = runInstallerCommandsWithPolicy( @@ -371,10 +372,58 @@ describe('skills CLI helpers', () => { runner, ); - expect(executed).toEqual(['brew install jq']); + expect(executed).toEqual([{ command: 'brew install jq', status: 'succeeded' }]); expect(runner.run).toHaveBeenCalledWith(['brew install jq']); }); + it('maps runner command results into structured per-step statuses', () => { + const attempted = [ + { installer_type: 'brew', command: 'brew install jq' }, + { installer_type: 'node', command: 'pnpm add -g zx' }, + ]; + + const results = mergeInstallerExecutionResults( + attempted, + { confirmed: true, execution_enabled: true, reason: 'execution_disabled' }, + [ + { command: 'brew install jq', status: 'succeeded', reason: 'ok' }, + { command: 'pnpm add -g zx', status: 'failed', reason: 'exit_code_1' }, + ], + ); + + expect(results).toEqual([ + { + installer_type: 'brew', + command: 'brew install jq', + status: 'succeeded', + reason: 'ok', + }, + { + installer_type: 'node', + command: 'pnpm add -g zx', + status: 'failed', + reason: 'exit_code_1', + }, + ]); + }); + + it('marks attempted steps failed when runner does not report a result', () => { + const results = mergeInstallerExecutionResults( + [{ installer_type: 'brew', command: 'brew install jq' }], + { confirmed: true, execution_enabled: true, reason: 'execution_disabled' }, + [], + ); + + expect(results).toEqual([ + { + installer_type: 'brew', + command: 'brew install jq', + status: 'failed', + reason: 'runner_no_result', + }, + ]); + }); + it('summarizes refresh counts across status and tiers', () => { const summary = summarizeSkillsRefresh([ buildSkill({ manifest: { name: 'a', description: 'a', version: '1.0.0', tier: 'bundled' } }), diff --git a/src/cli/skills.ts b/src/cli/skills.ts index d2ec6c8..01d1ee0 100644 --- a/src/cli/skills.ts +++ b/src/cli/skills.ts @@ -47,13 +47,14 @@ export interface SkillInstallerExecutionStubView { executed: string[]; reason: SkillInstallerExecutionReason; attempted: Array<{ installer_type: string; command: string }>; - results: Array<{ installer_type: string; command: string; status: 'blocked' | 'skipped'; reason: SkillInstallerExecutionReason }>; + results: Array<{ installer_type: string; command: string; status: SkillInstallerStepStatus; reason: string }>; wouldRun: string[]; skipped: SkillInstallerPlanView['skipped']; } export type SkillInstallActionMode = 'plan-only' | 'stub' | 'install'; export type SkillInstallerExecutionReason = 'execution_disabled' | 'confirmation_required'; +export type SkillInstallerStepStatus = 'blocked' | 'skipped' | 'succeeded' | 'failed'; export interface SkillInstallerExecutionPolicy { confirmed: boolean; @@ -62,11 +63,17 @@ export interface SkillInstallerExecutionPolicy { } export interface SkillInstallerCommandRunner { - run(commands: string[]): string[]; + run(commands: string[]): SkillInstallerCommandRunResult[]; +} + +export interface SkillInstallerCommandRunResult { + command: string; + status: 'succeeded' | 'failed'; + reason?: string; } export const noOpSkillInstallerCommandRunner: SkillInstallerCommandRunner = { - run(_commands: string[]): string[] { + run(_commands: string[]): SkillInstallerCommandRunResult[] { return []; }, }; @@ -83,8 +90,7 @@ export function toInstallerExecutionStepEnvelopes( command: step.command, })); - const status: SkillInstallerExecutionStubView['results'][number]['status'] = - policy.reason === 'confirmation_required' ? 'blocked' : 'skipped'; + const status: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped'; const results = attempted.map((step) => ({ installer_type: step.installer_type, @@ -96,6 +102,44 @@ export function toInstallerExecutionStepEnvelopes( return { attempted, results }; } +export function mergeInstallerExecutionResults( + attempted: SkillInstallerExecutionStubView['attempted'], + policy: SkillInstallerExecutionPolicy, + commandResults: SkillInstallerCommandRunResult[], +): SkillInstallerExecutionStubView['results'] { + if (!policy.execution_enabled) { + const blockedStatus: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped'; + return attempted.map((step) => ({ + installer_type: step.installer_type, + command: step.command, + status: blockedStatus, + reason: policy.reason, + })); + } + + const resultByCommand = new Map(commandResults.map((result) => [result.command, result])); + return attempted.map((step) => { + const commandResult = resultByCommand.get(step.command); + if (!commandResult) { + return { + installer_type: step.installer_type, + command: step.command, + status: 'failed' as const, + reason: 'runner_no_result', + }; + } + + return { + installer_type: step.installer_type, + command: step.command, + status: commandResult.status, + reason: + commandResult.reason ?? + (commandResult.status === 'succeeded' ? 'runner_reported_success' : 'runner_reported_failure'), + }; + }); +} + export function toSkillListRows(skills: Skill[]): SkillListRow[] { return skills .map((skill) => ({ @@ -318,7 +362,7 @@ export function runInstallerCommandsWithPolicy( commands: string[], policy: SkillInstallerExecutionPolicy, runner: SkillInstallerCommandRunner, -): string[] { +): SkillInstallerCommandRunResult[] { if (!policy.execution_enabled) { return []; } @@ -490,11 +534,17 @@ export function runSkillInstallAction( skipped: [], }; - execution.executed = runInstallerCommandsWithPolicy( + const commandResults = runInstallerCommandsWithPolicy( execution.wouldRun, installPolicy, opts.commandRunner ?? noOpSkillInstallerCommandRunner, ); + execution.executed = commandResults.map((result) => result.command); + execution.results = mergeInstallerExecutionResults( + execution.attempted, + installPolicy, + commandResults, + ); if (opts.asJson) { console.log(