feat(skills): map runner outcomes into step receipts

This commit is contained in:
William Valentin
2026-02-12 19:07:13 -08:00
parent 5e5d96523e
commit 3a1bac0891
3 changed files with 120 additions and 12 deletions
+11 -2
View File
@@ -1442,6 +1442,15 @@
"src/cli/skills.test.ts" "src/cli/skills.test.ts"
], ],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing" "test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
},
"installer_runner_terminal_status_mapping": {
"status": "completed",
"description": "Extended step result envelopes to support real-runner terminal statuses (`succeeded`/`failed`) and added runner-to-envelope mapping helpers while retaining execution-disabled defaults",
"files_modified": [
"src/cli/skills.ts",
"src/cli/skills.test.ts"
],
"test_status": "pnpm typecheck + pnpm test:run src/cli/skills.test.ts + pnpm test:run + pnpm lint (warnings only, 0 errors) + pnpm build passing"
} }
} }
} }
@@ -1470,7 +1479,7 @@
}, },
"overall_progress": { "overall_progress": {
"total_test_count": 1541, "total_test_count": 1543,
"all_tests_passing": true, "all_tests_passing": true,
"p0_completion": "3/3 (100%)", "p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)", "p1_completion": "4/4 (100%)",
@@ -1490,7 +1499,7 @@
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback",
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
"next_up": "Skills infrastructure Phase 3: define real-runner-compatible per-step terminal statuses and map runner return values into structured execution envelopes (execution still policy-gated by default)" "next_up": "Skills infrastructure Phase 3: add an optional concrete runner implementation (still opt-in) that emits command-level success/failure reasons into the existing execution envelope schema"
}, },
"soul_md_and_cron_create": { "soul_md_and_cron_create": {
"date": "2026-02-11", "date": "2026-02-11",
+52 -3
View File
@@ -20,6 +20,7 @@ import {
toSkillInstallerExecutionStubFromPreflight, toSkillInstallerExecutionStubFromPreflight,
evaluateInstallerExecutionPolicy, evaluateInstallerExecutionPolicy,
toInstallerExecutionStepEnvelopes, toInstallerExecutionStepEnvelopes,
mergeInstallerExecutionResults,
runInstallerCommandsWithPolicy, runInstallerCommandsWithPolicy,
noOpSkillInstallerCommandRunner, noOpSkillInstallerCommandRunner,
runSkillInstallAction, runSkillInstallAction,
@@ -347,7 +348,7 @@ describe('skills CLI helpers', () => {
it('does not invoke command runner when policy disables execution', () => { it('does not invoke command runner when policy disables execution', () => {
const runner = { const runner = {
run: vi.fn((_commands: string[]) => ['should-not-run']), run: vi.fn((_commands: string[]) => [{ command: 'should-not-run', status: 'succeeded' as const }]),
}; };
const executed = runInstallerCommandsWithPolicy( const executed = runInstallerCommandsWithPolicy(
@@ -362,7 +363,7 @@ describe('skills CLI helpers', () => {
it('supports pluggable command runner when policy enables execution', () => { it('supports pluggable command runner when policy enables execution', () => {
const runner = { const runner = {
run: vi.fn((commands: string[]) => commands), run: vi.fn((commands: string[]) => commands.map((command) => ({ command, status: 'succeeded' as const }))),
}; };
const executed = runInstallerCommandsWithPolicy( const executed = runInstallerCommandsWithPolicy(
@@ -371,10 +372,58 @@ describe('skills CLI helpers', () => {
runner, runner,
); );
expect(executed).toEqual(['brew install jq']); expect(executed).toEqual([{ command: 'brew install jq', status: 'succeeded' }]);
expect(runner.run).toHaveBeenCalledWith(['brew install jq']); expect(runner.run).toHaveBeenCalledWith(['brew install jq']);
}); });
it('maps runner command results into structured per-step statuses', () => {
const attempted = [
{ installer_type: 'brew', command: 'brew install jq' },
{ installer_type: 'node', command: 'pnpm add -g zx' },
];
const results = mergeInstallerExecutionResults(
attempted,
{ confirmed: true, execution_enabled: true, reason: 'execution_disabled' },
[
{ command: 'brew install jq', status: 'succeeded', reason: 'ok' },
{ command: 'pnpm add -g zx', status: 'failed', reason: 'exit_code_1' },
],
);
expect(results).toEqual([
{
installer_type: 'brew',
command: 'brew install jq',
status: 'succeeded',
reason: 'ok',
},
{
installer_type: 'node',
command: 'pnpm add -g zx',
status: 'failed',
reason: 'exit_code_1',
},
]);
});
it('marks attempted steps failed when runner does not report a result', () => {
const results = mergeInstallerExecutionResults(
[{ installer_type: 'brew', command: 'brew install jq' }],
{ confirmed: true, execution_enabled: true, reason: 'execution_disabled' },
[],
);
expect(results).toEqual([
{
installer_type: 'brew',
command: 'brew install jq',
status: 'failed',
reason: 'runner_no_result',
},
]);
});
it('summarizes refresh counts across status and tiers', () => { it('summarizes refresh counts across status and tiers', () => {
const summary = summarizeSkillsRefresh([ const summary = summarizeSkillsRefresh([
buildSkill({ manifest: { name: 'a', description: 'a', version: '1.0.0', tier: 'bundled' } }), buildSkill({ manifest: { name: 'a', description: 'a', version: '1.0.0', tier: 'bundled' } }),
+57 -7
View File
@@ -47,13 +47,14 @@ export interface SkillInstallerExecutionStubView {
executed: string[]; executed: string[];
reason: SkillInstallerExecutionReason; reason: SkillInstallerExecutionReason;
attempted: Array<{ installer_type: string; command: string }>; attempted: Array<{ installer_type: string; command: string }>;
results: Array<{ installer_type: string; command: string; status: 'blocked' | 'skipped'; reason: SkillInstallerExecutionReason }>; results: Array<{ installer_type: string; command: string; status: SkillInstallerStepStatus; reason: string }>;
wouldRun: string[]; wouldRun: string[];
skipped: SkillInstallerPlanView['skipped']; skipped: SkillInstallerPlanView['skipped'];
} }
export type SkillInstallActionMode = 'plan-only' | 'stub' | 'install'; export type SkillInstallActionMode = 'plan-only' | 'stub' | 'install';
export type SkillInstallerExecutionReason = 'execution_disabled' | 'confirmation_required'; export type SkillInstallerExecutionReason = 'execution_disabled' | 'confirmation_required';
export type SkillInstallerStepStatus = 'blocked' | 'skipped' | 'succeeded' | 'failed';
export interface SkillInstallerExecutionPolicy { export interface SkillInstallerExecutionPolicy {
confirmed: boolean; confirmed: boolean;
@@ -62,11 +63,17 @@ export interface SkillInstallerExecutionPolicy {
} }
export interface SkillInstallerCommandRunner { export interface SkillInstallerCommandRunner {
run(commands: string[]): string[]; run(commands: string[]): SkillInstallerCommandRunResult[];
}
export interface SkillInstallerCommandRunResult {
command: string;
status: 'succeeded' | 'failed';
reason?: string;
} }
export const noOpSkillInstallerCommandRunner: SkillInstallerCommandRunner = { export const noOpSkillInstallerCommandRunner: SkillInstallerCommandRunner = {
run(_commands: string[]): string[] { run(_commands: string[]): SkillInstallerCommandRunResult[] {
return []; return [];
}, },
}; };
@@ -83,8 +90,7 @@ export function toInstallerExecutionStepEnvelopes(
command: step.command, command: step.command,
})); }));
const status: SkillInstallerExecutionStubView['results'][number]['status'] = const status: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
const results = attempted.map((step) => ({ const results = attempted.map((step) => ({
installer_type: step.installer_type, installer_type: step.installer_type,
@@ -96,6 +102,44 @@ export function toInstallerExecutionStepEnvelopes(
return { attempted, results }; return { attempted, results };
} }
export function mergeInstallerExecutionResults(
attempted: SkillInstallerExecutionStubView['attempted'],
policy: SkillInstallerExecutionPolicy,
commandResults: SkillInstallerCommandRunResult[],
): SkillInstallerExecutionStubView['results'] {
if (!policy.execution_enabled) {
const blockedStatus: SkillInstallerStepStatus = policy.reason === 'confirmation_required' ? 'blocked' : 'skipped';
return attempted.map((step) => ({
installer_type: step.installer_type,
command: step.command,
status: blockedStatus,
reason: policy.reason,
}));
}
const resultByCommand = new Map(commandResults.map((result) => [result.command, result]));
return attempted.map((step) => {
const commandResult = resultByCommand.get(step.command);
if (!commandResult) {
return {
installer_type: step.installer_type,
command: step.command,
status: 'failed' as const,
reason: 'runner_no_result',
};
}
return {
installer_type: step.installer_type,
command: step.command,
status: commandResult.status,
reason:
commandResult.reason ??
(commandResult.status === 'succeeded' ? 'runner_reported_success' : 'runner_reported_failure'),
};
});
}
export function toSkillListRows(skills: Skill[]): SkillListRow[] { export function toSkillListRows(skills: Skill[]): SkillListRow[] {
return skills return skills
.map((skill) => ({ .map((skill) => ({
@@ -318,7 +362,7 @@ export function runInstallerCommandsWithPolicy(
commands: string[], commands: string[],
policy: SkillInstallerExecutionPolicy, policy: SkillInstallerExecutionPolicy,
runner: SkillInstallerCommandRunner, runner: SkillInstallerCommandRunner,
): string[] { ): SkillInstallerCommandRunResult[] {
if (!policy.execution_enabled) { if (!policy.execution_enabled) {
return []; return [];
} }
@@ -490,11 +534,17 @@ export function runSkillInstallAction(
skipped: [], skipped: [],
}; };
execution.executed = runInstallerCommandsWithPolicy( const commandResults = runInstallerCommandsWithPolicy(
execution.wouldRun, execution.wouldRun,
installPolicy, installPolicy,
opts.commandRunner ?? noOpSkillInstallerCommandRunner, opts.commandRunner ?? noOpSkillInstallerCommandRunner,
); );
execution.executed = commandResults.map((result) => result.command);
execution.results = mergeInstallerExecutionResults(
execution.attempted,
installPolicy,
commandResults,
);
if (opts.asJson) { if (opts.asJson) {
console.log( console.log(