test(tools): verify timeout abort prevents post-timeout side effects
This commit is contained in:
@@ -16,7 +16,7 @@ Scope: Production-risk-first audit of bugs, code improvements, and feature oppor
|
|||||||
- ✅ F-008 addressed: WhatsApp Chromium launch is now sandboxed by default; no-sandbox mode is behind explicit `whatsapp.no_sandbox: true` opt-in.
|
- ✅ F-008 addressed: WhatsApp Chromium launch is now sandboxed by default; no-sandbox mode is behind explicit `whatsapp.no_sandbox: true` opt-in.
|
||||||
- ✅ F-014 addressed: `ModelRouter.setOnTierChange` now preserves existing listeners instead of replacing them, removing destructive listener-setter behavior.
|
- ✅ F-014 addressed: `ModelRouter.setOnTierChange` now preserves existing listeners instead of replacing them, removing destructive listener-setter behavior.
|
||||||
- ✅ F-002 addressed: `config.patch` now supports durable persistence via atomic write + backup when daemon has a concrete config path, and response includes `persisted`/`persistError` so UI can distinguish runtime-only vs disk-persisted updates.
|
- ✅ F-002 addressed: `config.patch` now supports durable persistence via atomic write + backup when daemon has a concrete config path, and response includes `persisted`/`persistError` so UI can distinguish runtime-only vs disk-persisted updates.
|
||||||
- ◑ F-003 partially addressed: tool execution now has an `AbortSignal` contract and executor triggers abort on timeout; host `shell.exec`, sandbox docker exec, `process.start`, browser tools (`browser.navigate/click/type/content/eval/screenshot`), and web tools (`web.fetch`, `web.search`) now respond to cancellation. Remaining closure work is timeout-side-effect regression coverage.
|
- ✅ F-003 addressed: tool execution now has an `AbortSignal` contract, executor triggers abort on timeout, high-risk tools (`shell.exec`, sandbox docker exec, `process.start`, browser tools, `web.fetch`, `web.search`) respond to cancellation, and executor regression tests verify cancellable tools do not apply side effects after timeout.
|
||||||
- ✅ F-015 addressed: retry defaults no longer classify timeout-style failures as non-retryable, improving resilience for transient timeout conditions.
|
- ✅ F-015 addressed: retry defaults no longer classify timeout-style failures as non-retryable, improving resilience for transient timeout conditions.
|
||||||
|
|
||||||
## Executive Summary
|
## Executive Summary
|
||||||
@@ -116,6 +116,7 @@ Non-goals:
|
|||||||
Remediation update (2026-02-16):
|
Remediation update (2026-02-16):
|
||||||
- Abort propagation now covers executor -> context signal -> `process.start`, browser tools, and web fetch/search tools.
|
- Abort propagation now covers executor -> context signal -> `process.start`, browser tools, and web fetch/search tools.
|
||||||
- Added aborted-signal regression tests for these tool paths.
|
- Added aborted-signal regression tests for these tool paths.
|
||||||
|
- Added timeout regression coverage in `src/tools/executor.test.ts` to verify side effects are prevented after timeout-triggered abort for cancellable tools.
|
||||||
|
|
||||||
### F-004 Medium: Lint quality gate is broken and concentrated in key runtime files
|
### F-004 Medium: Lint quality gate is broken and concentrated in key runtime files
|
||||||
|
|
||||||
|
|||||||
@@ -2549,10 +2549,10 @@
|
|||||||
"test_status": "pnpm test:run src/gateway/handlers/handlers.test.ts src/config/persistence.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/gateway/handlers/handlers.test.ts src/config/persistence.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
"audit-followup-tool-timeout-cancellation-contract": {
|
"audit-followup-tool-timeout-cancellation-contract": {
|
||||||
"status": "in_progress",
|
"status": "completed",
|
||||||
"date": "2026-02-16",
|
"date": "2026-02-16",
|
||||||
"updated": "2026-02-16",
|
"updated": "2026-02-16",
|
||||||
"summary": "Expanded timeout-cancellation hardening: executor AbortSignal propagation now reaches process.start, browser tools (navigate/screenshot/click/type/content/eval), and web tools (fetch/search), with pre-abort regression tests. Remaining work: add explicit post-timeout side-effect regression coverage.",
|
"summary": "Completed timeout-cancellation hardening: executor AbortSignal propagation reaches process.start, browser tools (navigate/screenshot/click/type/content/eval), and web tools (fetch/search); added pre-abort tests and timeout regression coverage that verifies cancellable tools do not apply side effects after timeout.",
|
||||||
"files_modified": [
|
"files_modified": [
|
||||||
"src/tools/types.ts",
|
"src/tools/types.ts",
|
||||||
"src/tools/executor.ts",
|
"src/tools/executor.ts",
|
||||||
@@ -2572,7 +2572,7 @@
|
|||||||
"docs/architecture/CONTRIBUTOR_MAP.md",
|
"docs/architecture/CONTRIBUTOR_MAP.md",
|
||||||
"docs/plans/analysis/2026-02-16-codebase-audit-report.md"
|
"docs/plans/analysis/2026-02-16-codebase-audit-report.md"
|
||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/tools/builtin/browser/tools.test.ts src/tools/builtin/web-fetch.test.ts src/tools/builtin/web-search.test.ts src/tools/builtin/process/manager.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/tools/executor.test.ts src/tools/builtin/browser/tools.test.ts src/tools/builtin/web-fetch.test.ts src/tools/builtin/web-search.test.ts src/tools/builtin/process/manager.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
"audit-followup-retry-timeout-defaults": {
|
"audit-followup-retry-timeout-defaults": {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
|
|||||||
@@ -59,6 +59,31 @@ const cancellableTool: Tool = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function createSideEffectTool(sideEffect: { fired: boolean }): Tool {
|
||||||
|
return {
|
||||||
|
name: 'test.side_effect',
|
||||||
|
description: 'Cancellable side effect',
|
||||||
|
inputSchema: { type: 'object', properties: {} },
|
||||||
|
execute: async (_args, context) => {
|
||||||
|
return await new Promise((resolve) => {
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
sideEffect.fired = true;
|
||||||
|
resolve({ success: true, output: 'side effect fired' });
|
||||||
|
}, 120);
|
||||||
|
const onAbort = () => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve({ success: false, output: '', error: 'aborted' });
|
||||||
|
};
|
||||||
|
if (context?.signal?.aborted) {
|
||||||
|
onAbort();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
context?.signal?.addEventListener('abort', onAbort, { once: true });
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
describe('ToolExecutor', () => {
|
describe('ToolExecutor', () => {
|
||||||
it('executes a tool and returns result', async () => {
|
it('executes a tool and returns result', async () => {
|
||||||
const registry = new ToolRegistry();
|
const registry = new ToolRegistry();
|
||||||
@@ -114,6 +139,21 @@ describe('ToolExecutor', () => {
|
|||||||
expect(result.error).toContain('timed out');
|
expect(result.error).toContain('timed out');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('prevents post-timeout side effects for cancellable tools', async () => {
|
||||||
|
const sideEffect = { fired: false };
|
||||||
|
const registry = new ToolRegistry();
|
||||||
|
registry.register(createSideEffectTool(sideEffect));
|
||||||
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||||
|
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 30 });
|
||||||
|
|
||||||
|
const result = await executor.execute('test.side_effect', {});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toContain('timed out');
|
||||||
|
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 180));
|
||||||
|
expect(sideEffect.fired).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
it('truncates large output', async () => {
|
it('truncates large output', async () => {
|
||||||
const registry = new ToolRegistry();
|
const registry = new ToolRegistry();
|
||||||
registry.register(bigOutputTool);
|
registry.register(bigOutputTool);
|
||||||
|
|||||||
Reference in New Issue
Block a user