From 2eccd3e8eb8dc2ba5d2226daf5490a96eef8a1c0 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sun, 15 Feb 2026 22:17:49 -0800 Subject: [PATCH] test(tools): verify timeout abort prevents post-timeout side effects --- .../2026-02-16-codebase-audit-report.md | 3 +- docs/plans/state.json | 6 +-- src/tools/executor.test.ts | 40 +++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/docs/plans/analysis/2026-02-16-codebase-audit-report.md b/docs/plans/analysis/2026-02-16-codebase-audit-report.md index 3624dd7..3c48cc3 100644 --- a/docs/plans/analysis/2026-02-16-codebase-audit-report.md +++ b/docs/plans/analysis/2026-02-16-codebase-audit-report.md @@ -16,7 +16,7 @@ Scope: Production-risk-first audit of bugs, code improvements, and feature oppor - ✅ F-008 addressed: WhatsApp Chromium launch is now sandboxed by default; no-sandbox mode is behind explicit `whatsapp.no_sandbox: true` opt-in. - ✅ F-014 addressed: `ModelRouter.setOnTierChange` now preserves existing listeners instead of replacing them, removing destructive listener-setter behavior. - ✅ F-002 addressed: `config.patch` now supports durable persistence via atomic write + backup when daemon has a concrete config path, and response includes `persisted`/`persistError` so UI can distinguish runtime-only vs disk-persisted updates. -- ◑ F-003 partially addressed: tool execution now has an `AbortSignal` contract and executor triggers abort on timeout; host `shell.exec`, sandbox docker exec, `process.start`, browser tools (`browser.navigate/click/type/content/eval/screenshot`), and web tools (`web.fetch`, `web.search`) now respond to cancellation. Remaining closure work is timeout-side-effect regression coverage. +- ✅ F-003 addressed: tool execution now has an `AbortSignal` contract, executor triggers abort on timeout, high-risk tools (`shell.exec`, sandbox docker exec, `process.start`, browser tools, `web.fetch`, `web.search`) respond to cancellation, and executor regression tests verify cancellable tools do not apply side effects after timeout. - ✅ F-015 addressed: retry defaults no longer classify timeout-style failures as non-retryable, improving resilience for transient timeout conditions. ## Executive Summary @@ -116,6 +116,7 @@ Non-goals: Remediation update (2026-02-16): - Abort propagation now covers executor -> context signal -> `process.start`, browser tools, and web fetch/search tools. - Added aborted-signal regression tests for these tool paths. +- Added timeout regression coverage in `src/tools/executor.test.ts` to verify side effects are prevented after timeout-triggered abort for cancellable tools. ### F-004 Medium: Lint quality gate is broken and concentrated in key runtime files diff --git a/docs/plans/state.json b/docs/plans/state.json index 8ae448d..9416e2c 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -2549,10 +2549,10 @@ "test_status": "pnpm test:run src/gateway/handlers/handlers.test.ts src/config/persistence.test.ts + pnpm typecheck passing" }, "audit-followup-tool-timeout-cancellation-contract": { - "status": "in_progress", + "status": "completed", "date": "2026-02-16", "updated": "2026-02-16", - "summary": "Expanded timeout-cancellation hardening: executor AbortSignal propagation now reaches process.start, browser tools (navigate/screenshot/click/type/content/eval), and web tools (fetch/search), with pre-abort regression tests. Remaining work: add explicit post-timeout side-effect regression coverage.", + "summary": "Completed timeout-cancellation hardening: executor AbortSignal propagation reaches process.start, browser tools (navigate/screenshot/click/type/content/eval), and web tools (fetch/search); added pre-abort tests and timeout regression coverage that verifies cancellable tools do not apply side effects after timeout.", "files_modified": [ "src/tools/types.ts", "src/tools/executor.ts", @@ -2572,7 +2572,7 @@ "docs/architecture/CONTRIBUTOR_MAP.md", "docs/plans/analysis/2026-02-16-codebase-audit-report.md" ], - "test_status": "pnpm test:run src/tools/builtin/browser/tools.test.ts src/tools/builtin/web-fetch.test.ts src/tools/builtin/web-search.test.ts src/tools/builtin/process/manager.test.ts + pnpm typecheck passing" + "test_status": "pnpm test:run src/tools/executor.test.ts src/tools/builtin/browser/tools.test.ts src/tools/builtin/web-fetch.test.ts src/tools/builtin/web-search.test.ts src/tools/builtin/process/manager.test.ts + pnpm typecheck passing" }, "audit-followup-retry-timeout-defaults": { "status": "completed", diff --git a/src/tools/executor.test.ts b/src/tools/executor.test.ts index e7e7735..28be135 100644 --- a/src/tools/executor.test.ts +++ b/src/tools/executor.test.ts @@ -59,6 +59,31 @@ const cancellableTool: Tool = { }, }; +function createSideEffectTool(sideEffect: { fired: boolean }): Tool { + return { + name: 'test.side_effect', + description: 'Cancellable side effect', + inputSchema: { type: 'object', properties: {} }, + execute: async (_args, context) => { + return await new Promise((resolve) => { + const timer = setTimeout(() => { + sideEffect.fired = true; + resolve({ success: true, output: 'side effect fired' }); + }, 120); + const onAbort = () => { + clearTimeout(timer); + resolve({ success: false, output: '', error: 'aborted' }); + }; + if (context?.signal?.aborted) { + onAbort(); + return; + } + context?.signal?.addEventListener('abort', onAbort, { once: true }); + }); + }, + }; +} + describe('ToolExecutor', () => { it('executes a tool and returns result', async () => { const registry = new ToolRegistry(); @@ -114,6 +139,21 @@ describe('ToolExecutor', () => { expect(result.error).toContain('timed out'); }); + it('prevents post-timeout side effects for cancellable tools', async () => { + const sideEffect = { fired: false }; + const registry = new ToolRegistry(); + registry.register(createSideEffectTool(sideEffect)); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 30 }); + + const result = await executor.execute('test.side_effect', {}); + expect(result.success).toBe(false); + expect(result.error).toContain('timed out'); + + await new Promise(resolve => setTimeout(resolve, 180)); + expect(sideEffect.fired).toBe(false); + }); + it('truncates large output', async () => { const registry = new ToolRegistry(); registry.register(bigOutputTool);