From 2cdfb660719b88bd324d03fed9147b80b2bcf2f1 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sun, 15 Feb 2026 22:05:43 -0800 Subject: [PATCH] feat(tools): propagate timeout abort signals to tool execution --- docs/api/TOOLS.md | 8 ++++-- docs/architecture/CONTRIBUTOR_MAP.md | 2 +- .../2026-02-16-codebase-audit-report.md | 1 + docs/plans/state.json | 18 ++++++++++++ src/sandbox/docker.ts | 27 ++++++++++++++++-- src/sandbox/tools.ts | 9 +++--- src/tools/builtin/shell.ts | 20 +++++++++++-- src/tools/executor.test.ts | 28 +++++++++++++++++++ src/tools/executor.ts | 12 +++++--- src/tools/types.ts | 6 +++- 10 files changed, 113 insertions(+), 18 deletions(-) diff --git a/docs/api/TOOLS.md b/docs/api/TOOLS.md index 2a108e6..60597c0 100644 --- a/docs/api/TOOLS.md +++ b/docs/api/TOOLS.md @@ -57,8 +57,12 @@ export interface Tool { /** Secret scopes required to execute this tool (optional). */ requiredSecretScopes?: string[]; - /** Async function that executes the tool. */ - execute: (args: unknown) => Promise; + /** Optional execution context (abort signal, runtime metadata). */ + execute: (args: unknown, context?: ToolExecutionContext) => Promise; +} + +export interface ToolExecutionContext { + signal?: AbortSignal; } ``` diff --git a/docs/architecture/CONTRIBUTOR_MAP.md b/docs/architecture/CONTRIBUTOR_MAP.md index 6755ac9..4720ffa 100644 --- a/docs/architecture/CONTRIBUTOR_MAP.md +++ b/docs/architecture/CONTRIBUTOR_MAP.md @@ -59,7 +59,7 @@ export const myTool: Tool = { }, required: ['foo'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, _context?: { signal?: AbortSignal }): Promise => { // ... return { success: true, output: 'ok' }; }, diff --git a/docs/plans/analysis/2026-02-16-codebase-audit-report.md b/docs/plans/analysis/2026-02-16-codebase-audit-report.md index aecab99..9fe0fdf 100644 --- a/docs/plans/analysis/2026-02-16-codebase-audit-report.md +++ b/docs/plans/analysis/2026-02-16-codebase-audit-report.md @@ -16,6 +16,7 @@ Scope: Production-risk-first audit of bugs, code improvements, and feature oppor - ✅ F-008 addressed: WhatsApp Chromium launch is now sandboxed by default; no-sandbox mode is behind explicit `whatsapp.no_sandbox: true` opt-in. - ✅ F-014 addressed: `ModelRouter.setOnTierChange` now preserves existing listeners instead of replacing them, removing destructive listener-setter behavior. - ✅ F-002 addressed: `config.patch` now supports durable persistence via atomic write + backup when daemon has a concrete config path, and response includes `persisted`/`persistError` so UI can distinguish runtime-only vs disk-persisted updates. +- ◑ F-003 partially addressed: tool execution now has an `AbortSignal` contract and executor triggers abort on timeout; host `shell.exec` and sandbox docker exec now respond to cancellation. Additional high-risk tools still need explicit cancellation coverage for full closure. ## Executive Summary diff --git a/docs/plans/state.json b/docs/plans/state.json index 00c90cf..ec6c6c6 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -2547,6 +2547,24 @@ "docs/plans/analysis/2026-02-16-codebase-audit-report.md" ], "test_status": "pnpm test:run src/gateway/handlers/handlers.test.ts src/config/persistence.test.ts + pnpm typecheck passing" + }, + "audit-followup-tool-timeout-cancellation-contract": { + "status": "in_progress", + "date": "2026-02-16", + "updated": "2026-02-16", + "summary": "Started timeout-cancellation hardening: added ToolExecutionContext AbortSignal contract, executor abort-on-timeout propagation, and cancellation-aware shell execution for host and sandbox docker exec paths. Remaining work: migrate additional high-risk tools and add post-timeout side-effect regression coverage.", + "files_modified": [ + "src/tools/types.ts", + "src/tools/executor.ts", + "src/tools/executor.test.ts", + "src/tools/builtin/shell.ts", + "src/sandbox/docker.ts", + "src/sandbox/tools.ts", + "docs/api/TOOLS.md", + "docs/architecture/CONTRIBUTOR_MAP.md", + "docs/plans/analysis/2026-02-16-codebase-audit-report.md" + ], + "test_status": "pnpm test:run src/tools/executor.test.ts src/tools/builtin/shell.test.ts + pnpm typecheck passing" } }, "overall_progress": { diff --git a/src/sandbox/docker.ts b/src/sandbox/docker.ts index 0dc5513..fea35aa 100644 --- a/src/sandbox/docker.ts +++ b/src/sandbox/docker.ts @@ -13,6 +13,7 @@ export interface DockerSandboxConfig { export interface ExecOptions { cwd?: string; timeout?: number; + signal?: AbortSignal; } export interface ExecResult { @@ -76,7 +77,7 @@ export class DockerSandbox { args.push(this._containerId, 'bash', '-c', command); const timeout = opts?.timeout ?? this.config.timeoutSeconds * 1000; - return this.dockerCmd(args, timeout); + return this.dockerCmd(args, timeout, opts?.signal); } /** Force-remove the container. */ @@ -109,15 +110,35 @@ export class DockerSandbox { } /** Run a docker CLI command. */ - private dockerCmd(args: string[], timeout = 30_000): Promise { + private dockerCmd(args: string[], timeout = 30_000, signal?: AbortSignal): Promise { return new Promise((resolve, reject) => { - execFile('docker', args, { timeout, maxBuffer: 1024 * 1024 }, (error, stdout, stderr) => { + let settled = false; + const child = execFile('docker', args, { timeout, maxBuffer: 1024 * 1024 }, (error, stdout, stderr) => { + if (signal) { + signal.removeEventListener('abort', onAbort); + } + if (settled) {return;} + settled = true; if (error) { reject(error); return; } resolve({ stdout, stderr }); }); + + const onAbort = () => { + if (settled) {return;} + settled = true; + child.kill('SIGTERM'); + reject(new Error('Sandbox command aborted')); + }; + if (signal) { + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener('abort', onAbort, { once: true }); + } + } }); } } diff --git a/src/sandbox/tools.ts b/src/sandbox/tools.ts index 3528491..b1d7fa4 100644 --- a/src/sandbox/tools.ts +++ b/src/sandbox/tools.ts @@ -1,4 +1,4 @@ -import type { Tool, ToolResult } from '../tools/types.js'; +import type { Tool, ToolResult, ToolExecutionContext } from '../tools/types.js'; import type { DockerSandbox } from './docker.js'; interface ShellExecArgs { @@ -29,7 +29,7 @@ export function createSandboxedShellTool(sandbox: DockerSandbox): Tool { }, required: ['command'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as ShellExecArgs; const timeout = args.timeout ?? 30_000; @@ -37,6 +37,7 @@ export function createSandboxedShellTool(sandbox: DockerSandbox): Tool { const result = await sandbox.exec(args.command, { cwd: args.cwd, timeout, + signal: context?.signal, }); const output = result.stdout + (result.stderr ? `\nstderr: ${result.stderr}` : ''); @@ -68,12 +69,12 @@ export function createSandboxedProcessStartTool(sandbox: DockerSandbox): Tool { }, required: ['command'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as ProcessStartArgs; try { const wrappedCmd = `nohup bash -c '${args.command.replace(/'/g, "'\\''")}' > /tmp/proc.log 2>&1 & echo $!`; - const result = await sandbox.exec(wrappedCmd, { cwd: args.cwd }); + const result = await sandbox.exec(wrappedCmd, { cwd: args.cwd, signal: context?.signal }); const pid = result.stdout.trim(); return { diff --git a/src/tools/builtin/shell.ts b/src/tools/builtin/shell.ts index c779b80..5eebb04 100644 --- a/src/tools/builtin/shell.ts +++ b/src/tools/builtin/shell.ts @@ -1,5 +1,5 @@ import { execFile } from 'child_process'; -import type { Tool, ToolResult } from '../types.js'; +import type { Tool, ToolResult, ToolExecutionContext } from '../types.js'; interface ShellExecArgs { command: string; @@ -19,16 +19,19 @@ export const shellExecTool: Tool = { }, required: ['command'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as ShellExecArgs; const timeout = args.timeout ?? 30_000; return new Promise((resolve) => { - execFile('bash', ['-c', args.command], { + const child = execFile('bash', ['-c', args.command], { cwd: args.cwd, timeout, maxBuffer: 1024 * 1024, }, (error, stdout, stderr) => { + if (context?.signal) { + context.signal.removeEventListener('abort', onAbort); + } if (error) { if (error.killed || error.signal === 'SIGTERM') { resolve({ success: false, output: stdout, error: `Command timed out after ${timeout}ms` }); @@ -43,6 +46,17 @@ export const shellExecTool: Tool = { } resolve({ success: true, output: stdout + (stderr ? `\nstderr: ${stderr}` : '') }); }); + + const onAbort = () => { + child.kill('SIGTERM'); + }; + if (context?.signal) { + if (context.signal.aborted) { + onAbort(); + } else { + context.signal.addEventListener('abort', onAbort, { once: true }); + } + } }); }, }; diff --git a/src/tools/executor.test.ts b/src/tools/executor.test.ts index 7689411..e7e7735 100644 --- a/src/tools/executor.test.ts +++ b/src/tools/executor.test.ts @@ -42,6 +42,23 @@ const fileWriteLikeTool: Tool = { execute: async () => ({ success: true, output: 'ok' }), }; +const cancellableTool: Tool = { + name: 'test.cancellable', + description: 'Long-running cancellable tool', + inputSchema: { type: 'object', properties: {} }, + execute: async (_args, context) => { + return await new Promise((resolve) => { + const onAbort = () => resolve({ success: false, output: '', error: 'aborted' }); + if (context?.signal?.aborted) { + onAbort(); + return; + } + context?.signal?.addEventListener('abort', onAbort, { once: true }); + setTimeout(() => resolve({ success: true, output: 'done' }), 5000); + }); + }, +}; + describe('ToolExecutor', () => { it('executes a tool and returns result', async () => { const registry = new ToolRegistry(); @@ -86,6 +103,17 @@ describe('ToolExecutor', () => { expect(result.error).toContain('timed out'); }); + it('aborts cancellable tool work on timeout', async () => { + const registry = new ToolRegistry(); + registry.register(cancellableTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 50 }); + + const result = await executor.execute('test.cancellable', {}); + expect(result.success).toBe(false); + expect(result.error).toContain('timed out'); + }); + it('truncates large output', async () => { const registry = new ToolRegistry(); registry.register(bigOutputTool); diff --git a/src/tools/executor.ts b/src/tools/executor.ts index 80a6f15..78fa2ee 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -225,6 +225,7 @@ export class ToolExecutor { }); let timeoutHandle: NodeJS.Timeout | undefined; + const abortController = new AbortController(); try { const result = await Promise.race([ (async () => { @@ -232,17 +233,20 @@ export class ToolExecutor { const sandboxSessionId = context?.sessionId ?? `${context?.channel ?? 'unknown'}:${context?.sender ?? 'unknown'}`; const sandbox = await this.sandboxManager.getOrCreate(sandboxSessionId); if (toolName === 'shell.exec') { - return createSandboxedShellTool(sandbox).execute(args); + return createSandboxedShellTool(sandbox).execute(args, { signal: abortController.signal }); } if (toolName === 'process.start') { - return createSandboxedProcessStartTool(sandbox).execute(args); + return createSandboxedProcessStartTool(sandbox).execute(args, { signal: abortController.signal }); } } - return tool.execute(args); + return tool.execute(args, { signal: abortController.signal }); })(), new Promise((_, reject) => { timeoutHandle = setTimeout( - () => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), + () => { + abortController.abort(); + reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)); + }, this.defaultTimeoutMs, ); }), diff --git a/src/tools/types.ts b/src/tools/types.ts index 76924d8..a349dd5 100644 --- a/src/tools/types.ts +++ b/src/tools/types.ts @@ -10,7 +10,11 @@ export interface Tool { inputSchema: ToolInputSchema; /** Secret scopes required to execute this tool (optional). */ requiredSecretScopes?: string[]; - execute(args: unknown): Promise; + execute(args: unknown, context?: ToolExecutionContext): Promise; +} + +export interface ToolExecutionContext { + signal?: AbortSignal; } export interface ToolCall {