From b4006e91ffe607259ca606413c8fdb4fb483ff30 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sun, 15 Feb 2026 22:12:03 -0800 Subject: [PATCH] feat(tools): extend cancellation to browser, web, and process tools --- .../2026-02-16-codebase-audit-report.md | 8 +- docs/plans/state.json | 14 +- src/tools/builtin/browser/tools.test.ts | 12 ++ src/tools/builtin/browser/tools.ts | 125 +++++++++++++++--- src/tools/builtin/process/manager.test.ts | 13 ++ src/tools/builtin/process/start.ts | 11 +- src/tools/builtin/web-fetch.test.ts | 14 ++ src/tools/builtin/web-fetch.ts | 18 ++- src/tools/builtin/web-search.test.ts | 15 +++ src/tools/builtin/web-search.ts | 30 ++++- 10 files changed, 228 insertions(+), 32 deletions(-) diff --git a/docs/plans/analysis/2026-02-16-codebase-audit-report.md b/docs/plans/analysis/2026-02-16-codebase-audit-report.md index 05bee21..3624dd7 100644 --- a/docs/plans/analysis/2026-02-16-codebase-audit-report.md +++ b/docs/plans/analysis/2026-02-16-codebase-audit-report.md @@ -16,7 +16,7 @@ Scope: Production-risk-first audit of bugs, code improvements, and feature oppor - ✅ F-008 addressed: WhatsApp Chromium launch is now sandboxed by default; no-sandbox mode is behind explicit `whatsapp.no_sandbox: true` opt-in. - ✅ F-014 addressed: `ModelRouter.setOnTierChange` now preserves existing listeners instead of replacing them, removing destructive listener-setter behavior. - ✅ F-002 addressed: `config.patch` now supports durable persistence via atomic write + backup when daemon has a concrete config path, and response includes `persisted`/`persistError` so UI can distinguish runtime-only vs disk-persisted updates. -- ◑ F-003 partially addressed: tool execution now has an `AbortSignal` contract and executor triggers abort on timeout; host `shell.exec` and sandbox docker exec now respond to cancellation. Additional high-risk tools still need explicit cancellation coverage for full closure. +- ◑ F-003 partially addressed: tool execution now has an `AbortSignal` contract and executor triggers abort on timeout; host `shell.exec`, sandbox docker exec, `process.start`, browser tools (`browser.navigate/click/type/content/eval/screenshot`), and web tools (`web.fetch`, `web.search`) now respond to cancellation. Remaining closure work is timeout-side-effect regression coverage. - ✅ F-015 addressed: retry defaults no longer classify timeout-style failures as non-retryable, improving resilience for transient timeout conditions. ## Executive Summary @@ -24,7 +24,7 @@ Scope: Production-risk-first audit of bugs, code improvements, and feature oppor Current health snapshot: - `pnpm typecheck`: passing - `pnpm build`: passing -- `pnpm test:run`: passing (`137/137` files, `1735/1735` tests) +- `pnpm test:run`: passing (`140/140` files, `1765/1765` tests) - `pnpm lint`: failing (`148 errors`, `530 warnings`) Top conclusions: @@ -113,6 +113,10 @@ Non-goals: - Timeout + cancellation conformance tests for cancellable tools. - Regression tests verifying no post-timeout side effects. +Remediation update (2026-02-16): +- Abort propagation now covers executor -> context signal -> `process.start`, browser tools, and web fetch/search tools. +- Added aborted-signal regression tests for these tool paths. + ### F-004 Medium: Lint quality gate is broken and concentrated in key runtime files - Severity: Medium diff --git a/docs/plans/state.json b/docs/plans/state.json index 15da73d..8ae448d 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -2552,19 +2552,27 @@ "status": "in_progress", "date": "2026-02-16", "updated": "2026-02-16", - "summary": "Started timeout-cancellation hardening: added ToolExecutionContext AbortSignal contract, executor abort-on-timeout propagation, and cancellation-aware shell execution for host and sandbox docker exec paths. Remaining work: migrate additional high-risk tools and add post-timeout side-effect regression coverage.", + "summary": "Expanded timeout-cancellation hardening: executor AbortSignal propagation now reaches process.start, browser tools (navigate/screenshot/click/type/content/eval), and web tools (fetch/search), with pre-abort regression tests. Remaining work: add explicit post-timeout side-effect regression coverage.", "files_modified": [ "src/tools/types.ts", "src/tools/executor.ts", "src/tools/executor.test.ts", "src/tools/builtin/shell.ts", + "src/tools/builtin/browser/tools.ts", + "src/tools/builtin/browser/tools.test.ts", + "src/tools/builtin/web-fetch.ts", + "src/tools/builtin/web-fetch.test.ts", + "src/tools/builtin/web-search.ts", + "src/tools/builtin/web-search.test.ts", + "src/tools/builtin/process/start.ts", + "src/tools/builtin/process/manager.test.ts", "src/sandbox/docker.ts", "src/sandbox/tools.ts", "docs/api/TOOLS.md", "docs/architecture/CONTRIBUTOR_MAP.md", "docs/plans/analysis/2026-02-16-codebase-audit-report.md" ], - "test_status": "pnpm test:run src/tools/executor.test.ts src/tools/builtin/shell.test.ts + pnpm typecheck passing" + "test_status": "pnpm test:run src/tools/builtin/browser/tools.test.ts src/tools/builtin/web-fetch.test.ts src/tools/builtin/web-search.test.ts src/tools/builtin/process/manager.test.ts + pnpm typecheck passing" }, "audit-followup-retry-timeout-defaults": { "status": "completed", @@ -2580,7 +2588,7 @@ } }, "overall_progress": { - "total_test_count": 1703, + "total_test_count": 1765, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", diff --git a/src/tools/builtin/browser/tools.test.ts b/src/tools/builtin/browser/tools.test.ts index 97086e9..1dcabb7 100644 --- a/src/tools/builtin/browser/tools.test.ts +++ b/src/tools/builtin/browser/tools.test.ts @@ -154,4 +154,16 @@ describe('Browser tools', () => { expect(result.success).toBe(false); expect(result.error).toContain('Element not found'); }); + + it('returns aborted error when signal is already aborted', async () => { + const tool = tools.find(t => t.name === 'browser.navigate')!; + const controller = new AbortController(); + controller.abort(); + + const result = await tool.execute({ url: 'https://example.com' }, { signal: controller.signal }); + expect(result.success).toBe(false); + expect(result.error).toContain('aborted'); + expect(mockManager.getPage).not.toHaveBeenCalled(); + expect(mockGoto).not.toHaveBeenCalled(); + }); }); diff --git a/src/tools/builtin/browser/tools.ts b/src/tools/builtin/browser/tools.ts index c765c64..a35c4d6 100644 --- a/src/tools/builtin/browser/tools.ts +++ b/src/tools/builtin/browser/tools.ts @@ -1,6 +1,60 @@ -import type { Tool, ToolResult } from '../../types.js'; +import type { Tool, ToolExecutionContext, ToolResult } from '../../types.js'; import type { BrowserManager } from './manager.js'; +function abortError(): string { + return 'Operation aborted'; +} + +function isAbortError(error: unknown): boolean { + if (!(error instanceof Error)) { + return false; + } + return error.name === 'AbortError' || error.message.toLowerCase().includes('aborted'); +} + +function throwIfAborted(signal?: AbortSignal): void { + if (signal?.aborted) { + throw new Error(abortError()); + } +} + +async function withAbort( + operation: Promise, + signal?: AbortSignal, + onAbort?: () => void, +): Promise { + if (!signal) { + return operation; + } + if (signal.aborted) { + onAbort?.(); + throw new Error(abortError()); + } + return new Promise((resolve, reject) => { + const cleanup = () => { + signal.removeEventListener('abort', handleAbort); + }; + + const handleAbort = () => { + onAbort?.(); + cleanup(); + reject(new Error(abortError())); + }; + + signal.addEventListener('abort', handleAbort, { once: true }); + + operation + .then((value) => { + cleanup(); + resolve(value); + }) + .catch((error) => { + cleanup(); + reject(error); + }); + }); +} + /** Create all browser tools bound to a BrowserManager instance. */ export function createBrowserTools(manager: BrowserManager): Tool[] { return [ @@ -28,12 +82,23 @@ function createBrowserNavigateTool(manager: BrowserManager): Tool { }, required: ['url'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { url: string; waitUntil?: string }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); + throwIfAborted(context?.signal); const waitUntil = (args.waitUntil ?? 'domcontentloaded') as 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2'; - await page.goto(args.url, { waitUntil }); + await withAbort( + page.goto(args.url, { waitUntil }), + context?.signal, + () => { + void page.evaluate(() => { + (globalThis as { stop?: () => void }).stop?.(); + }).catch(() => undefined); + }, + ); + throwIfAborted(context?.signal); const title = await page.title(); const currentUrl = page.url(); return { @@ -41,6 +106,9 @@ function createBrowserNavigateTool(manager: BrowserManager): Tool { output: `Navigated to: ${currentUrl}\nTitle: ${title}`, }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', @@ -62,10 +130,12 @@ function createBrowserScreenshotTool(manager: BrowserManager): Tool { selector: { type: 'string', description: 'CSS selector to screenshot a specific element' }, }, }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { fullPage?: boolean; selector?: string }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); + throwIfAborted(context?.signal); let screenshotData: string; if (args.selector) { @@ -86,6 +156,9 @@ function createBrowserScreenshotTool(manager: BrowserManager): Tool { output: `Screenshot captured (base64 PNG, ${screenshotData.length} chars):\n${screenshotData.slice(0, 200)}...`, }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', @@ -107,13 +180,18 @@ function createBrowserClickTool(manager: BrowserManager): Tool { }, required: ['selector'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { selector: string }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); - await page.click(args.selector); + throwIfAborted(context?.signal); + await withAbort(page.click(args.selector), context?.signal); return { success: true, output: `Clicked element: ${args.selector}` }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', @@ -137,17 +215,22 @@ function createBrowserTypeTool(manager: BrowserManager): Tool { }, required: ['selector', 'text'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { selector: string; text: string; clear?: boolean }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); + throwIfAborted(context?.signal); if (args.clear) { - await page.click(args.selector, { count: 3 }); // Select all - await page.keyboard.press('Backspace'); + await withAbort(page.click(args.selector, { count: 3 }), context?.signal); // Select all + await withAbort(page.keyboard.press('Backspace'), context?.signal); } - await page.type(args.selector, args.text); + await withAbort(page.type(args.selector, args.text), context?.signal); return { success: true, output: `Typed "${args.text}" into ${args.selector}` }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', @@ -169,19 +252,21 @@ function createBrowserContentTool(manager: BrowserManager): Tool { maxLength: { type: 'number', description: 'Maximum characters to return (default: 10000)' }, }, }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { selector?: string; maxLength?: number }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); + throwIfAborted(context?.signal); const selector = args.selector ?? 'body'; const maxLength = args.maxLength ?? 10000; // The $eval callback executes in the browser context where DOM types exist, // but TS checks against Node.js lib (no DOM). Use `any` to bridge the gap. - const text = await page.$eval(selector, (el) => { + const text = await withAbort(page.$eval(selector, (el) => { const htmlEl = el as unknown as { innerText?: string; textContent?: string | null }; return htmlEl.innerText || htmlEl.textContent || ''; - }); + }), context?.signal); const truncated = text.length > maxLength ? text.slice(0, maxLength) + `\n... (truncated, ${text.length} total chars)` @@ -195,6 +280,9 @@ function createBrowserContentTool(manager: BrowserManager): Tool { output: `URL: ${url}\nTitle: ${title}\n\n${truncated}`, }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', @@ -216,18 +304,23 @@ function createBrowserEvalTool(manager: BrowserManager): Tool { }, required: ['expression'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as { expression: string }; try { + throwIfAborted(context?.signal); const page = await manager.getPage(); + throwIfAborted(context?.signal); // Use evaluate with a function that evaluates the expression string - const result = await page.evaluate((expr: string) => { + const result = await withAbort(page.evaluate((expr: string) => { // eslint-disable-next-line no-eval return eval(expr); - }, args.expression); + }, args.expression), context?.signal); const output = typeof result === 'string' ? result : JSON.stringify(result, null, 2); return { success: true, output: output ?? 'undefined' }; } catch (error) { + if (isAbortError(error)) { + return { success: false, output: '', error: abortError() }; + } return { success: false, output: '', diff --git a/src/tools/builtin/process/manager.test.ts b/src/tools/builtin/process/manager.test.ts index 4a07bf5..6403d0b 100644 --- a/src/tools/builtin/process/manager.test.ts +++ b/src/tools/builtin/process/manager.test.ts @@ -221,6 +221,19 @@ describe('Process tools', () => { await new Promise(resolve => setTimeout(resolve, 300)); }); + it('process.start tool fails when aborted before start', async () => { + const { createProcessStartTool } = await import('./start.js'); + manager = new ProcessManager(); + const tool = createProcessStartTool(manager); + const controller = new AbortController(); + controller.abort(); + + const result = await tool.execute({ command: 'sleep 60' }, { signal: controller.signal }); + expect(result.success).toBe(false); + expect(result.error).toContain('aborted'); + expect(manager.list().length).toBe(0); + }); + it('process.status tool returns status info', async () => { const { createProcessStatusTool } = await import('./status.js'); manager = new ProcessManager(); diff --git a/src/tools/builtin/process/start.ts b/src/tools/builtin/process/start.ts index 1bedc8e..3e6b35a 100644 --- a/src/tools/builtin/process/start.ts +++ b/src/tools/builtin/process/start.ts @@ -1,4 +1,4 @@ -import type { Tool, ToolResult } from '../../types.js'; +import type { Tool, ToolExecutionContext, ToolResult } from '../../types.js'; import type { ProcessManager } from './manager.js'; interface ProcessStartArgs { @@ -18,8 +18,15 @@ export function createProcessStartTool(manager: ProcessManager): Tool { }, required: ['command'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as ProcessStartArgs; + if (context?.signal?.aborted) { + return { + success: false, + output: '', + error: 'Operation aborted', + }; + } try { const proc = manager.start(args.command, args.cwd); return { diff --git a/src/tools/builtin/web-fetch.test.ts b/src/tools/builtin/web-fetch.test.ts index 1b3dc80..9f09dac 100644 --- a/src/tools/builtin/web-fetch.test.ts +++ b/src/tools/builtin/web-fetch.test.ts @@ -160,6 +160,20 @@ describe('web.fetch', () => { expect(result.error).toContain('network error'); }); + it('returns aborted error when signal is already aborted', async () => { + const controller = new AbortController(); + controller.abort(); + + const result = await webFetchTool.execute( + { url: 'https://example.com' }, + { signal: controller.signal }, + ); + + expect(result.success).toBe(false); + expect(result.error).toContain('aborted'); + expect(mockFetch).not.toHaveBeenCalled(); + }); + // ---- Caching ---- it('caches responses and reuses them on the second call', async () => { diff --git a/src/tools/builtin/web-fetch.ts b/src/tools/builtin/web-fetch.ts index e4ab737..f2425de 100644 --- a/src/tools/builtin/web-fetch.ts +++ b/src/tools/builtin/web-fetch.ts @@ -1,7 +1,7 @@ import { parseHTML } from 'linkedom'; import { Readability } from '@mozilla/readability'; import TurndownService from 'turndown'; -import type { Tool, ToolResult } from '../types.js'; +import type { Tool, ToolExecutionContext, ToolResult } from '../types.js'; // --------------------------------------------------------------------------- // Types @@ -181,11 +181,15 @@ export const webFetchTool: Tool = { required: ['url'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as WebFetchArgs; const format: OutputFormat = args.format ?? 'markdown'; const timeout = args.timeout ?? 15_000; + if (context?.signal?.aborted) { + return { success: false, output: '', error: 'Operation aborted' }; + } + // ----- Check cache (lazy eviction) ----- // evictExpired(); const key = cacheKey(args.url, format); @@ -196,8 +200,13 @@ export const webFetchTool: Tool = { // ----- Fetch ----- // try { + const timeoutSignal = AbortSignal.timeout(timeout); + const signal = context?.signal + ? AbortSignal.any([context.signal, timeoutSignal]) + : timeoutSignal; + const response = await fetch(args.url, { - signal: AbortSignal.timeout(timeout), + signal, headers: { 'User-Agent': 'Flynn/0.1 (personal AI assistant)', Accept: 'text/html, application/json, text/plain, */*', @@ -224,6 +233,9 @@ export const webFetchTool: Tool = { return { success: true, output }; } catch (error) { + if (error instanceof Error && (error.name === 'AbortError' || error.message.toLowerCase().includes('aborted'))) { + return { success: false, output: '', error: 'Operation aborted' }; + } return { success: false, output: '', diff --git a/src/tools/builtin/web-search.test.ts b/src/tools/builtin/web-search.test.ts index e4ff5aa..a70af27 100644 --- a/src/tools/builtin/web-search.test.ts +++ b/src/tools/builtin/web-search.test.ts @@ -255,4 +255,19 @@ describe('web.search', () => { expect(result.output).toContain('2. **Result 2**'); }); }); + + it('returns aborted error when signal is already aborted', async () => { + const tool = createWebSearchTool(braveConfig); + const controller = new AbortController(); + controller.abort(); + + const result = await tool.execute( + { query: 'test query' }, + { signal: controller.signal }, + ); + + expect(result.success).toBe(false); + expect(result.error).toContain('aborted'); + expect(mockFetch).not.toHaveBeenCalled(); + }); }); diff --git a/src/tools/builtin/web-search.ts b/src/tools/builtin/web-search.ts index 286cf68..ef7f19c 100644 --- a/src/tools/builtin/web-search.ts +++ b/src/tools/builtin/web-search.ts @@ -1,4 +1,4 @@ -import type { Tool, ToolResult } from '../types.js'; +import type { Tool, ToolExecutionContext, ToolResult } from '../types.js'; /** Configuration for the web search tool. */ export interface WebSearchConfig { @@ -49,16 +49,22 @@ async function searchBrave( query: string, count: number, apiKey: string, + signal?: AbortSignal, ): Promise { const params = new URLSearchParams({ q: query, count: String(count), }); + const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT_MS); + const fetchSignal = signal + ? AbortSignal.any([signal, timeoutSignal]) + : timeoutSignal; + const response = await fetch( `https://api.search.brave.com/res/v1/web/search?${params.toString()}`, { - signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + signal: fetchSignal, headers: { 'Accept': 'application/json', 'X-Subscription-Token': apiKey, @@ -90,6 +96,7 @@ async function searchSearxng( query: string, count: number, endpoint: string, + signal?: AbortSignal, ): Promise { // Strip trailing slash from endpoint const baseUrl = endpoint.replace(/\/+$/, ''); @@ -99,8 +106,13 @@ async function searchSearxng( categories: 'general', }); + const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT_MS); + const fetchSignal = signal + ? AbortSignal.any([signal, timeoutSignal]) + : timeoutSignal; + const response = await fetch(`${baseUrl}/search?${params.toString()}`, { - signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + signal: fetchSignal, headers: { 'Accept': 'application/json', }, @@ -147,8 +159,11 @@ export function createWebSearchTool(config: WebSearchConfig): Tool { }, required: ['query'], }, - execute: async (rawArgs: unknown): Promise => { + execute: async (rawArgs: unknown, context?: ToolExecutionContext): Promise => { const args = rawArgs as WebSearchArgs; + if (context?.signal?.aborted) { + return { success: false, output: '', error: 'Operation aborted' }; + } // Clamp count: use provided value (capped at MAX_RESULTS), or fall back to default const count = Math.min(args.count ?? defaultCount, MAX_RESULTS); @@ -163,7 +178,7 @@ export function createWebSearchTool(config: WebSearchConfig): Tool { error: 'Brave Search API key not configured', }; } - results = await searchBrave(args.query, count, config.apiKey); + results = await searchBrave(args.query, count, config.apiKey, context?.signal); } else { // SearXNG provider if (!config.endpoint) { @@ -173,7 +188,7 @@ export function createWebSearchTool(config: WebSearchConfig): Tool { error: 'SearXNG endpoint not configured', }; } - results = await searchSearxng(args.query, count, config.endpoint); + results = await searchSearxng(args.query, count, config.endpoint, context?.signal); } if (results.length === 0) { @@ -185,6 +200,9 @@ export function createWebSearchTool(config: WebSearchConfig): Tool { return { success: true, output: formatResults(results) }; } catch (error) { + if (error instanceof Error && (error.name === 'AbortError' || error.message.toLowerCase().includes('aborted'))) { + return { success: false, output: '', error: 'Operation aborted' }; + } return { success: false, output: '',