feat: add Chrome DevTools Protocol browser tools

Add BrowserManager (puppeteer-core) with page pool and auto-detection of
Chrome/Chromium. Six tools: browser.navigate, browser.screenshot,
browser.click, browser.type, browser.content, browser.eval. Feature is
opt-in (browser.enabled defaults to false). Add to coding tool profile.
Includes 22 unit tests for manager and all tools.
This commit is contained in:
William Valentin
2026-02-06 16:52:03 -08:00
parent 647d7779c7
commit 8c56a5a1a8
8 changed files with 662 additions and 1 deletions
+3
View File
@@ -0,0 +1,3 @@
export { BrowserManager } from './manager.js';
export type { BrowserManagerConfig } from './manager.js';
export { createBrowserTools } from './tools.js';
+108
View File
@@ -0,0 +1,108 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { BrowserManager } from './manager.js';
// Use vi.hoisted() so these are available inside the hoisted vi.mock() call
const {
mockGoto, mockTitle, mockUrl, mockClose, mockIsClosed,
mockSetDefaultTimeout, mockPage, mockNewPage, mockPages,
mockBrowserClose,
} = vi.hoisted(() => {
const mockGoto = vi.fn().mockResolvedValue(undefined);
const mockTitle = vi.fn().mockResolvedValue('Test Page');
const mockUrl = vi.fn().mockReturnValue('https://example.com');
const mockClose = vi.fn().mockResolvedValue(undefined);
const mockIsClosed = vi.fn().mockReturnValue(false);
const mockSetDefaultTimeout = vi.fn();
const mockPage = {
goto: mockGoto,
title: mockTitle,
url: mockUrl,
close: mockClose,
isClosed: mockIsClosed,
setDefaultTimeout: mockSetDefaultTimeout,
};
const mockNewPage = vi.fn().mockResolvedValue(mockPage);
const mockPages = vi.fn().mockResolvedValue([mockPage]);
const mockBrowserClose = vi.fn().mockResolvedValue(undefined);
return {
mockGoto, mockTitle, mockUrl, mockClose, mockIsClosed,
mockSetDefaultTimeout, mockPage, mockNewPage, mockPages,
mockBrowserClose,
};
});
vi.mock('puppeteer-core', () => ({
default: {
launch: vi.fn().mockResolvedValue({
connected: true,
newPage: mockNewPage,
pages: mockPages,
close: mockBrowserClose,
}),
connect: vi.fn().mockResolvedValue({
connected: true,
newPage: mockNewPage,
pages: mockPages,
close: mockBrowserClose,
}),
},
}));
describe('BrowserManager', () => {
beforeEach(() => {
vi.clearAllMocks();
mockPages.mockResolvedValue([mockPage]);
mockIsClosed.mockReturnValue(false);
});
it('gets the default page via WebSocket endpoint', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222' });
const page = await manager.getPage();
expect(page).toBeDefined();
expect(page.goto).toBeDefined();
});
it('reuses existing pages', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222' });
const page1 = await manager.getPage();
const page2 = await manager.getPage();
expect(page1).toBe(page2);
});
it('creates a new page when existing one is closed', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222' });
await manager.getPage();
mockIsClosed.mockReturnValue(true);
const page2 = await manager.getPage();
expect(page2).toBeDefined();
});
it('enforces max pages limit', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222', maxPages: 1 });
// The default page counts as one
await manager.getPage();
await expect(manager.newPage('second')).rejects.toThrow('Maximum concurrent pages');
});
it('shuts down cleanly', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222' });
await manager.getPage();
await manager.shutdown();
expect(mockBrowserClose).toHaveBeenCalled();
});
it('closes a specific page', async () => {
const manager = new BrowserManager({ wsEndpoint: 'ws://localhost:9222' });
await manager.getPage('test-page');
await manager.closePage('test-page');
expect(mockClose).toHaveBeenCalled();
});
it('reports availability correctly', () => {
// Without a valid path, isAvailable should return false
expect(BrowserManager.isAvailable('/nonexistent/path')).toBe(false);
});
});
+145
View File
@@ -0,0 +1,145 @@
import puppeteer from 'puppeteer-core';
import type { Browser, Page } from 'puppeteer-core';
import { existsSync } from 'fs';
export interface BrowserManagerConfig {
/** Path to Chrome/Chromium executable. If not set, tries common locations. */
executablePath?: string;
/** Connect to a running browser via WebSocket URL instead of launching. */
wsEndpoint?: string;
/** Launch headless (default: true) */
headless?: boolean;
/** Maximum number of concurrent pages (default: 5) */
maxPages?: number;
/** Default navigation timeout in ms (default: 30000) */
defaultTimeout?: number;
}
/** Find Chrome/Chromium executable path on common locations. */
function findChrome(): string {
const candidates = [
// Linux
'/usr/bin/chromium',
'/usr/bin/chromium-browser',
'/usr/bin/google-chrome',
'/usr/bin/google-chrome-stable',
// macOS
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
// Snap/Flatpak
'/snap/bin/chromium',
];
for (const candidate of candidates) {
if (existsSync(candidate)) return candidate;
}
throw new Error('Chrome/Chromium not found. Set browser.executable_path in config or install Chrome.');
}
/**
* BrowserManager — manages a browser instance and page pool via CDP.
*
* Features:
* - Launch a local browser or connect to a remote one via WebSocket
* - Page pool with configurable concurrency limit
* - Graceful shutdown of all pages and the browser
*/
export class BrowserManager {
private browser: Browser | null = null;
private pages: Map<string, Page> = new Map();
private config: BrowserManagerConfig;
constructor(config: BrowserManagerConfig = {}) {
this.config = config;
}
/** Get or launch the browser instance. */
async getBrowser(): Promise<Browser> {
if (this.browser && this.browser.connected) {
return this.browser;
}
if (this.config.wsEndpoint) {
this.browser = await puppeteer.connect({
browserWSEndpoint: this.config.wsEndpoint,
});
} else {
const executablePath = this.config.executablePath ?? findChrome();
this.browser = await puppeteer.launch({
executablePath,
headless: this.config.headless ?? true,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
});
}
return this.browser;
}
/** Get the default page or create one if none exists. */
async getPage(pageId?: string): Promise<Page> {
const id = pageId ?? 'default';
let page = this.pages.get(id);
if (page && !page.isClosed()) {
return page;
}
const browser = await this.getBrowser();
const pages = await browser.pages();
page = pages[0] ?? await browser.newPage();
page.setDefaultTimeout(this.config.defaultTimeout ?? 30000);
this.pages.set(id, page);
return page;
}
/** Create a new page with a given ID. */
async newPage(pageId: string): Promise<Page> {
if (this.pages.size >= (this.config.maxPages ?? 5)) {
throw new Error(`Maximum concurrent pages (${this.config.maxPages ?? 5}) reached`);
}
const browser = await this.getBrowser();
const page = await browser.newPage();
page.setDefaultTimeout(this.config.defaultTimeout ?? 30000);
this.pages.set(pageId, page);
return page;
}
/** Close a specific page. */
async closePage(pageId: string): Promise<void> {
const page = this.pages.get(pageId);
if (page && !page.isClosed()) {
await page.close();
}
this.pages.delete(pageId);
}
/** Close all pages and the browser. */
async shutdown(): Promise<void> {
for (const [, page] of this.pages) {
try {
if (!page.isClosed()) await page.close();
} catch { /* ignore */ }
}
this.pages.clear();
if (this.browser) {
try {
await this.browser.close();
} catch { /* ignore */ }
this.browser = null;
}
}
/** Check if a browser is available (Chrome/Chromium exists). */
static isAvailable(executablePath?: string): boolean {
try {
const path = executablePath ?? findChrome();
return existsSync(path);
} catch {
return false;
}
}
}
+157
View File
@@ -0,0 +1,157 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { createBrowserTools } from './tools.js';
import type { BrowserManager } from './manager.js';
const mockGoto = vi.fn().mockResolvedValue(undefined);
const mockTitle = vi.fn().mockResolvedValue('Test Page');
const mockUrl = vi.fn().mockReturnValue('https://example.com');
const mockClick = vi.fn().mockResolvedValue(undefined);
const mockType = vi.fn().mockResolvedValue(undefined);
const mock$eval = vi.fn().mockResolvedValue('Page content here');
const mockEvaluate = vi.fn().mockResolvedValue({ result: 42 });
const mockScreenshot = vi.fn().mockResolvedValue('base64data');
const mock$ = vi.fn().mockResolvedValue({ screenshot: vi.fn().mockResolvedValue('element-base64') });
const mockKeyboard = { press: vi.fn().mockResolvedValue(undefined) };
const mockPage = {
goto: mockGoto,
title: mockTitle,
url: mockUrl,
click: mockClick,
type: mockType,
$eval: mock$eval,
evaluate: mockEvaluate,
screenshot: mockScreenshot,
$: mock$,
keyboard: mockKeyboard,
isClosed: () => false,
};
const mockManager = {
getPage: vi.fn().mockResolvedValue(mockPage),
} as unknown as BrowserManager;
describe('Browser tools', () => {
let tools: ReturnType<typeof createBrowserTools>;
beforeEach(() => {
vi.clearAllMocks();
tools = createBrowserTools(mockManager);
});
it('creates all browser tools', () => {
const names = tools.map(t => t.name);
expect(names).toContain('browser.navigate');
expect(names).toContain('browser.screenshot');
expect(names).toContain('browser.click');
expect(names).toContain('browser.type');
expect(names).toContain('browser.content');
expect(names).toContain('browser.eval');
expect(names).toHaveLength(6);
});
it('browser.navigate navigates to URL', async () => {
const tool = tools.find(t => t.name === 'browser.navigate')!;
const result = await tool.execute({ url: 'https://example.com' });
expect(result.success).toBe(true);
expect(result.output).toContain('example.com');
expect(mockGoto).toHaveBeenCalledWith('https://example.com', { waitUntil: 'domcontentloaded' });
});
it('browser.navigate respects custom waitUntil', async () => {
const tool = tools.find(t => t.name === 'browser.navigate')!;
await tool.execute({ url: 'https://example.com', waitUntil: 'networkidle0' });
expect(mockGoto).toHaveBeenCalledWith('https://example.com', { waitUntil: 'networkidle0' });
});
it('browser.screenshot takes page screenshot', async () => {
const tool = tools.find(t => t.name === 'browser.screenshot')!;
const result = await tool.execute({});
expect(result.success).toBe(true);
expect(result.output).toContain('Screenshot captured');
expect(mockScreenshot).toHaveBeenCalledWith({ encoding: 'base64', fullPage: false });
});
it('browser.screenshot takes element screenshot', async () => {
const tool = tools.find(t => t.name === 'browser.screenshot')!;
const result = await tool.execute({ selector: '#header' });
expect(result.success).toBe(true);
expect(mock$).toHaveBeenCalledWith('#header');
});
it('browser.screenshot fails for missing element', async () => {
mock$.mockResolvedValueOnce(null);
const tool = tools.find(t => t.name === 'browser.screenshot')!;
const result = await tool.execute({ selector: '#nonexistent' });
expect(result.success).toBe(false);
expect(result.error).toContain('Element not found');
});
it('browser.click clicks element', async () => {
const tool = tools.find(t => t.name === 'browser.click')!;
const result = await tool.execute({ selector: '#submit' });
expect(result.success).toBe(true);
expect(mockClick).toHaveBeenCalledWith('#submit');
});
it('browser.type types into element', async () => {
const tool = tools.find(t => t.name === 'browser.type')!;
const result = await tool.execute({ selector: '#search', text: 'hello' });
expect(result.success).toBe(true);
expect(mockType).toHaveBeenCalledWith('#search', 'hello');
});
it('browser.type clears field before typing when clear=true', async () => {
const tool = tools.find(t => t.name === 'browser.type')!;
await tool.execute({ selector: '#search', text: 'hello', clear: true });
expect(mockClick).toHaveBeenCalledWith('#search', { count: 3 });
expect(mockKeyboard.press).toHaveBeenCalledWith('Backspace');
expect(mockType).toHaveBeenCalledWith('#search', 'hello');
});
it('browser.content returns page text', async () => {
const tool = tools.find(t => t.name === 'browser.content')!;
const result = await tool.execute({});
expect(result.success).toBe(true);
expect(result.output).toContain('Page content here');
expect(result.output).toContain('example.com');
expect(result.output).toContain('Test Page');
});
it('browser.content uses custom selector', async () => {
const tool = tools.find(t => t.name === 'browser.content')!;
await tool.execute({ selector: '#main' });
expect(mock$eval).toHaveBeenCalledWith('#main', expect.any(Function));
});
it('browser.eval evaluates JS', async () => {
const tool = tools.find(t => t.name === 'browser.eval')!;
const result = await tool.execute({ expression: '1 + 1' });
expect(result.success).toBe(true);
expect(result.output).toContain('42');
});
it('browser.eval returns string results directly', async () => {
mockEvaluate.mockResolvedValueOnce('hello world');
const tool = tools.find(t => t.name === 'browser.eval')!;
const result = await tool.execute({ expression: '"hello world"' });
expect(result.success).toBe(true);
expect(result.output).toBe('hello world');
});
it('handles navigation errors gracefully', async () => {
mockGoto.mockRejectedValueOnce(new Error('Navigation failed'));
const tool = tools.find(t => t.name === 'browser.navigate')!;
const result = await tool.execute({ url: 'https://broken.example.com' });
expect(result.success).toBe(false);
expect(result.error).toContain('Navigation failed');
});
it('handles click errors gracefully', async () => {
mockClick.mockRejectedValueOnce(new Error('Element not found'));
const tool = tools.find(t => t.name === 'browser.click')!;
const result = await tool.execute({ selector: '#missing' });
expect(result.success).toBe(false);
expect(result.error).toContain('Element not found');
});
});
+239
View File
@@ -0,0 +1,239 @@
import type { Tool, ToolResult } from '../../types.js';
import type { BrowserManager } from './manager.js';
/** Create all browser tools bound to a BrowserManager instance. */
export function createBrowserTools(manager: BrowserManager): Tool[] {
return [
createBrowserNavigateTool(manager),
createBrowserScreenshotTool(manager),
createBrowserClickTool(manager),
createBrowserTypeTool(manager),
createBrowserContentTool(manager),
createBrowserEvalTool(manager),
];
}
function createBrowserNavigateTool(manager: BrowserManager): Tool {
return {
name: 'browser.navigate',
description: 'Navigate to a URL in the browser. Returns the page title and URL after navigation.',
inputSchema: {
type: 'object',
properties: {
url: { type: 'string', description: 'The URL to navigate to' },
waitUntil: {
type: 'string',
description: 'When to consider navigation complete: load, domcontentloaded, networkidle0, networkidle2 (default: domcontentloaded)',
},
},
required: ['url'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { url: string; waitUntil?: string };
try {
const page = await manager.getPage();
const waitUntil = (args.waitUntil ?? 'domcontentloaded') as 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2';
await page.goto(args.url, { waitUntil });
const title = await page.title();
const currentUrl = page.url();
return {
success: true,
output: `Navigated to: ${currentUrl}\nTitle: ${title}`,
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
function createBrowserScreenshotTool(manager: BrowserManager): Tool {
return {
name: 'browser.screenshot',
description: 'Take a screenshot of the current page. Returns the screenshot as a base64-encoded PNG.',
inputSchema: {
type: 'object',
properties: {
fullPage: { type: 'boolean', description: 'Capture full scrollable page (default: false)' },
selector: { type: 'string', description: 'CSS selector to screenshot a specific element' },
},
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { fullPage?: boolean; selector?: string };
try {
const page = await manager.getPage();
let screenshotData: string;
if (args.selector) {
const element = await page.$(args.selector);
if (!element) {
return { success: false, output: '', error: `Element not found: ${args.selector}` };
}
screenshotData = (await element.screenshot({ encoding: 'base64' })) as string;
} else {
screenshotData = (await page.screenshot({
encoding: 'base64',
fullPage: args.fullPage ?? false,
})) as string;
}
return {
success: true,
output: `Screenshot captured (base64 PNG, ${screenshotData.length} chars):\n${screenshotData.slice(0, 200)}...`,
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
function createBrowserClickTool(manager: BrowserManager): Tool {
return {
name: 'browser.click',
description: 'Click an element on the page identified by CSS selector.',
inputSchema: {
type: 'object',
properties: {
selector: { type: 'string', description: 'CSS selector of the element to click' },
},
required: ['selector'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { selector: string };
try {
const page = await manager.getPage();
await page.click(args.selector);
return { success: true, output: `Clicked element: ${args.selector}` };
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
function createBrowserTypeTool(manager: BrowserManager): Tool {
return {
name: 'browser.type',
description: 'Type text into an input element on the page.',
inputSchema: {
type: 'object',
properties: {
selector: { type: 'string', description: 'CSS selector of the input element' },
text: { type: 'string', description: 'Text to type' },
clear: { type: 'boolean', description: 'Clear the field before typing (default: false)' },
},
required: ['selector', 'text'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { selector: string; text: string; clear?: boolean };
try {
const page = await manager.getPage();
if (args.clear) {
await page.click(args.selector, { count: 3 }); // Select all
await page.keyboard.press('Backspace');
}
await page.type(args.selector, args.text);
return { success: true, output: `Typed "${args.text}" into ${args.selector}` };
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
function createBrowserContentTool(manager: BrowserManager): Tool {
return {
name: 'browser.content',
description: 'Get the text content of the page or a specific element. Returns extracted text (not raw HTML).',
inputSchema: {
type: 'object',
properties: {
selector: { type: 'string', description: 'CSS selector to get content of a specific element (default: body)' },
maxLength: { type: 'number', description: 'Maximum characters to return (default: 10000)' },
},
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { selector?: string; maxLength?: number };
try {
const page = await manager.getPage();
const selector = args.selector ?? 'body';
const maxLength = args.maxLength ?? 10000;
// The $eval callback executes in the browser context where DOM types exist,
// but TS checks against Node.js lib (no DOM). Use `any` to bridge the gap.
const text = await page.$eval(selector, (el) => {
const htmlEl = el as unknown as { innerText?: string; textContent?: string | null };
return htmlEl.innerText || htmlEl.textContent || '';
});
const truncated = text.length > maxLength
? text.slice(0, maxLength) + `\n... (truncated, ${text.length} total chars)`
: text;
const title = await page.title();
const url = page.url();
return {
success: true,
output: `URL: ${url}\nTitle: ${title}\n\n${truncated}`,
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
function createBrowserEvalTool(manager: BrowserManager): Tool {
return {
name: 'browser.eval',
description: 'Evaluate JavaScript in the browser page context. Returns the result as a string.',
inputSchema: {
type: 'object',
properties: {
expression: { type: 'string', description: 'JavaScript expression to evaluate in the page context' },
},
required: ['expression'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as { expression: string };
try {
const page = await manager.getPage();
// Use evaluate with a function that evaluates the expression string
const result = await page.evaluate((expr: string) => {
// eslint-disable-next-line no-eval
return eval(expr);
}, args.expression);
const output = typeof result === 'string' ? result : JSON.stringify(result, null, 2);
return { success: true, output: output ?? 'undefined' };
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
+2
View File
@@ -11,6 +11,8 @@ export { createWebSearchTool } from './web-search.js';
export type { WebSearchConfig } from './web-search.js';
export { createProcessTools, ProcessManager } from './process/index.js';
export type { ProcessManagerConfig } from './process/index.js';
export { BrowserManager, createBrowserTools } from './browser/index.js';
export type { BrowserManagerConfig } from './browser/index.js';
import type { Tool } from '../types.js';
import type { MemoryStore } from '../../memory/store.js';
+2 -1
View File
@@ -5,9 +5,10 @@ export { ToolExecutor } from './executor.js';
export type { ToolExecutorConfig } from './executor.js';
export { ToolPolicy } from './policy.js';
export type { ToolPolicyContext } from './policy.js';
export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager } from './builtin/index.js';
export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools } from './builtin/index.js';
export type { WebSearchConfig } from './builtin/web-search.js';
export type { ProcessManagerConfig } from './builtin/process/index.js';
export type { BrowserManagerConfig } from './builtin/browser/index.js';
export { shellExecTool } from './builtin/shell.js';
export { fileReadTool } from './builtin/file-read.js';
export { fileWriteTool } from './builtin/file-write.js';
+6
View File
@@ -35,6 +35,12 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
'process.output',
'process.kill',
'process.list',
'browser.navigate',
'browser.screenshot',
'browser.click',
'browser.type',
'browser.content',
'browser.eval',
]),
full: new Set(), // Special: matches everything
};