Add browser workflow reliability primitives and guardrails

This commit is contained in:
William Valentin
2026-02-26 14:06:46 -08:00
parent 3cc9e16ef5
commit 7c904ef0fd
7 changed files with 1185 additions and 272 deletions
+21
View File
@@ -243,6 +243,13 @@ describe('configSchema — browser', () => {
expect(result.browser.headless).toBe(true);
expect(result.browser.max_pages).toBe(5);
expect(result.browser.default_timeout).toBe(30000);
expect(result.browser.allowed_domains).toEqual([]);
expect(result.browser.high_risk_domains).toEqual([]);
expect(result.browser.require_confirmation_for_high_risk).toBe(true);
expect(result.browser.max_workflow_steps).toBe(120);
expect(result.browser.default_retry_attempts).toBe(1);
expect(result.browser.max_retry_attempts).toBe(5);
expect(result.browser.retry_delay_ms).toBe(250);
});
it('accepts explicit browser config', () => {
@@ -254,6 +261,13 @@ describe('configSchema — browser', () => {
headless: false,
max_pages: 3,
default_timeout: 45000,
allowed_domains: ['example.com', '*.trusted.test'],
high_risk_domains: ['bank.example.com'],
require_confirmation_for_high_risk: true,
max_workflow_steps: 40,
default_retry_attempts: 2,
max_retry_attempts: 6,
retry_delay_ms: 500,
},
});
@@ -262,6 +276,13 @@ describe('configSchema — browser', () => {
expect(result.browser.headless).toBe(false);
expect(result.browser.max_pages).toBe(3);
expect(result.browser.default_timeout).toBe(45000);
expect(result.browser.allowed_domains).toEqual(['example.com', '*.trusted.test']);
expect(result.browser.high_risk_domains).toEqual(['bank.example.com']);
expect(result.browser.require_confirmation_for_high_risk).toBe(true);
expect(result.browser.max_workflow_steps).toBe(40);
expect(result.browser.default_retry_attempts).toBe(2);
expect(result.browser.max_retry_attempts).toBe(6);
expect(result.browser.retry_delay_ms).toBe(500);
});
});
+7
View File
@@ -781,6 +781,13 @@ const browserSchema = z.object({
headless: z.boolean().default(true),
max_pages: z.number().min(1).max(20).default(5),
default_timeout: z.number().min(1000).max(120000).default(30000),
allowed_domains: z.array(z.string().min(1)).default([]),
high_risk_domains: z.array(z.string().min(1)).default([]),
require_confirmation_for_high_risk: z.boolean().default(true),
max_workflow_steps: z.number().int().min(1).max(1000).default(120),
default_retry_attempts: z.number().int().min(1).max(10).default(1),
max_retry_attempts: z.number().int().min(1).max(20).default(5),
retry_delay_ms: z.number().int().min(0).max(10000).default(250),
}).default({});
const processSchema = z.object({
+23 -2
View File
@@ -67,7 +67,20 @@ export function initTools(deps: ToolsDeps): ToolsResult {
}
// Initialize browser manager and register browser tools (if enabled)
const browserToolNames = ['browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval', 'browser.evaluate'];
const browserToolNames = [
'browser.navigate',
'browser.screenshot',
'browser.click',
'browser.type',
'browser.content',
'browser.wait_for',
'browser.assert',
'browser.extract',
'browser.checkpoint.save',
'browser.checkpoint.resume',
'browser.eval',
'browser.evaluate',
];
let browserManager: BrowserManager | undefined;
if (config.browser?.enabled) {
const manager = new BrowserManager({
@@ -79,7 +92,15 @@ export function initTools(deps: ToolsDeps): ToolsResult {
});
browserManager = manager;
for (const tool of createBrowserTools(manager)) {
for (const tool of createBrowserTools(manager, {
allowedDomains: config.browser.allowed_domains,
highRiskDomains: config.browser.high_risk_domains,
requireHighRiskConfirmation: config.browser.require_confirmation_for_high_risk,
maxWorkflowSteps: config.browser.max_workflow_steps,
defaultRetryAttempts: config.browser.default_retry_attempts,
maxRetryAttempts: config.browser.max_retry_attempts,
retryDelayMs: config.browser.retry_delay_ms,
})) {
toolRegistry.register(tool);
}
console.log(`Browser tools enabled (headless=${config.browser.headless})`);
+157 -3
View File
@@ -8,7 +8,10 @@ const mockUrl = vi.fn().mockReturnValue('https://example.com');
const mockClick = vi.fn().mockResolvedValue(undefined);
const mockType = vi.fn().mockResolvedValue(undefined);
const mock$eval = vi.fn().mockResolvedValue('Page content here');
const mock$$eval = vi.fn().mockResolvedValue(['Row 1', 'Row 2']);
const mockEvaluate = vi.fn().mockResolvedValue({ result: 42 });
const mockWaitForSelector = vi.fn().mockResolvedValue(undefined);
const mockWaitForFunction = vi.fn().mockResolvedValue(undefined);
const mockScreenshot = vi.fn().mockResolvedValue('base64data');
const mock$ = vi.fn().mockResolvedValue({ screenshot: vi.fn().mockResolvedValue('element-base64') });
const mockKeyboard = { press: vi.fn().mockResolvedValue(undefined) };
@@ -20,7 +23,10 @@ const mockPage = {
click: mockClick,
type: mockType,
$eval: mock$eval,
$$eval: mock$$eval,
evaluate: mockEvaluate,
waitForSelector: mockWaitForSelector,
waitForFunction: mockWaitForFunction,
screenshot: mockScreenshot,
$: mock$,
keyboard: mockKeyboard,
@@ -44,6 +50,7 @@ describe('Browser tools', () => {
beforeEach(() => {
vi.clearAllMocks();
mockUrl.mockReturnValue('https://example.com');
tools = createBrowserTools(mockManager);
});
@@ -54,9 +61,14 @@ describe('Browser tools', () => {
expect(names).toContain('browser.click');
expect(names).toContain('browser.type');
expect(names).toContain('browser.content');
expect(names).toContain('browser.wait_for');
expect(names).toContain('browser.assert');
expect(names).toContain('browser.extract');
expect(names).toContain('browser.checkpoint.save');
expect(names).toContain('browser.checkpoint.resume');
expect(names).toContain('browser.eval');
expect(names).toContain('browser.evaluate');
expect(names).toHaveLength(7);
expect(names).toHaveLength(12);
});
it('browser.navigate navigates to URL', async () => {
@@ -73,6 +85,17 @@ describe('Browser tools', () => {
expect(mockGoto).toHaveBeenCalledWith('https://example.com', { waitUntil: 'networkidle0' });
});
it('browser.navigate retries on transient errors', async () => {
mockGoto.mockRejectedValueOnce(new Error('temporary down'));
const tool = getTool('browser.navigate');
const result = await tool.execute({
url: 'https://example.com',
retry: { attempts: 2, delay_ms: 0 },
});
expect(result.success).toBe(true);
expect(mockGoto).toHaveBeenCalledTimes(2);
});
it('browser.screenshot takes page screenshot', async () => {
const tool = getTool('browser.screenshot');
const result = await tool.execute({});
@@ -103,6 +126,17 @@ describe('Browser tools', () => {
expect(mockClick).toHaveBeenCalledWith('#submit');
});
it('browser.click retries on transient failures', async () => {
mockClick.mockRejectedValueOnce(new Error('click miss'));
const tool = getTool('browser.click');
const result = await tool.execute({
selector: '#submit',
retry: { attempts: 2, delay_ms: 0 },
});
expect(result.success).toBe(true);
expect(mockClick).toHaveBeenCalledTimes(2);
});
it('browser.type types into element', async () => {
const tool = getTool('browser.type');
const result = await tool.execute({ selector: '#search', text: 'hello' });
@@ -133,6 +167,75 @@ describe('Browser tools', () => {
expect(mock$eval).toHaveBeenCalledWith('#main', expect.any(Function));
});
it('browser.wait_for waits on selector and text', async () => {
const tool = getTool('browser.wait_for');
const result = await tool.execute({
selector: '#loaded',
text: 'Ready',
timeout_ms: 5000,
});
expect(result.success).toBe(true);
expect(mockWaitForSelector).toHaveBeenCalledWith('#loaded', { timeout: 5000, visible: false });
expect(mockWaitForFunction).toHaveBeenCalled();
});
it('browser.assert validates selector/text/url conditions', async () => {
const tool = getTool('browser.assert');
const result = await tool.execute({
selector: '#main',
exists: true,
text: 'Page content here',
url_includes: 'example.com',
});
expect(result.success).toBe(true);
});
it('browser.assert fails when conditions are not met', async () => {
mockUrl.mockReturnValue('https://example.com/path');
mock$eval.mockResolvedValueOnce('different content');
mock$.mockResolvedValueOnce(null);
const tool = getTool('browser.assert');
const result = await tool.execute({
selector: '#missing',
exists: true,
text: 'not present',
url_includes: 'nope',
retry: { attempts: 1 },
});
expect(result.success).toBe(false);
expect(result.error).toContain('failed after retries');
});
it('browser.extract returns a single value by selector', async () => {
mock$eval.mockResolvedValueOnce('Primary value');
const tool = getTool('browser.extract');
const result = await tool.execute({ selector: '#value' });
expect(result.success).toBe(true);
expect(result.output).toContain('"value": "Primary value"');
});
it('browser.extract returns array values when all=true', async () => {
mock$$eval.mockResolvedValueOnce(['A', 'B', 'C']);
const tool = getTool('browser.extract');
const result = await tool.execute({ selector: '.row', all: true });
expect(result.success).toBe(true);
expect(result.output).toContain('"count": 3');
expect(result.output).toContain('"values"');
});
it('browser.checkpoint.save and resume navigates to saved url', async () => {
const saveTool = getTool('browser.checkpoint.save');
const resumeTool = getTool('browser.checkpoint.resume');
const saved = await saveTool.execute({ checkpoint_id: 'cp-1' });
expect(saved.success).toBe(true);
expect(saved.output).toContain('cp-1');
const resumed = await resumeTool.execute({ checkpoint_id: 'cp-1' });
expect(resumed.success).toBe(true);
expect(mockGoto).toHaveBeenCalledWith('https://example.com', { waitUntil: 'domcontentloaded' });
});
it('browser.eval evaluates JS', async () => {
const tool = getTool('browser.eval');
const result = await tool.execute({ expression: '1 + 1' });
@@ -155,10 +258,61 @@ describe('Browser tools', () => {
expect(result.output).toContain('42');
});
it('enforces allowed domain guardrail for navigation', async () => {
const restrictedTools = createBrowserTools(mockManager, {
allowedDomains: ['example.com'],
});
const navigate = restrictedTools.find((tool) => tool.name === 'browser.navigate');
if (!navigate) {
throw new Error('missing navigate tool');
}
const result = await navigate.execute({ url: 'https://blocked.test' });
expect(result.success).toBe(false);
expect(result.error).toContain('allowed_domains');
});
it('requires explicit high-risk confirmation for configured domains', async () => {
const guardedTools = createBrowserTools(mockManager, {
highRiskDomains: ['bank.example.com'],
requireHighRiskConfirmation: true,
});
const navigate = guardedTools.find((tool) => tool.name === 'browser.navigate');
if (!navigate) {
throw new Error('missing navigate tool');
}
const denied = await navigate.execute({ url: 'https://bank.example.com' });
expect(denied.success).toBe(false);
expect(denied.error).toContain('confirm_high_risk=true');
const allowed = await navigate.execute({
url: 'https://bank.example.com',
confirm_high_risk: true,
});
expect(allowed.success).toBe(true);
});
it('enforces workflow step budget', async () => {
const budgetedTools = createBrowserTools(mockManager, {
maxWorkflowSteps: 1,
});
const navigate = budgetedTools.find((tool) => tool.name === 'browser.navigate');
const click = budgetedTools.find((tool) => tool.name === 'browser.click');
if (!navigate || !click) {
throw new Error('missing browser tools');
}
const first = await navigate.execute({ url: 'https://example.com' });
expect(first.success).toBe(true);
const second = await click.execute({ selector: '#submit' });
expect(second.success).toBe(false);
expect(second.error).toContain('budget exhausted');
});
it('handles navigation errors gracefully', async () => {
mockGoto.mockRejectedValueOnce(new Error('Navigation failed'));
const tool = getTool('browser.navigate');
const result = await tool.execute({ url: 'https://broken.example.com' });
const result = await tool.execute({ url: 'https://broken.example.com', retry: { attempts: 1 } });
expect(result.success).toBe(false);
expect(result.error).toContain('Navigation failed');
});
@@ -166,7 +320,7 @@ describe('Browser tools', () => {
it('handles click errors gracefully', async () => {
mockClick.mockRejectedValueOnce(new Error('Element not found'));
const tool = getTool('browser.click');
const result = await tool.execute({ selector: '#missing' });
const result = await tool.execute({ selector: '#missing', retry: { attempts: 1 } });
expect(result.success).toBe(false);
expect(result.error).toContain('Element not found');
});
File diff suppressed because it is too large Load Diff
+22 -1
View File
@@ -108,6 +108,11 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
'browser.click',
'browser.type',
'browser.content',
'browser.wait_for',
'browser.assert',
'browser.extract',
'browser.checkpoint.save',
'browser.checkpoint.resume',
'browser.eval',
'browser.evaluate',
'agent.delegate',
@@ -129,7 +134,23 @@ const PROFILE_TOOLS: Record<ToolProfile, Set<string>> = {
export const TOOL_GROUPS: Record<string, string[]> = {
'group:fs': ['file.read', 'file.write', 'file.edit', 'file.patch', 'file.list'],
'group:runtime': ['shell.exec', 'process.start', 'process.output', 'process.status', 'process.kill', 'process.list', 'screen.capture', 'camera.capture'],
'group:web': ['web.fetch', 'web.search', 'web.search.news', 'browser.navigate', 'browser.screenshot', 'browser.click', 'browser.type', 'browser.content', 'browser.eval', 'browser.evaluate'],
'group:web': [
'web.fetch',
'web.search',
'web.search.news',
'browser.navigate',
'browser.screenshot',
'browser.click',
'browser.type',
'browser.content',
'browser.wait_for',
'browser.assert',
'browser.extract',
'browser.checkpoint.save',
'browser.checkpoint.resume',
'browser.eval',
'browser.evaluate',
],
'group:memory': ['memory.read', 'memory.write', 'memory.search'],
'group:gmail': ['gmail.list', 'gmail.search', 'gmail.read', 'gmail.filter.create'],
'group:gcal': ['calendar.today', 'calendar.list', 'calendar.search'],