import { describe, it, expect } from 'vitest'; import { ToolExecutor } from './executor.js'; import { ToolRegistry } from './registry.js'; import { HookEngine } from '../hooks/engine.js'; import type { Tool } from './types.js'; const echoTool: Tool = { name: 'test.echo', description: 'Echoes input', inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, execute: async (args) => ({ success: true, output: (args as { text: string }).text }), }; const slowTool: Tool = { name: 'test.slow', description: 'Takes forever', inputSchema: { type: 'object', properties: {} }, execute: async () => { await new Promise(r => setTimeout(r, 5000)); return { success: true, output: 'done' }; }, }; const failTool: Tool = { name: 'test.fail', description: 'Throws', inputSchema: { type: 'object', properties: {} }, execute: async () => { throw new Error('kaboom'); }, }; const bigOutputTool: Tool = { name: 'test.big', description: 'Returns huge output', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'x'.repeat(100_000) }), }; const fileWriteLikeTool: Tool = { name: 'file.write', description: 'Test file write tool', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }; describe('ToolExecutor', () => { it('executes a tool and returns result', async () => { const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('test.echo', { text: 'hello' }); expect(result.success).toBe(true); expect(result.output).toBe('hello'); }); it('returns error for unknown tool', async () => { const registry = new ToolRegistry(); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('nonexistent', {}); expect(result.success).toBe(false); expect(result.error).toContain('not found'); }); it('catches tool execution errors', async () => { const registry = new ToolRegistry(); registry.register(failTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('test.fail', {}); expect(result.success).toBe(false); expect(result.error).toContain('kaboom'); }); it('enforces timeout', async () => { const registry = new ToolRegistry(); registry.register(slowTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 }); const result = await executor.execute('test.slow', {}); expect(result.success).toBe(false); expect(result.error).toContain('timed out'); }); it('truncates large output', async () => { const registry = new ToolRegistry(); registry.register(bigOutputTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 }); const result = await executor.execute('test.big', {}); expect(result.success).toBe(true); expect(result.output.length).toBeLessThanOrEqual(1100); expect(result.output).toContain('[truncated]'); }); it('blocks on confirm hook and resolves when approved', async () => { const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const resultPromise = executor.execute('test.echo', { text: 'hi' }); const pending = hooks.getPendingConfirmations(); expect(pending).toHaveLength(1); hooks.resolveConfirmation(pending[0].id, { approved: true }); const result = await resultPromise; expect(result.success).toBe(true); expect(result.output).toBe('hi'); }); it('blocks on confirm hook and returns denied', async () => { const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const resultPromise = executor.execute('test.echo', { text: 'hi' }); const pending = hooks.getPendingConfirmations(); hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' }); const result = await resultPromise; expect(result.success).toBe(false); expect(result.error).toContain('denied'); }); it('conservative autonomy requires confirm for dangerous tools', async () => { const registry = new ToolRegistry(); registry.register(fileWriteLikeTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const resultPromise = executor.execute('file.write', {}, { autonomyLevel: 'conservative' }); const pending = hooks.getPendingConfirmations(); expect(pending).toHaveLength(1); hooks.resolveConfirmation(pending[0].id, { approved: true }); const result = await resultPromise; expect(result.success).toBe(true); }); it('autonomous mode defers to explicit confirm hooks', async () => { const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const resultPromise = executor.execute('test.echo', { text: 'hi' }, { autonomyLevel: 'autonomous' }); const pending = hooks.getPendingConfirmations(); expect(pending).toHaveLength(1); hooks.resolveConfirmation(pending[0].id, { approved: true }); const result = await resultPromise; expect(result.success).toBe(true); }); it('enforces skill filesystem write allowlist', async () => { const registry = new ToolRegistry(); registry.register({ name: 'file.write', description: 'write', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const allowed = await executor.execute( 'file.write', { path: '/tmp/flynn-skill-ok.txt', content: 'hello' }, { skillName: 'test-skill', skillPermissions: { execution_environment: 'host', fs: { write: ['/tmp/**'] }, }, executionEnvironment: 'host', autonomyLevel: 'autonomous', }, ); expect(allowed.success).toBe(true); const denied = await executor.execute( 'file.write', { path: '/etc/passwd', content: 'nope' }, { skillName: 'test-skill', skillPermissions: { execution_environment: 'host', fs: { write: ['/tmp/**'] }, }, executionEnvironment: 'host', autonomyLevel: 'autonomous', }, ); expect(denied.success).toBe(false); expect(denied.error).toContain('path not allowed'); }); it('enforces tool secret scopes for skill contexts', async () => { const registry = new ToolRegistry(); registry.register({ name: 'gmail.list', description: 'gmail', requiredSecretScopes: ['gmail'], inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('gmail.list', {}, { skillName: 'no-secrets-skill', skillPermissions: { secrets: [] }, executionEnvironment: 'host', }); expect(result.success).toBe(false); expect(result.error).toContain('missing secret scopes'); }); it('blocks high-risk tool calls with injection markers when untrusted content is present', async () => { const registry = new ToolRegistry(); registry.register({ name: 'shell.exec', description: 'shell', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('shell.exec', { command: 'rm -rf /' }, { untrustedContent: true, executionEnvironment: 'host', }); expect(result.success).toBe(false); expect(result.error).toContain('blocked'); }); it('blocks passing secret-like args to network tools when untrusted content is present', async () => { const registry = new ToolRegistry(); registry.register({ name: 'web.fetch', description: 'fetch', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const result = await executor.execute('web.fetch', { url: 'https://example.com', authorization: 'Bearer abcdef' }, { untrustedContent: true, executionEnvironment: 'host', }); expect(result.success).toBe(false); expect(result.error).toContain('refusing to pass'); }); it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => { const registry = new ToolRegistry(); registry.register({ name: 'shell.exec', description: 'shell', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'ok' }), }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const denied = await executor.execute('shell.exec', { command: 'echo hi' }, { skillName: 'test-skill', skillPermissions: { execution_environment: 'sandbox' }, executionEnvironment: 'host', autonomyLevel: 'autonomous', }); expect(denied.success).toBe(false); expect(denied.error).toContain('execution_environment=host'); const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, { skillName: 'test-skill', skillPermissions: { execution_environment: 'sandbox' }, executionEnvironment: 'host', elevatedHostUntilMs: Date.now() + 60_000, elevatedHostId: 'e1', autonomyLevel: 'autonomous', }); const pending = hooks.getPendingConfirmations(); expect(pending).toHaveLength(1); hooks.resolveConfirmation(pending[0].id, { approved: true }); const allowed = await allowedPromise; expect(allowed.success).toBe(true); }); it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => { const registry = new ToolRegistry(); registry.register({ name: 'shell.exec', description: 'host shell', inputSchema: { type: 'object', properties: {} }, execute: async () => { throw new Error('host should not run'); }, }); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const fakeSandbox = { exec: async () => ({ stdout: 'sandbox-out', stderr: '' }), } as any; const fakeManager = { getOrCreate: async () => fakeSandbox, } as any; executor.setSandboxManager(fakeManager); const result = await executor.execute('shell.exec', { command: 'echo hi' }, { executionEnvironment: 'sandbox', sessionId: 's1', autonomyLevel: 'autonomous', }); expect(result.success).toBe(true); expect(result.output).toContain('sandbox-out'); }); });