Files
flynn/src/tools/executor.test.ts
T
2026-02-15 17:02:05 -08:00

330 lines
12 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import { ToolExecutor } from './executor.js';
import { ToolRegistry } from './registry.js';
import { HookEngine } from '../hooks/engine.js';
import type { Tool } from './types.js';
const echoTool: Tool = {
name: 'test.echo',
description: 'Echoes input',
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
};
const slowTool: Tool = {
name: 'test.slow',
description: 'Takes forever',
inputSchema: { type: 'object', properties: {} },
execute: async () => {
await new Promise(r => setTimeout(r, 5000));
return { success: true, output: 'done' };
},
};
const failTool: Tool = {
name: 'test.fail',
description: 'Throws',
inputSchema: { type: 'object', properties: {} },
execute: async () => { throw new Error('kaboom'); },
};
const bigOutputTool: Tool = {
name: 'test.big',
description: 'Returns huge output',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
};
const fileWriteLikeTool: Tool = {
name: 'file.write',
description: 'Test file write tool',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
};
describe('ToolExecutor', () => {
it('executes a tool and returns result', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.echo', { text: 'hello' });
expect(result.success).toBe(true);
expect(result.output).toBe('hello');
});
it('returns error for unknown tool', async () => {
const registry = new ToolRegistry();
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('nonexistent', {});
expect(result.success).toBe(false);
expect(result.error).toContain('not found');
});
it('catches tool execution errors', async () => {
const registry = new ToolRegistry();
registry.register(failTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.fail', {});
expect(result.success).toBe(false);
expect(result.error).toContain('kaboom');
});
it('enforces timeout', async () => {
const registry = new ToolRegistry();
registry.register(slowTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 });
const result = await executor.execute('test.slow', {});
expect(result.success).toBe(false);
expect(result.error).toContain('timed out');
});
it('truncates large output', async () => {
const registry = new ToolRegistry();
registry.register(bigOutputTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 });
const result = await executor.execute('test.big', {});
expect(result.success).toBe(true);
expect(result.output.length).toBeLessThanOrEqual(1100);
expect(result.output).toContain('[truncated]');
});
it('blocks on confirm hook and resolves when approved', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const result = await resultPromise;
expect(result.success).toBe(true);
expect(result.output).toBe('hi');
});
it('blocks on confirm hook and returns denied', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' });
const result = await resultPromise;
expect(result.success).toBe(false);
expect(result.error).toContain('denied');
});
it('conservative autonomy requires confirm for dangerous tools', async () => {
const registry = new ToolRegistry();
registry.register(fileWriteLikeTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('file.write', {}, { autonomyLevel: 'conservative' });
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const result = await resultPromise;
expect(result.success).toBe(true);
});
it('autonomous mode defers to explicit confirm hooks', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' }, { autonomyLevel: 'autonomous' });
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const result = await resultPromise;
expect(result.success).toBe(true);
});
it('enforces skill filesystem write allowlist', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'file.write',
description: 'write',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const allowed = await executor.execute(
'file.write',
{ path: '/tmp/flynn-skill-ok.txt', content: 'hello' },
{
skillName: 'test-skill',
skillPermissions: {
execution_environment: 'host',
fs: { write: ['/tmp/**'] },
},
executionEnvironment: 'host',
autonomyLevel: 'autonomous',
},
);
expect(allowed.success).toBe(true);
const denied = await executor.execute(
'file.write',
{ path: '/etc/passwd', content: 'nope' },
{
skillName: 'test-skill',
skillPermissions: {
execution_environment: 'host',
fs: { write: ['/tmp/**'] },
},
executionEnvironment: 'host',
autonomyLevel: 'autonomous',
},
);
expect(denied.success).toBe(false);
expect(denied.error).toContain('path not allowed');
});
it('enforces tool secret scopes for skill contexts', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'gmail.list',
description: 'gmail',
requiredSecretScopes: ['gmail'],
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('gmail.list', {}, {
skillName: 'no-secrets-skill',
skillPermissions: { secrets: [] },
executionEnvironment: 'host',
});
expect(result.success).toBe(false);
expect(result.error).toContain('missing secret scopes');
});
it('blocks high-risk tool calls with injection markers when untrusted content is present', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'shell.exec',
description: 'shell',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('shell.exec', { command: 'rm -rf /' }, {
untrustedContent: true,
executionEnvironment: 'host',
});
expect(result.success).toBe(false);
expect(result.error).toContain('blocked');
});
it('blocks passing secret-like args to network tools when untrusted content is present', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'web.fetch',
description: 'fetch',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('web.fetch', { url: 'https://example.com', authorization: 'Bearer abcdef' }, {
untrustedContent: true,
executionEnvironment: 'host',
});
expect(result.success).toBe(false);
expect(result.error).toContain('refusing to pass');
});
it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'shell.exec',
description: 'shell',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const denied = await executor.execute('shell.exec', { command: 'echo hi' }, {
skillName: 'test-skill',
skillPermissions: { execution_environment: 'sandbox' },
executionEnvironment: 'host',
autonomyLevel: 'autonomous',
});
expect(denied.success).toBe(false);
expect(denied.error).toContain('execution_environment=host');
const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, {
skillName: 'test-skill',
skillPermissions: { execution_environment: 'sandbox' },
executionEnvironment: 'host',
elevatedHostUntilMs: Date.now() + 60_000,
elevatedHostId: 'e1',
autonomyLevel: 'autonomous',
});
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const allowed = await allowedPromise;
expect(allowed.success).toBe(true);
});
it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'shell.exec',
description: 'host shell',
inputSchema: { type: 'object', properties: {} },
execute: async () => { throw new Error('host should not run'); },
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const fakeSandbox = {
exec: async () => ({ stdout: 'sandbox-out', stderr: '' }),
} as any;
const fakeManager = {
getOrCreate: async () => fakeSandbox,
} as any;
executor.setSandboxManager(fakeManager);
const result = await executor.execute('shell.exec', { command: 'echo hi' }, {
executionEnvironment: 'sandbox',
sessionId: 's1',
autonomyLevel: 'autonomous',
});
expect(result.success).toBe(true);
expect(result.output).toContain('sandbox-out');
});
});