Files
flynn/src/tools/executor.test.ts
T
William Valentin b00706325b feat: add tool framework foundation (types, registry, executor, shell tool, model types, SOUL.md)
- Task 0: SOUL.md + loadSystemPrompt() in daemon
- Task 1: Tool type definitions (Tool, ToolCall, ToolResult, etc.)
- Task 2: ToolRegistry with Anthropic/OpenAI serialization
- Task 3: ToolExecutor with hooks, timeout, truncation
- Task 4: shell.exec builtin tool
- Task 8: Model types updated for tool use (ToolDefinition, ModelToolCall, etc.)
- Task 15: Model index exports for tool types
2026-02-05 17:39:40 -08:00

127 lines
4.5 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import { ToolExecutor } from './executor.js';
import { ToolRegistry } from './registry.js';
import { HookEngine } from '../hooks/engine.js';
import type { Tool } from './types.js';
const echoTool: Tool = {
name: 'test.echo',
description: 'Echoes input',
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
};
const slowTool: Tool = {
name: 'test.slow',
description: 'Takes forever',
inputSchema: { type: 'object', properties: {} },
execute: async () => {
await new Promise(r => setTimeout(r, 5000));
return { success: true, output: 'done' };
},
};
const failTool: Tool = {
name: 'test.fail',
description: 'Throws',
inputSchema: { type: 'object', properties: {} },
execute: async () => { throw new Error('kaboom'); },
};
const bigOutputTool: Tool = {
name: 'test.big',
description: 'Returns huge output',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
};
describe('ToolExecutor', () => {
it('executes a tool and returns result', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.echo', { text: 'hello' });
expect(result.success).toBe(true);
expect(result.output).toBe('hello');
});
it('returns error for unknown tool', async () => {
const registry = new ToolRegistry();
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('nonexistent', {});
expect(result.success).toBe(false);
expect(result.error).toContain('not found');
});
it('catches tool execution errors', async () => {
const registry = new ToolRegistry();
registry.register(failTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.fail', {});
expect(result.success).toBe(false);
expect(result.error).toContain('kaboom');
});
it('enforces timeout', async () => {
const registry = new ToolRegistry();
registry.register(slowTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 });
const result = await executor.execute('test.slow', {});
expect(result.success).toBe(false);
expect(result.error).toContain('timed out');
});
it('truncates large output', async () => {
const registry = new ToolRegistry();
registry.register(bigOutputTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 });
const result = await executor.execute('test.big', {});
expect(result.success).toBe(true);
expect(result.output.length).toBeLessThanOrEqual(1100);
expect(result.output).toContain('[truncated]');
});
it('blocks on confirm hook and resolves when approved', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const result = await resultPromise;
expect(result.success).toBe(true);
expect(result.output).toBe('hi');
});
it('blocks on confirm hook and returns denied', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' });
const result = await resultPromise;
expect(result.success).toBe(false);
expect(result.error).toContain('denied');
});
});