feat: add tool framework foundation (types, registry, executor, shell tool, model types, SOUL.md)

- Task 0: SOUL.md + loadSystemPrompt() in daemon
- Task 1: Tool type definitions (Tool, ToolCall, ToolResult, etc.)
- Task 2: ToolRegistry with Anthropic/OpenAI serialization
- Task 3: ToolExecutor with hooks, timeout, truncation
- Task 4: shell.exec builtin tool
- Task 8: Model types updated for tool use (ToolDefinition, ModelToolCall, etc.)
- Task 15: Model index exports for tool types
This commit is contained in:
William Valentin
2026-02-05 17:39:40 -08:00
parent 32dd3ad728
commit b00706325b
13 changed files with 691 additions and 7 deletions
+42
View File
@@ -0,0 +1,42 @@
import { describe, it, expect } from 'vitest';
import { shellExecTool } from './shell.js';
import { tmpdir } from 'os';
import { mkdtempSync, writeFileSync, rmSync } from 'fs';
import { join } from 'path';
describe('shell.exec tool', () => {
it('has correct metadata', () => {
expect(shellExecTool.name).toBe('shell.exec');
expect(shellExecTool.inputSchema.required).toContain('command');
});
it('runs a simple command', async () => {
const result = await shellExecTool.execute({ command: 'echo hello' });
expect(result.success).toBe(true);
expect(result.output.trim()).toBe('hello');
});
it('captures stderr on failure', async () => {
const result = await shellExecTool.execute({ command: 'ls /nonexistent_dir_xyz' });
expect(result.success).toBe(false);
expect(result.error).toBeTruthy();
});
it('respects cwd parameter', async () => {
const dir = mkdtempSync(join(tmpdir(), 'flynn-test-'));
writeFileSync(join(dir, 'test.txt'), 'content');
try {
const result = await shellExecTool.execute({ command: 'ls test.txt', cwd: dir });
expect(result.success).toBe(true);
expect(result.output.trim()).toBe('test.txt');
} finally {
rmSync(dir, { recursive: true });
}
});
it('respects timeout parameter', async () => {
const result = await shellExecTool.execute({ command: 'sleep 10', timeout: 200 });
expect(result.success).toBe(false);
expect(result.error).toContain('timed out');
});
});
+48
View File
@@ -0,0 +1,48 @@
import { execFile } from 'child_process';
import type { Tool, ToolResult } from '../types.js';
interface ShellExecArgs {
command: string;
cwd?: string;
timeout?: number;
}
export const shellExecTool: Tool = {
name: 'shell.exec',
description: 'Execute a shell command and return stdout/stderr. Use for running build commands, git operations, system tasks, etc.',
inputSchema: {
type: 'object',
properties: {
command: { type: 'string', description: 'The shell command to execute' },
cwd: { type: 'string', description: 'Working directory (optional)' },
timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' },
},
required: ['command'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as ShellExecArgs;
const timeout = args.timeout ?? 30_000;
return new Promise((resolve) => {
execFile('bash', ['-c', args.command], {
cwd: args.cwd,
timeout,
maxBuffer: 1024 * 1024,
}, (error, stdout, stderr) => {
if (error) {
if (error.killed || error.signal === 'SIGTERM') {
resolve({ success: false, output: stdout, error: `Command timed out after ${timeout}ms` });
return;
}
resolve({
success: false,
output: stdout,
error: stderr || error.message,
});
return;
}
resolve({ success: true, output: stdout + (stderr ? `\nstderr: ${stderr}` : '') });
});
});
},
};
+126
View File
@@ -0,0 +1,126 @@
import { describe, it, expect } from 'vitest';
import { ToolExecutor } from './executor.js';
import { ToolRegistry } from './registry.js';
import { HookEngine } from '../hooks/engine.js';
import type { Tool } from './types.js';
const echoTool: Tool = {
name: 'test.echo',
description: 'Echoes input',
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
};
const slowTool: Tool = {
name: 'test.slow',
description: 'Takes forever',
inputSchema: { type: 'object', properties: {} },
execute: async () => {
await new Promise(r => setTimeout(r, 5000));
return { success: true, output: 'done' };
},
};
const failTool: Tool = {
name: 'test.fail',
description: 'Throws',
inputSchema: { type: 'object', properties: {} },
execute: async () => { throw new Error('kaboom'); },
};
const bigOutputTool: Tool = {
name: 'test.big',
description: 'Returns huge output',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
};
describe('ToolExecutor', () => {
it('executes a tool and returns result', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.echo', { text: 'hello' });
expect(result.success).toBe(true);
expect(result.output).toBe('hello');
});
it('returns error for unknown tool', async () => {
const registry = new ToolRegistry();
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('nonexistent', {});
expect(result.success).toBe(false);
expect(result.error).toContain('not found');
});
it('catches tool execution errors', async () => {
const registry = new ToolRegistry();
registry.register(failTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const result = await executor.execute('test.fail', {});
expect(result.success).toBe(false);
expect(result.error).toContain('kaboom');
});
it('enforces timeout', async () => {
const registry = new ToolRegistry();
registry.register(slowTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 });
const result = await executor.execute('test.slow', {});
expect(result.success).toBe(false);
expect(result.error).toContain('timed out');
});
it('truncates large output', async () => {
const registry = new ToolRegistry();
registry.register(bigOutputTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 });
const result = await executor.execute('test.big', {});
expect(result.success).toBe(true);
expect(result.output.length).toBeLessThanOrEqual(1100);
expect(result.output).toContain('[truncated]');
});
it('blocks on confirm hook and resolves when approved', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const result = await resultPromise;
expect(result.success).toBe(true);
expect(result.output).toBe('hi');
});
it('blocks on confirm hook and returns denied', async () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const resultPromise = executor.execute('test.echo', { text: 'hi' });
const pending = hooks.getPendingConfirmations();
hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' });
const result = await resultPromise;
expect(result.success).toBe(false);
expect(result.error).toContain('denied');
});
});
+68
View File
@@ -0,0 +1,68 @@
import type { ToolResult } from './types.js';
import type { ToolRegistry } from './registry.js';
import type { HookEngine } from '../hooks/engine.js';
export interface ToolExecutorConfig {
defaultTimeoutMs?: number;
maxOutputBytes?: number;
}
export class ToolExecutor {
private registry: ToolRegistry;
private hooks: HookEngine;
private defaultTimeoutMs: number;
private maxOutputBytes: number;
constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) {
this.registry = registry;
this.hooks = hooks;
this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000;
this.maxOutputBytes = config?.maxOutputBytes ?? 51_200;
}
async execute(toolName: string, args: unknown): Promise<ToolResult> {
const tool = this.registry.get(toolName);
if (!tool) {
return { success: false, output: '', error: `Tool '${toolName}' not found` };
}
// Check hooks
const action = this.hooks.getAction(toolName);
if (action === 'confirm') {
const hookResult = await this.hooks.requestConfirmation(
toolName,
args as Record<string, unknown>,
);
if (!hookResult.approved) {
return {
success: false,
output: '',
error: `Tool '${toolName}' denied by user: ${hookResult.reason ?? 'no reason'}`,
};
}
}
// Execute with timeout
try {
const result = await Promise.race([
tool.execute(args),
new Promise<ToolResult>((_, reject) =>
setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs)
),
]);
// Truncate output if too large
if (result.output.length > this.maxOutputBytes) {
result.output = result.output.slice(0, this.maxOutputBytes) + '\n[truncated]';
}
return result;
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
}
}
+79
View File
@@ -0,0 +1,79 @@
import { describe, it, expect } from 'vitest';
import { ToolRegistry } from './registry.js';
import type { Tool } from './types.js';
const echoTool: Tool = {
name: 'test.echo',
description: 'Echoes input back',
inputSchema: {
type: 'object',
properties: { text: { type: 'string', description: 'Text to echo' } },
required: ['text'],
},
execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }),
};
const greetTool: Tool = {
name: 'test.greet',
description: 'Greets someone',
inputSchema: {
type: 'object',
properties: { name: { type: 'string' } },
required: ['name'],
},
execute: async (args) => ({ success: true, output: `Hello ${(args as { name: string }).name}` }),
};
describe('ToolRegistry', () => {
it('registers and retrieves tools by name', () => {
const registry = new ToolRegistry();
registry.register(echoTool);
expect(registry.get('test.echo')).toBe(echoTool);
expect(registry.get('nonexistent')).toBeUndefined();
});
it('lists all registered tools', () => {
const registry = new ToolRegistry();
registry.register(echoTool);
registry.register(greetTool);
const tools = registry.list();
expect(tools).toHaveLength(2);
expect(tools.map(t => t.name)).toContain('test.echo');
expect(tools.map(t => t.name)).toContain('test.greet');
});
it('throws on duplicate registration', () => {
const registry = new ToolRegistry();
registry.register(echoTool);
expect(() => registry.register(echoTool)).toThrow('already registered');
});
it('serializes to Anthropic format', () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const anthropicTools = registry.toAnthropicFormat();
expect(anthropicTools).toEqual([{
name: 'test.echo',
description: 'Echoes input back',
input_schema: echoTool.inputSchema,
}]);
});
it('serializes to OpenAI format', () => {
const registry = new ToolRegistry();
registry.register(echoTool);
const openaiTools = registry.toOpenAIFormat();
expect(openaiTools).toEqual([{
type: 'function',
function: {
name: 'test.echo',
description: 'Echoes input back',
parameters: echoTool.inputSchema,
},
}]);
});
});
+54
View File
@@ -0,0 +1,54 @@
import type { Tool, ToolInputSchema } from './types.js';
export interface AnthropicToolDef {
name: string;
description: string;
input_schema: ToolInputSchema;
}
export interface OpenAIToolDef {
type: 'function';
function: {
name: string;
description: string;
parameters: ToolInputSchema;
};
}
export class ToolRegistry {
private tools: Map<string, Tool> = new Map();
register(tool: Tool): void {
if (this.tools.has(tool.name)) {
throw new Error(`Tool '${tool.name}' is already registered`);
}
this.tools.set(tool.name, tool);
}
get(name: string): Tool | undefined {
return this.tools.get(name);
}
list(): Tool[] {
return Array.from(this.tools.values());
}
toAnthropicFormat(): AnthropicToolDef[] {
return this.list().map(t => ({
name: t.name,
description: t.description,
input_schema: t.inputSchema,
}));
}
toOpenAIFormat(): OpenAIToolDef[] {
return this.list().map(t => ({
type: 'function' as const,
function: {
name: t.name,
description: t.description,
parameters: t.inputSchema,
},
}));
}
}
+52
View File
@@ -0,0 +1,52 @@
import { describe, it, expect } from 'vitest';
import type { Tool, ToolCall, ToolResult, ToolUseMessage, ToolResultMessage } from './types.js';
describe('Tool types', () => {
it('Tool interface is structurally correct', () => {
const tool: Tool = {
name: 'test.echo',
description: 'Echoes input',
inputSchema: {
type: 'object',
properties: { text: { type: 'string' } },
required: ['text'],
},
execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }),
};
expect(tool.name).toBe('test.echo');
expect(tool.inputSchema.type).toBe('object');
});
it('ToolCall has required fields', () => {
const call: ToolCall = { id: 'call_1', name: 'test.echo', args: { text: 'hi' } };
expect(call.id).toBe('call_1');
expect(call.name).toBe('test.echo');
});
it('ToolResult has success and output', () => {
const result: ToolResult = { success: true, output: 'hello' };
expect(result.success).toBe(true);
const errResult: ToolResult = { success: false, output: '', error: 'boom' };
expect(errResult.error).toBe('boom');
});
it('ToolUseMessage has correct shape', () => {
const msg: ToolUseMessage = {
role: 'assistant',
content: [{ type: 'tool_use', id: 'call_1', name: 'test.echo', input: { text: 'hi' } }],
};
expect(msg.role).toBe('assistant');
expect(msg.content[0].type).toBe('tool_use');
});
it('ToolResultMessage has correct shape', () => {
const msg: ToolResultMessage = {
role: 'user',
content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'output here' }],
};
expect(msg.role).toBe('user');
expect(msg.content[0].type).toBe('tool_result');
});
});
+52
View File
@@ -0,0 +1,52 @@
export interface ToolInputSchema {
type: 'object';
properties: Record<string, unknown>;
required?: string[];
}
export interface Tool {
name: string;
description: string;
inputSchema: ToolInputSchema;
execute(args: unknown): Promise<ToolResult>;
}
export interface ToolCall {
id: string;
name: string;
args: unknown;
}
export interface ToolResult {
success: boolean;
output: string;
error?: string;
}
// Content block for assistant messages containing tool calls
export interface ToolUseBlock {
type: 'tool_use';
id: string;
name: string;
input: unknown;
}
// Content block for user messages returning tool results
export interface ToolResultBlock {
type: 'tool_result';
tool_use_id: string;
content: string;
is_error?: boolean;
}
// Message from assistant requesting tool use
export interface ToolUseMessage {
role: 'assistant';
content: ToolUseBlock[];
}
// Message from user returning tool results
export interface ToolResultMessage {
role: 'user';
content: ToolResultBlock[];
}