feat: add tool framework foundation (types, registry, executor, shell tool, model types, SOUL.md)
- Task 0: SOUL.md + loadSystemPrompt() in daemon - Task 1: Tool type definitions (Tool, ToolCall, ToolResult, etc.) - Task 2: ToolRegistry with Anthropic/OpenAI serialization - Task 3: ToolExecutor with hooks, timeout, truncation - Task 4: shell.exec builtin tool - Task 8: Model types updated for tool use (ToolDefinition, ModelToolCall, etc.) - Task 15: Model index exports for tool types
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { shellExecTool } from './shell.js';
|
||||
import { tmpdir } from 'os';
|
||||
import { mkdtempSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
describe('shell.exec tool', () => {
|
||||
it('has correct metadata', () => {
|
||||
expect(shellExecTool.name).toBe('shell.exec');
|
||||
expect(shellExecTool.inputSchema.required).toContain('command');
|
||||
});
|
||||
|
||||
it('runs a simple command', async () => {
|
||||
const result = await shellExecTool.execute({ command: 'echo hello' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output.trim()).toBe('hello');
|
||||
});
|
||||
|
||||
it('captures stderr on failure', async () => {
|
||||
const result = await shellExecTool.execute({ command: 'ls /nonexistent_dir_xyz' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toBeTruthy();
|
||||
});
|
||||
|
||||
it('respects cwd parameter', async () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'flynn-test-'));
|
||||
writeFileSync(join(dir, 'test.txt'), 'content');
|
||||
try {
|
||||
const result = await shellExecTool.execute({ command: 'ls test.txt', cwd: dir });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output.trim()).toBe('test.txt');
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('respects timeout parameter', async () => {
|
||||
const result = await shellExecTool.execute({ command: 'sleep 10', timeout: 200 });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('timed out');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,48 @@
|
||||
import { execFile } from 'child_process';
|
||||
import type { Tool, ToolResult } from '../types.js';
|
||||
|
||||
interface ShellExecArgs {
|
||||
command: string;
|
||||
cwd?: string;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export const shellExecTool: Tool = {
|
||||
name: 'shell.exec',
|
||||
description: 'Execute a shell command and return stdout/stderr. Use for running build commands, git operations, system tasks, etc.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
command: { type: 'string', description: 'The shell command to execute' },
|
||||
cwd: { type: 'string', description: 'Working directory (optional)' },
|
||||
timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' },
|
||||
},
|
||||
required: ['command'],
|
||||
},
|
||||
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||
const args = rawArgs as ShellExecArgs;
|
||||
const timeout = args.timeout ?? 30_000;
|
||||
|
||||
return new Promise((resolve) => {
|
||||
execFile('bash', ['-c', args.command], {
|
||||
cwd: args.cwd,
|
||||
timeout,
|
||||
maxBuffer: 1024 * 1024,
|
||||
}, (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
if (error.killed || error.signal === 'SIGTERM') {
|
||||
resolve({ success: false, output: stdout, error: `Command timed out after ${timeout}ms` });
|
||||
return;
|
||||
}
|
||||
resolve({
|
||||
success: false,
|
||||
output: stdout,
|
||||
error: stderr || error.message,
|
||||
});
|
||||
return;
|
||||
}
|
||||
resolve({ success: true, output: stdout + (stderr ? `\nstderr: ${stderr}` : '') });
|
||||
});
|
||||
});
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,126 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { ToolExecutor } from './executor.js';
|
||||
import { ToolRegistry } from './registry.js';
|
||||
import { HookEngine } from '../hooks/engine.js';
|
||||
import type { Tool } from './types.js';
|
||||
|
||||
const echoTool: Tool = {
|
||||
name: 'test.echo',
|
||||
description: 'Echoes input',
|
||||
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
|
||||
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
|
||||
};
|
||||
|
||||
const slowTool: Tool = {
|
||||
name: 'test.slow',
|
||||
description: 'Takes forever',
|
||||
inputSchema: { type: 'object', properties: {} },
|
||||
execute: async () => {
|
||||
await new Promise(r => setTimeout(r, 5000));
|
||||
return { success: true, output: 'done' };
|
||||
},
|
||||
};
|
||||
|
||||
const failTool: Tool = {
|
||||
name: 'test.fail',
|
||||
description: 'Throws',
|
||||
inputSchema: { type: 'object', properties: {} },
|
||||
execute: async () => { throw new Error('kaboom'); },
|
||||
};
|
||||
|
||||
const bigOutputTool: Tool = {
|
||||
name: 'test.big',
|
||||
description: 'Returns huge output',
|
||||
inputSchema: { type: 'object', properties: {} },
|
||||
execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
|
||||
};
|
||||
|
||||
describe('ToolExecutor', () => {
|
||||
it('executes a tool and returns result', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const result = await executor.execute('test.echo', { text: 'hello' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('hello');
|
||||
});
|
||||
|
||||
it('returns error for unknown tool', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const result = await executor.execute('nonexistent', {});
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('not found');
|
||||
});
|
||||
|
||||
it('catches tool execution errors', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(failTool);
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const result = await executor.execute('test.fail', {});
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('kaboom');
|
||||
});
|
||||
|
||||
it('enforces timeout', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(slowTool);
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 });
|
||||
|
||||
const result = await executor.execute('test.slow', {});
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('timed out');
|
||||
});
|
||||
|
||||
it('truncates large output', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(bigOutputTool);
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 });
|
||||
|
||||
const result = await executor.execute('test.big', {});
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output.length).toBeLessThanOrEqual(1100);
|
||||
expect(result.output).toContain('[truncated]');
|
||||
});
|
||||
|
||||
it('blocks on confirm hook and resolves when approved', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const resultPromise = executor.execute('test.echo', { text: 'hi' });
|
||||
|
||||
const pending = hooks.getPendingConfirmations();
|
||||
expect(pending).toHaveLength(1);
|
||||
hooks.resolveConfirmation(pending[0].id, { approved: true });
|
||||
|
||||
const result = await resultPromise;
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('hi');
|
||||
});
|
||||
|
||||
it('blocks on confirm hook and returns denied', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const resultPromise = executor.execute('test.echo', { text: 'hi' });
|
||||
|
||||
const pending = hooks.getPendingConfirmations();
|
||||
hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' });
|
||||
|
||||
const result = await resultPromise;
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toContain('denied');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,68 @@
|
||||
import type { ToolResult } from './types.js';
|
||||
import type { ToolRegistry } from './registry.js';
|
||||
import type { HookEngine } from '../hooks/engine.js';
|
||||
|
||||
export interface ToolExecutorConfig {
|
||||
defaultTimeoutMs?: number;
|
||||
maxOutputBytes?: number;
|
||||
}
|
||||
|
||||
export class ToolExecutor {
|
||||
private registry: ToolRegistry;
|
||||
private hooks: HookEngine;
|
||||
private defaultTimeoutMs: number;
|
||||
private maxOutputBytes: number;
|
||||
|
||||
constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) {
|
||||
this.registry = registry;
|
||||
this.hooks = hooks;
|
||||
this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000;
|
||||
this.maxOutputBytes = config?.maxOutputBytes ?? 51_200;
|
||||
}
|
||||
|
||||
async execute(toolName: string, args: unknown): Promise<ToolResult> {
|
||||
const tool = this.registry.get(toolName);
|
||||
if (!tool) {
|
||||
return { success: false, output: '', error: `Tool '${toolName}' not found` };
|
||||
}
|
||||
|
||||
// Check hooks
|
||||
const action = this.hooks.getAction(toolName);
|
||||
if (action === 'confirm') {
|
||||
const hookResult = await this.hooks.requestConfirmation(
|
||||
toolName,
|
||||
args as Record<string, unknown>,
|
||||
);
|
||||
if (!hookResult.approved) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
error: `Tool '${toolName}' denied by user: ${hookResult.reason ?? 'no reason'}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Execute with timeout
|
||||
try {
|
||||
const result = await Promise.race([
|
||||
tool.execute(args),
|
||||
new Promise<ToolResult>((_, reject) =>
|
||||
setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs)
|
||||
),
|
||||
]);
|
||||
|
||||
// Truncate output if too large
|
||||
if (result.output.length > this.maxOutputBytes) {
|
||||
result.output = result.output.slice(0, this.maxOutputBytes) + '\n[truncated]';
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { ToolRegistry } from './registry.js';
|
||||
import type { Tool } from './types.js';
|
||||
|
||||
const echoTool: Tool = {
|
||||
name: 'test.echo',
|
||||
description: 'Echoes input back',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { text: { type: 'string', description: 'Text to echo' } },
|
||||
required: ['text'],
|
||||
},
|
||||
execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }),
|
||||
};
|
||||
|
||||
const greetTool: Tool = {
|
||||
name: 'test.greet',
|
||||
description: 'Greets someone',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { name: { type: 'string' } },
|
||||
required: ['name'],
|
||||
},
|
||||
execute: async (args) => ({ success: true, output: `Hello ${(args as { name: string }).name}` }),
|
||||
};
|
||||
|
||||
describe('ToolRegistry', () => {
|
||||
it('registers and retrieves tools by name', () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
|
||||
expect(registry.get('test.echo')).toBe(echoTool);
|
||||
expect(registry.get('nonexistent')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('lists all registered tools', () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
registry.register(greetTool);
|
||||
|
||||
const tools = registry.list();
|
||||
expect(tools).toHaveLength(2);
|
||||
expect(tools.map(t => t.name)).toContain('test.echo');
|
||||
expect(tools.map(t => t.name)).toContain('test.greet');
|
||||
});
|
||||
|
||||
it('throws on duplicate registration', () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
expect(() => registry.register(echoTool)).toThrow('already registered');
|
||||
});
|
||||
|
||||
it('serializes to Anthropic format', () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
|
||||
const anthropicTools = registry.toAnthropicFormat();
|
||||
expect(anthropicTools).toEqual([{
|
||||
name: 'test.echo',
|
||||
description: 'Echoes input back',
|
||||
input_schema: echoTool.inputSchema,
|
||||
}]);
|
||||
});
|
||||
|
||||
it('serializes to OpenAI format', () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
|
||||
const openaiTools = registry.toOpenAIFormat();
|
||||
expect(openaiTools).toEqual([{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'test.echo',
|
||||
description: 'Echoes input back',
|
||||
parameters: echoTool.inputSchema,
|
||||
},
|
||||
}]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,54 @@
|
||||
import type { Tool, ToolInputSchema } from './types.js';
|
||||
|
||||
export interface AnthropicToolDef {
|
||||
name: string;
|
||||
description: string;
|
||||
input_schema: ToolInputSchema;
|
||||
}
|
||||
|
||||
export interface OpenAIToolDef {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: ToolInputSchema;
|
||||
};
|
||||
}
|
||||
|
||||
export class ToolRegistry {
|
||||
private tools: Map<string, Tool> = new Map();
|
||||
|
||||
register(tool: Tool): void {
|
||||
if (this.tools.has(tool.name)) {
|
||||
throw new Error(`Tool '${tool.name}' is already registered`);
|
||||
}
|
||||
this.tools.set(tool.name, tool);
|
||||
}
|
||||
|
||||
get(name: string): Tool | undefined {
|
||||
return this.tools.get(name);
|
||||
}
|
||||
|
||||
list(): Tool[] {
|
||||
return Array.from(this.tools.values());
|
||||
}
|
||||
|
||||
toAnthropicFormat(): AnthropicToolDef[] {
|
||||
return this.list().map(t => ({
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
input_schema: t.inputSchema,
|
||||
}));
|
||||
}
|
||||
|
||||
toOpenAIFormat(): OpenAIToolDef[] {
|
||||
return this.list().map(t => ({
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
parameters: t.inputSchema,
|
||||
},
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import type { Tool, ToolCall, ToolResult, ToolUseMessage, ToolResultMessage } from './types.js';
|
||||
|
||||
describe('Tool types', () => {
|
||||
it('Tool interface is structurally correct', () => {
|
||||
const tool: Tool = {
|
||||
name: 'test.echo',
|
||||
description: 'Echoes input',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { text: { type: 'string' } },
|
||||
required: ['text'],
|
||||
},
|
||||
execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }),
|
||||
};
|
||||
|
||||
expect(tool.name).toBe('test.echo');
|
||||
expect(tool.inputSchema.type).toBe('object');
|
||||
});
|
||||
|
||||
it('ToolCall has required fields', () => {
|
||||
const call: ToolCall = { id: 'call_1', name: 'test.echo', args: { text: 'hi' } };
|
||||
expect(call.id).toBe('call_1');
|
||||
expect(call.name).toBe('test.echo');
|
||||
});
|
||||
|
||||
it('ToolResult has success and output', () => {
|
||||
const result: ToolResult = { success: true, output: 'hello' };
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
const errResult: ToolResult = { success: false, output: '', error: 'boom' };
|
||||
expect(errResult.error).toBe('boom');
|
||||
});
|
||||
|
||||
it('ToolUseMessage has correct shape', () => {
|
||||
const msg: ToolUseMessage = {
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id: 'call_1', name: 'test.echo', input: { text: 'hi' } }],
|
||||
};
|
||||
expect(msg.role).toBe('assistant');
|
||||
expect(msg.content[0].type).toBe('tool_use');
|
||||
});
|
||||
|
||||
it('ToolResultMessage has correct shape', () => {
|
||||
const msg: ToolResultMessage = {
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'output here' }],
|
||||
};
|
||||
expect(msg.role).toBe('user');
|
||||
expect(msg.content[0].type).toBe('tool_result');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,52 @@
|
||||
export interface ToolInputSchema {
|
||||
type: 'object';
|
||||
properties: Record<string, unknown>;
|
||||
required?: string[];
|
||||
}
|
||||
|
||||
export interface Tool {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: ToolInputSchema;
|
||||
execute(args: unknown): Promise<ToolResult>;
|
||||
}
|
||||
|
||||
export interface ToolCall {
|
||||
id: string;
|
||||
name: string;
|
||||
args: unknown;
|
||||
}
|
||||
|
||||
export interface ToolResult {
|
||||
success: boolean;
|
||||
output: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// Content block for assistant messages containing tool calls
|
||||
export interface ToolUseBlock {
|
||||
type: 'tool_use';
|
||||
id: string;
|
||||
name: string;
|
||||
input: unknown;
|
||||
}
|
||||
|
||||
// Content block for user messages returning tool results
|
||||
export interface ToolResultBlock {
|
||||
type: 'tool_result';
|
||||
tool_use_id: string;
|
||||
content: string;
|
||||
is_error?: boolean;
|
||||
}
|
||||
|
||||
// Message from assistant requesting tool use
|
||||
export interface ToolUseMessage {
|
||||
role: 'assistant';
|
||||
content: ToolUseBlock[];
|
||||
}
|
||||
|
||||
// Message from user returning tool results
|
||||
export interface ToolResultMessage {
|
||||
role: 'user';
|
||||
content: ToolResultBlock[];
|
||||
}
|
||||
Reference in New Issue
Block a user