// src/tools/integration.test.ts import { describe, it, expect, vi } from 'vitest'; import { NativeAgent } from '../backends/native/agent.js'; import { ToolRegistry } from './registry.js'; import { ToolExecutor } from './executor.js'; import { HookEngine } from '../hooks/engine.js'; import { shellExecTool } from './builtin/shell.js'; import { fileReadTool } from './builtin/file-read.js'; import { fileWriteTool } from './builtin/file-write.js'; import type { ModelClient, ChatResponse } from '../models/types.js'; import { mkdtempSync, rmSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; describe('Tool integration (end-to-end)', () => { it('agent uses shell tool and returns result', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo integration_test' } }], } satisfies ChatResponse; } return { content: 'The command output was: integration_test', stopReason: 'end_turn', usage: { inputTokens: 20, outputTokens: 10 }, } satisfies ChatResponse; }), }; const registry = new ToolRegistry(); registry.register(shellExecTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, toolPolicyContext: { autonomyLevel: 'autonomous' }, }); const result = await agent.process('run echo integration_test'); expect(result).toContain('integration_test'); }); it('agent chains multiple tools across iterations', async () => { const dir = mkdtempSync(join(tmpdir(), 'flynn-integ-')); let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'c1', name: 'file.write', args: { path: join(dir, 'test.txt'), content: 'hello' } }], }; } if (callCount === 2) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 15, outputTokens: 8 }, toolCalls: [{ id: 'c2', name: 'file.read', args: { path: join(dir, 'test.txt') } }], }; } return { content: 'I wrote and read the file. It contains: hello', stopReason: 'end_turn', usage: { inputTokens: 20, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(fileWriteTool); registry.register(fileReadTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, toolPolicyContext: { autonomyLevel: 'autonomous' }, }); try { const result = await agent.process('write hello to test.txt then read it'); expect(result).toContain('hello'); expect(mockClient.chat).toHaveBeenCalledTimes(3); } finally { rmSync(dir, { recursive: true }); } }); it('verifies tool results are passed back to model correctly', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation((request: { messages: unknown[] }) => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo verify_pass' } }], }; } // Second call: verify the tool result was included in messages const lastMsg = request.messages[request.messages.length - 1] as { content: unknown[] }; const resultBlock = lastMsg.content[0] as { type: string; tool_use_id: string; content: string }; expect(resultBlock.type).toBe('tool_result'); expect(resultBlock.tool_use_id).toBe('c1'); expect(resultBlock.content).toContain('verify_pass'); return { content: 'Verified tool result', stopReason: 'end_turn', usage: { inputTokens: 20, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(shellExecTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, toolPolicyContext: { autonomyLevel: 'autonomous' }, }); const result = await agent.process('verify tool results'); expect(result).toBe('Verified tool result'); }); });