From b686eb3fa311fcf2d197472875063091d4b23cae Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 5 Feb 2026 17:50:17 -0800 Subject: [PATCH] test: add end-to-end tool integration tests Tests the full agent -> tool execution -> model feedback loop: - Shell tool execution with mock model - Multi-tool chaining (file.write -> file.read) across iterations - Verification that tool results are correctly passed back to model --- src/tools/integration.test.ts | 149 ++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 src/tools/integration.test.ts diff --git a/src/tools/integration.test.ts b/src/tools/integration.test.ts new file mode 100644 index 0000000..0102df7 --- /dev/null +++ b/src/tools/integration.test.ts @@ -0,0 +1,149 @@ +// src/tools/integration.test.ts +import { describe, it, expect, vi } from 'vitest'; +import { NativeAgent } from '../backends/native/agent.js'; +import { ToolRegistry } from './registry.js'; +import { ToolExecutor } from './executor.js'; +import { HookEngine } from '../hooks/engine.js'; +import { shellExecTool } from './builtin/shell.js'; +import { fileReadTool } from './builtin/file-read.js'; +import { fileWriteTool } from './builtin/file-write.js'; +import type { ModelClient, ChatResponse } from '../models/types.js'; +import { mkdtempSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +describe('Tool integration (end-to-end)', () => { + it('agent uses shell tool and returns result', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo integration_test' } }], + } satisfies ChatResponse; + } + return { + content: 'The command output was: integration_test', + stopReason: 'end_turn', + usage: { inputTokens: 20, outputTokens: 10 }, + } satisfies ChatResponse; + }), + }; + + const registry = new ToolRegistry(); + registry.register(shellExecTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You have tools.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const result = await agent.process('run echo integration_test'); + expect(result).toContain('integration_test'); + }); + + it('agent chains multiple tools across iterations', async () => { + const dir = mkdtempSync(join(tmpdir(), 'flynn-integ-')); + let callCount = 0; + + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'c1', name: 'file.write', args: { path: join(dir, 'test.txt'), content: 'hello' } }], + }; + } + if (callCount === 2) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 15, outputTokens: 8 }, + toolCalls: [{ id: 'c2', name: 'file.read', args: { path: join(dir, 'test.txt') } }], + }; + } + return { + content: 'I wrote and read the file. It contains: hello', + stopReason: 'end_turn', + usage: { inputTokens: 20, outputTokens: 10 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(fileWriteTool); + registry.register(fileReadTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You have file tools.', + toolRegistry: registry, + toolExecutor: executor, + }); + + try { + const result = await agent.process('write hello to test.txt then read it'); + expect(result).toContain('hello'); + expect(mockClient.chat).toHaveBeenCalledTimes(3); + } finally { + rmSync(dir, { recursive: true }); + } + }); + + it('verifies tool results are passed back to model correctly', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation((request: { messages: unknown[] }) => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo verify_pass' } }], + }; + } + // Second call: verify the tool result was included in messages + const lastMsg = request.messages[request.messages.length - 1] as { content: unknown[] }; + const resultBlock = lastMsg.content[0] as { type: string; tool_use_id: string; content: string }; + expect(resultBlock.type).toBe('tool_result'); + expect(resultBlock.tool_use_id).toBe('c1'); + expect(resultBlock.content).toContain('verify_pass'); + + return { + content: 'Verified tool result', + stopReason: 'end_turn', + usage: { inputTokens: 20, outputTokens: 10 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(shellExecTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You have tools.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const result = await agent.process('verify tool results'); + expect(result).toBe('Verified tool result'); + }); +});