test: add end-to-end tool integration tests

Tests the full agent -> tool execution -> model feedback loop: - Shell tool execution with mock model - Multi-tool chaining (file.write -> file.read) across iterations - Verification that tool results are correctly passed back to model
2026-02-05 17:50:17 -08:00
parent b9601b50ab
commit b686eb3fa3
1 changed files with 149 additions and 0 deletions
@@ -0,0 +1,149 @@
+// src/tools/integration.test.ts
+import { describe, it, expect, vi } from 'vitest';
+import { NativeAgent } from '../backends/native/agent.js';
+import { ToolRegistry } from './registry.js';
+import { ToolExecutor } from './executor.js';
+import { HookEngine } from '../hooks/engine.js';
+import { shellExecTool } from './builtin/shell.js';
+import { fileReadTool } from './builtin/file-read.js';
+import { fileWriteTool } from './builtin/file-write.js';
+import type { ModelClient, ChatResponse } from '../models/types.js';
+import { mkdtempSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+
+describe('Tool integration (end-to-end)', () => {
+  it('agent uses shell tool and returns result', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 10, outputTokens: 5 },
+            toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo integration_test' } }],
+          } satisfies ChatResponse;
+        }
+        return {
+          content: 'The command output was: integration_test',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 20, outputTokens: 10 },
+        } satisfies ChatResponse;
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(shellExecTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You have tools.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const result = await agent.process('run echo integration_test');
+    expect(result).toContain('integration_test');
+  });
+
+  it('agent chains multiple tools across iterations', async () => {
+    const dir = mkdtempSync(join(tmpdir(), 'flynn-integ-'));
+    let callCount = 0;
+
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 10, outputTokens: 5 },
+            toolCalls: [{ id: 'c1', name: 'file.write', args: { path: join(dir, 'test.txt'), content: 'hello' } }],
+          };
+        }
+        if (callCount === 2) {
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 15, outputTokens: 8 },
+            toolCalls: [{ id: 'c2', name: 'file.read', args: { path: join(dir, 'test.txt') } }],
+          };
+        }
+        return {
+          content: 'I wrote and read the file. It contains: hello',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 20, outputTokens: 10 },
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(fileWriteTool);
+    registry.register(fileReadTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You have file tools.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    try {
+      const result = await agent.process('write hello to test.txt then read it');
+      expect(result).toContain('hello');
+      expect(mockClient.chat).toHaveBeenCalledTimes(3);
+    } finally {
+      rmSync(dir, { recursive: true });
+    }
+  });
+
+  it('verifies tool results are passed back to model correctly', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation((request: { messages: unknown[] }) => {
+        callCount++;
+        if (callCount === 1) {
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 10, outputTokens: 5 },
+            toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo verify_pass' } }],
+          };
+        }
+        // Second call: verify the tool result was included in messages
+        const lastMsg = request.messages[request.messages.length - 1] as { content: unknown[] };
+        const resultBlock = lastMsg.content[0] as { type: string; tool_use_id: string; content: string };
+        expect(resultBlock.type).toBe('tool_result');
+        expect(resultBlock.tool_use_id).toBe('c1');
+        expect(resultBlock.content).toContain('verify_pass');
+
+        return {
+          content: 'Verified tool result',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 20, outputTokens: 10 },
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(shellExecTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You have tools.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const result = await agent.process('verify tool results');
+    expect(result).toBe('Verified tool result');
+  });
+});