import { describe, it, expect, vi, beforeEach } from 'vitest'; import { NativeAgent } from './agent.js'; import type { ModelClient, ChatResponse } from '../../models/types.js'; import { ToolRegistry, ToolExecutor } from '../../tools/index.js'; import { HookEngine } from '../../hooks/index.js'; import type { Tool, ToolResult } from '../../tools/index.js'; describe('NativeAgent', () => { const createMockClient = (): ModelClient => ({ chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, } satisfies ChatResponse), }); it('processes messages and maintains history', async () => { const mockClient = createMockClient(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const response = await agent.process('Hi'); expect(response).toBe('Hello!'); expect(mockClient.chat).toHaveBeenCalledWith({ messages: [{ role: 'user', content: 'Hi' }], system: 'You are helpful.', }); const history = agent.getHistory(); expect(history).toHaveLength(2); expect(history[0]).toEqual({ role: 'user', content: 'Hi' }); expect(history[1]).toEqual({ role: 'assistant', content: 'Hello!' }); }); it('resets conversation history', async () => { const mockClient = createMockClient(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); await agent.process('Hi'); agent.reset(); expect(agent.getHistory()).toHaveLength(0); }); it('uses session when provided', async () => { const mockClient = createMockClient(); const mockSession = { id: 'test-session', getHistory: vi.fn().mockReturnValue([]), addMessage: vi.fn(), clear: vi.fn(), replaceHistory: vi.fn(), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', session: mockSession, }); await agent.process('Hi'); expect(mockSession.addMessage).toHaveBeenCalledTimes(2); expect(mockSession.addMessage).toHaveBeenNthCalledWith(1, { role: 'user', content: 'Hi' }); expect(mockSession.addMessage).toHaveBeenNthCalledWith(2, { role: 'assistant', content: 'Hello!' }); }); }); // Simple test tool const echoTool: Tool = { name: 'test.echo', description: 'Echo', inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, execute: async (args) => ({ success: true, output: (args as { text: string }).text }), }; describe('NativeAgent tool loop', () => { it('executes tool calls and feeds results back', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { // First call: model requests tool use return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }], }; } // Second call: model gives final text response return { content: 'The tool returned: hello', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hello'); expect(response).toBe('The tool returned: hello'); expect(mockClient.chat).toHaveBeenCalledTimes(2); }); it('respects max iterations when tool calls vary', async () => { // Model always returns tool_use but with different args each time (no loop detection) let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }], }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 3, }); const response = await agent.process('loop forever'); expect(response).toContain('max iterations'); expect(mockClient.chat).toHaveBeenCalledTimes(3); }); it('nudges model after same tool called too many times with different args', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation((req: any) => { callCount++; // After nudge message, model should respond with text const lastMsg = req.messages[req.messages.length - 1]; const hasNudge = typeof lastMsg?.content !== 'string' && Array.isArray(lastMsg?.content) && lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again')); if (hasNudge) { return { content: 'Here is what I found from my searches.', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; } return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }], }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 10, }); const response = await agent.process('search a lot'); // Model should have responded after receiving the nudge expect(response).toBe('Here is what I found from my searches.'); // 4 tool calls + 1 final response = 5 chat calls expect(mockClient.chat).toHaveBeenCalledTimes(5); }); it('detects repeated identical tool calls and breaks the loop', async () => { // Model always returns the exact same tool call — simulates local LLM stuck in a loop const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }], }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 10, }); const response = await agent.process('search for news'); expect(response).toContain('Tool loop detected'); expect(response).toContain('same thing'); // includes the last tool result // Should break after 3 consecutive identical calls, not 10 expect(mockClient.chat).toHaveBeenCalledTimes(3); }); it('works without tools (backward compatible)', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const response = await agent.process('Hi'); expect(response).toBe('Hello!'); }); it('calls onToolUse callback on start and end', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], }; } return { content: 'Done', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const onToolUse = vi.fn(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, onToolUse, }); await agent.process('echo hi'); expect(onToolUse).toHaveBeenCalledTimes(2); expect(onToolUse).toHaveBeenNthCalledWith(1, expect.objectContaining({ type: 'start', tool: 'test.echo', args: { text: 'hi' }, })); expect(onToolUse).toHaveBeenNthCalledWith(2, expect.objectContaining({ type: 'end', tool: 'test.echo', result: expect.objectContaining({ success: true, output: 'hi' }), })); }); it('injects tool inventory note when history exists and fingerprint changes', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: `Response ${callCount}`, stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // First message — no prior history, so no inventory note await agent.process('Hi'); const firstCall = (mockClient.chat as ReturnType).mock.calls[0][0]; expect(firstCall.system).toBe('You are helpful.'); // Second message — history exists but fingerprint hasn't changed, no note await agent.process('Hello again'); const secondCall = (mockClient.chat as ReturnType).mock.calls[1][0]; expect(secondCall.system).toBe('You are helpful.'); // Now add a new tool to change the fingerprint const newTool: Tool = { name: 'test.greet', description: 'Greet', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'hi' }), }; registry.register(newTool); // Third message — history exists AND fingerprint changed, should inject note await agent.process('What can you do?'); const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(thirdCall.system).toContain('[Tool inventory updated'); expect(thirdCall.system).toContain('test_echo'); expect(thirdCall.system).toContain('test_greet'); }); it('does not inject tool inventory note on fresh session', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // First message ever — only one message in history (the user message just added) await agent.process('Hi'); const call = (mockClient.chat as ReturnType).mock.calls[0][0]; expect(call.system).toBe('You are helpful.'); expect(call.system).not.toContain('Tool inventory updated'); }); it('only injects tool inventory note once per fingerprint change', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: `Response ${callCount}`, stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // Build up history await agent.process('Hi'); await agent.process('Hello'); // Add a new tool const newTool: Tool = { name: 'test.greet', description: 'Greet', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'hi' }), }; registry.register(newTool); // First call after change — note injected await agent.process('What tools?'); const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(thirdCall.system).toContain('[Tool inventory updated'); // Second call with same tools — no note (fingerprint matches) await agent.process('Anything else?'); const fourthCall = (mockClient.chat as ReturnType).mock.calls[3][0]; expect(fourthCall.system).toBe('You are helpful.'); }); it('resets tool fingerprint on reset()', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // Build history and establish fingerprint await agent.process('Hi'); await agent.process('Hello'); // Reset clears fingerprint agent.reset(); // After reset, first message has no history so no note await agent.process('Hi again'); const call = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(call.system).not.toContain('Tool inventory updated'); }); it('catches model errors in tool loop and returns error message', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { // First call: model requests tool use return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], }; } // Second call: model throws an error throw new Error('Connection reset by peer'); }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hi'); expect(response).toContain('Error in tool loop'); expect(response).toContain('Connection reset by peer'); // Error should be persisted to history const history = agent.getHistory(); expect(history[history.length - 1].role).toBe('assistant'); expect(history[history.length - 1].content).toContain('Error in tool loop'); }); it('handles multiple tool calls in single response', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [ { id: 'call_1', name: 'test.echo', args: { text: 'first' } }, { id: 'call_2', name: 'test.echo', args: { text: 'second' } }, ], }; } return { content: 'Got both results', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo both'); expect(response).toBe('Got both results'); expect(mockClient.chat).toHaveBeenCalledTimes(2); }); });