import { describe, it, expect, vi } from 'vitest'; import { NativeAgent } from './agent.js'; import type { ModelClient, ChatRequest, ChatResponse } from '../../models/types.js'; import { ToolRegistry, ToolExecutor } from '../../tools/index.js'; import { HookEngine } from '../../hooks/index.js'; import type { Tool } from '../../tools/index.js'; describe('NativeAgent', () => { const createMockClient = (): ModelClient => ({ chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, } satisfies ChatResponse), }); it('processes messages and maintains history', async () => { const mockClient = createMockClient(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const response = await agent.process('Hi'); expect(response).toBe('Hello!'); expect(mockClient.chat).toHaveBeenCalledWith(expect.objectContaining({ messages: [{ role: 'user', content: 'Hi' }], system: 'You are helpful.', signal: expect.any(AbortSignal), })); const history = agent.getHistory(); expect(history).toHaveLength(2); expect(history[0]).toEqual({ role: 'user', content: 'Hi' }); expect(history[1]).toEqual({ role: 'assistant', content: 'Hello!' }); }); it('resets conversation history', async () => { const mockClient = createMockClient(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); await agent.process('Hi'); agent.reset(); expect(agent.getHistory()).toHaveLength(0); }); it('uses session when provided', async () => { const mockClient = createMockClient(); const mockSession = { id: 'test-session', getHistory: vi.fn().mockReturnValue([]), addMessage: vi.fn(), clear: vi.fn(), replaceHistory: vi.fn(), getConfig: vi.fn().mockReturnValue(undefined), setConfig: vi.fn(), deleteConfig: vi.fn(), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', session: mockSession, }); await agent.process('Hi'); expect(mockSession.addMessage).toHaveBeenCalledTimes(2); expect(mockSession.addMessage).toHaveBeenNthCalledWith(1, { role: 'user', content: 'Hi' }); expect(mockSession.addMessage).toHaveBeenNthCalledWith(2, { role: 'assistant', content: 'Hello!' }); }); it('supports cancellation during single-turn model wait', async () => { let release!: () => void; const blocked = new Promise((resolve) => { release = resolve; }); const mockClient: ModelClient = { chat: vi.fn(async () => { await blocked; return { content: 'Late response', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, } satisfies ChatResponse; }), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const pending = agent.process('Please wait'); await new Promise((resolve) => queueMicrotask(resolve)); expect(agent.isCancellable()).toBe(true); agent.cancel(); release(); const response = await pending; expect(response).toBe('Operation cancelled by user.'); expect(agent.isCancellable()).toBe(false); const history = agent.getHistory(); expect(history[history.length - 1]).toEqual({ role: 'assistant', content: 'Operation cancelled by user.' }); }); it('returns fallback text when model response is empty', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: '', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const response = await agent.process('Hi'); expect(response).toBe('I could not generate a response for that. Please try again.'); const history = agent.getHistory(); expect(history[history.length - 1]).toEqual({ role: 'assistant', content: 'I could not generate a response for that. Please try again.', }); }); it('times out single-turn model calls', async () => { const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => new Promise(() => {})), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', modelTimeoutMs: 10, }); await expect(agent.process('Hi')).rejects.toThrow('Model request timed out after 10ms'); }); }); // Simple test tool const echoTool: Tool = { name: 'test.echo', description: 'Echo', inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, execute: async (args) => ({ success: true, output: (args as { text: string }).text }), }; describe('NativeAgent tool loop', () => { it('executes tool calls and feeds results back', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { // First call: model requests tool use return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }], }; } // Second call: model gives final text response return { content: 'The tool returned: hello', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hello'); expect(response).toBe('The tool returned: hello'); expect(mockClient.chat).toHaveBeenCalledTimes(2); }); it('respects max iterations when tool calls vary', async () => { // Model always returns tool_use but with different args each time (no loop detection) let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }], }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 3, }); const response = await agent.process('loop forever'); expect(response).toContain('max iterations'); expect(mockClient.chat).toHaveBeenCalledTimes(3); }); it('nudges model after same tool called too many times with different args', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation((req: ChatRequest) => { callCount++; // After nudge message, model should respond with text const lastMsg = req.messages[req.messages.length - 1]; const hasNudge = typeof lastMsg?.content === 'string' && lastMsg.content.includes('do NOT call it again'); if (hasNudge) { return { content: 'Here is what I found from my searches.', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; } return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }], }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 10, }); const response = await agent.process('search a lot'); // Model should have responded after receiving the nudge expect(response).toBe('Here is what I found from my searches.'); // 4 tool calls + 1 final response = 5 chat calls expect(mockClient.chat).toHaveBeenCalledTimes(5); }); it('detects repeated identical tool calls and breaks the loop', async () => { // Model always returns the exact same tool call — simulates local LLM stuck in a loop const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }], }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, maxIterations: 10, }); const response = await agent.process('search for news'); expect(response).toContain('Tool loop detected'); expect(response).toContain('same thing'); // includes the last tool result // Should break after 3 consecutive identical calls, not 10 expect(mockClient.chat).toHaveBeenCalledTimes(3); }); it('surfaces warning when model emits textual tool_use block without structured tool calls', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Let me read the full email to evaluate legitimacy:{"type":"tool_use","id":"call_123","name":"gmail_read","input":{"id":"abc"}}', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('read latest email'); expect(response).toContain('Tool call was emitted as plain text and was not executed.'); expect(response).toContain('Tool: gmail_read (id: call_123)'); expect(response).toContain('"type":"tool_use"'); const history = agent.getHistory(); expect(history[history.length - 1]).toEqual({ role: 'assistant', content: response }); }); it('recovers and executes valid textual tool_use JSON for registered tools', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: 'Running tool now: {"type":"tool_use","id":"call_123","name":"test_echo","input":{"text":"hello"}}', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; } return { content: 'The tool returned: hello', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hello'); expect(response).toBe('The tool returned: hello'); expect(mockClient.chat).toHaveBeenCalledTimes(2); }); it('works without tools (backward compatible)', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', }); const response = await agent.process('Hi'); expect(response).toBe('Hello!'); }); it('calls onToolUse callback on start and end', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], }; } return { content: 'Done', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const onToolUse = vi.fn(); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, onToolUse, }); await agent.process('echo hi'); expect(onToolUse).toHaveBeenCalledTimes(2); expect(onToolUse).toHaveBeenNthCalledWith(1, expect.objectContaining({ type: 'start', tool: 'test.echo', args: { text: 'hi' }, })); expect(onToolUse).toHaveBeenNthCalledWith(2, expect.objectContaining({ type: 'end', tool: 'test.echo', result: expect.objectContaining({ success: true, output: 'hi' }), })); }); it('injects tool inventory note when history exists and fingerprint changes', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: `Response ${callCount}`, stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // First message — no prior history, so no inventory note await agent.process('Hi'); const firstCall = (mockClient.chat as ReturnType).mock.calls[0][0]; expect(firstCall.system).toBe('You are helpful.'); // Second message — history exists but fingerprint hasn't changed, no note await agent.process('Hello again'); const secondCall = (mockClient.chat as ReturnType).mock.calls[1][0]; expect(secondCall.system).toBe('You are helpful.'); // Now add a new tool to change the fingerprint const newTool: Tool = { name: 'test.greet', description: 'Greet', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'hi' }), }; registry.register(newTool); // Third message — history exists AND fingerprint changed, should inject note await agent.process('What can you do?'); const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(thirdCall.system).toContain('[Tool inventory updated'); expect(thirdCall.system).toContain('test_echo'); expect(thirdCall.system).toContain('test_greet'); }); it('does not inject tool inventory note on fresh session', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // First message ever — only one message in history (the user message just added) await agent.process('Hi'); const call = (mockClient.chat as ReturnType).mock.calls[0][0]; expect(call.system).toBe('You are helpful.'); expect(call.system).not.toContain('Tool inventory updated'); }); it('only injects tool inventory note once per fingerprint change', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; return { content: `Response ${callCount}`, stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // Build up history await agent.process('Hi'); await agent.process('Hello'); // Add a new tool const newTool: Tool = { name: 'test.greet', description: 'Greet', inputSchema: { type: 'object', properties: {} }, execute: async () => ({ success: true, output: 'hi' }), }; registry.register(newTool); // First call after change — note injected await agent.process('What tools?'); const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(thirdCall.system).toContain('[Tool inventory updated'); // Second call with same tools — no note (fingerprint matches) await agent.process('Anything else?'); const fourthCall = (mockClient.chat as ReturnType).mock.calls[3][0]; expect(fourthCall.system).toBe('You are helpful.'); }); it('resets tool fingerprint on reset()', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ content: 'Hello!', stopReason: 'end_turn', usage: { inputTokens: 10, outputTokens: 5 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); // Build history and establish fingerprint await agent.process('Hi'); await agent.process('Hello'); // Reset clears fingerprint agent.reset(); // After reset, first message has no history so no note await agent.process('Hi again'); const call = (mockClient.chat as ReturnType).mock.calls[2][0]; expect(call.system).not.toContain('Tool inventory updated'); }); it('catches model errors in tool loop and returns error message', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { // First call: model requests tool use return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], }; } // Second call: model throws an error throw new Error('Connection reset by peer'); }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hi'); expect(response).toContain('Error in tool loop'); expect(response).toContain('Connection reset by peer'); // Error should be persisted to history const history = agent.getHistory(); expect(history[history.length - 1].role).toBe('assistant'); expect(history[history.length - 1].content).toContain('Error in tool loop'); }); it('handles multiple tool calls in single response', async () => { let callCount = 0; const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => { callCount++; if (callCount === 1) { return { content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [ { id: 'call_1', name: 'test.echo', args: { text: 'first' } }, { id: 'call_2', name: 'test.echo', args: { text: 'second' } }, ], }; } return { content: 'Got both results', stopReason: 'end_turn', usage: { inputTokens: 15, outputTokens: 10 }, }; }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo both'); expect(response).toBe('Got both results'); expect(mockClient.chat).toHaveBeenCalledTimes(2); }); it('returns fallback text when tool loop final response is empty', async () => { const mockClient: ModelClient = { chat: vi .fn() .mockResolvedValueOnce({ content: '', stopReason: 'tool_use', usage: { inputTokens: 10, outputTokens: 5 }, toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }], }) .mockResolvedValueOnce({ content: '', stopReason: 'end_turn', usage: { inputTokens: 12, outputTokens: 4 }, }), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, }); const response = await agent.process('echo hello'); expect(response).toBe('I could not generate a response for that. Please try again.'); const history = agent.getHistory(); expect(history[history.length - 1]).toEqual({ role: 'assistant', content: 'I could not generate a response for that. Please try again.', }); }); it('times out tool-loop model calls and returns an error message', async () => { const mockClient: ModelClient = { chat: vi.fn().mockImplementation(() => new Promise(() => {})), }; const registry = new ToolRegistry(); registry.register(echoTool); const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); const executor = new ToolExecutor(registry, hooks); const agent = new NativeAgent({ modelClient: mockClient, systemPrompt: 'You are helpful.', toolRegistry: registry, toolExecutor: executor, modelTimeoutMs: 10, }); const response = await agent.process('echo hello'); expect(response).toContain('Error in tool loop'); expect(response).toContain('Model request timed out after 10ms'); }); });