1aab006a7f
- agent.ts: track consecutive calls to the same tool (ignoring args) and inject a nudge after 4 repeats telling the model to summarize and respond, preventing local models from endlessly retrying searches with slight query variations - agent.ts: wrap the entire tool loop iteration in try-catch so model/network errors don't crash the daemon — returns a descriptive error message instead - Tests for both: nudge triggers after 4 same-tool calls, error recovery persists to history
546 lines
18 KiB
TypeScript
546 lines
18 KiB
TypeScript
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
import { NativeAgent } from './agent.js';
|
|
import type { ModelClient, ChatResponse } from '../../models/types.js';
|
|
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
|
|
import { HookEngine } from '../../hooks/index.js';
|
|
import type { Tool, ToolResult } from '../../tools/index.js';
|
|
|
|
describe('NativeAgent', () => {
|
|
const createMockClient = (): ModelClient => ({
|
|
chat: vi.fn().mockResolvedValue({
|
|
content: 'Hello!',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
} satisfies ChatResponse),
|
|
});
|
|
|
|
it('processes messages and maintains history', async () => {
|
|
const mockClient = createMockClient();
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
});
|
|
|
|
const response = await agent.process('Hi');
|
|
|
|
expect(response).toBe('Hello!');
|
|
expect(mockClient.chat).toHaveBeenCalledWith({
|
|
messages: [{ role: 'user', content: 'Hi' }],
|
|
system: 'You are helpful.',
|
|
});
|
|
|
|
const history = agent.getHistory();
|
|
expect(history).toHaveLength(2);
|
|
expect(history[0]).toEqual({ role: 'user', content: 'Hi' });
|
|
expect(history[1]).toEqual({ role: 'assistant', content: 'Hello!' });
|
|
});
|
|
|
|
it('resets conversation history', async () => {
|
|
const mockClient = createMockClient();
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
});
|
|
|
|
await agent.process('Hi');
|
|
agent.reset();
|
|
|
|
expect(agent.getHistory()).toHaveLength(0);
|
|
});
|
|
|
|
it('uses session when provided', async () => {
|
|
const mockClient = createMockClient();
|
|
const mockSession = {
|
|
id: 'test-session',
|
|
getHistory: vi.fn().mockReturnValue([]),
|
|
addMessage: vi.fn(),
|
|
clear: vi.fn(),
|
|
replaceHistory: vi.fn(),
|
|
};
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
session: mockSession,
|
|
});
|
|
|
|
await agent.process('Hi');
|
|
|
|
expect(mockSession.addMessage).toHaveBeenCalledTimes(2);
|
|
expect(mockSession.addMessage).toHaveBeenNthCalledWith(1, { role: 'user', content: 'Hi' });
|
|
expect(mockSession.addMessage).toHaveBeenNthCalledWith(2, { role: 'assistant', content: 'Hello!' });
|
|
});
|
|
});
|
|
|
|
// Simple test tool
|
|
const echoTool: Tool = {
|
|
name: 'test.echo',
|
|
description: 'Echo',
|
|
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
|
|
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
|
|
};
|
|
|
|
describe('NativeAgent tool loop', () => {
|
|
it('executes tool calls and feeds results back', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
// First call: model requests tool use
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }],
|
|
};
|
|
}
|
|
// Second call: model gives final text response
|
|
return {
|
|
content: 'The tool returned: hello',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 15, outputTokens: 10 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
const response = await agent.process('echo hello');
|
|
expect(response).toBe('The tool returned: hello');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(2);
|
|
});
|
|
|
|
it('respects max iterations when tool calls vary', async () => {
|
|
// Model always returns tool_use but with different args each time (no loop detection)
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }],
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
maxIterations: 3,
|
|
});
|
|
|
|
const response = await agent.process('loop forever');
|
|
expect(response).toContain('max iterations');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
|
});
|
|
|
|
it('nudges model after same tool called too many times with different args', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation((req: any) => {
|
|
callCount++;
|
|
// After nudge message, model should respond with text
|
|
const lastMsg = req.messages[req.messages.length - 1];
|
|
const hasNudge = typeof lastMsg?.content !== 'string' &&
|
|
Array.isArray(lastMsg?.content) &&
|
|
lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
|
|
if (hasNudge) {
|
|
return {
|
|
content: 'Here is what I found from my searches.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
};
|
|
}
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
maxIterations: 10,
|
|
});
|
|
|
|
const response = await agent.process('search a lot');
|
|
// Model should have responded after receiving the nudge
|
|
expect(response).toBe('Here is what I found from my searches.');
|
|
// 4 tool calls + 1 final response = 5 chat calls
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(5);
|
|
});
|
|
|
|
it('detects repeated identical tool calls and breaks the loop', async () => {
|
|
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockResolvedValue({
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }],
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
maxIterations: 10,
|
|
});
|
|
|
|
const response = await agent.process('search for news');
|
|
expect(response).toContain('Tool loop detected');
|
|
expect(response).toContain('same thing'); // includes the last tool result
|
|
// Should break after 3 consecutive identical calls, not 10
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
|
});
|
|
|
|
it('works without tools (backward compatible)', async () => {
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockResolvedValue({
|
|
content: 'Hello!',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
}),
|
|
};
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
});
|
|
|
|
const response = await agent.process('Hi');
|
|
expect(response).toBe('Hello!');
|
|
});
|
|
|
|
it('calls onToolUse callback on start and end', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
|
|
};
|
|
}
|
|
return {
|
|
content: 'Done',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 15, outputTokens: 10 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
const onToolUse = vi.fn();
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
onToolUse,
|
|
});
|
|
|
|
await agent.process('echo hi');
|
|
|
|
expect(onToolUse).toHaveBeenCalledTimes(2);
|
|
expect(onToolUse).toHaveBeenNthCalledWith(1, expect.objectContaining({
|
|
type: 'start',
|
|
tool: 'test.echo',
|
|
args: { text: 'hi' },
|
|
}));
|
|
expect(onToolUse).toHaveBeenNthCalledWith(2, expect.objectContaining({
|
|
type: 'end',
|
|
tool: 'test.echo',
|
|
result: expect.objectContaining({ success: true, output: 'hi' }),
|
|
}));
|
|
});
|
|
|
|
it('injects tool inventory note when history exists and fingerprint changes', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
return {
|
|
content: `Response ${callCount}`,
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
// First message — no prior history, so no inventory note
|
|
await agent.process('Hi');
|
|
const firstCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
|
|
expect(firstCall.system).toBe('You are helpful.');
|
|
|
|
// Second message — history exists but fingerprint hasn't changed, no note
|
|
await agent.process('Hello again');
|
|
const secondCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[1][0];
|
|
expect(secondCall.system).toBe('You are helpful.');
|
|
|
|
// Now add a new tool to change the fingerprint
|
|
const newTool: Tool = {
|
|
name: 'test.greet',
|
|
description: 'Greet',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'hi' }),
|
|
};
|
|
registry.register(newTool);
|
|
|
|
// Third message — history exists AND fingerprint changed, should inject note
|
|
await agent.process('What can you do?');
|
|
const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
|
|
expect(thirdCall.system).toContain('[Tool inventory updated');
|
|
expect(thirdCall.system).toContain('test_echo');
|
|
expect(thirdCall.system).toContain('test_greet');
|
|
});
|
|
|
|
it('does not inject tool inventory note on fresh session', async () => {
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockResolvedValue({
|
|
content: 'Hello!',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
// First message ever — only one message in history (the user message just added)
|
|
await agent.process('Hi');
|
|
const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
|
|
expect(call.system).toBe('You are helpful.');
|
|
expect(call.system).not.toContain('Tool inventory updated');
|
|
});
|
|
|
|
it('only injects tool inventory note once per fingerprint change', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
return {
|
|
content: `Response ${callCount}`,
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
// Build up history
|
|
await agent.process('Hi');
|
|
await agent.process('Hello');
|
|
|
|
// Add a new tool
|
|
const newTool: Tool = {
|
|
name: 'test.greet',
|
|
description: 'Greet',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'hi' }),
|
|
};
|
|
registry.register(newTool);
|
|
|
|
// First call after change — note injected
|
|
await agent.process('What tools?');
|
|
const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
|
|
expect(thirdCall.system).toContain('[Tool inventory updated');
|
|
|
|
// Second call with same tools — no note (fingerprint matches)
|
|
await agent.process('Anything else?');
|
|
const fourthCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[3][0];
|
|
expect(fourthCall.system).toBe('You are helpful.');
|
|
});
|
|
|
|
it('resets tool fingerprint on reset()', async () => {
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockResolvedValue({
|
|
content: 'Hello!',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
// Build history and establish fingerprint
|
|
await agent.process('Hi');
|
|
await agent.process('Hello');
|
|
|
|
// Reset clears fingerprint
|
|
agent.reset();
|
|
|
|
// After reset, first message has no history so no note
|
|
await agent.process('Hi again');
|
|
const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
|
|
expect(call.system).not.toContain('Tool inventory updated');
|
|
});
|
|
|
|
it('catches model errors in tool loop and returns error message', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
// First call: model requests tool use
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
|
|
};
|
|
}
|
|
// Second call: model throws an error
|
|
throw new Error('Connection reset by peer');
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
const response = await agent.process('echo hi');
|
|
expect(response).toContain('Error in tool loop');
|
|
expect(response).toContain('Connection reset by peer');
|
|
// Error should be persisted to history
|
|
const history = agent.getHistory();
|
|
expect(history[history.length - 1].role).toBe('assistant');
|
|
expect(history[history.length - 1].content).toContain('Error in tool loop');
|
|
});
|
|
|
|
it('handles multiple tool calls in single response', async () => {
|
|
let callCount = 0;
|
|
const mockClient: ModelClient = {
|
|
chat: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return {
|
|
content: '',
|
|
stopReason: 'tool_use',
|
|
usage: { inputTokens: 10, outputTokens: 5 },
|
|
toolCalls: [
|
|
{ id: 'call_1', name: 'test.echo', args: { text: 'first' } },
|
|
{ id: 'call_2', name: 'test.echo', args: { text: 'second' } },
|
|
],
|
|
};
|
|
}
|
|
return {
|
|
content: 'Got both results',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 15, outputTokens: 10 },
|
|
};
|
|
}),
|
|
};
|
|
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const agent = new NativeAgent({
|
|
modelClient: mockClient,
|
|
systemPrompt: 'You are helpful.',
|
|
toolRegistry: registry,
|
|
toolExecutor: executor,
|
|
});
|
|
|
|
const response = await agent.process('echo both');
|
|
expect(response).toBe('Got both results');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(2);
|
|
});
|
|
});
|