feat: improve agent loop resilience — same-tool nudging and error handling

- agent.ts: track consecutive calls to the same tool (ignoring args) and
  inject a nudge after 4 repeats telling the model to summarize and respond,
  preventing local models from endlessly retrying searches with slight
  query variations
- agent.ts: wrap the entire tool loop iteration in try-catch so model/network
  errors don't crash the daemon — returns a descriptive error message instead
- Tests for both: nudge triggers after 4 same-tool calls, error recovery
  persists to history
This commit is contained in:
William Valentin
2026-02-11 09:33:30 -08:00
parent c01de7d097
commit 1aab006a7f
2 changed files with 202 additions and 79 deletions
+86
View File
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
expect(mockClient.chat).toHaveBeenCalledTimes(3);
});
it('nudges model after same tool called too many times with different args', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation((req: any) => {
callCount++;
// After nudge message, model should respond with text
const lastMsg = req.messages[req.messages.length - 1];
const hasNudge = typeof lastMsg?.content !== 'string' &&
Array.isArray(lastMsg?.content) &&
lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
if (hasNudge) {
return {
content: 'Here is what I found from my searches.',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}
return {
content: '',
stopReason: 'tool_use',
usage: { inputTokens: 10, outputTokens: 5 },
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
};
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
maxIterations: 10,
});
const response = await agent.process('search a lot');
// Model should have responded after receiving the nudge
expect(response).toBe('Here is what I found from my searches.');
// 4 tool calls + 1 final response = 5 chat calls
expect(mockClient.chat).toHaveBeenCalledTimes(5);
});
it('detects repeated identical tool calls and breaks the loop', async () => {
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
const mockClient: ModelClient = {
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
expect(call.system).not.toContain('Tool inventory updated');
});
it('catches model errors in tool loop and returns error message', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// First call: model requests tool use
return {
content: '',
stopReason: 'tool_use',
usage: { inputTokens: 10, outputTokens: 5 },
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
};
}
// Second call: model throws an error
throw new Error('Connection reset by peer');
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
const response = await agent.process('echo hi');
expect(response).toContain('Error in tool loop');
expect(response).toContain('Connection reset by peer');
// Error should be persisted to history
const history = agent.getHistory();
expect(history[history.length - 1].role).toBe('assistant');
expect(history[history.length - 1].content).toContain('Error in tool loop');
});
it('handles multiple tool calls in single response', async () => {
let callCount = 0;
const mockClient: ModelClient = {