feat: improve agent loop resilience — same-tool nudging and error handling

- agent.ts: track consecutive calls to the same tool (ignoring args) and inject a nudge after 4 repeats telling the model to summarize and respond, preventing local models from endlessly retrying searches with slight query variations - agent.ts: wrap the entire tool loop iteration in try-catch so model/network errors don't crash the daemon — returns a descriptive error message instead - Tests for both: nudge triggers after 4 same-tool calls, error recovery persists to history
2026-02-11 09:33:30 -08:00
parent c01de7d097
commit 1aab006a7f
2 changed files with 202 additions and 79 deletions
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(3);
  });

+  it('nudges model after same tool called too many times with different args', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation((req: any) => {
+        callCount++;
+        // After nudge message, model should respond with text
+        const lastMsg = req.messages[req.messages.length - 1];
+        const hasNudge = typeof lastMsg?.content !== 'string' &&
+          Array.isArray(lastMsg?.content) &&
+          lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
+        if (hasNudge) {
+          return {
+            content: 'Here is what I found from my searches.',
+            stopReason: 'end_turn',
+            usage: { inputTokens: 10, outputTokens: 5 },
+          };
+        }
+        return {
+          content: '',
+          stopReason: 'tool_use',
+          usage: { inputTokens: 10, outputTokens: 5 },
+          toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+      maxIterations: 10,
+    });
+
+    const response = await agent.process('search a lot');
+    // Model should have responded after receiving the nudge
+    expect(response).toBe('Here is what I found from my searches.');
+    // 4 tool calls + 1 final response = 5 chat calls
+    expect(mockClient.chat).toHaveBeenCalledTimes(5);
+  });
+
  it('detects repeated identical tool calls and breaks the loop', async () => {
    // Model always returns the exact same tool call — simulates local LLM stuck in a loop
    const mockClient: ModelClient = {
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
    expect(call.system).not.toContain('Tool inventory updated');
  });

+  it('catches model errors in tool loop and returns error message', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          // First call: model requests tool use
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 10, outputTokens: 5 },
+            toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
+          };
+        }
+        // Second call: model throws an error
+        throw new Error('Connection reset by peer');
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('echo hi');
+    expect(response).toContain('Error in tool loop');
+    expect(response).toContain('Connection reset by peer');
+    // Error should be persisted to history
+    const history = agent.getHistory();
+    expect(history[history.length - 1].role).toBe('assistant');
+    expect(history[history.length - 1].content).toContain('Error in tool loop');
+  });
+
  it('handles multiple tool calls in single response', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {