feat: improve agent loop resilience — same-tool nudging and error handling

- agent.ts: track consecutive calls to the same tool (ignoring args) and inject a nudge after 4 repeats telling the model to summarize and respond, preventing local models from endlessly retrying searches with slight query variations - agent.ts: wrap the entire tool loop iteration in try-catch so model/network errors don't crash the daemon — returns a descriptive error message instead - Tests for both: nudge triggers after 4 same-tool calls, error recovery persists to history
2026-02-11 09:33:30 -08:00
parent c01de7d097
commit 1aab006a7f
2 changed files with 202 additions and 79 deletions
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(3);
  });

+  it('nudges model after same tool called too many times with different args', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation((req: any) => {
+        callCount++;
+        // After nudge message, model should respond with text
+        const lastMsg = req.messages[req.messages.length - 1];
+        const hasNudge = typeof lastMsg?.content !== 'string' &&
+          Array.isArray(lastMsg?.content) &&
+          lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
+        if (hasNudge) {
+          return {
+            content: 'Here is what I found from my searches.',
+            stopReason: 'end_turn',
+            usage: { inputTokens: 10, outputTokens: 5 },
+          };
+        }
+        return {
+          content: '',
+          stopReason: 'tool_use',
+          usage: { inputTokens: 10, outputTokens: 5 },
+          toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+      maxIterations: 10,
+    });
+
+    const response = await agent.process('search a lot');
+    // Model should have responded after receiving the nudge
+    expect(response).toBe('Here is what I found from my searches.');
+    // 4 tool calls + 1 final response = 5 chat calls
+    expect(mockClient.chat).toHaveBeenCalledTimes(5);
+  });
+
  it('detects repeated identical tool calls and breaks the loop', async () => {
    // Model always returns the exact same tool call — simulates local LLM stuck in a loop
    const mockClient: ModelClient = {
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
    expect(call.system).not.toContain('Tool inventory updated');
  });

+  it('catches model errors in tool loop and returns error message', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          // First call: model requests tool use
+          return {
+            content: '',
+            stopReason: 'tool_use',
+            usage: { inputTokens: 10, outputTokens: 5 },
+            toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
+          };
+        }
+        // Second call: model throws an error
+        throw new Error('Connection reset by peer');
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    const response = await agent.process('echo hi');
+    expect(response).toContain('Error in tool loop');
+    expect(response).toContain('Connection reset by peer');
+    // Error should be persisted to history
+    const history = agent.getHistory();
+    expect(history[history.length - 1].role).toBe('assistant');
+    expect(history[history.length - 1].content).toContain('Error in tool loop');
+  });
+
  it('handles multiple tool calls in single response', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {
@@ -150,96 +150,133 @@ export class NativeAgent {
    const maxConsecutiveRepeats = 3;
    let lastToolResults: string[] = [];

+    // Track consecutive calls to the same tool (even with different args).
+    // Local models often call the same tool with slight query variations.
+    let lastToolName: string | undefined;
+    let sameToolStreak = 0;
+    const maxSameToolStreak = 4; // nudge after 4 calls to the same tool
+    let nudged = false;
+
    for (let iteration = 0; iteration < this.maxIterations; iteration++) {
-      // Build request — cast loopMessages to Message[] because the underlying
-      // model client will pass them through to the API which accepts structured content.
-      const request = {
-        messages: loopMessages as unknown as Message[],
-        system: effectiveSystem,
-        tools,
-        ...(this._thinking ? { thinking: true } : {}),
-      };
+      try {
+        // Build request — cast loopMessages to Message[] because the underlying
+        // model client will pass them through to the API which accepts structured content.
+        const request = {
+          messages: loopMessages as unknown as Message[],
+          system: effectiveSystem,
+          tools,
+          ...(this._thinking ? { thinking: true } : {}),
+        };

-      const response = await this.chatWithRouter(request);
+        const response = await this.chatWithRouter(request);

-      this._totalUsage.inputTokens += response.usage.inputTokens;
-      this._totalUsage.outputTokens += response.usage.outputTokens;
-      this._callCount++;
+        this._totalUsage.inputTokens += response.usage.inputTokens;
+        this._totalUsage.outputTokens += response.usage.outputTokens;
+        this._callCount++;

-      // If the model didn't request tool use, we're done
-      if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) {
-        let finalContent = response.content;
-        if (response.thinkingContent) {
-          finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+        // If the model didn't request tool use, we're done
+        if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) {
+          let finalContent = response.content;
+          if (response.thinkingContent) {
+            finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
+          }
+          const assistantMsg: Message = { role: 'assistant', content: response.content };
+          this.addToHistory(assistantMsg);
+          return finalContent;
        }
-        const assistantMsg: Message = { role: 'assistant', content: response.content };
-        this.addToHistory(assistantMsg);
-        return finalContent;
-      }

-      // Check for repeated tool calls — build a fingerprint from tool names + args
-      const fingerprint = response.toolCalls
-        .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
-        .sort()
-        .join('|');
+        // Check for repeated tool calls — build a fingerprint from tool names + args
+        const fingerprint = response.toolCalls
+          .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
+          .sort()
+          .join('|');

-      if (fingerprint === lastFingerprint) {
-        consecutiveRepeats++;
-      } else {
-        consecutiveRepeats = 1;
-        lastFingerprint = fingerprint;
-      }
-
-      // Build the assistant message with tool_use content blocks
-      const assistantContent: unknown[] = [];
-      if (response.content) {
-        assistantContent.push({ type: 'text', text: response.content });
-      }
-      for (const tc of response.toolCalls) {
-        assistantContent.push({
-          type: 'tool_use',
-          id: tc.id,
-          name: tc.name,
-          input: tc.args,
-        });
-      }
-      loopMessages.push({ role: 'assistant', content: assistantContent });
-
-      // Execute each tool call and collect results
-      const toolResultBlocks: unknown[] = [];
-      lastToolResults = [];
-      for (const tc of response.toolCalls) {
-        const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
-        this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
-
-        const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext);
-
-        this.onToolUse?.({ type: 'end', tool: internalName, result });
-
-        const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
-        toolResultBlocks.push({
-          type: 'tool_result',
-          tool_use_id: tc.id,
-          content: resultContent,
-          is_error: !result.success,
-        });
-        if (result.success && result.output) {
-          lastToolResults.push(result.output);
+        if (fingerprint === lastFingerprint) {
+          consecutiveRepeats++;
+        } else {
+          consecutiveRepeats = 1;
+          lastFingerprint = fingerprint;
        }
-      }

-      // Add tool results as a user message
-      loopMessages.push({ role: 'user', content: toolResultBlocks });
+        // Track consecutive calls to the same tool (by name, ignoring args)
+        const toolNames = response.toolCalls.map(tc => tc.name).sort().join(',');
+        if (toolNames === lastToolName) {
+          sameToolStreak++;
+        } else {
+          sameToolStreak = 1;
+          lastToolName = toolNames;
+          nudged = false;
+        }

-      // Break out if the model is stuck in a repeated tool call loop
-      if (consecutiveRepeats >= maxConsecutiveRepeats) {
-        const toolOutput = lastToolResults.length > 0
-          ? lastToolResults.join('\n\n')
-          : 'No results available.';
-        const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
-        const assistantMsg: Message = { role: 'assistant', content: breakMsg };
+        // Build the assistant message with tool_use content blocks
+        const assistantContent: unknown[] = [];
+        if (response.content) {
+          assistantContent.push({ type: 'text', text: response.content });
+        }
+        for (const tc of response.toolCalls) {
+          assistantContent.push({
+            type: 'tool_use',
+            id: tc.id,
+            name: tc.name,
+            input: tc.args,
+          });
+        }
+        loopMessages.push({ role: 'assistant', content: assistantContent });
+
+        // Execute each tool call and collect results
+        const toolResultBlocks: unknown[] = [];
+        lastToolResults = [];
+        for (const tc of response.toolCalls) {
+          const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
+          this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
+
+          const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext);
+
+          this.onToolUse?.({ type: 'end', tool: internalName, result });
+
+          const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
+          toolResultBlocks.push({
+            type: 'tool_result',
+            tool_use_id: tc.id,
+            content: resultContent,
+            is_error: !result.success,
+          });
+          if (result.success && result.output) {
+            lastToolResults.push(result.output);
+          }
+        }
+
+        // If the same tool has been called too many times, append a nudge
+        // telling the model to use what it has. This combats local models
+        // that endlessly retry searches with slight query variations.
+        if (sameToolStreak >= maxSameToolStreak && !nudged) {
+          nudged = true;
+          toolResultBlocks.push({
+            type: 'tool_result',
+            tool_use_id: '__system',
+            content: `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`,
+            is_error: false,
+          });
+        }
+
+        // Add tool results as a user message
+        loopMessages.push({ role: 'user', content: toolResultBlocks });
+
+        // Break out if the model is stuck in a repeated tool call loop
+        if (consecutiveRepeats >= maxConsecutiveRepeats) {
+          const toolOutput = lastToolResults.length > 0
+            ? lastToolResults.join('\n\n')
+            : 'No results available.';
+          const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
+          const assistantMsg: Message = { role: 'assistant', content: breakMsg };
+          this.addToHistory(assistantMsg);
+          return breakMsg;
+        }
+      } catch (error) {
+        const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`;
+        const assistantMsg: Message = { role: 'assistant', content: errorMsg };
        this.addToHistory(assistantMsg);
-        return breakMsg;
+        return errorMsg;
      }
    }