fix(agent): detect repeated tool call loops and make max_iterations configurable

Local LLMs often get stuck calling the same tool repeatedly because they lack the sophistication to synthesize results. The agent loop had no safeguard — it re-executed whatever the model requested up to 10 times. Add fingerprint-based loop detection: if the same tool+args combination repeats 3 consecutive times, break the loop and return the last results. Also add agents.max_iterations to the config schema so the iteration limit is user-configurable (default: 10).
2026-02-10 19:35:09 -08:00
parent 4ce8e81c01
commit bf9ca690f3
5 changed files with 84 additions and 10 deletions
@@ -1027,7 +1027,7 @@
  },

  "overall_progress": {
-    "total_test_count": 1268,
+    "total_test_count": 1292,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -1047,4 +1047,4 @@
    "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
    "next_up": "GSD Milestone: Operator DX — Phase 3 Plan 02 (Dashboard UI consuming metrics RPC). All phases P0-P8 and Tiers 1-4 complete. Setup wizard added. TUI fullscreen mode now has full tool access and proper display. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
  }
-}
+}
@@ -121,14 +121,18 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(2);
  });

-  it('respects max iterations', async () => {
-    // Model always returns tool_use
+  it('respects max iterations when tool calls vary', async () => {
+    // Model always returns tool_use but with different args each time (no loop detection)
+    let callCount = 0;
    const mockClient: ModelClient = {
-      chat: vi.fn().mockResolvedValue({
-        content: '',
-        stopReason: 'tool_use',
-        usage: { inputTokens: 10, outputTokens: 5 },
-        toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'loop' } }],
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        return {
+          content: '',
+          stopReason: 'tool_use',
+          usage: { inputTokens: 10, outputTokens: 5 },
+          toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }],
+        };
      }),
    };

@@ -150,6 +154,37 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(3);
  });

+  it('detects repeated identical tool calls and breaks the loop', async () => {
+    // Model always returns the exact same tool call — simulates local LLM stuck in a loop
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockResolvedValue({
+        content: '',
+        stopReason: 'tool_use',
+        usage: { inputTokens: 10, outputTokens: 5 },
+        toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }],
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+      maxIterations: 10,
+    });
+
+    const response = await agent.process('search for news');
+    expect(response).toContain('Tool loop detected');
+    expect(response).toContain('same thing'); // includes the last tool result
+    // Should break after 3 consecutive identical calls, not 10
+    expect(mockClient.chat).toHaveBeenCalledTimes(3);
+  });
+
  it('works without tools (backward compatible)', async () => {
    const mockClient: ModelClient = {
      chat: vi.fn().mockResolvedValue({
@@ -143,6 +143,13 @@ export class NativeAgent {
      content: m.content,
    }));

+    // Track consecutive identical tool call fingerprints to detect loops.
+    // Local LLMs are especially prone to repeatedly requesting the same tool call.
+    let lastFingerprint: string | undefined;
+    let consecutiveRepeats = 0;
+    const maxConsecutiveRepeats = 3;
+    let lastToolResults: string[] = [];
+
    for (let iteration = 0; iteration < this.maxIterations; iteration++) {
      // Build request — cast loopMessages to Message[] because the underlying
      // model client will pass them through to the API which accepts structured content.
@@ -170,6 +177,19 @@ export class NativeAgent {
        return finalContent;
      }

+      // Check for repeated tool calls — build a fingerprint from tool names + args
+      const fingerprint = response.toolCalls
+        .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
+        .sort()
+        .join('|');
+
+      if (fingerprint === lastFingerprint) {
+        consecutiveRepeats++;
+      } else {
+        consecutiveRepeats = 1;
+        lastFingerprint = fingerprint;
+      }
+
      // Build the assistant message with tool_use content blocks
      const assistantContent: unknown[] = [];
      if (response.content) {
@@ -187,6 +207,7 @@ export class NativeAgent {

      // Execute each tool call and collect results
      const toolResultBlocks: unknown[] = [];
+      lastToolResults = [];
      for (const tc of response.toolCalls) {
        const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
        this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
@@ -195,16 +216,31 @@ export class NativeAgent {

        this.onToolUse?.({ type: 'end', tool: internalName, result });

+        const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
        toolResultBlocks.push({
          type: 'tool_result',
          tool_use_id: tc.id,
-          content: result.success ? result.output : (result.error ?? 'Unknown error'),
+          content: resultContent,
          is_error: !result.success,
        });
+        if (result.success && result.output) {
+          lastToolResults.push(result.output);
+        }
      }

      // Add tool results as a user message
      loopMessages.push({ role: 'user', content: toolResultBlocks });
+
+      // Break out if the model is stuck in a repeated tool call loop
+      if (consecutiveRepeats >= maxConsecutiveRepeats) {
+        const toolOutput = lastToolResults.length > 0
+          ? lastToolResults.join('\n\n')
+          : 'No results available.';
+        const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
+        const assistantMsg: Message = { role: 'assistant', content: breakMsg };
+        this.addToHistory(assistantMsg);
+        return breakMsg;
+      }
    }

    // Max iterations reached
@@ -220,6 +220,8 @@ const agentsSchema = z.object({
  }),
  auto_escalate: z.boolean().default(false),
  max_delegation_depth: z.number().min(1).max(10).default(3),
+  /** Maximum tool-loop iterations before the agent stops. */
+  max_iterations: z.number().min(1).max(50).default(10),
 }).default({});

 const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']);
@@ -134,6 +134,7 @@ export function createMessageRouter(deps: {
        primaryTier: effectiveTier,
        delegation: delegationConfig,
        maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3,
+        maxIterations: deps.config.agents.max_iterations,
        compaction: deps.config.compaction.enabled ? {
          thresholdPct: deps.config.compaction.threshold_pct,
          keepTurns: deps.config.compaction.keep_turns,