From bf9ca690f3a1c007ee53382f63cbf1763d8f0c92 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Tue, 10 Feb 2026 19:35:09 -0800 Subject: [PATCH] fix(agent): detect repeated tool call loops and make max_iterations configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local LLMs often get stuck calling the same tool repeatedly because they lack the sophistication to synthesize results. The agent loop had no safeguard — it re-executed whatever the model requested up to 10 times. Add fingerprint-based loop detection: if the same tool+args combination repeats 3 consecutive times, break the loop and return the last results. Also add agents.max_iterations to the config schema so the iteration limit is user-configurable (default: 10). --- docs/plans/state.json | 4 +-- src/backends/native/agent.test.ts | 49 ++++++++++++++++++++++++++----- src/backends/native/agent.ts | 38 +++++++++++++++++++++++- src/config/schema.ts | 2 ++ src/daemon/routing.ts | 1 + 5 files changed, 84 insertions(+), 10 deletions(-) diff --git a/docs/plans/state.json b/docs/plans/state.json index 953c453..c2e6d75 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -1027,7 +1027,7 @@ }, "overall_progress": { - "total_test_count": 1268, + "total_test_count": 1292, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -1047,4 +1047,4 @@ "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "next_up": "GSD Milestone: Operator DX — Phase 3 Plan 02 (Dashboard UI consuming metrics RPC). All phases P0-P8 and Tiers 1-4 complete. Setup wizard added. TUI fullscreen mode now has full tool access and proper display. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items" } -} +} \ No newline at end of file diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index 5f31f09..e915ffc 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -121,14 +121,18 @@ describe('NativeAgent tool loop', () => { expect(mockClient.chat).toHaveBeenCalledTimes(2); }); - it('respects max iterations', async () => { - // Model always returns tool_use + it('respects max iterations when tool calls vary', async () => { + // Model always returns tool_use but with different args each time (no loop detection) + let callCount = 0; const mockClient: ModelClient = { - chat: vi.fn().mockResolvedValue({ - content: '', - stopReason: 'tool_use', - usage: { inputTokens: 10, outputTokens: 5 }, - toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'loop' } }], + chat: vi.fn().mockImplementation(() => { + callCount++; + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }], + }; }), }; @@ -150,6 +154,37 @@ describe('NativeAgent tool loop', () => { expect(mockClient.chat).toHaveBeenCalledTimes(3); }); + it('detects repeated identical tool calls and breaks the loop', async () => { + // Model always returns the exact same tool call — simulates local LLM stuck in a loop + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }], + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + maxIterations: 10, + }); + + const response = await agent.process('search for news'); + expect(response).toContain('Tool loop detected'); + expect(response).toContain('same thing'); // includes the last tool result + // Should break after 3 consecutive identical calls, not 10 + expect(mockClient.chat).toHaveBeenCalledTimes(3); + }); + it('works without tools (backward compatible)', async () => { const mockClient: ModelClient = { chat: vi.fn().mockResolvedValue({ diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 9abdb97..cf46cc9 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -143,6 +143,13 @@ export class NativeAgent { content: m.content, })); + // Track consecutive identical tool call fingerprints to detect loops. + // Local LLMs are especially prone to repeatedly requesting the same tool call. + let lastFingerprint: string | undefined; + let consecutiveRepeats = 0; + const maxConsecutiveRepeats = 3; + let lastToolResults: string[] = []; + for (let iteration = 0; iteration < this.maxIterations; iteration++) { // Build request — cast loopMessages to Message[] because the underlying // model client will pass them through to the API which accepts structured content. @@ -170,6 +177,19 @@ export class NativeAgent { return finalContent; } + // Check for repeated tool calls — build a fingerprint from tool names + args + const fingerprint = response.toolCalls + .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) + .sort() + .join('|'); + + if (fingerprint === lastFingerprint) { + consecutiveRepeats++; + } else { + consecutiveRepeats = 1; + lastFingerprint = fingerprint; + } + // Build the assistant message with tool_use content blocks const assistantContent: unknown[] = []; if (response.content) { @@ -187,6 +207,7 @@ export class NativeAgent { // Execute each tool call and collect results const toolResultBlocks: unknown[] = []; + lastToolResults = []; for (const tc of response.toolCalls) { const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); @@ -195,16 +216,31 @@ export class NativeAgent { this.onToolUse?.({ type: 'end', tool: internalName, result }); + const resultContent = result.success ? result.output : (result.error ?? 'Unknown error'); toolResultBlocks.push({ type: 'tool_result', tool_use_id: tc.id, - content: result.success ? result.output : (result.error ?? 'Unknown error'), + content: resultContent, is_error: !result.success, }); + if (result.success && result.output) { + lastToolResults.push(result.output); + } } // Add tool results as a user message loopMessages.push({ role: 'user', content: toolResultBlocks }); + + // Break out if the model is stuck in a repeated tool call loop + if (consecutiveRepeats >= maxConsecutiveRepeats) { + const toolOutput = lastToolResults.length > 0 + ? lastToolResults.join('\n\n') + : 'No results available.'; + const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`; + const assistantMsg: Message = { role: 'assistant', content: breakMsg }; + this.addToHistory(assistantMsg); + return breakMsg; + } } // Max iterations reached diff --git a/src/config/schema.ts b/src/config/schema.ts index dd84bd6..d511002 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -220,6 +220,8 @@ const agentsSchema = z.object({ }), auto_escalate: z.boolean().default(false), max_delegation_depth: z.number().min(1).max(10).default(3), + /** Maximum tool-loop iterations before the agent stops. */ + max_iterations: z.number().min(1).max(50).default(10), }).default({}); const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']); diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index 30bf3b0..319dd70 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -134,6 +134,7 @@ export function createMessageRouter(deps: { primaryTier: effectiveTier, delegation: delegationConfig, maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3, + maxIterations: deps.config.agents.max_iterations, compaction: deps.config.compaction.enabled ? { thresholdPct: deps.config.compaction.threshold_pct, keepTurns: deps.config.compaction.keep_turns,