fix(agent): detect repeated tool call loops and make max_iterations configurable
Local LLMs often get stuck calling the same tool repeatedly because they lack the sophistication to synthesize results. The agent loop had no safeguard — it re-executed whatever the model requested up to 10 times. Add fingerprint-based loop detection: if the same tool+args combination repeats 3 consecutive times, break the loop and return the last results. Also add agents.max_iterations to the config schema so the iteration limit is user-configurable (default: 10).
This commit is contained in:
@@ -1027,7 +1027,7 @@
|
||||
},
|
||||
|
||||
"overall_progress": {
|
||||
"total_test_count": 1268,
|
||||
"total_test_count": 1292,
|
||||
"all_tests_passing": true,
|
||||
"p0_completion": "3/3 (100%)",
|
||||
"p1_completion": "4/4 (100%)",
|
||||
@@ -1047,4 +1047,4 @@
|
||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||
"next_up": "GSD Milestone: Operator DX — Phase 3 Plan 02 (Dashboard UI consuming metrics RPC). All phases P0-P8 and Tiers 1-4 complete. Setup wizard added. TUI fullscreen mode now has full tool access and proper display. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -121,14 +121,18 @@ describe('NativeAgent tool loop', () => {
|
||||
expect(mockClient.chat).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('respects max iterations', async () => {
|
||||
// Model always returns tool_use
|
||||
it('respects max iterations when tool calls vary', async () => {
|
||||
// Model always returns tool_use but with different args each time (no loop detection)
|
||||
let callCount = 0;
|
||||
const mockClient: ModelClient = {
|
||||
chat: vi.fn().mockResolvedValue({
|
||||
content: '',
|
||||
stopReason: 'tool_use',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'loop' } }],
|
||||
chat: vi.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
return {
|
||||
content: '',
|
||||
stopReason: 'tool_use',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `attempt_${callCount}` } }],
|
||||
};
|
||||
}),
|
||||
};
|
||||
|
||||
@@ -150,6 +154,37 @@ describe('NativeAgent tool loop', () => {
|
||||
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('detects repeated identical tool calls and breaks the loop', async () => {
|
||||
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
|
||||
const mockClient: ModelClient = {
|
||||
chat: vi.fn().mockResolvedValue({
|
||||
content: '',
|
||||
stopReason: 'tool_use',
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'same thing' } }],
|
||||
}),
|
||||
};
|
||||
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(echoTool);
|
||||
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||
const executor = new ToolExecutor(registry, hooks);
|
||||
|
||||
const agent = new NativeAgent({
|
||||
modelClient: mockClient,
|
||||
systemPrompt: 'You are helpful.',
|
||||
toolRegistry: registry,
|
||||
toolExecutor: executor,
|
||||
maxIterations: 10,
|
||||
});
|
||||
|
||||
const response = await agent.process('search for news');
|
||||
expect(response).toContain('Tool loop detected');
|
||||
expect(response).toContain('same thing'); // includes the last tool result
|
||||
// Should break after 3 consecutive identical calls, not 10
|
||||
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('works without tools (backward compatible)', async () => {
|
||||
const mockClient: ModelClient = {
|
||||
chat: vi.fn().mockResolvedValue({
|
||||
|
||||
@@ -143,6 +143,13 @@ export class NativeAgent {
|
||||
content: m.content,
|
||||
}));
|
||||
|
||||
// Track consecutive identical tool call fingerprints to detect loops.
|
||||
// Local LLMs are especially prone to repeatedly requesting the same tool call.
|
||||
let lastFingerprint: string | undefined;
|
||||
let consecutiveRepeats = 0;
|
||||
const maxConsecutiveRepeats = 3;
|
||||
let lastToolResults: string[] = [];
|
||||
|
||||
for (let iteration = 0; iteration < this.maxIterations; iteration++) {
|
||||
// Build request — cast loopMessages to Message[] because the underlying
|
||||
// model client will pass them through to the API which accepts structured content.
|
||||
@@ -170,6 +177,19 @@ export class NativeAgent {
|
||||
return finalContent;
|
||||
}
|
||||
|
||||
// Check for repeated tool calls — build a fingerprint from tool names + args
|
||||
const fingerprint = response.toolCalls
|
||||
.map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
|
||||
.sort()
|
||||
.join('|');
|
||||
|
||||
if (fingerprint === lastFingerprint) {
|
||||
consecutiveRepeats++;
|
||||
} else {
|
||||
consecutiveRepeats = 1;
|
||||
lastFingerprint = fingerprint;
|
||||
}
|
||||
|
||||
// Build the assistant message with tool_use content blocks
|
||||
const assistantContent: unknown[] = [];
|
||||
if (response.content) {
|
||||
@@ -187,6 +207,7 @@ export class NativeAgent {
|
||||
|
||||
// Execute each tool call and collect results
|
||||
const toolResultBlocks: unknown[] = [];
|
||||
lastToolResults = [];
|
||||
for (const tc of response.toolCalls) {
|
||||
const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
|
||||
this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
|
||||
@@ -195,16 +216,31 @@ export class NativeAgent {
|
||||
|
||||
this.onToolUse?.({ type: 'end', tool: internalName, result });
|
||||
|
||||
const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
|
||||
toolResultBlocks.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: tc.id,
|
||||
content: result.success ? result.output : (result.error ?? 'Unknown error'),
|
||||
content: resultContent,
|
||||
is_error: !result.success,
|
||||
});
|
||||
if (result.success && result.output) {
|
||||
lastToolResults.push(result.output);
|
||||
}
|
||||
}
|
||||
|
||||
// Add tool results as a user message
|
||||
loopMessages.push({ role: 'user', content: toolResultBlocks });
|
||||
|
||||
// Break out if the model is stuck in a repeated tool call loop
|
||||
if (consecutiveRepeats >= maxConsecutiveRepeats) {
|
||||
const toolOutput = lastToolResults.length > 0
|
||||
? lastToolResults.join('\n\n')
|
||||
: 'No results available.';
|
||||
const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
|
||||
const assistantMsg: Message = { role: 'assistant', content: breakMsg };
|
||||
this.addToHistory(assistantMsg);
|
||||
return breakMsg;
|
||||
}
|
||||
}
|
||||
|
||||
// Max iterations reached
|
||||
|
||||
@@ -220,6 +220,8 @@ const agentsSchema = z.object({
|
||||
}),
|
||||
auto_escalate: z.boolean().default(false),
|
||||
max_delegation_depth: z.number().min(1).max(10).default(3),
|
||||
/** Maximum tool-loop iterations before the agent stops. */
|
||||
max_iterations: z.number().min(1).max(50).default(10),
|
||||
}).default({});
|
||||
|
||||
const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']);
|
||||
|
||||
@@ -134,6 +134,7 @@ export function createMessageRouter(deps: {
|
||||
primaryTier: effectiveTier,
|
||||
delegation: delegationConfig,
|
||||
maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3,
|
||||
maxIterations: deps.config.agents.max_iterations,
|
||||
compaction: deps.config.compaction.enabled ? {
|
||||
thresholdPct: deps.config.compaction.threshold_pct,
|
||||
keepTurns: deps.config.compaction.keep_turns,
|
||||
|
||||
Reference in New Issue
Block a user