feat: improve agent loop resilience — same-tool nudging and error handling

- agent.ts: track consecutive calls to the same tool (ignoring args) and
  inject a nudge after 4 repeats telling the model to summarize and respond,
  preventing local models from endlessly retrying searches with slight
  query variations
- agent.ts: wrap the entire tool loop iteration in try-catch so model/network
  errors don't crash the daemon — returns a descriptive error message instead
- Tests for both: nudge triggers after 4 same-tool calls, error recovery
  persists to history
This commit is contained in:
William Valentin
2026-02-11 09:33:30 -08:00
parent c01de7d097
commit 1aab006a7f
2 changed files with 202 additions and 79 deletions
+86
View File
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
expect(mockClient.chat).toHaveBeenCalledTimes(3);
});
it('nudges model after same tool called too many times with different args', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation((req: any) => {
callCount++;
// After nudge message, model should respond with text
const lastMsg = req.messages[req.messages.length - 1];
const hasNudge = typeof lastMsg?.content !== 'string' &&
Array.isArray(lastMsg?.content) &&
lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
if (hasNudge) {
return {
content: 'Here is what I found from my searches.',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}
return {
content: '',
stopReason: 'tool_use',
usage: { inputTokens: 10, outputTokens: 5 },
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
};
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
maxIterations: 10,
});
const response = await agent.process('search a lot');
// Model should have responded after receiving the nudge
expect(response).toBe('Here is what I found from my searches.');
// 4 tool calls + 1 final response = 5 chat calls
expect(mockClient.chat).toHaveBeenCalledTimes(5);
});
it('detects repeated identical tool calls and breaks the loop', async () => {
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
const mockClient: ModelClient = {
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
expect(call.system).not.toContain('Tool inventory updated');
});
it('catches model errors in tool loop and returns error message', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// First call: model requests tool use
return {
content: '',
stopReason: 'tool_use',
usage: { inputTokens: 10, outputTokens: 5 },
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
};
}
// Second call: model throws an error
throw new Error('Connection reset by peer');
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
const response = await agent.process('echo hi');
expect(response).toContain('Error in tool loop');
expect(response).toContain('Connection reset by peer');
// Error should be persisted to history
const history = agent.getHistory();
expect(history[history.length - 1].role).toBe('assistant');
expect(history[history.length - 1].content).toContain('Error in tool loop');
});
it('handles multiple tool calls in single response', async () => {
let callCount = 0;
const mockClient: ModelClient = {
+116 -79
View File
@@ -150,96 +150,133 @@ export class NativeAgent {
const maxConsecutiveRepeats = 3;
let lastToolResults: string[] = [];
// Track consecutive calls to the same tool (even with different args).
// Local models often call the same tool with slight query variations.
let lastToolName: string | undefined;
let sameToolStreak = 0;
const maxSameToolStreak = 4; // nudge after 4 calls to the same tool
let nudged = false;
for (let iteration = 0; iteration < this.maxIterations; iteration++) {
// Build request — cast loopMessages to Message[] because the underlying
// model client will pass them through to the API which accepts structured content.
const request = {
messages: loopMessages as unknown as Message[],
system: effectiveSystem,
tools,
...(this._thinking ? { thinking: true } : {}),
};
try {
// Build request — cast loopMessages to Message[] because the underlying
// model client will pass them through to the API which accepts structured content.
const request = {
messages: loopMessages as unknown as Message[],
system: effectiveSystem,
tools,
...(this._thinking ? { thinking: true } : {}),
};
const response = await this.chatWithRouter(request);
const response = await this.chatWithRouter(request);
this._totalUsage.inputTokens += response.usage.inputTokens;
this._totalUsage.outputTokens += response.usage.outputTokens;
this._callCount++;
this._totalUsage.inputTokens += response.usage.inputTokens;
this._totalUsage.outputTokens += response.usage.outputTokens;
this._callCount++;
// If the model didn't request tool use, we're done
if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) {
let finalContent = response.content;
if (response.thinkingContent) {
finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
// If the model didn't request tool use, we're done
if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) {
let finalContent = response.content;
if (response.thinkingContent) {
finalContent = `<thinking>\n${response.thinkingContent}\n</thinking>\n\n${response.content}`;
}
const assistantMsg: Message = { role: 'assistant', content: response.content };
this.addToHistory(assistantMsg);
return finalContent;
}
const assistantMsg: Message = { role: 'assistant', content: response.content };
this.addToHistory(assistantMsg);
return finalContent;
}
// Check for repeated tool calls — build a fingerprint from tool names + args
const fingerprint = response.toolCalls
.map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
.sort()
.join('|');
// Check for repeated tool calls — build a fingerprint from tool names + args
const fingerprint = response.toolCalls
.map(tc => `${tc.name}:${JSON.stringify(tc.args)}`)
.sort()
.join('|');
if (fingerprint === lastFingerprint) {
consecutiveRepeats++;
} else {
consecutiveRepeats = 1;
lastFingerprint = fingerprint;
}
// Build the assistant message with tool_use content blocks
const assistantContent: unknown[] = [];
if (response.content) {
assistantContent.push({ type: 'text', text: response.content });
}
for (const tc of response.toolCalls) {
assistantContent.push({
type: 'tool_use',
id: tc.id,
name: tc.name,
input: tc.args,
});
}
loopMessages.push({ role: 'assistant', content: assistantContent });
// Execute each tool call and collect results
const toolResultBlocks: unknown[] = [];
lastToolResults = [];
for (const tc of response.toolCalls) {
const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext);
this.onToolUse?.({ type: 'end', tool: internalName, result });
const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
toolResultBlocks.push({
type: 'tool_result',
tool_use_id: tc.id,
content: resultContent,
is_error: !result.success,
});
if (result.success && result.output) {
lastToolResults.push(result.output);
if (fingerprint === lastFingerprint) {
consecutiveRepeats++;
} else {
consecutiveRepeats = 1;
lastFingerprint = fingerprint;
}
}
// Add tool results as a user message
loopMessages.push({ role: 'user', content: toolResultBlocks });
// Track consecutive calls to the same tool (by name, ignoring args)
const toolNames = response.toolCalls.map(tc => tc.name).sort().join(',');
if (toolNames === lastToolName) {
sameToolStreak++;
} else {
sameToolStreak = 1;
lastToolName = toolNames;
nudged = false;
}
// Break out if the model is stuck in a repeated tool call loop
if (consecutiveRepeats >= maxConsecutiveRepeats) {
const toolOutput = lastToolResults.length > 0
? lastToolResults.join('\n\n')
: 'No results available.';
const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
const assistantMsg: Message = { role: 'assistant', content: breakMsg };
// Build the assistant message with tool_use content blocks
const assistantContent: unknown[] = [];
if (response.content) {
assistantContent.push({ type: 'text', text: response.content });
}
for (const tc of response.toolCalls) {
assistantContent.push({
type: 'tool_use',
id: tc.id,
name: tc.name,
input: tc.args,
});
}
loopMessages.push({ role: 'assistant', content: assistantContent });
// Execute each tool call and collect results
const toolResultBlocks: unknown[] = [];
lastToolResults = [];
for (const tc of response.toolCalls) {
const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });
const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext);
this.onToolUse?.({ type: 'end', tool: internalName, result });
const resultContent = result.success ? result.output : (result.error ?? 'Unknown error');
toolResultBlocks.push({
type: 'tool_result',
tool_use_id: tc.id,
content: resultContent,
is_error: !result.success,
});
if (result.success && result.output) {
lastToolResults.push(result.output);
}
}
// If the same tool has been called too many times, append a nudge
// telling the model to use what it has. This combats local models
// that endlessly retry searches with slight query variations.
if (sameToolStreak >= maxSameToolStreak && !nudged) {
nudged = true;
toolResultBlocks.push({
type: 'tool_result',
tool_use_id: '__system',
content: `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`,
is_error: false,
});
}
// Add tool results as a user message
loopMessages.push({ role: 'user', content: toolResultBlocks });
// Break out if the model is stuck in a repeated tool call loop
if (consecutiveRepeats >= maxConsecutiveRepeats) {
const toolOutput = lastToolResults.length > 0
? lastToolResults.join('\n\n')
: 'No results available.';
const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`;
const assistantMsg: Message = { role: 'assistant', content: breakMsg };
this.addToHistory(assistantMsg);
return breakMsg;
}
} catch (error) {
const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`;
const assistantMsg: Message = { role: 'assistant', content: errorMsg };
this.addToHistory(assistantMsg);
return breakMsg;
return errorMsg;
}
}