feat: improve agent loop resilience — same-tool nudging and error handling
- agent.ts: track consecutive calls to the same tool (ignoring args) and inject a nudge after 4 repeats telling the model to summarize and respond, preventing local models from endlessly retrying searches with slight query variations - agent.ts: wrap the entire tool loop iteration in try-catch so model/network errors don't crash the daemon — returns a descriptive error message instead - Tests for both: nudge triggers after 4 same-tool calls, error recovery persists to history
This commit is contained in:
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
|
|||||||
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
expect(mockClient.chat).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('nudges model after same tool called too many times with different args', async () => {
|
||||||
|
let callCount = 0;
|
||||||
|
const mockClient: ModelClient = {
|
||||||
|
chat: vi.fn().mockImplementation((req: any) => {
|
||||||
|
callCount++;
|
||||||
|
// After nudge message, model should respond with text
|
||||||
|
const lastMsg = req.messages[req.messages.length - 1];
|
||||||
|
const hasNudge = typeof lastMsg?.content !== 'string' &&
|
||||||
|
Array.isArray(lastMsg?.content) &&
|
||||||
|
lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
|
||||||
|
if (hasNudge) {
|
||||||
|
return {
|
||||||
|
content: 'Here is what I found from my searches.',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
content: '',
|
||||||
|
stopReason: 'tool_use',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const registry = new ToolRegistry();
|
||||||
|
registry.register(echoTool);
|
||||||
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||||
|
const executor = new ToolExecutor(registry, hooks);
|
||||||
|
|
||||||
|
const agent = new NativeAgent({
|
||||||
|
modelClient: mockClient,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
toolRegistry: registry,
|
||||||
|
toolExecutor: executor,
|
||||||
|
maxIterations: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await agent.process('search a lot');
|
||||||
|
// Model should have responded after receiving the nudge
|
||||||
|
expect(response).toBe('Here is what I found from my searches.');
|
||||||
|
// 4 tool calls + 1 final response = 5 chat calls
|
||||||
|
expect(mockClient.chat).toHaveBeenCalledTimes(5);
|
||||||
|
});
|
||||||
|
|
||||||
it('detects repeated identical tool calls and breaks the loop', async () => {
|
it('detects repeated identical tool calls and breaks the loop', async () => {
|
||||||
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
|
// Model always returns the exact same tool call — simulates local LLM stuck in a loop
|
||||||
const mockClient: ModelClient = {
|
const mockClient: ModelClient = {
|
||||||
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
|
|||||||
expect(call.system).not.toContain('Tool inventory updated');
|
expect(call.system).not.toContain('Tool inventory updated');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('catches model errors in tool loop and returns error message', async () => {
|
||||||
|
let callCount = 0;
|
||||||
|
const mockClient: ModelClient = {
|
||||||
|
chat: vi.fn().mockImplementation(() => {
|
||||||
|
callCount++;
|
||||||
|
if (callCount === 1) {
|
||||||
|
// First call: model requests tool use
|
||||||
|
return {
|
||||||
|
content: '',
|
||||||
|
stopReason: 'tool_use',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Second call: model throws an error
|
||||||
|
throw new Error('Connection reset by peer');
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const registry = new ToolRegistry();
|
||||||
|
registry.register(echoTool);
|
||||||
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||||
|
const executor = new ToolExecutor(registry, hooks);
|
||||||
|
|
||||||
|
const agent = new NativeAgent({
|
||||||
|
modelClient: mockClient,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
toolRegistry: registry,
|
||||||
|
toolExecutor: executor,
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await agent.process('echo hi');
|
||||||
|
expect(response).toContain('Error in tool loop');
|
||||||
|
expect(response).toContain('Connection reset by peer');
|
||||||
|
// Error should be persisted to history
|
||||||
|
const history = agent.getHistory();
|
||||||
|
expect(history[history.length - 1].role).toBe('assistant');
|
||||||
|
expect(history[history.length - 1].content).toContain('Error in tool loop');
|
||||||
|
});
|
||||||
|
|
||||||
it('handles multiple tool calls in single response', async () => {
|
it('handles multiple tool calls in single response', async () => {
|
||||||
let callCount = 0;
|
let callCount = 0;
|
||||||
const mockClient: ModelClient = {
|
const mockClient: ModelClient = {
|
||||||
|
|||||||
@@ -150,7 +150,15 @@ export class NativeAgent {
|
|||||||
const maxConsecutiveRepeats = 3;
|
const maxConsecutiveRepeats = 3;
|
||||||
let lastToolResults: string[] = [];
|
let lastToolResults: string[] = [];
|
||||||
|
|
||||||
|
// Track consecutive calls to the same tool (even with different args).
|
||||||
|
// Local models often call the same tool with slight query variations.
|
||||||
|
let lastToolName: string | undefined;
|
||||||
|
let sameToolStreak = 0;
|
||||||
|
const maxSameToolStreak = 4; // nudge after 4 calls to the same tool
|
||||||
|
let nudged = false;
|
||||||
|
|
||||||
for (let iteration = 0; iteration < this.maxIterations; iteration++) {
|
for (let iteration = 0; iteration < this.maxIterations; iteration++) {
|
||||||
|
try {
|
||||||
// Build request — cast loopMessages to Message[] because the underlying
|
// Build request — cast loopMessages to Message[] because the underlying
|
||||||
// model client will pass them through to the API which accepts structured content.
|
// model client will pass them through to the API which accepts structured content.
|
||||||
const request = {
|
const request = {
|
||||||
@@ -190,6 +198,16 @@ export class NativeAgent {
|
|||||||
lastFingerprint = fingerprint;
|
lastFingerprint = fingerprint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track consecutive calls to the same tool (by name, ignoring args)
|
||||||
|
const toolNames = response.toolCalls.map(tc => tc.name).sort().join(',');
|
||||||
|
if (toolNames === lastToolName) {
|
||||||
|
sameToolStreak++;
|
||||||
|
} else {
|
||||||
|
sameToolStreak = 1;
|
||||||
|
lastToolName = toolNames;
|
||||||
|
nudged = false;
|
||||||
|
}
|
||||||
|
|
||||||
// Build the assistant message with tool_use content blocks
|
// Build the assistant message with tool_use content blocks
|
||||||
const assistantContent: unknown[] = [];
|
const assistantContent: unknown[] = [];
|
||||||
if (response.content) {
|
if (response.content) {
|
||||||
@@ -228,6 +246,19 @@ export class NativeAgent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the same tool has been called too many times, append a nudge
|
||||||
|
// telling the model to use what it has. This combats local models
|
||||||
|
// that endlessly retry searches with slight query variations.
|
||||||
|
if (sameToolStreak >= maxSameToolStreak && !nudged) {
|
||||||
|
nudged = true;
|
||||||
|
toolResultBlocks.push({
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: '__system',
|
||||||
|
content: `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`,
|
||||||
|
is_error: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Add tool results as a user message
|
// Add tool results as a user message
|
||||||
loopMessages.push({ role: 'user', content: toolResultBlocks });
|
loopMessages.push({ role: 'user', content: toolResultBlocks });
|
||||||
|
|
||||||
@@ -241,6 +272,12 @@ export class NativeAgent {
|
|||||||
this.addToHistory(assistantMsg);
|
this.addToHistory(assistantMsg);
|
||||||
return breakMsg;
|
return breakMsg;
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`;
|
||||||
|
const assistantMsg: Message = { role: 'assistant', content: errorMsg };
|
||||||
|
this.addToHistory(assistantMsg);
|
||||||
|
return errorMsg;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Max iterations reached
|
// Max iterations reached
|
||||||
|
|||||||
Reference in New Issue
Block a user