diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index e915ffc..bd5a4b4 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => { expect(mockClient.chat).toHaveBeenCalledTimes(3); }); + it('nudges model after same tool called too many times with different args', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation((req: any) => { + callCount++; + // After nudge message, model should respond with text + const lastMsg = req.messages[req.messages.length - 1]; + const hasNudge = typeof lastMsg?.content !== 'string' && + Array.isArray(lastMsg?.content) && + lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again')); + if (hasNudge) { + return { + content: 'Here is what I found from my searches.', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }; + } + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }], + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + maxIterations: 10, + }); + + const response = await agent.process('search a lot'); + // Model should have responded after receiving the nudge + expect(response).toBe('Here is what I found from my searches.'); + // 4 tool calls + 1 final response = 5 chat calls + expect(mockClient.chat).toHaveBeenCalledTimes(5); + }); + it('detects repeated identical tool calls and breaks the loop', async () => { // Model always returns the exact same tool call — simulates local LLM stuck in a loop const mockClient: ModelClient = { @@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => { expect(call.system).not.toContain('Tool inventory updated'); }); + it('catches model errors in tool loop and returns error message', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + // First call: model requests tool use + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], + }; + } + // Second call: model throws an error + throw new Error('Connection reset by peer'); + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const response = await agent.process('echo hi'); + expect(response).toContain('Error in tool loop'); + expect(response).toContain('Connection reset by peer'); + // Error should be persisted to history + const history = agent.getHistory(); + expect(history[history.length - 1].role).toBe('assistant'); + expect(history[history.length - 1].content).toContain('Error in tool loop'); + }); + it('handles multiple tool calls in single response', async () => { let callCount = 0; const mockClient: ModelClient = { diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index cf46cc9..96aca9b 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -150,96 +150,133 @@ export class NativeAgent { const maxConsecutiveRepeats = 3; let lastToolResults: string[] = []; + // Track consecutive calls to the same tool (even with different args). + // Local models often call the same tool with slight query variations. + let lastToolName: string | undefined; + let sameToolStreak = 0; + const maxSameToolStreak = 4; // nudge after 4 calls to the same tool + let nudged = false; + for (let iteration = 0; iteration < this.maxIterations; iteration++) { - // Build request — cast loopMessages to Message[] because the underlying - // model client will pass them through to the API which accepts structured content. - const request = { - messages: loopMessages as unknown as Message[], - system: effectiveSystem, - tools, - ...(this._thinking ? { thinking: true } : {}), - }; + try { + // Build request — cast loopMessages to Message[] because the underlying + // model client will pass them through to the API which accepts structured content. + const request = { + messages: loopMessages as unknown as Message[], + system: effectiveSystem, + tools, + ...(this._thinking ? { thinking: true } : {}), + }; - const response = await this.chatWithRouter(request); + const response = await this.chatWithRouter(request); - this._totalUsage.inputTokens += response.usage.inputTokens; - this._totalUsage.outputTokens += response.usage.outputTokens; - this._callCount++; + this._totalUsage.inputTokens += response.usage.inputTokens; + this._totalUsage.outputTokens += response.usage.outputTokens; + this._callCount++; - // If the model didn't request tool use, we're done - if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) { - let finalContent = response.content; - if (response.thinkingContent) { - finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; + // If the model didn't request tool use, we're done + if (response.stopReason !== 'tool_use' || !response.toolCalls?.length) { + let finalContent = response.content; + if (response.thinkingContent) { + finalContent = `\n${response.thinkingContent}\n\n\n${response.content}`; + } + const assistantMsg: Message = { role: 'assistant', content: response.content }; + this.addToHistory(assistantMsg); + return finalContent; } - const assistantMsg: Message = { role: 'assistant', content: response.content }; - this.addToHistory(assistantMsg); - return finalContent; - } - // Check for repeated tool calls — build a fingerprint from tool names + args - const fingerprint = response.toolCalls - .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) - .sort() - .join('|'); + // Check for repeated tool calls — build a fingerprint from tool names + args + const fingerprint = response.toolCalls + .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) + .sort() + .join('|'); - if (fingerprint === lastFingerprint) { - consecutiveRepeats++; - } else { - consecutiveRepeats = 1; - lastFingerprint = fingerprint; - } - - // Build the assistant message with tool_use content blocks - const assistantContent: unknown[] = []; - if (response.content) { - assistantContent.push({ type: 'text', text: response.content }); - } - for (const tc of response.toolCalls) { - assistantContent.push({ - type: 'tool_use', - id: tc.id, - name: tc.name, - input: tc.args, - }); - } - loopMessages.push({ role: 'assistant', content: assistantContent }); - - // Execute each tool call and collect results - const toolResultBlocks: unknown[] = []; - lastToolResults = []; - for (const tc of response.toolCalls) { - const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; - this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); - - const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext); - - this.onToolUse?.({ type: 'end', tool: internalName, result }); - - const resultContent = result.success ? result.output : (result.error ?? 'Unknown error'); - toolResultBlocks.push({ - type: 'tool_result', - tool_use_id: tc.id, - content: resultContent, - is_error: !result.success, - }); - if (result.success && result.output) { - lastToolResults.push(result.output); + if (fingerprint === lastFingerprint) { + consecutiveRepeats++; + } else { + consecutiveRepeats = 1; + lastFingerprint = fingerprint; } - } - // Add tool results as a user message - loopMessages.push({ role: 'user', content: toolResultBlocks }); + // Track consecutive calls to the same tool (by name, ignoring args) + const toolNames = response.toolCalls.map(tc => tc.name).sort().join(','); + if (toolNames === lastToolName) { + sameToolStreak++; + } else { + sameToolStreak = 1; + lastToolName = toolNames; + nudged = false; + } - // Break out if the model is stuck in a repeated tool call loop - if (consecutiveRepeats >= maxConsecutiveRepeats) { - const toolOutput = lastToolResults.length > 0 - ? lastToolResults.join('\n\n') - : 'No results available.'; - const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`; - const assistantMsg: Message = { role: 'assistant', content: breakMsg }; + // Build the assistant message with tool_use content blocks + const assistantContent: unknown[] = []; + if (response.content) { + assistantContent.push({ type: 'text', text: response.content }); + } + for (const tc of response.toolCalls) { + assistantContent.push({ + type: 'tool_use', + id: tc.id, + name: tc.name, + input: tc.args, + }); + } + loopMessages.push({ role: 'assistant', content: assistantContent }); + + // Execute each tool call and collect results + const toolResultBlocks: unknown[] = []; + lastToolResults = []; + for (const tc of response.toolCalls) { + const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; + this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); + + const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext); + + this.onToolUse?.({ type: 'end', tool: internalName, result }); + + const resultContent = result.success ? result.output : (result.error ?? 'Unknown error'); + toolResultBlocks.push({ + type: 'tool_result', + tool_use_id: tc.id, + content: resultContent, + is_error: !result.success, + }); + if (result.success && result.output) { + lastToolResults.push(result.output); + } + } + + // If the same tool has been called too many times, append a nudge + // telling the model to use what it has. This combats local models + // that endlessly retry searches with slight query variations. + if (sameToolStreak >= maxSameToolStreak && !nudged) { + nudged = true; + toolResultBlocks.push({ + type: 'tool_result', + tool_use_id: '__system', + content: `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`, + is_error: false, + }); + } + + // Add tool results as a user message + loopMessages.push({ role: 'user', content: toolResultBlocks }); + + // Break out if the model is stuck in a repeated tool call loop + if (consecutiveRepeats >= maxConsecutiveRepeats) { + const toolOutput = lastToolResults.length > 0 + ? lastToolResults.join('\n\n') + : 'No results available.'; + const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`; + const assistantMsg: Message = { role: 'assistant', content: breakMsg }; + this.addToHistory(assistantMsg); + return breakMsg; + } + } catch (error) { + const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`; + const assistantMsg: Message = { role: 'assistant', content: errorMsg }; this.addToHistory(assistantMsg); - return breakMsg; + return errorMsg; } }