feat: improve agent loop resilience — same-tool nudging and error handling

- agent.ts: track consecutive calls to the same tool (ignoring args) and inject a nudge after 4 repeats telling the model to summarize and respond, preventing local models from endlessly retrying searches with slight query variations - agent.ts: wrap the entire tool loop iteration in try-catch so model/network errors don't crash the daemon — returns a descriptive error message instead - Tests for both: nudge triggers after 4 same-tool calls, error recovery persists to history
2026-02-11 09:33:30 -08:00
parent c01de7d097
commit 1aab006a7f
2 changed files with 202 additions and 79 deletions
@@ -154,6 +154,52 @@ describe('NativeAgent tool loop', () => {
    expect(mockClient.chat).toHaveBeenCalledTimes(3);
  });
  it('nudges model after same tool called too many times with different args', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {
      chat: vi.fn().mockImplementation((req: any) => {
        callCount++;
        // After nudge message, model should respond with text
        const lastMsg = req.messages[req.messages.length - 1];
        const hasNudge = typeof lastMsg?.content !== 'string' &&
          Array.isArray(lastMsg?.content) &&
          lastMsg.content.some((b: any) => b.content?.includes('do NOT call it again'));
        if (hasNudge) {
          return {
            content: 'Here is what I found from my searches.',
            stopReason: 'end_turn',
            usage: { inputTokens: 10, outputTokens: 5 },
          };
        }
        return {
          content: '',
          stopReason: 'tool_use',
          usage: { inputTokens: 10, outputTokens: 5 },
          toolCalls: [{ id: `call_${callCount}`, name: 'test.echo', args: { text: `query_${callCount}` } }],
        };
      }),
    };
    const registry = new ToolRegistry();
    registry.register(echoTool);
    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
    const executor = new ToolExecutor(registry, hooks);
    const agent = new NativeAgent({
      modelClient: mockClient,
      systemPrompt: 'You are helpful.',
      toolRegistry: registry,
      toolExecutor: executor,
      maxIterations: 10,
    });
    const response = await agent.process('search a lot');
    // Model should have responded after receiving the nudge
    expect(response).toBe('Here is what I found from my searches.');
    // 4 tool calls + 1 final response = 5 chat calls
    expect(mockClient.chat).toHaveBeenCalledTimes(5);
  });
  it('detects repeated identical tool calls and breaks the loop', async () => {
    // Model always returns the exact same tool call — simulates local LLM stuck in a loop
    const mockClient: ModelClient = {
@@ -416,6 +462,46 @@ describe('NativeAgent tool loop', () => {
    expect(call.system).not.toContain('Tool inventory updated');
  });
  it('catches model errors in tool loop and returns error message', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {
      chat: vi.fn().mockImplementation(() => {
        callCount++;
        if (callCount === 1) {
          // First call: model requests tool use
          return {
            content: '',
            stopReason: 'tool_use',
            usage: { inputTokens: 10, outputTokens: 5 },
            toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
          };
        }
        // Second call: model throws an error
        throw new Error('Connection reset by peer');
      }),
    };
    const registry = new ToolRegistry();
    registry.register(echoTool);
    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
    const executor = new ToolExecutor(registry, hooks);
    const agent = new NativeAgent({
      modelClient: mockClient,
      systemPrompt: 'You are helpful.',
      toolRegistry: registry,
      toolExecutor: executor,
    });
    const response = await agent.process('echo hi');
    expect(response).toContain('Error in tool loop');
    expect(response).toContain('Connection reset by peer');
    // Error should be persisted to history
    const history = agent.getHistory();
    expect(history[history.length - 1].role).toBe('assistant');
    expect(history[history.length - 1].content).toContain('Error in tool loop');
  });
  it('handles multiple tool calls in single response', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {
@@ -150,7 +150,15 @@ export class NativeAgent {
    const maxConsecutiveRepeats = 3;
    let lastToolResults: string[] = [];
    // Track consecutive calls to the same tool (even with different args).
    // Local models often call the same tool with slight query variations.
    let lastToolName: string | undefined;
    let sameToolStreak = 0;
    const maxSameToolStreak = 4; // nudge after 4 calls to the same tool
    let nudged = false;
    for (let iteration = 0; iteration < this.maxIterations; iteration++) {
      try {
        // Build request — cast loopMessages to Message[] because the underlying
        // model client will pass them through to the API which accepts structured content.
        const request = {
@@ -190,6 +198,16 @@ export class NativeAgent {
          lastFingerprint = fingerprint;
        }
        // Track consecutive calls to the same tool (by name, ignoring args)
        const toolNames = response.toolCalls.map(tc => tc.name).sort().join(',');
        if (toolNames === lastToolName) {
          sameToolStreak++;
        } else {
          sameToolStreak = 1;
          lastToolName = toolNames;
          nudged = false;
        }
        // Build the assistant message with tool_use content blocks
        const assistantContent: unknown[] = [];
        if (response.content) {
@@ -228,6 +246,19 @@ export class NativeAgent {
          }
        }
        // If the same tool has been called too many times, append a nudge
        // telling the model to use what it has. This combats local models
        // that endlessly retry searches with slight query variations.
        if (sameToolStreak >= maxSameToolStreak && !nudged) {
          nudged = true;
          toolResultBlocks.push({
            type: 'tool_result',
            tool_use_id: '__system',
            content: `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`,
            is_error: false,
          });
        }
        // Add tool results as a user message
        loopMessages.push({ role: 'user', content: toolResultBlocks });
@@ -241,6 +272,12 @@ export class NativeAgent {
          this.addToHistory(assistantMsg);
          return breakMsg;
        }
      } catch (error) {
        const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`;
        const assistantMsg: Message = { role: 'assistant', content: errorMsg };
        this.addToHistory(assistantMsg);
        return errorMsg;
      }
    }
    // Max iterations reached