fix(agent): inject tool inventory note when tools change mid-session

Stale session history can cause the model to follow old "I can't do that" patterns even when new tools are available. NativeAgent now tracks a tool fingerprint and appends a system prompt note listing current tools when the inventory changes, resetting on session reset. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 11:41:31 -08:00
parent 94264e848c
commit 796e143d61
2 changed files with 179 additions and 1 deletions
@@ -218,6 +218,169 @@ describe('NativeAgent tool loop', () => {
    }));
  });

+  it('injects tool inventory note when history exists and fingerprint changes', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        return {
+          content: `Response ${callCount}`,
+          stopReason: 'end_turn',
+          usage: { inputTokens: 10, outputTokens: 5 },
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    // First message — no prior history, so no inventory note
+    await agent.process('Hi');
+    const firstCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
+    expect(firstCall.system).toBe('You are helpful.');
+
+    // Second message — history exists but fingerprint hasn't changed, no note
+    await agent.process('Hello again');
+    const secondCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[1][0];
+    expect(secondCall.system).toBe('You are helpful.');
+
+    // Now add a new tool to change the fingerprint
+    const newTool: Tool = {
+      name: 'test.greet',
+      description: 'Greet',
+      inputSchema: { type: 'object', properties: {} },
+      execute: async () => ({ success: true, output: 'hi' }),
+    };
+    registry.register(newTool);
+
+    // Third message — history exists AND fingerprint changed, should inject note
+    await agent.process('What can you do?');
+    const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
+    expect(thirdCall.system).toContain('[Tool inventory updated');
+    expect(thirdCall.system).toContain('test_echo');
+    expect(thirdCall.system).toContain('test_greet');
+  });
+
+  it('does not inject tool inventory note on fresh session', async () => {
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockResolvedValue({
+        content: 'Hello!',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 10, outputTokens: 5 },
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    // First message ever — only one message in history (the user message just added)
+    await agent.process('Hi');
+    const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
+    expect(call.system).toBe('You are helpful.');
+    expect(call.system).not.toContain('Tool inventory updated');
+  });
+
+  it('only injects tool inventory note once per fingerprint change', async () => {
+    let callCount = 0;
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockImplementation(() => {
+        callCount++;
+        return {
+          content: `Response ${callCount}`,
+          stopReason: 'end_turn',
+          usage: { inputTokens: 10, outputTokens: 5 },
+        };
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    // Build up history
+    await agent.process('Hi');
+    await agent.process('Hello');
+
+    // Add a new tool
+    const newTool: Tool = {
+      name: 'test.greet',
+      description: 'Greet',
+      inputSchema: { type: 'object', properties: {} },
+      execute: async () => ({ success: true, output: 'hi' }),
+    };
+    registry.register(newTool);
+
+    // First call after change — note injected
+    await agent.process('What tools?');
+    const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
+    expect(thirdCall.system).toContain('[Tool inventory updated');
+
+    // Second call with same tools — no note (fingerprint matches)
+    await agent.process('Anything else?');
+    const fourthCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[3][0];
+    expect(fourthCall.system).toBe('You are helpful.');
+  });
+
+  it('resets tool fingerprint on reset()', async () => {
+    const mockClient: ModelClient = {
+      chat: vi.fn().mockResolvedValue({
+        content: 'Hello!',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 10, outputTokens: 5 },
+      }),
+    };
+
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const agent = new NativeAgent({
+      modelClient: mockClient,
+      systemPrompt: 'You are helpful.',
+      toolRegistry: registry,
+      toolExecutor: executor,
+    });
+
+    // Build history and establish fingerprint
+    await agent.process('Hi');
+    await agent.process('Hello');
+
+    // Reset clears fingerprint
+    agent.reset();
+
+    // After reset, first message has no history so no note
+    await agent.process('Hi again');
+    const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
+    expect(call.system).not.toContain('Tool inventory updated');
+  });
+
  it('handles multiple tool calls in single response', async () => {
    let callCount = 0;
    const mockClient: ModelClient = {
@@ -52,6 +52,7 @@ export class NativeAgent {
  private _toolPolicyContext?: ToolPolicyContext;
  private _attachmentCollector?: OutboundAttachmentCollector;
  private _thinking: boolean = false;
+  private _lastToolFingerprint?: string;

  constructor(config: NativeAgentConfig) {
    this.modelClient = config.modelClient;
@@ -122,6 +123,19 @@ export class NativeAgent {
  private async toolLoop(): Promise<string> {
    const tools = this.toolRegistry!.filteredToAnthropicFormat(this._toolPolicyContext);

+    // Detect tool inventory changes to combat conversational inertia in long sessions.
+    // When tools change (e.g. new tools added between restarts), the model's prior messages
+    // saying "I can't do that" can override tool definitions. Injecting a system note fixes this.
+    const currentFingerprint = tools.map(t => t.name).sort().join(',');
+    const hasHistory = this.history.length > 1; // more than just the current user message
+    let effectiveSystem = this.systemPrompt;
+
+    if (hasHistory && this._lastToolFingerprint !== currentFingerprint) {
+      const toolNames = tools.map(t => t.name).join(', ');
+      effectiveSystem += `\n\n[Tool inventory updated — available tools: ${toolNames}. Use these tools directly; do not attempt workarounds for functionality that a tool already provides.]`;
+    }
+    this._lastToolFingerprint = currentFingerprint;
+
    // Build the loop messages from existing history.
    // These are the messages sent to the model, including any structured tool blocks.
    const loopMessages: LoopMessage[] = this.history.map(m => ({
@@ -134,7 +148,7 @@ export class NativeAgent {
      // model client will pass them through to the API which accepts structured content.
      const request = {
        messages: loopMessages as unknown as Message[],
-        system: this.systemPrompt,
+        system: effectiveSystem,
        tools,
        ...(this._thinking ? { thinking: true } : {}),
      };
@@ -221,6 +235,7 @@ export class NativeAgent {
    } else {
      this.inMemoryHistory = [];
    }
+    this._lastToolFingerprint = undefined;
    this.resetUsage();
  }