From 796e143d61c876852370b1953e4d86641332fc87 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Tue, 10 Feb 2026 11:41:31 -0800 Subject: [PATCH] fix(agent): inject tool inventory note when tools change mid-session Stale session history can cause the model to follow old "I can't do that" patterns even when new tools are available. NativeAgent now tracks a tool fingerprint and appends a system prompt note listing current tools when the inventory changes, resetting on session reset. Co-Authored-By: Claude Opus 4.6 --- src/backends/native/agent.test.ts | 163 ++++++++++++++++++++++++++++++ src/backends/native/agent.ts | 17 +++- 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts index a4b4041..5f31f09 100644 --- a/src/backends/native/agent.test.ts +++ b/src/backends/native/agent.test.ts @@ -218,6 +218,169 @@ describe('NativeAgent tool loop', () => { })); }); + it('injects tool inventory note when history exists and fingerprint changes', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + return { + content: `Response ${callCount}`, + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + // First message — no prior history, so no inventory note + await agent.process('Hi'); + const firstCall = (mockClient.chat as ReturnType).mock.calls[0][0]; + expect(firstCall.system).toBe('You are helpful.'); + + // Second message — history exists but fingerprint hasn't changed, no note + await agent.process('Hello again'); + const secondCall = (mockClient.chat as ReturnType).mock.calls[1][0]; + expect(secondCall.system).toBe('You are helpful.'); + + // Now add a new tool to change the fingerprint + const newTool: Tool = { + name: 'test.greet', + description: 'Greet', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'hi' }), + }; + registry.register(newTool); + + // Third message — history exists AND fingerprint changed, should inject note + await agent.process('What can you do?'); + const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; + expect(thirdCall.system).toContain('[Tool inventory updated'); + expect(thirdCall.system).toContain('test_echo'); + expect(thirdCall.system).toContain('test_greet'); + }); + + it('does not inject tool inventory note on fresh session', async () => { + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: 'Hello!', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + // First message ever — only one message in history (the user message just added) + await agent.process('Hi'); + const call = (mockClient.chat as ReturnType).mock.calls[0][0]; + expect(call.system).toBe('You are helpful.'); + expect(call.system).not.toContain('Tool inventory updated'); + }); + + it('only injects tool inventory note once per fingerprint change', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + return { + content: `Response ${callCount}`, + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + // Build up history + await agent.process('Hi'); + await agent.process('Hello'); + + // Add a new tool + const newTool: Tool = { + name: 'test.greet', + description: 'Greet', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'hi' }), + }; + registry.register(newTool); + + // First call after change — note injected + await agent.process('What tools?'); + const thirdCall = (mockClient.chat as ReturnType).mock.calls[2][0]; + expect(thirdCall.system).toContain('[Tool inventory updated'); + + // Second call with same tools — no note (fingerprint matches) + await agent.process('Anything else?'); + const fourthCall = (mockClient.chat as ReturnType).mock.calls[3][0]; + expect(fourthCall.system).toBe('You are helpful.'); + }); + + it('resets tool fingerprint on reset()', async () => { + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: 'Hello!', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + // Build history and establish fingerprint + await agent.process('Hi'); + await agent.process('Hello'); + + // Reset clears fingerprint + agent.reset(); + + // After reset, first message has no history so no note + await agent.process('Hi again'); + const call = (mockClient.chat as ReturnType).mock.calls[2][0]; + expect(call.system).not.toContain('Tool inventory updated'); + }); + it('handles multiple tool calls in single response', async () => { let callCount = 0; const mockClient: ModelClient = { diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 1bf7301..9abdb97 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -52,6 +52,7 @@ export class NativeAgent { private _toolPolicyContext?: ToolPolicyContext; private _attachmentCollector?: OutboundAttachmentCollector; private _thinking: boolean = false; + private _lastToolFingerprint?: string; constructor(config: NativeAgentConfig) { this.modelClient = config.modelClient; @@ -122,6 +123,19 @@ export class NativeAgent { private async toolLoop(): Promise { const tools = this.toolRegistry!.filteredToAnthropicFormat(this._toolPolicyContext); + // Detect tool inventory changes to combat conversational inertia in long sessions. + // When tools change (e.g. new tools added between restarts), the model's prior messages + // saying "I can't do that" can override tool definitions. Injecting a system note fixes this. + const currentFingerprint = tools.map(t => t.name).sort().join(','); + const hasHistory = this.history.length > 1; // more than just the current user message + let effectiveSystem = this.systemPrompt; + + if (hasHistory && this._lastToolFingerprint !== currentFingerprint) { + const toolNames = tools.map(t => t.name).join(', '); + effectiveSystem += `\n\n[Tool inventory updated — available tools: ${toolNames}. Use these tools directly; do not attempt workarounds for functionality that a tool already provides.]`; + } + this._lastToolFingerprint = currentFingerprint; + // Build the loop messages from existing history. // These are the messages sent to the model, including any structured tool blocks. const loopMessages: LoopMessage[] = this.history.map(m => ({ @@ -134,7 +148,7 @@ export class NativeAgent { // model client will pass them through to the API which accepts structured content. const request = { messages: loopMessages as unknown as Message[], - system: this.systemPrompt, + system: effectiveSystem, tools, ...(this._thinking ? { thinking: true } : {}), }; @@ -221,6 +235,7 @@ export class NativeAgent { } else { this.inMemoryHistory = []; } + this._lastToolFingerprint = undefined; this.resetUsage(); }