fix(agent): inject tool inventory note when tools change mid-session

Stale session history can cause the model to follow old "I can't do
that" patterns even when new tools are available. NativeAgent now tracks
a tool fingerprint and appends a system prompt note listing current
tools when the inventory changes, resetting on session reset.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-02-10 11:41:31 -08:00
parent 94264e848c
commit 796e143d61
2 changed files with 179 additions and 1 deletions
+163
View File
@@ -218,6 +218,169 @@ describe('NativeAgent tool loop', () => {
}));
});
it('injects tool inventory note when history exists and fingerprint changes', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation(() => {
callCount++;
return {
content: `Response ${callCount}`,
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
// First message — no prior history, so no inventory note
await agent.process('Hi');
const firstCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
expect(firstCall.system).toBe('You are helpful.');
// Second message — history exists but fingerprint hasn't changed, no note
await agent.process('Hello again');
const secondCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[1][0];
expect(secondCall.system).toBe('You are helpful.');
// Now add a new tool to change the fingerprint
const newTool: Tool = {
name: 'test.greet',
description: 'Greet',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'hi' }),
};
registry.register(newTool);
// Third message — history exists AND fingerprint changed, should inject note
await agent.process('What can you do?');
const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
expect(thirdCall.system).toContain('[Tool inventory updated');
expect(thirdCall.system).toContain('test_echo');
expect(thirdCall.system).toContain('test_greet');
});
it('does not inject tool inventory note on fresh session', async () => {
const mockClient: ModelClient = {
chat: vi.fn().mockResolvedValue({
content: 'Hello!',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
// First message ever — only one message in history (the user message just added)
await agent.process('Hi');
const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[0][0];
expect(call.system).toBe('You are helpful.');
expect(call.system).not.toContain('Tool inventory updated');
});
it('only injects tool inventory note once per fingerprint change', async () => {
let callCount = 0;
const mockClient: ModelClient = {
chat: vi.fn().mockImplementation(() => {
callCount++;
return {
content: `Response ${callCount}`,
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
};
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
// Build up history
await agent.process('Hi');
await agent.process('Hello');
// Add a new tool
const newTool: Tool = {
name: 'test.greet',
description: 'Greet',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'hi' }),
};
registry.register(newTool);
// First call after change — note injected
await agent.process('What tools?');
const thirdCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
expect(thirdCall.system).toContain('[Tool inventory updated');
// Second call with same tools — no note (fingerprint matches)
await agent.process('Anything else?');
const fourthCall = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[3][0];
expect(fourthCall.system).toBe('You are helpful.');
});
it('resets tool fingerprint on reset()', async () => {
const mockClient: ModelClient = {
chat: vi.fn().mockResolvedValue({
content: 'Hello!',
stopReason: 'end_turn',
usage: { inputTokens: 10, outputTokens: 5 },
}),
};
const registry = new ToolRegistry();
registry.register(echoTool);
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const agent = new NativeAgent({
modelClient: mockClient,
systemPrompt: 'You are helpful.',
toolRegistry: registry,
toolExecutor: executor,
});
// Build history and establish fingerprint
await agent.process('Hi');
await agent.process('Hello');
// Reset clears fingerprint
agent.reset();
// After reset, first message has no history so no note
await agent.process('Hi again');
const call = (mockClient.chat as ReturnType<typeof vi.fn>).mock.calls[2][0];
expect(call.system).not.toContain('Tool inventory updated');
});
it('handles multiple tool calls in single response', async () => {
let callCount = 0;
const mockClient: ModelClient = {