feat: add tool calling support to Ollama and llama.cpp clients

- Ollama: pass tools to API, parse tool_calls responses, handle thinking field from reasoning models (deepseek-r1, glm-4.7-flash)
- llama.cpp: pass tools via OpenAI-compatible endpoint, parse tool_calls, accumulate streaming tool call deltas
- Both clients now set stopReason to 'tool_use' when tool calls are present
- Tests: 12 new tests (8 Ollama + 5 llama.cpp, total 983→995)
This commit is contained in:
William Valentin
2026-02-07 17:20:27 -08:00
parent fcbab1e1ee
commit fb20acfbcd
4 changed files with 655 additions and 30 deletions
+244
View File
@@ -6,6 +6,7 @@ describe('LlamaCppClient', () => {
const mockFetch = vi.fn();
beforeEach(() => {
mockFetch.mockReset();
vi.stubGlobal('fetch', mockFetch);
});
@@ -96,4 +97,247 @@ describe('LlamaCppClient', () => {
messages: [{ role: 'user', content: 'Hello' }],
})).rejects.toThrow('llama-server not running at http://localhost:8080');
});
it('passes tools in request body', async () => {
mockFetch.mockResolvedValue({
ok: true,
json: () => Promise.resolve({
choices: [{ message: { content: 'I can help with that.' } }],
usage: { prompt_tokens: 12, completion_tokens: 6 },
}),
});
const client = new LlamaCppClient({
endpoint: 'http://localhost:8080',
model: 'test-model',
});
await client.chat({
messages: [{ role: 'user', content: 'Run ls' }],
tools: [{
name: 'shell.exec',
description: 'Run shell',
input_schema: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
}],
});
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(requestBody.tools).toEqual([{
type: 'function',
function: {
name: 'shell.exec',
description: 'Run shell',
parameters: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
},
}]);
});
it('parses tool_calls from response', async () => {
mockFetch.mockResolvedValue({
ok: true,
json: () => Promise.resolve({
choices: [{
message: {
content: null,
tool_calls: [{
id: 'call_123',
type: 'function',
function: { name: 'shell.exec', arguments: '{"command":"ls"}' },
}],
},
finish_reason: 'tool_calls',
}],
usage: { prompt_tokens: 15, completion_tokens: 8 },
}),
});
const client = new LlamaCppClient({
endpoint: 'http://localhost:8080',
model: 'test-model',
});
const response = await client.chat({
messages: [{ role: 'user', content: 'List files' }],
tools: [{
name: 'shell.exec',
description: 'Run shell',
input_schema: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
}],
});
expect(response.stopReason).toBe('tool_use');
expect(response.toolCalls).toHaveLength(1);
expect(response.toolCalls![0]).toEqual({
id: 'call_123',
name: 'shell.exec',
args: { command: 'ls' },
});
expect(response.usage.inputTokens).toBe(15);
expect(response.usage.outputTokens).toBe(8);
});
it('does not send tools when none provided', async () => {
mockFetch.mockResolvedValue({
ok: true,
json: () => Promise.resolve({
choices: [{ message: { content: 'Hello!' } }],
usage: { prompt_tokens: 5, completion_tokens: 2 },
}),
});
const client = new LlamaCppClient({
endpoint: 'http://localhost:8080',
model: 'test-model',
});
await client.chat({
messages: [{ role: 'user', content: 'Hello' }],
});
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(requestBody.tools).toBeUndefined();
});
it('streaming: accumulates and yields tool_calls from deltas', async () => {
const chunks = [
'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"shell.exec"}}]}}]}\n\n',
'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"comma"}}]}}]}\n\n',
'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"nd\\":\\"ls\\"}"}}]}}]}\n\n',
'data: {"choices":[{}],"usage":{"prompt_tokens":10,"completion_tokens":5}}\n\n',
'data: [DONE]\n\n',
];
const encoder = new TextEncoder();
let chunkIndex = 0;
const mockStream = new ReadableStream({
pull(controller) {
if (chunkIndex < chunks.length) {
controller.enqueue(encoder.encode(chunks[chunkIndex]));
chunkIndex++;
} else {
controller.close();
}
},
});
mockFetch.mockResolvedValue({
ok: true,
body: mockStream,
});
const client = new LlamaCppClient({
endpoint: 'http://localhost:8080',
model: 'test-model',
});
const events: ChatStreamEvent[] = [];
for await (const event of client.chatStream({
messages: [{ role: 'user', content: 'Run ls' }],
tools: [{
name: 'shell.exec',
description: 'Run shell',
input_schema: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
}],
})) {
events.push(event);
}
// Should have a tool_use event and a done event
const toolUseEvents = events.filter(e => e.type === 'tool_use');
const doneEvents = events.filter(e => e.type === 'done');
expect(toolUseEvents).toHaveLength(1);
expect(toolUseEvents[0].toolCall).toEqual({
id: 'call_1',
name: 'shell.exec',
args: { command: 'ls' },
});
expect(doneEvents).toHaveLength(1);
expect(doneEvents[0].usage).toEqual({
inputTokens: 10,
outputTokens: 5,
});
});
it('streaming: passes tools in request body', async () => {
const chunks = [
'data: {"choices":[{"delta":{"content":"Hi"}}]}\n\n',
'data: {"choices":[{}],"usage":{"prompt_tokens":3,"completion_tokens":1}}\n\n',
'data: [DONE]\n\n',
];
const encoder = new TextEncoder();
let chunkIndex = 0;
const mockStream = new ReadableStream({
pull(controller) {
if (chunkIndex < chunks.length) {
controller.enqueue(encoder.encode(chunks[chunkIndex]));
chunkIndex++;
} else {
controller.close();
}
},
});
mockFetch.mockResolvedValue({
ok: true,
body: mockStream,
});
const client = new LlamaCppClient({
endpoint: 'http://localhost:8080',
model: 'test-model',
});
// Consume the stream to trigger the fetch call
const events: ChatStreamEvent[] = [];
for await (const event of client.chatStream({
messages: [{ role: 'user', content: 'Hi' }],
tools: [{
name: 'shell.exec',
description: 'Run shell',
input_schema: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
}],
})) {
events.push(event);
}
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(requestBody.tools).toEqual([{
type: 'function',
function: {
name: 'shell.exec',
description: 'Run shell',
parameters: {
type: 'object',
properties: { command: { type: 'string' } },
required: ['command'],
},
},
}]);
expect(requestBody.stream).toBe(true);
});
});