feat: add tool calling support to Ollama and llama.cpp clients

- Ollama: pass tools to API, parse tool_calls responses, handle thinking field from reasoning models (deepseek-r1, glm-4.7-flash) - llama.cpp: pass tools via OpenAI-compatible endpoint, parse tool_calls, accumulate streaming tool call deltas - Both clients now set stopReason to 'tool_use' when tool calls are present - Tests: 12 new tests (8 Ollama + 5 llama.cpp, total 983→995)
2026-02-07 17:20:27 -08:00
parent fcbab1e1ee
commit fb20acfbcd
4 changed files with 655 additions and 30 deletions
@@ -6,6 +6,7 @@ describe('LlamaCppClient', () => {
  const mockFetch = vi.fn();

  beforeEach(() => {
+    mockFetch.mockReset();
    vi.stubGlobal('fetch', mockFetch);
  });

@@ -96,4 +97,247 @@ describe('LlamaCppClient', () => {
      messages: [{ role: 'user', content: 'Hello' }],
    })).rejects.toThrow('llama-server not running at http://localhost:8080');
  });
+
+  it('passes tools in request body', async () => {
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({
+        choices: [{ message: { content: 'I can help with that.' } }],
+        usage: { prompt_tokens: 12, completion_tokens: 6 },
+      }),
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+      model: 'test-model',
+    });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'Run ls' }],
+      tools: [{
+        name: 'shell.exec',
+        description: 'Run shell',
+        input_schema: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      }],
+    });
+
+    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(requestBody.tools).toEqual([{
+      type: 'function',
+      function: {
+        name: 'shell.exec',
+        description: 'Run shell',
+        parameters: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      },
+    }]);
+  });
+
+  it('parses tool_calls from response', async () => {
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({
+        choices: [{
+          message: {
+            content: null,
+            tool_calls: [{
+              id: 'call_123',
+              type: 'function',
+              function: { name: 'shell.exec', arguments: '{"command":"ls"}' },
+            }],
+          },
+          finish_reason: 'tool_calls',
+        }],
+        usage: { prompt_tokens: 15, completion_tokens: 8 },
+      }),
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+      model: 'test-model',
+    });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'List files' }],
+      tools: [{
+        name: 'shell.exec',
+        description: 'Run shell',
+        input_schema: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      }],
+    });
+
+    expect(response.stopReason).toBe('tool_use');
+    expect(response.toolCalls).toHaveLength(1);
+    expect(response.toolCalls![0]).toEqual({
+      id: 'call_123',
+      name: 'shell.exec',
+      args: { command: 'ls' },
+    });
+    expect(response.usage.inputTokens).toBe(15);
+    expect(response.usage.outputTokens).toBe(8);
+  });
+
+  it('does not send tools when none provided', async () => {
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({
+        choices: [{ message: { content: 'Hello!' } }],
+        usage: { prompt_tokens: 5, completion_tokens: 2 },
+      }),
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+      model: 'test-model',
+    });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    });
+
+    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(requestBody.tools).toBeUndefined();
+  });
+
+  it('streaming: accumulates and yields tool_calls from deltas', async () => {
+    const chunks = [
+      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"shell.exec"}}]}}]}\n\n',
+      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"comma"}}]}}]}\n\n',
+      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"nd\\":\\"ls\\"}"}}]}}]}\n\n',
+      'data: {"choices":[{}],"usage":{"prompt_tokens":10,"completion_tokens":5}}\n\n',
+      'data: [DONE]\n\n',
+    ];
+
+    const encoder = new TextEncoder();
+    let chunkIndex = 0;
+
+    const mockStream = new ReadableStream({
+      pull(controller) {
+        if (chunkIndex < chunks.length) {
+          controller.enqueue(encoder.encode(chunks[chunkIndex]));
+          chunkIndex++;
+        } else {
+          controller.close();
+        }
+      },
+    });
+
+    mockFetch.mockResolvedValue({
+      ok: true,
+      body: mockStream,
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+      model: 'test-model',
+    });
+
+    const events: ChatStreamEvent[] = [];
+    for await (const event of client.chatStream({
+      messages: [{ role: 'user', content: 'Run ls' }],
+      tools: [{
+        name: 'shell.exec',
+        description: 'Run shell',
+        input_schema: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      }],
+    })) {
+      events.push(event);
+    }
+
+    // Should have a tool_use event and a done event
+    const toolUseEvents = events.filter(e => e.type === 'tool_use');
+    const doneEvents = events.filter(e => e.type === 'done');
+
+    expect(toolUseEvents).toHaveLength(1);
+    expect(toolUseEvents[0].toolCall).toEqual({
+      id: 'call_1',
+      name: 'shell.exec',
+      args: { command: 'ls' },
+    });
+
+    expect(doneEvents).toHaveLength(1);
+    expect(doneEvents[0].usage).toEqual({
+      inputTokens: 10,
+      outputTokens: 5,
+    });
+  });
+
+  it('streaming: passes tools in request body', async () => {
+    const chunks = [
+      'data: {"choices":[{"delta":{"content":"Hi"}}]}\n\n',
+      'data: {"choices":[{}],"usage":{"prompt_tokens":3,"completion_tokens":1}}\n\n',
+      'data: [DONE]\n\n',
+    ];
+
+    const encoder = new TextEncoder();
+    let chunkIndex = 0;
+
+    const mockStream = new ReadableStream({
+      pull(controller) {
+        if (chunkIndex < chunks.length) {
+          controller.enqueue(encoder.encode(chunks[chunkIndex]));
+          chunkIndex++;
+        } else {
+          controller.close();
+        }
+      },
+    });
+
+    mockFetch.mockResolvedValue({
+      ok: true,
+      body: mockStream,
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+      model: 'test-model',
+    });
+
+    // Consume the stream to trigger the fetch call
+    const events: ChatStreamEvent[] = [];
+    for await (const event of client.chatStream({
+      messages: [{ role: 'user', content: 'Hi' }],
+      tools: [{
+        name: 'shell.exec',
+        description: 'Run shell',
+        input_schema: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      }],
+    })) {
+      events.push(event);
+    }
+
+    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
+    expect(requestBody.tools).toEqual([{
+      type: 'function',
+      function: {
+        name: 'shell.exec',
+        description: 'Run shell',
+        parameters: {
+          type: 'object',
+          properties: { command: { type: 'string' } },
+          required: ['command'],
+        },
+      },
+    }]);
+    expect(requestBody.stream).toBe(true);
+  });
 });