import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { LlamaCppClient, normalizeMessagesForLlamaCpp } from './llamacpp.js'; import type { ChatStreamEvent, Message } from '../types.js'; describe('LlamaCppClient', () => { const mockFetch = vi.fn(); beforeEach(() => { mockFetch.mockReset(); vi.stubGlobal('fetch', mockFetch); }); afterEach(() => { vi.unstubAllGlobals(); }); it('sends messages and returns response', async () => { mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve({ choices: [{ message: { content: 'Hello from llama.cpp!' } }], usage: { prompt_tokens: 10, completion_tokens: 5 }, }), }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); const response = await client.chat({ messages: [{ role: 'user', content: 'Hello' }], }); expect(response.content).toBe('Hello from llama.cpp!'); expect(response.usage.inputTokens).toBe(10); expect(response.usage.outputTokens).toBe(5); }); it('streams responses via SSE', async () => { const chunks = [ 'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n', 'data: {"choices":[{"delta":{"content":" world"}}]}\n\n', 'data: {"choices":[{}],"usage":{"prompt_tokens":5,"completion_tokens":2}}\n\n', 'data: [DONE]\n\n', ]; const encoder = new TextEncoder(); let chunkIndex = 0; const mockStream = new ReadableStream({ pull(controller) { if (chunkIndex < chunks.length) { controller.enqueue(encoder.encode(chunks[chunkIndex])); chunkIndex++; } else { controller.close(); } }, }); mockFetch.mockResolvedValue({ ok: true, body: mockStream, }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); const events: ChatStreamEvent[] = []; for await (const event of client.chatStream({ messages: [{ role: 'user', content: 'Hi' }], })) { events.push(event); } expect(events).toHaveLength(3); expect(events[0]).toEqual({ type: 'content', content: 'Hello' }); expect(events[1]).toEqual({ type: 'content', content: ' world' }); expect(events[2]).toEqual({ type: 'done', usage: { inputTokens: 5, outputTokens: 2 }, }); }); it('throws clear error when server not running', async () => { mockFetch.mockRejectedValue(new TypeError('fetch failed')); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); await expect(client.chat({ messages: [{ role: 'user', content: 'Hello' }], })).rejects.toThrow('llama-server not running at http://localhost:8080'); }); it('passes tools in request body', async () => { mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve({ choices: [{ message: { content: 'I can help with that.' } }], usage: { prompt_tokens: 12, completion_tokens: 6 }, }), }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); await client.chat({ messages: [{ role: 'user', content: 'Run ls' }], tools: [{ name: 'shell.exec', description: 'Run shell', input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }], }); const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body); expect(requestBody.tools).toEqual([{ type: 'function', function: { name: 'shell.exec', description: 'Run shell', parameters: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }, }]); }); it('sanitizes web_search tool schema for llama.cpp', async () => { mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve({ choices: [{ message: { content: 'ok' } }], usage: { prompt_tokens: 1, completion_tokens: 1 }, }), }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); await client.chat({ messages: [{ role: 'user', content: 'search' }], tools: [{ name: 'web_search', description: 'Search', input_schema: { type: 'object', properties: { query: { type: 'string' }, count: { type: 'number' }, }, required: ['query'], }, }], }); const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body); expect(requestBody.tools[0].function.parameters).toEqual({ type: 'object', properties: { query: { type: 'string' } }, required: ['query'], }); }); it('parses tool_calls from response', async () => { mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve({ choices: [{ message: { content: null, tool_calls: [{ id: 'call_123', type: 'function', function: { name: 'shell.exec', arguments: '{"command":"ls"}' }, }], }, finish_reason: 'tool_calls', }], usage: { prompt_tokens: 15, completion_tokens: 8 }, }), }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); const response = await client.chat({ messages: [{ role: 'user', content: 'List files' }], tools: [{ name: 'shell.exec', description: 'Run shell', input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }], }); expect(response.stopReason).toBe('tool_use'); expect(response.toolCalls).toHaveLength(1); const firstToolCall = response.toolCalls?.[0]; expect(firstToolCall).toEqual({ id: 'call_123', name: 'shell.exec', args: { command: 'ls' }, }); expect(response.usage.inputTokens).toBe(15); expect(response.usage.outputTokens).toBe(8); }); it('does not send tools when none provided', async () => { mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve({ choices: [{ message: { content: 'Hello!' } }], usage: { prompt_tokens: 5, completion_tokens: 2 }, }), }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); await client.chat({ messages: [{ role: 'user', content: 'Hello' }], }); const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body); expect(requestBody.tools).toBeUndefined(); }); it('streaming: accumulates and yields tool_calls from deltas', async () => { const chunks = [ 'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"shell.exec"}}]}}]}\n\n', 'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"comma"}}]}}]}\n\n', 'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"nd\\":\\"ls\\"}"}}]}}]}\n\n', 'data: {"choices":[{}],"usage":{"prompt_tokens":10,"completion_tokens":5}}\n\n', 'data: [DONE]\n\n', ]; const encoder = new TextEncoder(); let chunkIndex = 0; const mockStream = new ReadableStream({ pull(controller) { if (chunkIndex < chunks.length) { controller.enqueue(encoder.encode(chunks[chunkIndex])); chunkIndex++; } else { controller.close(); } }, }); mockFetch.mockResolvedValue({ ok: true, body: mockStream, }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); const events: ChatStreamEvent[] = []; for await (const event of client.chatStream({ messages: [{ role: 'user', content: 'Run ls' }], tools: [{ name: 'shell.exec', description: 'Run shell', input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }], })) { events.push(event); } // Should have a tool_use event and a done event const toolUseEvents = events.filter(e => e.type === 'tool_use'); const doneEvents = events.filter(e => e.type === 'done'); expect(toolUseEvents).toHaveLength(1); expect(toolUseEvents[0].toolCall).toEqual({ id: 'call_1', name: 'shell.exec', args: { command: 'ls' }, }); expect(doneEvents).toHaveLength(1); expect(doneEvents[0].usage).toEqual({ inputTokens: 10, outputTokens: 5, }); }); it('streaming: passes tools in request body', async () => { const chunks = [ 'data: {"choices":[{"delta":{"content":"Hi"}}]}\n\n', 'data: {"choices":[{}],"usage":{"prompt_tokens":3,"completion_tokens":1}}\n\n', 'data: [DONE]\n\n', ]; const encoder = new TextEncoder(); let chunkIndex = 0; const mockStream = new ReadableStream({ pull(controller) { if (chunkIndex < chunks.length) { controller.enqueue(encoder.encode(chunks[chunkIndex])); chunkIndex++; } else { controller.close(); } }, }); mockFetch.mockResolvedValue({ ok: true, body: mockStream, }); const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', model: 'test-model', }); // Consume the stream to trigger the fetch call const events: ChatStreamEvent[] = []; for await (const event of client.chatStream({ messages: [{ role: 'user', content: 'Hi' }], tools: [{ name: 'shell.exec', description: 'Run shell', input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }], })) { events.push(event); } const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body); expect(requestBody.tools).toEqual([{ type: 'function', function: { name: 'shell.exec', description: 'Run shell', parameters: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'], }, }, }]); expect(requestBody.stream).toBe(true); }); }); describe('normalizeMessagesForLlamaCpp', () => { it('passes plain text messages through', () => { const messages: Message[] = [ { role: 'user', content: 'Hello' }, { role: 'assistant', content: 'Hi there' }, ]; const result = normalizeMessagesForLlamaCpp('System prompt', messages); expect(result).toEqual([ { role: 'system', content: 'System prompt' }, { role: 'user', content: 'Hello' }, { role: 'assistant', content: 'Hi there' }, ]); }); it('converts assistant tool_use blocks to OpenAI tool_calls format', () => { const messages: Message[] = [ { role: 'user', content: 'Search for news' }, { role: 'assistant', content: [ { type: 'text', text: 'Searching...' }, { type: 'tool_use', id: 'call_1', name: 'web.search', input: { query: 'news' } }, ] as unknown as Message['content'], }, ]; const result = normalizeMessagesForLlamaCpp(undefined, messages); expect(result).toHaveLength(2); expect(result[1]).toEqual({ role: 'assistant', content: 'Searching...', tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'web.search', arguments: '{"query":"news"}', }, }], }); }); it('converts user tool_result blocks to user messages with text formatting', () => { const messages: Message[] = [ { role: 'user', content: 'Search' }, { role: 'assistant', content: [ { type: 'tool_use', id: 'call_1', name: 'web.search', input: { query: 'news' } }, ] as unknown as Message['content'], }, { role: 'user', content: [ { type: 'tool_result', tool_use_id: 'call_1', content: 'Results here', is_error: false }, ] as unknown as Message['content'], }, ]; const result = normalizeMessagesForLlamaCpp(undefined, messages); expect(result).toHaveLength(3); expect(result[2]).toEqual({ role: 'user', content: '[Tool "web.search" result]\nResults here', }); }); it('handles multiple tool results in a single user message', () => { const messages: Message[] = [ { role: 'user', content: 'Do two things' }, { role: 'assistant', content: [ { type: 'tool_use', id: 'call_a', name: 'tool.a', input: {} }, { type: 'tool_use', id: 'call_b', name: 'tool.b', input: { x: 1 } }, ] as unknown as Message['content'], }, { role: 'user', content: [ { type: 'tool_result', tool_use_id: 'call_a', content: 'A result' }, { type: 'tool_result', tool_use_id: 'call_b', content: 'B result' }, ] as unknown as Message['content'], }, ]; const result = normalizeMessagesForLlamaCpp(undefined, messages); const toolCalls = result[1].tool_calls; expect(toolCalls).toHaveLength(2); expect(toolCalls?.[0]?.id).toBe('call_a'); expect(toolCalls?.[1]?.function.arguments).toBe('{"x":1}'); // Multiple results merged into one user message expect(result).toHaveLength(3); expect(result[2]).toEqual({ role: 'user', content: '[Tool "tool.a" result]\nA result\n\n[Tool "tool.b" result]\nB result', }); }); it('marks error results in text formatting', () => { const messages: Message[] = [ { role: 'user', content: 'Do it' }, { role: 'assistant', content: [ { type: 'tool_use', id: 'call_1', name: 'file.read', input: { path: '/tmp/x' } }, ] as unknown as Message['content'], }, { role: 'user', content: [ { type: 'tool_result', tool_use_id: 'call_1', content: 'File not found', is_error: true }, ] as unknown as Message['content'], }, ]; const result = normalizeMessagesForLlamaCpp(undefined, messages); expect(result[2]).toEqual({ role: 'user', content: '[Tool "file.read" result (error)]\nFile not found', }); }); it('handles full tool round-trip conversation', () => { const messages: Message[] = [ { role: 'user', content: 'What is the weather?' }, { role: 'assistant', content: [ { type: 'text', text: 'Checking...' }, { type: 'tool_use', id: 'tc_0', name: 'weather.get', input: { city: 'NYC' } }, ] as unknown as Message['content'], }, { role: 'user', content: [ { type: 'tool_result', tool_use_id: 'tc_0', content: 'Sunny, 72F' }, ] as unknown as Message['content'], }, { role: 'assistant', content: 'The weather in NYC is sunny, 72F.' }, ]; const result = normalizeMessagesForLlamaCpp('You are helpful.', messages); expect(result).toHaveLength(5); expect(result[0]).toEqual({ role: 'system', content: 'You are helpful.' }); expect(result[1]).toEqual({ role: 'user', content: 'What is the weather?' }); expect(result[2]).toEqual({ role: 'assistant', content: 'Checking...', tool_calls: [{ id: 'tc_0', type: 'function', function: { name: 'weather.get', arguments: '{"city":"NYC"}' }, }], }); expect(result[3]).toEqual({ role: 'user', content: '[Tool "weather.get" result]\nSunny, 72F', }); expect(result[4]).toEqual({ role: 'assistant', content: 'The weather in NYC is sunny, 72F.' }); }); });