flynn/src/models/local/llamacpp.test.ts

import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { LlamaCppClient, normalizeMessagesForLlamaCpp } from './llamacpp.js';
import type { ChatStreamEvent, Message } from '../types.js';

describe('LlamaCppClient', () => {
  const mockFetch = vi.fn();

  beforeEach(() => {
    mockFetch.mockReset();
    vi.stubGlobal('fetch', mockFetch);
  });

  afterEach(() => {
    vi.unstubAllGlobals();
  });

  it('sends messages and returns response', async () => {
    mockFetch.mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({
        choices: [{ message: { content: 'Hello from llama.cpp!' } }],
        usage: { prompt_tokens: 10, completion_tokens: 5 },
      }),
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    const response = await client.chat({
      messages: [{ role: 'user', content: 'Hello' }],
    });

    expect(response.content).toBe('Hello from llama.cpp!');
    expect(response.usage.inputTokens).toBe(10);
    expect(response.usage.outputTokens).toBe(5);
  });

  it('streams responses via SSE', async () => {
    const chunks = [
      'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n',
      'data: {"choices":[{"delta":{"content":" world"}}]}\n\n',
      'data: {"choices":[{}],"usage":{"prompt_tokens":5,"completion_tokens":2}}\n\n',
      'data: [DONE]\n\n',
    ];

    const encoder = new TextEncoder();
    let chunkIndex = 0;

    const mockStream = new ReadableStream({
      pull(controller) {
        if (chunkIndex < chunks.length) {
          controller.enqueue(encoder.encode(chunks[chunkIndex]));
          chunkIndex++;
        } else {
          controller.close();
        }
      },
    });

    mockFetch.mockResolvedValue({
      ok: true,
      body: mockStream,
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    const events: ChatStreamEvent[] = [];
    for await (const event of client.chatStream({
      messages: [{ role: 'user', content: 'Hi' }],
    })) {
      events.push(event);
    }

    expect(events).toHaveLength(3);
    expect(events[0]).toEqual({ type: 'content', content: 'Hello' });
    expect(events[1]).toEqual({ type: 'content', content: ' world' });
    expect(events[2]).toEqual({
      type: 'done',
      usage: { inputTokens: 5, outputTokens: 2 },
    });
  });

  it('throws clear error when server not running', async () => {
    mockFetch.mockRejectedValue(new TypeError('fetch failed'));

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    await expect(client.chat({
      messages: [{ role: 'user', content: 'Hello' }],
    })).rejects.toThrow('llama-server not running at http://localhost:8080');
  });

  it('passes tools in request body', async () => {
    mockFetch.mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({
        choices: [{ message: { content: 'I can help with that.' } }],
        usage: { prompt_tokens: 12, completion_tokens: 6 },
      }),
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    await client.chat({
      messages: [{ role: 'user', content: 'Run ls' }],
      tools: [{
        name: 'shell.exec',
        description: 'Run shell',
        input_schema: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      }],
    });

    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
    expect(requestBody.tools).toEqual([{
      type: 'function',
      function: {
        name: 'shell.exec',
        description: 'Run shell',
        parameters: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      },
    }]);
  });

  it('sanitizes web_search tool schema for llama.cpp', async () => {
    mockFetch.mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({
        choices: [{ message: { content: 'ok' } }],
        usage: { prompt_tokens: 1, completion_tokens: 1 },
      }),
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    await client.chat({
      messages: [{ role: 'user', content: 'search' }],
      tools: [{
        name: 'web_search',
        description: 'Search',
        input_schema: {
          type: 'object',
          properties: {
            query: { type: 'string' },
            count: { type: 'number' },
          },
          required: ['query'],
        },
      }],
    });

    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
    expect(requestBody.tools[0].function.parameters).toEqual({
      type: 'object',
      properties: { query: { type: 'string' } },
      required: ['query'],
    });
  });

  it('parses tool_calls from response', async () => {
    mockFetch.mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({
        choices: [{
          message: {
            content: null,
            tool_calls: [{
              id: 'call_123',
              type: 'function',
              function: { name: 'shell.exec', arguments: '{"command":"ls"}' },
            }],
          },
          finish_reason: 'tool_calls',
        }],
        usage: { prompt_tokens: 15, completion_tokens: 8 },
      }),
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    const response = await client.chat({
      messages: [{ role: 'user', content: 'List files' }],
      tools: [{
        name: 'shell.exec',
        description: 'Run shell',
        input_schema: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      }],
    });

    expect(response.stopReason).toBe('tool_use');
    expect(response.toolCalls).toHaveLength(1);
    const firstToolCall = response.toolCalls?.[0];
    expect(firstToolCall).toEqual({
      id: 'call_123',
      name: 'shell.exec',
      args: { command: 'ls' },
    });
    expect(response.usage.inputTokens).toBe(15);
    expect(response.usage.outputTokens).toBe(8);
  });

  it('does not send tools when none provided', async () => {
    mockFetch.mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({
        choices: [{ message: { content: 'Hello!' } }],
        usage: { prompt_tokens: 5, completion_tokens: 2 },
      }),
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    await client.chat({
      messages: [{ role: 'user', content: 'Hello' }],
    });

    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
    expect(requestBody.tools).toBeUndefined();
  });

  it('streaming: accumulates and yields tool_calls from deltas', async () => {
    const chunks = [
      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"shell.exec"}}]}}]}\n\n',
      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"comma"}}]}}]}\n\n',
      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"nd\\":\\"ls\\"}"}}]}}]}\n\n',
      'data: {"choices":[{}],"usage":{"prompt_tokens":10,"completion_tokens":5}}\n\n',
      'data: [DONE]\n\n',
    ];

    const encoder = new TextEncoder();
    let chunkIndex = 0;

    const mockStream = new ReadableStream({
      pull(controller) {
        if (chunkIndex < chunks.length) {
          controller.enqueue(encoder.encode(chunks[chunkIndex]));
          chunkIndex++;
        } else {
          controller.close();
        }
      },
    });

    mockFetch.mockResolvedValue({
      ok: true,
      body: mockStream,
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    const events: ChatStreamEvent[] = [];
    for await (const event of client.chatStream({
      messages: [{ role: 'user', content: 'Run ls' }],
      tools: [{
        name: 'shell.exec',
        description: 'Run shell',
        input_schema: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      }],
    })) {
      events.push(event);
    }

    // Should have a tool_use event and a done event
    const toolUseEvents = events.filter(e => e.type === 'tool_use');
    const doneEvents = events.filter(e => e.type === 'done');

    expect(toolUseEvents).toHaveLength(1);
    expect(toolUseEvents[0].toolCall).toEqual({
      id: 'call_1',
      name: 'shell.exec',
      args: { command: 'ls' },
    });

    expect(doneEvents).toHaveLength(1);
    expect(doneEvents[0].usage).toEqual({
      inputTokens: 10,
      outputTokens: 5,
    });
  });

  it('streaming: passes tools in request body', async () => {
    const chunks = [
      'data: {"choices":[{"delta":{"content":"Hi"}}]}\n\n',
      'data: {"choices":[{}],"usage":{"prompt_tokens":3,"completion_tokens":1}}\n\n',
      'data: [DONE]\n\n',
    ];

    const encoder = new TextEncoder();
    let chunkIndex = 0;

    const mockStream = new ReadableStream({
      pull(controller) {
        if (chunkIndex < chunks.length) {
          controller.enqueue(encoder.encode(chunks[chunkIndex]));
          chunkIndex++;
        } else {
          controller.close();
        }
      },
    });

    mockFetch.mockResolvedValue({
      ok: true,
      body: mockStream,
    });

    const client = new LlamaCppClient({
      endpoint: 'http://localhost:8080',
      model: 'test-model',
    });

    // Consume the stream to trigger the fetch call
    const events: ChatStreamEvent[] = [];
    for await (const event of client.chatStream({
      messages: [{ role: 'user', content: 'Hi' }],
      tools: [{
        name: 'shell.exec',
        description: 'Run shell',
        input_schema: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      }],
    })) {
      events.push(event);
    }

    const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body);
    expect(requestBody.tools).toEqual([{
      type: 'function',
      function: {
        name: 'shell.exec',
        description: 'Run shell',
        parameters: {
          type: 'object',
          properties: { command: { type: 'string' } },
          required: ['command'],
        },
      },
    }]);
    expect(requestBody.stream).toBe(true);
  });
});

describe('normalizeMessagesForLlamaCpp', () => {
  it('passes plain text messages through', () => {
    const messages: Message[] = [
      { role: 'user', content: 'Hello' },
      { role: 'assistant', content: 'Hi there' },
    ];

    const result = normalizeMessagesForLlamaCpp('System prompt', messages);

    expect(result).toEqual([
      { role: 'system', content: 'System prompt' },
      { role: 'user', content: 'Hello' },
      { role: 'assistant', content: 'Hi there' },
    ]);
  });

  it('converts assistant tool_use blocks to OpenAI tool_calls format', () => {
    const messages: Message[] = [
      { role: 'user', content: 'Search for news' },
      {
        role: 'assistant',
        content: [
          { type: 'text', text: 'Searching...' },
          { type: 'tool_use', id: 'call_1', name: 'web.search', input: { query: 'news' } },
        ] as unknown as Message['content'],
      },
    ];

    const result = normalizeMessagesForLlamaCpp(undefined, messages);

    expect(result).toHaveLength(2);
    expect(result[1]).toEqual({
      role: 'assistant',
      content: 'Searching...',
      tool_calls: [{
        id: 'call_1',
        type: 'function',
        function: {
          name: 'web.search',
          arguments: '{"query":"news"}',
        },
      }],
    });
  });

  it('converts user tool_result blocks to user messages with text formatting', () => {
    const messages: Message[] = [
      { role: 'user', content: 'Search' },
      {
        role: 'assistant',
        content: [
          { type: 'tool_use', id: 'call_1', name: 'web.search', input: { query: 'news' } },
        ] as unknown as Message['content'],
      },
      {
        role: 'user',
        content: [
          { type: 'tool_result', tool_use_id: 'call_1', content: 'Results here', is_error: false },
        ] as unknown as Message['content'],
      },
    ];

    const result = normalizeMessagesForLlamaCpp(undefined, messages);

    expect(result).toHaveLength(3);
    expect(result[2]).toEqual({
      role: 'user',
      content: '[Tool "web.search" result]\nResults here',
    });
  });

  it('handles multiple tool results in a single user message', () => {
    const messages: Message[] = [
      { role: 'user', content: 'Do two things' },
      {
        role: 'assistant',
        content: [
          { type: 'tool_use', id: 'call_a', name: 'tool.a', input: {} },
          { type: 'tool_use', id: 'call_b', name: 'tool.b', input: { x: 1 } },
        ] as unknown as Message['content'],
      },
      {
        role: 'user',
        content: [
          { type: 'tool_result', tool_use_id: 'call_a', content: 'A result' },
          { type: 'tool_result', tool_use_id: 'call_b', content: 'B result' },
        ] as unknown as Message['content'],
      },
    ];

    const result = normalizeMessagesForLlamaCpp(undefined, messages);

    const toolCalls = result[1].tool_calls;
    expect(toolCalls).toHaveLength(2);
    expect(toolCalls?.[0]?.id).toBe('call_a');
    expect(toolCalls?.[1]?.function.arguments).toBe('{"x":1}');
    // Multiple results merged into one user message
    expect(result).toHaveLength(3);
    expect(result[2]).toEqual({
      role: 'user',
      content: '[Tool "tool.a" result]\nA result\n\n[Tool "tool.b" result]\nB result',
    });
  });

  it('marks error results in text formatting', () => {
    const messages: Message[] = [
      { role: 'user', content: 'Do it' },
      {
        role: 'assistant',
        content: [
          { type: 'tool_use', id: 'call_1', name: 'file.read', input: { path: '/tmp/x' } },
        ] as unknown as Message['content'],
      },
      {
        role: 'user',
        content: [
          { type: 'tool_result', tool_use_id: 'call_1', content: 'File not found', is_error: true },
        ] as unknown as Message['content'],
      },
    ];

    const result = normalizeMessagesForLlamaCpp(undefined, messages);

    expect(result[2]).toEqual({
      role: 'user',
      content: '[Tool "file.read" result (error)]\nFile not found',
    });
  });

  it('handles full tool round-trip conversation', () => {
    const messages: Message[] = [
      { role: 'user', content: 'What is the weather?' },
      {
        role: 'assistant',
        content: [
          { type: 'text', text: 'Checking...' },
          { type: 'tool_use', id: 'tc_0', name: 'weather.get', input: { city: 'NYC' } },
        ] as unknown as Message['content'],
      },
      {
        role: 'user',
        content: [
          { type: 'tool_result', tool_use_id: 'tc_0', content: 'Sunny, 72F' },
        ] as unknown as Message['content'],
      },
      { role: 'assistant', content: 'The weather in NYC is sunny, 72F.' },
    ];

    const result = normalizeMessagesForLlamaCpp('You are helpful.', messages);

    expect(result).toHaveLength(5);
    expect(result[0]).toEqual({ role: 'system', content: 'You are helpful.' });
    expect(result[1]).toEqual({ role: 'user', content: 'What is the weather?' });
    expect(result[2]).toEqual({
      role: 'assistant',
      content: 'Checking...',
      tool_calls: [{
        id: 'tc_0',
        type: 'function',
        function: { name: 'weather.get', arguments: '{"city":"NYC"}' },
      }],
    });
    expect(result[3]).toEqual({
      role: 'user',
      content: '[Tool "weather.get" result]\nSunny, 72F',
    });
    expect(result[4]).toEqual({ role: 'assistant', content: 'The weather in NYC is sunny, 72F.' });
  });
});