diff --git a/src/models/local/llamacpp.test.ts b/src/models/local/llamacpp.test.ts
new file mode 100644
index 0000000..fd3518b
--- /dev/null
+++ b/src/models/local/llamacpp.test.ts
@@ -0,0 +1,36 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { LlamaCppClient } from './llamacpp.js';
+
+describe('LlamaCppClient', () => {
+  const mockFetch = vi.fn();
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', mockFetch);
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  it('sends messages and returns response', async () => {
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({
+        choices: [{ message: { content: 'Hello from llama.cpp!' } }],
+        usage: { prompt_tokens: 10, completion_tokens: 5 },
+      }),
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+    });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    });
+
+    expect(response.content).toBe('Hello from llama.cpp!');
+    expect(response.usage.inputTokens).toBe(10);
+    expect(response.usage.outputTokens).toBe(5);
+  });
+});
diff --git a/src/models/local/llamacpp.ts b/src/models/local/llamacpp.ts
new file mode 100644
index 0000000..c9129eb
--- /dev/null
+++ b/src/models/local/llamacpp.ts
@@ -0,0 +1,71 @@
+import type { ChatRequest, ChatResponse, ModelClient } from '../types.js';
+
+export interface LlamaCppClientConfig {
+  endpoint: string;
+  authToken?: string;
+}
+
+interface LlamaCppMessage {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+}
+
+interface LlamaCppResponse {
+  choices: Array<{ message: { content: string } }>;
+  usage: { prompt_tokens: number; completion_tokens: number };
+}
+
+export class LlamaCppClient implements ModelClient {
+  private endpoint: string;
+  private authToken?: string;
+
+  constructor(config: LlamaCppClientConfig) {
+    this.endpoint = config.endpoint.replace(/\/$/, '');
+    this.authToken = config.authToken;
+  }
+
+  async chat(request: ChatRequest): Promise<ChatResponse> {
+    const messages: LlamaCppMessage[] = [];
+
+    if (request.system) {
+      messages.push({ role: 'system', content: request.system });
+    }
+
+    for (const msg of request.messages) {
+      messages.push({ role: msg.role, content: msg.content });
+    }
+
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    };
+
+    if (this.authToken) {
+      headers['Authorization'] = `Bearer ${this.authToken}`;
+    }
+
+    const response = await fetch(`${this.endpoint}/v1/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        messages,
+        max_tokens: request.maxTokens ?? 2048,
+      }),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`llama-server error (${response.status}): ${text}`);
+    }
+
+    const data = (await response.json()) as LlamaCppResponse;
+
+    return {
+      content: data.choices[0]?.message?.content ?? '',
+      stopReason: 'stop',
+      usage: {
+        inputTokens: data.usage?.prompt_tokens ?? 0,
+        outputTokens: data.usage?.completion_tokens ?? 0,
+      },
+    };
+  }
+}