diff --git a/docs/plans/2026-02-05-llamacpp-implementation.md b/docs/plans/2026-02-05-llamacpp-implementation.md
new file mode 100644
index 0000000..e595b3d
--- /dev/null
+++ b/docs/plans/2026-02-05-llamacpp-implementation.md
@@ -0,0 +1,556 @@
+# llama.cpp Integration Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add LlamaCppClient to connect Flynn to a llama-server instance for local LLM inference with Qwen 2.5 14B.
+
+**Architecture:** New `LlamaCppClient` class implements existing `ModelClient` interface, communicating with llama-server's OpenAI-compatible `/v1/chat/completions` endpoint via HTTP/SSE.
+
+**Tech Stack:** TypeScript, native fetch API, Server-Sent Events for streaming
+
+---
+
+## Task 1: Create LlamaCppClient with Basic Chat
+
+**Files:**
+- Create: `src/models/local/llamacpp.ts`
+- Create: `src/models/local/llamacpp.test.ts`
+
+### Step 1: Write the failing test
+
+Create `src/models/local/llamacpp.test.ts`:
+
+```typescript
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { LlamaCppClient } from './llamacpp.js';
+
+describe('LlamaCppClient', () => {
+  const mockFetch = vi.fn();
+
+  beforeEach(() => {
+    vi.stubGlobal('fetch', mockFetch);
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  it('sends messages and returns response', async () => {
+    mockFetch.mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({
+        choices: [{ message: { content: 'Hello from llama.cpp!' } }],
+        usage: { prompt_tokens: 10, completion_tokens: 5 },
+      }),
+    });
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+    });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    });
+
+    expect(response.content).toBe('Hello from llama.cpp!');
+    expect(response.usage.inputTokens).toBe(10);
+    expect(response.usage.outputTokens).toBe(5);
+  });
+});
+```
+
+### Step 2: Run test to verify it fails
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: FAIL with "Cannot find module './llamacpp.js'"
+
+### Step 3: Write minimal implementation
+
+Create `src/models/local/llamacpp.ts`:
+
+```typescript
+import type { ChatRequest, ChatResponse, ModelClient } from '../types.js';
+
+export interface LlamaCppClientConfig {
+  endpoint: string;
+  authToken?: string;
+}
+
+interface LlamaCppMessage {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+}
+
+interface LlamaCppResponse {
+  choices: Array<{ message: { content: string } }>;
+  usage: { prompt_tokens: number; completion_tokens: number };
+}
+
+export class LlamaCppClient implements ModelClient {
+  private endpoint: string;
+  private authToken?: string;
+
+  constructor(config: LlamaCppClientConfig) {
+    this.endpoint = config.endpoint.replace(/\/$/, '');
+    this.authToken = config.authToken;
+  }
+
+  async chat(request: ChatRequest): Promise<ChatResponse> {
+    const messages: LlamaCppMessage[] = [];
+
+    if (request.system) {
+      messages.push({ role: 'system', content: request.system });
+    }
+
+    for (const msg of request.messages) {
+      messages.push({ role: msg.role, content: msg.content });
+    }
+
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    };
+
+    if (this.authToken) {
+      headers['Authorization'] = `Bearer ${this.authToken}`;
+    }
+
+    const response = await fetch(`${this.endpoint}/v1/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        messages,
+        max_tokens: request.maxTokens ?? 2048,
+      }),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`llama-server error (${response.status}): ${text}`);
+    }
+
+    const data = (await response.json()) as LlamaCppResponse;
+
+    return {
+      content: data.choices[0]?.message?.content ?? '',
+      stopReason: 'stop',
+      usage: {
+        inputTokens: data.usage?.prompt_tokens ?? 0,
+        outputTokens: data.usage?.completion_tokens ?? 0,
+      },
+    };
+  }
+}
+```
+
+### Step 4: Run test to verify it passes
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: PASS
+
+### Step 5: Commit
+
+```bash
+git add src/models/local/llamacpp.ts src/models/local/llamacpp.test.ts
+git commit -m "feat: add LlamaCppClient with basic chat support"
+```
+
+---
+
+## Task 2: Add Streaming Support
+
+**Files:**
+- Modify: `src/models/local/llamacpp.ts`
+- Modify: `src/models/local/llamacpp.test.ts`
+
+### Step 1: Write the failing test
+
+Add to `src/models/local/llamacpp.test.ts`:
+
+```typescript
+import type { ChatStreamEvent } from '../types.js';
+
+// Add this test inside the describe block:
+
+it('streams responses via SSE', async () => {
+  const chunks = [
+    'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n',
+    'data: {"choices":[{"delta":{"content":" world"}}]}\n\n',
+    'data: {"choices":[{}],"usage":{"prompt_tokens":5,"completion_tokens":2}}\n\n',
+    'data: [DONE]\n\n',
+  ];
+
+  const encoder = new TextEncoder();
+  let chunkIndex = 0;
+
+  const mockStream = new ReadableStream({
+    pull(controller) {
+      if (chunkIndex < chunks.length) {
+        controller.enqueue(encoder.encode(chunks[chunkIndex]));
+        chunkIndex++;
+      } else {
+        controller.close();
+      }
+    },
+  });
+
+  mockFetch.mockResolvedValue({
+    ok: true,
+    body: mockStream,
+  });
+
+  const client = new LlamaCppClient({
+    endpoint: 'http://localhost:8080',
+  });
+
+  const events: ChatStreamEvent[] = [];
+  for await (const event of client.chatStream({
+    messages: [{ role: 'user', content: 'Hi' }],
+  })) {
+    events.push(event);
+  }
+
+  expect(events).toHaveLength(3);
+  expect(events[0]).toEqual({ type: 'content', content: 'Hello' });
+  expect(events[1]).toEqual({ type: 'content', content: ' world' });
+  expect(events[2]).toEqual({
+    type: 'done',
+    usage: { inputTokens: 5, outputTokens: 2 },
+  });
+});
+```
+
+### Step 2: Run test to verify it fails
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: FAIL with "chatStream is not a function" or similar
+
+### Step 3: Write minimal implementation
+
+Add to `src/models/local/llamacpp.ts` (import and method):
+
+At the top, update imports:
+```typescript
+import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from '../types.js';
+```
+
+Add interface for streaming:
+```typescript
+interface LlamaCppStreamChunk {
+  choices: Array<{ delta?: { content?: string } }>;
+  usage?: { prompt_tokens: number; completion_tokens: number };
+}
+```
+
+Add method to `LlamaCppClient` class:
+```typescript
+async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
+  const messages: LlamaCppMessage[] = [];
+
+  if (request.system) {
+    messages.push({ role: 'system', content: request.system });
+  }
+
+  for (const msg of request.messages) {
+    messages.push({ role: msg.role, content: msg.content });
+  }
+
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+  };
+
+  if (this.authToken) {
+    headers['Authorization'] = `Bearer ${this.authToken}`;
+  }
+
+  try {
+    const response = await fetch(`${this.endpoint}/v1/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        messages,
+        max_tokens: request.maxTokens ?? 2048,
+        stream: true,
+      }),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`llama-server error (${response.status}): ${text}`);
+    }
+
+    if (!response.body) {
+      throw new Error('No response body for streaming');
+    }
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let usage = { inputTokens: 0, outputTokens: 0 };
+
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || !trimmed.startsWith('data: ')) continue;
+
+        const data = trimmed.slice(6);
+        if (data === '[DONE]') continue;
+
+        try {
+          const chunk = JSON.parse(data) as LlamaCppStreamChunk;
+
+          if (chunk.choices[0]?.delta?.content) {
+            yield { type: 'content', content: chunk.choices[0].delta.content };
+          }
+
+          if (chunk.usage) {
+            usage = {
+              inputTokens: chunk.usage.prompt_tokens,
+              outputTokens: chunk.usage.completion_tokens,
+            };
+          }
+        } catch {
+          // Skip malformed JSON
+        }
+      }
+    }
+
+    yield { type: 'done', usage };
+  } catch (error) {
+    yield {
+      type: 'error',
+      error: error instanceof Error ? error : new Error(String(error)),
+    };
+  }
+}
+```
+
+### Step 4: Run test to verify it passes
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: PASS (2 tests)
+
+### Step 5: Commit
+
+```bash
+git add src/models/local/llamacpp.ts src/models/local/llamacpp.test.ts
+git commit -m "feat: add streaming support to LlamaCppClient"
+```
+
+---
+
+## Task 3: Add Connection Error Handling
+
+**Files:**
+- Modify: `src/models/local/llamacpp.test.ts`
+- Modify: `src/models/local/llamacpp.ts`
+
+### Step 1: Write the failing test
+
+Add to `src/models/local/llamacpp.test.ts`:
+
+```typescript
+it('throws clear error when server not running', async () => {
+  mockFetch.mockRejectedValue(new TypeError('fetch failed'));
+
+  const client = new LlamaCppClient({
+    endpoint: 'http://localhost:8080',
+  });
+
+  await expect(client.chat({
+    messages: [{ role: 'user', content: 'Hello' }],
+  })).rejects.toThrow('llama-server not running at http://localhost:8080');
+});
+```
+
+### Step 2: Run test to verify it fails
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: FAIL - error message doesn't match
+
+### Step 3: Update implementation
+
+Wrap the fetch call in `chat()` method with error handling:
+
+```typescript
+async chat(request: ChatRequest): Promise<ChatResponse> {
+  const messages: LlamaCppMessage[] = [];
+
+  if (request.system) {
+    messages.push({ role: 'system', content: request.system });
+  }
+
+  for (const msg of request.messages) {
+    messages.push({ role: msg.role, content: msg.content });
+  }
+
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+  };
+
+  if (this.authToken) {
+    headers['Authorization'] = `Bearer ${this.authToken}`;
+  }
+
+  let response: Response;
+  try {
+    response = await fetch(`${this.endpoint}/v1/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        messages,
+        max_tokens: request.maxTokens ?? 2048,
+      }),
+    });
+  } catch (error) {
+    if (error instanceof TypeError && error.message.includes('fetch failed')) {
+      throw new Error(`llama-server not running at ${this.endpoint}`);
+    }
+    throw error;
+  }
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new Error(`llama-server error (${response.status}): ${text}`);
+  }
+
+  const data = (await response.json()) as LlamaCppResponse;
+
+  return {
+    content: data.choices[0]?.message?.content ?? '',
+    stopReason: 'stop',
+    usage: {
+      inputTokens: data.usage?.prompt_tokens ?? 0,
+      outputTokens: data.usage?.completion_tokens ?? 0,
+    },
+  };
+}
+```
+
+### Step 4: Run test to verify it passes
+
+Run: `npm test -- src/models/local/llamacpp.test.ts`
+Expected: PASS (3 tests)
+
+### Step 5: Commit
+
+```bash
+git add src/models/local/llamacpp.ts src/models/local/llamacpp.test.ts
+git commit -m "feat: add clear error message when llama-server not running"
+```
+
+---
+
+## Task 4: Export and Wire Up Client
+
+**Files:**
+- Modify: `src/models/local/index.ts`
+- Modify: `src/models/index.ts`
+- Modify: `src/daemon/index.ts`
+
+### Step 1: Export from local/index.ts
+
+Update `src/models/local/index.ts`:
+
+```typescript
+export { OllamaClient, type OllamaClientConfig } from './ollama.js';
+export { LlamaCppClient, type LlamaCppClientConfig } from './llamacpp.js';
+```
+
+### Step 2: Export from models/index.ts
+
+Update `src/models/index.ts`:
+
+```typescript
+export { AnthropicClient, type AnthropicClientConfig } from './anthropic.js';
+export { OpenAIClient, type OpenAIClientConfig } from './openai.js';
+export { OllamaClient, type OllamaClientConfig } from './local/index.js';
+export { LlamaCppClient, type LlamaCppClientConfig } from './local/index.js';
+export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js';
+export type { Message, ChatRequest, ChatResponse, ModelClient } from './types.js';
+```
+
+### Step 3: Wire up in daemon
+
+Update `src/daemon/index.ts` import:
+
+```typescript
+import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js';
+```
+
+Update `createModelRouter` function, add llamacpp case in the `if (models.local)` block:
+
+```typescript
+if (models.local) {
+  if (models.local.provider === 'ollama') {
+    localClient = new OllamaClient({
+      model: models.local.model,
+      host: models.local.endpoint,
+    });
+  } else if (models.local.provider === 'llamacpp') {
+    localClient = new LlamaCppClient({
+      endpoint: models.local.endpoint ?? 'http://localhost:8080',
+      authToken: models.local.auth_token,
+    });
+  }
+}
+```
+
+### Step 4: Run all tests
+
+Run: `npm test`
+Expected: All tests pass
+
+### Step 5: Commit
+
+```bash
+git add src/models/local/index.ts src/models/index.ts src/daemon/index.ts
+git commit -m "feat: wire up LlamaCppClient to model router"
+```
+
+---
+
+## Task 5: Run Full Test Suite and Verify
+
+**Files:** None (verification only)
+
+### Step 1: Run full test suite
+
+Run: `npm test`
+Expected: All tests pass (should be 62+ tests now)
+
+### Step 2: Type check
+
+Run: `npx tsc --noEmit`
+Expected: No errors
+
+### Step 3: Build
+
+Run: `npm run build`
+Expected: Build succeeds
+
+### Step 4: Final commit (if any changes needed)
+
+If any fixes were needed, commit them.
+
+---
+
+## Summary
+
+| Task | Description | Tests Added |
+|------|-------------|-------------|
+| 1 | Basic chat support | 1 |
+| 2 | Streaming via SSE | 1 |
+| 3 | Connection error handling | 1 |
+| 4 | Export and wire up | 0 |
+| 5 | Verification | 0 |
+
+**Total new tests:** 3
+**Files created:** 2 (`llamacpp.ts`, `llamacpp.test.ts`)
+**Files modified:** 3 (`local/index.ts`, `models/index.ts`, `daemon/index.ts`)