From 8e7fa24fd698ab8cbad57489f9ad475632de0d3e Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Thu, 5 Feb 2026 13:17:56 -0800
Subject: [PATCH] feat: add clear error message when llama-server not running

---
 src/models/local/llamacpp.test.ts | 12 ++++++++++++
 src/models/local/llamacpp.ts      | 24 ++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/models/local/llamacpp.test.ts b/src/models/local/llamacpp.test.ts
index 1884a28..9812c90 100644
--- a/src/models/local/llamacpp.test.ts
+++ b/src/models/local/llamacpp.test.ts
@@ -81,4 +81,16 @@ describe('LlamaCppClient', () => {
       usage: { inputTokens: 5, outputTokens: 2 },
     });
   });
+
+  it('throws clear error when server not running', async () => {
+    mockFetch.mockRejectedValue(new TypeError('fetch failed'));
+
+    const client = new LlamaCppClient({
+      endpoint: 'http://localhost:8080',
+    });
+
+    await expect(client.chat({
+      messages: [{ role: 'user', content: 'Hello' }],
+    })).rejects.toThrow('llama-server not running at http://localhost:8080');
+  });
 });
diff --git a/src/models/local/llamacpp.ts b/src/models/local/llamacpp.ts
index 8aeac17..f428b38 100644
--- a/src/models/local/llamacpp.ts
+++ b/src/models/local/llamacpp.ts
@@ -48,14 +48,22 @@ export class LlamaCppClient implements ModelClient {
       headers['Authorization'] = `Bearer ${this.authToken}`;
     }
 
-    const response = await fetch(`${this.endpoint}/v1/chat/completions`, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify({
-        messages,
-        max_tokens: request.maxTokens ?? 2048,
-      }),
-    });
+    let response: Response;
+    try {
+      response = await fetch(`${this.endpoint}/v1/chat/completions`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({
+          messages,
+          max_tokens: request.maxTokens ?? 2048,
+        }),
+      });
+    } catch (error) {
+      if (error instanceof TypeError && error.message.includes('fetch failed')) {
+        throw new Error(`llama-server not running at ${this.endpoint}`);
+      }
+      throw error;
+    }
 
     if (!response.ok) {
       const text = await response.text();