From 8e7fa24fd698ab8cbad57489f9ad475632de0d3e Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 5 Feb 2026 13:17:56 -0800 Subject: [PATCH] feat: add clear error message when llama-server not running --- src/models/local/llamacpp.test.ts | 12 ++++++++++++ src/models/local/llamacpp.ts | 24 ++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/models/local/llamacpp.test.ts b/src/models/local/llamacpp.test.ts index 1884a28..9812c90 100644 --- a/src/models/local/llamacpp.test.ts +++ b/src/models/local/llamacpp.test.ts @@ -81,4 +81,16 @@ describe('LlamaCppClient', () => { usage: { inputTokens: 5, outputTokens: 2 }, }); }); + + it('throws clear error when server not running', async () => { + mockFetch.mockRejectedValue(new TypeError('fetch failed')); + + const client = new LlamaCppClient({ + endpoint: 'http://localhost:8080', + }); + + await expect(client.chat({ + messages: [{ role: 'user', content: 'Hello' }], + })).rejects.toThrow('llama-server not running at http://localhost:8080'); + }); }); diff --git a/src/models/local/llamacpp.ts b/src/models/local/llamacpp.ts index 8aeac17..f428b38 100644 --- a/src/models/local/llamacpp.ts +++ b/src/models/local/llamacpp.ts @@ -48,14 +48,22 @@ export class LlamaCppClient implements ModelClient { headers['Authorization'] = `Bearer ${this.authToken}`; } - const response = await fetch(`${this.endpoint}/v1/chat/completions`, { - method: 'POST', - headers, - body: JSON.stringify({ - messages, - max_tokens: request.maxTokens ?? 2048, - }), - }); + let response: Response; + try { + response = await fetch(`${this.endpoint}/v1/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify({ + messages, + max_tokens: request.maxTokens ?? 2048, + }), + }); + } catch (error) { + if (error instanceof TypeError && error.message.includes('fetch failed')) { + throw new Error(`llama-server not running at ${this.endpoint}`); + } + throw error; + } if (!response.ok) { const text = await response.text();