diff --git a/src/models/local/llamacpp.test.ts b/src/models/local/llamacpp.test.ts index 9812c90..8e0a91d 100644 --- a/src/models/local/llamacpp.test.ts +++ b/src/models/local/llamacpp.test.ts @@ -24,6 +24,7 @@ describe('LlamaCppClient', () => { const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', + model: 'test-model', }); const response = await client.chat({ @@ -64,6 +65,7 @@ describe('LlamaCppClient', () => { const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', + model: 'test-model', }); const events: ChatStreamEvent[] = []; @@ -87,6 +89,7 @@ describe('LlamaCppClient', () => { const client = new LlamaCppClient({ endpoint: 'http://localhost:8080', + model: 'test-model', }); await expect(client.chat({ diff --git a/src/models/local/llamacpp.ts b/src/models/local/llamacpp.ts index f428b38..6665d1d 100644 --- a/src/models/local/llamacpp.ts +++ b/src/models/local/llamacpp.ts @@ -2,6 +2,7 @@ import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from '.. export interface LlamaCppClientConfig { endpoint: string; + model: string; authToken?: string; } @@ -22,10 +23,12 @@ interface LlamaCppStreamChunk { export class LlamaCppClient implements ModelClient { private endpoint: string; + private model: string; private authToken?: string; constructor(config: LlamaCppClientConfig) { this.endpoint = config.endpoint.replace(/\/$/, ''); + this.model = config.model; this.authToken = config.authToken; } @@ -54,6 +57,7 @@ export class LlamaCppClient implements ModelClient { method: 'POST', headers, body: JSON.stringify({ + model: this.model, messages, max_tokens: request.maxTokens ?? 2048, }), @@ -106,6 +110,7 @@ export class LlamaCppClient implements ModelClient { method: 'POST', headers, body: JSON.stringify({ + model: this.model, messages, max_tokens: request.maxTokens ?? 2048, stream: true,