fix: normalize message roles for local model backends (llama.cpp, Ollama)
Local backends using strict chat templates (e.g. Mistral 3) rejected Flynn's Anthropic-style tool_use/tool_result content blocks, causing 'roles must alternate' errors. Added getMessageTextWithTools() and normalizeMessagesForLocal() to serialize structured blocks to plain text, drop empty messages, and merge consecutive same-role messages. Also fixed compaction to ensure kept messages start with user role.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
|
||||
import { getMessageText } from '../media.js';
|
||||
import { normalizeMessagesForLocal } from '../media.js';
|
||||
|
||||
export interface LlamaCppClientConfig {
|
||||
endpoint: string;
|
||||
@@ -7,11 +7,6 @@ export interface LlamaCppClientConfig {
|
||||
authToken?: string;
|
||||
}
|
||||
|
||||
interface LlamaCppMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface LlamaCppToolCall {
|
||||
id: string;
|
||||
type: 'function';
|
||||
@@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient {
|
||||
}
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
const messages: LlamaCppMessage[] = [];
|
||||
|
||||
if (request.system) {
|
||||
messages.push({ role: 'system', content: request.system });
|
||||
}
|
||||
|
||||
for (const msg of request.messages) {
|
||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
||||
}
|
||||
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient {
|
||||
}
|
||||
|
||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const messages: LlamaCppMessage[] = [];
|
||||
|
||||
if (request.system) {
|
||||
messages.push({ role: 'system', content: request.system });
|
||||
}
|
||||
|
||||
for (const msg of request.messages) {
|
||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
||||
}
|
||||
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Ollama, type Tool } from 'ollama';
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
|
||||
import { getMessageText } from '../media.js';
|
||||
import { normalizeMessagesForLocal } from '../media.js';
|
||||
|
||||
export interface OllamaClientConfig {
|
||||
host?: string;
|
||||
@@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient {
|
||||
}
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
||||
|
||||
if (request.system) {
|
||||
messages.push({ role: 'system', content: request.system });
|
||||
}
|
||||
|
||||
for (const msg of request.messages) {
|
||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
||||
}
|
||||
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||
|
||||
// Build the chat params, optionally including tools
|
||||
const chatParams: Parameters<typeof this.client.chat>[0] = {
|
||||
@@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient {
|
||||
}
|
||||
|
||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
||||
|
||||
if (request.system) {
|
||||
messages.push({ role: 'system', content: request.system });
|
||||
}
|
||||
|
||||
for (const msg of request.messages) {
|
||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
||||
}
|
||||
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||
|
||||
try {
|
||||
// Build tools array if provided and model supports them
|
||||
|
||||
Reference in New Issue
Block a user