fix: normalize message roles for local model backends (llama.cpp, Ollama)

Local backends using strict chat templates (e.g. Mistral 3) rejected
Flynn's Anthropic-style tool_use/tool_result content blocks, causing
'roles must alternate' errors. Added getMessageTextWithTools() and
normalizeMessagesForLocal() to serialize structured blocks to plain
text, drop empty messages, and merge consecutive same-role messages.
Also fixed compaction to ensure kept messages start with user role.
This commit is contained in:
William Valentin
2026-02-10 22:04:17 -08:00
parent 2f6d045e2a
commit 6761dca1c2
6 changed files with 318 additions and 43 deletions
+3 -24
View File
@@ -1,5 +1,5 @@
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
import { getMessageText } from '../media.js';
import { normalizeMessagesForLocal } from '../media.js';
export interface LlamaCppClientConfig {
endpoint: string;
@@ -7,11 +7,6 @@ export interface LlamaCppClientConfig {
authToken?: string;
}
interface LlamaCppMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
interface LlamaCppToolCall {
id: string;
type: 'function';
@@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient {
}
async chat(request: ChatRequest): Promise<ChatResponse> {
const messages: LlamaCppMessage[] = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
const headers: Record<string, string> = {
'Content-Type': 'application/json',
@@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient {
}
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const messages: LlamaCppMessage[] = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
const headers: Record<string, string> = {
'Content-Type': 'application/json',
+3 -19
View File
@@ -1,6 +1,6 @@
import { Ollama, type Tool } from 'ollama';
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
import { getMessageText } from '../media.js';
import { normalizeMessagesForLocal } from '../media.js';
export interface OllamaClientConfig {
host?: string;
@@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient {
}
async chat(request: ChatRequest): Promise<ChatResponse> {
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
// Build the chat params, optionally including tools
const chatParams: Parameters<typeof this.client.chat>[0] = {
@@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient {
}
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
try {
// Build tools array if provided and model supports them