fix: normalize message roles for local model backends (llama.cpp, Ollama)

Local backends using strict chat templates (e.g. Mistral 3) rejected
Flynn's Anthropic-style tool_use/tool_result content blocks, causing
'roles must alternate' errors. Added getMessageTextWithTools() and
normalizeMessagesForLocal() to serialize structured blocks to plain
text, drop empty messages, and merge consecutive same-role messages.
Also fixed compaction to ensure kept messages start with user role.
This commit is contained in:
William Valentin
2026-02-10 22:04:17 -08:00
parent 2f6d045e2a
commit 6761dca1c2
6 changed files with 318 additions and 43 deletions
+19
View File
@@ -101,4 +101,23 @@ describe('compactHistory', () => {
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
});
it('shifts leading assistant messages from toKeep into toCompact to ensure user-first', async () => {
// 9 messages: user(0), assistant(1), ..., user(8)
// With keepTurns=2 (keepCount=4), toKeep starts as [assistant(5), user(6), assistant(7), user(8)]
// The fix shifts assistant(5) into toCompact, so toKeep = [user(6), assistant(7), user(8)]
const messages = makeMessages(9);
const orchestrator = makeMockOrchestrator();
const result = await compactHistory({ messages, orchestrator, config });
// summary(assistant) + user(6) + assistant(7) + user(8) = 4 messages
expect(result.messages).toHaveLength(4);
expect(result.compactedCount).toBe(6);
// First is summary (assistant), second must be user
expect(result.messages[0].role).toBe('assistant');
expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
expect(result.messages[1].role).toBe('user');
expect(result.messages[1].content).toBe('Message 6');
});
});
+6
View File
@@ -53,6 +53,12 @@ export async function compactHistory(opts: {
const toCompact = messages.slice(0, -keepCount);
const toKeep = messages.slice(-keepCount);
// Ensure toKeep starts with a user message to avoid assistant→assistant
// after the compaction summary (which has role 'assistant').
while (toKeep.length > 0 && toKeep[0].role === 'assistant') {
toCompact.push(toKeep.shift()!);
}
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');
const tier = orchestrator.getDelegationTier('compaction');
+3 -24
View File
@@ -1,5 +1,5 @@
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
import { getMessageText } from '../media.js';
import { normalizeMessagesForLocal } from '../media.js';
export interface LlamaCppClientConfig {
endpoint: string;
@@ -7,11 +7,6 @@ export interface LlamaCppClientConfig {
authToken?: string;
}
interface LlamaCppMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
interface LlamaCppToolCall {
id: string;
type: 'function';
@@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient {
}
async chat(request: ChatRequest): Promise<ChatResponse> {
const messages: LlamaCppMessage[] = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
const headers: Record<string, string> = {
'Content-Type': 'application/json',
@@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient {
}
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const messages: LlamaCppMessage[] = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
const headers: Record<string, string> = {
'Content-Type': 'application/json',
+3 -19
View File
@@ -1,6 +1,6 @@
import { Ollama, type Tool } from 'ollama';
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
import { getMessageText } from '../media.js';
import { normalizeMessagesForLocal } from '../media.js';
export interface OllamaClientConfig {
host?: string;
@@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient {
}
async chat(request: ChatRequest): Promise<ChatResponse> {
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
// Build the chat params, optionally including tools
const chatParams: Parameters<typeof this.client.chat>[0] = {
@@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient {
}
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
if (request.system) {
messages.push({ role: 'system', content: request.system });
}
for (const msg of request.messages) {
messages.push({ role: msg.role, content: getMessageText(msg) });
}
const messages = normalizeMessagesForLocal(request.system, request.messages);
try {
// Build tools array if provided and model supports them
+215
View File
@@ -8,6 +8,8 @@ import {
attachmentToImageSource,
buildUserMessage,
getMessageText,
getMessageTextWithTools,
normalizeMessagesForLocal,
hasImages,
transcribeAudio,
buildUserMessageWithAudio,
@@ -605,3 +607,216 @@ describe('buildUserMessageWithAudio', () => {
expect(result.content).toContain('[Voice message]:');
});
});
// ---------------------------------------------------------------------------
// 10. getMessageTextWithTools
// ---------------------------------------------------------------------------
describe('getMessageTextWithTools', () => {
it('returns string directly for string content', () => {
const msg: Message = { role: 'user', content: 'plain text' };
expect(getMessageTextWithTools(msg)).toBe('plain text');
});
it('extracts text from text-only array content', () => {
const msg: Message = {
role: 'assistant',
content: [
{ type: 'text', text: 'Hello ' },
{ type: 'text', text: 'World' },
],
};
expect(getMessageTextWithTools(msg)).toBe('Hello \nWorld');
});
it('serializes tool_use blocks to readable text', () => {
const msg = {
role: 'assistant',
content: [
{ type: 'tool_use', name: 'search', input: { query: 'foo' } },
],
} as unknown as Message;
expect(getMessageTextWithTools(msg)).toBe('[Calling tool: search({"query":"foo"})]');
});
it('serializes tool_result blocks to readable text', () => {
const msg = {
role: 'user',
content: [
{ type: 'tool_result', content: 'Found 3 results' },
],
} as unknown as Message;
expect(getMessageTextWithTools(msg)).toBe('[Tool result: Found 3 results]');
});
it('marks error tool_result blocks', () => {
const msg = {
role: 'user',
content: [
{ type: 'tool_result', content: 'File not found', is_error: true },
],
} as unknown as Message;
expect(getMessageTextWithTools(msg)).toBe('[Tool result (error): File not found]');
});
it('handles mixed content (text + tool_use + tool_result) joined with newline', () => {
const msg = {
role: 'assistant',
content: [
{ type: 'text', text: 'Let me search for that.' },
{ type: 'tool_use', name: 'web_search', input: { q: 'test' } },
{ type: 'tool_result', content: 'No results' },
],
} as unknown as Message;
const result = getMessageTextWithTools(msg);
expect(result).toBe(
'Let me search for that.\n[Calling tool: web_search({"q":"test"})]\n[Tool result: No results]',
);
});
it('returns empty string for empty array content', () => {
const msg: Message = {
role: 'assistant',
content: [],
};
expect(getMessageTextWithTools(msg)).toBe('');
});
});
// ---------------------------------------------------------------------------
// 11. normalizeMessagesForLocal
// ---------------------------------------------------------------------------
describe('normalizeMessagesForLocal', () => {
it('passes through simple text messages', () => {
const messages: Message[] = [
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there' },
];
const result = normalizeMessagesForLocal(undefined, messages);
expect(result).toEqual([
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there' },
]);
});
it('prepends system message when provided', () => {
const messages: Message[] = [
{ role: 'user', content: 'Hello' },
];
const result = normalizeMessagesForLocal('You are helpful.', messages);
expect(result).toEqual([
{ role: 'system', content: 'You are helpful.' },
{ role: 'user', content: 'Hello' },
]);
});
it('omits system message when undefined', () => {
const messages: Message[] = [
{ role: 'user', content: 'Hello' },
];
const result = normalizeMessagesForLocal(undefined, messages);
expect(result).toEqual([
{ role: 'user', content: 'Hello' },
]);
});
it('merges consecutive same-role messages', () => {
const messages: Message[] = [
{ role: 'user', content: 'Part 1' },
{ role: 'user', content: 'Part 2' },
{ role: 'assistant', content: 'Response' },
];
const result = normalizeMessagesForLocal(undefined, messages);
expect(result).toEqual([
{ role: 'user', content: 'Part 1\n\nPart 2' },
{ role: 'assistant', content: 'Response' },
]);
});
it('drops empty messages (e.g. image-only content that serializes to "")', () => {
const messages: Message[] = [
{ role: 'user', content: 'Before' },
{
role: 'user',
content: [
{ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/img.png' } },
],
},
{ role: 'assistant', content: 'After' },
];
const result = normalizeMessagesForLocal(undefined, messages);
expect(result).toEqual([
{ role: 'user', content: 'Before' },
{ role: 'assistant', content: 'After' },
]);
});
it('handles realistic agent tool loop sequence', () => {
// Simulates: user asks question → assistant calls tool → user provides result → assistant responds
const messages = [
{ role: 'user', content: 'What is the weather?' },
{
role: 'assistant',
content: [
{ type: 'text', text: 'Let me check.' },
{ type: 'tool_use', name: 'get_weather', input: { city: 'London' } },
],
},
{
role: 'user',
content: [
{ type: 'tool_result', content: 'Sunny, 22°C' },
],
},
{ role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
] as unknown as Message[];
const result = normalizeMessagesForLocal('You are a weather bot.', messages);
expect(result).toEqual([
{ role: 'system', content: 'You are a weather bot.' },
{ role: 'user', content: 'What is the weather?' },
{ role: 'assistant', content: 'Let me check.\n[Calling tool: get_weather({"city":"London"})]' },
{ role: 'user', content: '[Tool result: Sunny, 22°C]' },
{ role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
]);
});
it('returns empty array when all messages are empty', () => {
const messages: Message[] = [
{ role: 'user', content: '' },
{ role: 'assistant', content: '' },
];
const result = normalizeMessagesForLocal(undefined, messages);
expect(result).toEqual([]);
});
it('returns only system message when all messages are empty but system is set', () => {
const messages: Message[] = [
{ role: 'user', content: '' },
];
const result = normalizeMessagesForLocal('System prompt', messages);
expect(result).toEqual([
{ role: 'system', content: 'System prompt' },
]);
});
});
+72
View File
@@ -120,6 +120,78 @@ export function getMessageText(message: Message): string {
.join('');
}
/**
* Serialize a message's content to a plain string, including tool_use and
* tool_result structured blocks that getMessageText() would discard.
* This is needed for local model backends (llama.cpp, Ollama) whose chat
* templates don't understand Anthropic-style structured content blocks.
*/
export function getMessageTextWithTools(message: Message): string {
if (typeof message.content === 'string') {
return message.content;
}
const parts: string[] = [];
for (const block of message.content as Record<string, unknown>[]) {
if (block.type === 'text' && typeof block.text === 'string') {
parts.push(block.text);
} else if (block.type === 'tool_use') {
const name = block.name as string;
let argsStr: string;
try {
argsStr = JSON.stringify(block.input);
} catch {
argsStr = String(block.input);
}
parts.push(`[Calling tool: ${name}(${argsStr})]`);
} else if (block.type === 'tool_result') {
const content = (block.content as string) ?? '';
const isError = block.is_error ? ' (error)' : '';
parts.push(`[Tool result${isError}: ${content}]`);
}
}
return parts.join('\n');
}
interface SimpleMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
/**
* Normalize a message array for local model backends that require strict
* role alternation (system? -> user -> assistant -> user -> ...).
*
* 1. Serializes structured tool_use/tool_result content blocks to text
* 2. Drops empty messages
* 3. Merges consecutive same-role messages with a newline separator
*/
export function normalizeMessagesForLocal(
system: string | undefined,
messages: Message[],
): SimpleMessage[] {
const result: SimpleMessage[] = [];
if (system) {
result.push({ role: 'system', content: system });
}
for (const msg of messages) {
const text = getMessageTextWithTools(msg);
if (!text) continue; // drop empty messages
const last = result.length > 0 ? result[result.length - 1] : undefined;
if (last && last.role === msg.role) {
// Merge consecutive same-role messages
last.content += '\n\n' + text;
} else {
result.push({ role: msg.role, content: text });
}
}
return result;
}
/** Configuration for audio transcription via Whisper-compatible API. */
export interface AudioTranscriptionConfig {
/** Whisper-compatible API endpoint (e.g. "https://api.openai.com/v1/audio/transcriptions") */