From 6761dca1c29b2529ece14a127743f00bf6a543ac Mon Sep 17 00:00:00 2001 From: William Valentin Date: Tue, 10 Feb 2026 22:04:17 -0800 Subject: [PATCH] fix: normalize message roles for local model backends (llama.cpp, Ollama) Local backends using strict chat templates (e.g. Mistral 3) rejected Flynn's Anthropic-style tool_use/tool_result content blocks, causing 'roles must alternate' errors. Added getMessageTextWithTools() and normalizeMessagesForLocal() to serialize structured blocks to plain text, drop empty messages, and merge consecutive same-role messages. Also fixed compaction to ensure kept messages start with user role. --- src/context/compaction.test.ts | 19 +++ src/context/compaction.ts | 6 + src/models/local/llamacpp.ts | 27 +---- src/models/local/ollama.ts | 22 +--- src/models/media.test.ts | 215 +++++++++++++++++++++++++++++++++ src/models/media.ts | 72 +++++++++++ 6 files changed, 318 insertions(+), 43 deletions(-) diff --git a/src/context/compaction.test.ts b/src/context/compaction.test.ts index 1de56ca..da797da 100644 --- a/src/context/compaction.test.ts +++ b/src/context/compaction.test.ts @@ -101,4 +101,23 @@ describe('compactHistory', () => { expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4); expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024); }); + + it('shifts leading assistant messages from toKeep into toCompact to ensure user-first', async () => { + // 9 messages: user(0), assistant(1), ..., user(8) + // With keepTurns=2 (keepCount=4), toKeep starts as [assistant(5), user(6), assistant(7), user(8)] + // The fix shifts assistant(5) into toCompact, so toKeep = [user(6), assistant(7), user(8)] + const messages = makeMessages(9); + const orchestrator = makeMockOrchestrator(); + + const result = await compactHistory({ messages, orchestrator, config }); + + // summary(assistant) + user(6) + assistant(7) + user(8) = 4 messages + expect(result.messages).toHaveLength(4); + expect(result.compactedCount).toBe(6); + // First is summary (assistant), second must be user + expect(result.messages[0].role).toBe('assistant'); + expect(result.messages[0].content).toContain('[Summary of earlier conversation]'); + expect(result.messages[1].role).toBe('user'); + expect(result.messages[1].content).toBe('Message 6'); + }); }); diff --git a/src/context/compaction.ts b/src/context/compaction.ts index 441cf17..cd867d7 100644 --- a/src/context/compaction.ts +++ b/src/context/compaction.ts @@ -53,6 +53,12 @@ export async function compactHistory(opts: { const toCompact = messages.slice(0, -keepCount); const toKeep = messages.slice(-keepCount); + // Ensure toKeep starts with a user message to avoid assistant→assistant + // after the compaction summary (which has role 'assistant'). + while (toKeep.length > 0 && toKeep[0].role === 'assistant') { + toCompact.push(toKeep.shift()!); + } + const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n'); const tier = orchestrator.getDelegationTier('compaction'); diff --git a/src/models/local/llamacpp.ts b/src/models/local/llamacpp.ts index 6c0ea58..82be1b0 100644 --- a/src/models/local/llamacpp.ts +++ b/src/models/local/llamacpp.ts @@ -1,5 +1,5 @@ import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js'; -import { getMessageText } from '../media.js'; +import { normalizeMessagesForLocal } from '../media.js'; export interface LlamaCppClientConfig { endpoint: string; @@ -7,11 +7,6 @@ export interface LlamaCppClientConfig { authToken?: string; } -interface LlamaCppMessage { - role: 'system' | 'user' | 'assistant'; - content: string; -} - interface LlamaCppToolCall { id: string; type: 'function'; @@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient { } async chat(request: ChatRequest): Promise { - const messages: LlamaCppMessage[] = []; - - if (request.system) { - messages.push({ role: 'system', content: request.system }); - } - - for (const msg of request.messages) { - messages.push({ role: msg.role, content: getMessageText(msg) }); - } + const messages = normalizeMessagesForLocal(request.system, request.messages); const headers: Record = { 'Content-Type': 'application/json', @@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient { } async *chatStream(request: ChatRequest): AsyncIterable { - const messages: LlamaCppMessage[] = []; - - if (request.system) { - messages.push({ role: 'system', content: request.system }); - } - - for (const msg of request.messages) { - messages.push({ role: msg.role, content: getMessageText(msg) }); - } + const messages = normalizeMessagesForLocal(request.system, request.messages); const headers: Record = { 'Content-Type': 'application/json', diff --git a/src/models/local/ollama.ts b/src/models/local/ollama.ts index 49d5961..181ad3a 100644 --- a/src/models/local/ollama.ts +++ b/src/models/local/ollama.ts @@ -1,6 +1,6 @@ import { Ollama, type Tool } from 'ollama'; import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js'; -import { getMessageText } from '../media.js'; +import { normalizeMessagesForLocal } from '../media.js'; export interface OllamaClientConfig { host?: string; @@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient { } async chat(request: ChatRequest): Promise { - const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = []; - - if (request.system) { - messages.push({ role: 'system', content: request.system }); - } - - for (const msg of request.messages) { - messages.push({ role: msg.role, content: getMessageText(msg) }); - } + const messages = normalizeMessagesForLocal(request.system, request.messages); // Build the chat params, optionally including tools const chatParams: Parameters[0] = { @@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient { } async *chatStream(request: ChatRequest): AsyncIterable { - const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = []; - - if (request.system) { - messages.push({ role: 'system', content: request.system }); - } - - for (const msg of request.messages) { - messages.push({ role: msg.role, content: getMessageText(msg) }); - } + const messages = normalizeMessagesForLocal(request.system, request.messages); try { // Build tools array if provided and model supports them diff --git a/src/models/media.test.ts b/src/models/media.test.ts index 529de43..5dcaff6 100644 --- a/src/models/media.test.ts +++ b/src/models/media.test.ts @@ -8,6 +8,8 @@ import { attachmentToImageSource, buildUserMessage, getMessageText, + getMessageTextWithTools, + normalizeMessagesForLocal, hasImages, transcribeAudio, buildUserMessageWithAudio, @@ -605,3 +607,216 @@ describe('buildUserMessageWithAudio', () => { expect(result.content).toContain('[Voice message]:'); }); }); + +// --------------------------------------------------------------------------- +// 10. getMessageTextWithTools +// --------------------------------------------------------------------------- + +describe('getMessageTextWithTools', () => { + it('returns string directly for string content', () => { + const msg: Message = { role: 'user', content: 'plain text' }; + expect(getMessageTextWithTools(msg)).toBe('plain text'); + }); + + it('extracts text from text-only array content', () => { + const msg: Message = { + role: 'assistant', + content: [ + { type: 'text', text: 'Hello ' }, + { type: 'text', text: 'World' }, + ], + }; + expect(getMessageTextWithTools(msg)).toBe('Hello \nWorld'); + }); + + it('serializes tool_use blocks to readable text', () => { + const msg = { + role: 'assistant', + content: [ + { type: 'tool_use', name: 'search', input: { query: 'foo' } }, + ], + } as unknown as Message; + + expect(getMessageTextWithTools(msg)).toBe('[Calling tool: search({"query":"foo"})]'); + }); + + it('serializes tool_result blocks to readable text', () => { + const msg = { + role: 'user', + content: [ + { type: 'tool_result', content: 'Found 3 results' }, + ], + } as unknown as Message; + + expect(getMessageTextWithTools(msg)).toBe('[Tool result: Found 3 results]'); + }); + + it('marks error tool_result blocks', () => { + const msg = { + role: 'user', + content: [ + { type: 'tool_result', content: 'File not found', is_error: true }, + ], + } as unknown as Message; + + expect(getMessageTextWithTools(msg)).toBe('[Tool result (error): File not found]'); + }); + + it('handles mixed content (text + tool_use + tool_result) joined with newline', () => { + const msg = { + role: 'assistant', + content: [ + { type: 'text', text: 'Let me search for that.' }, + { type: 'tool_use', name: 'web_search', input: { q: 'test' } }, + { type: 'tool_result', content: 'No results' }, + ], + } as unknown as Message; + + const result = getMessageTextWithTools(msg); + expect(result).toBe( + 'Let me search for that.\n[Calling tool: web_search({"q":"test"})]\n[Tool result: No results]', + ); + }); + + it('returns empty string for empty array content', () => { + const msg: Message = { + role: 'assistant', + content: [], + }; + expect(getMessageTextWithTools(msg)).toBe(''); + }); +}); + +// --------------------------------------------------------------------------- +// 11. normalizeMessagesForLocal +// --------------------------------------------------------------------------- + +describe('normalizeMessagesForLocal', () => { + it('passes through simple text messages', () => { + const messages: Message[] = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + ]; + + const result = normalizeMessagesForLocal(undefined, messages); + + expect(result).toEqual([ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + ]); + }); + + it('prepends system message when provided', () => { + const messages: Message[] = [ + { role: 'user', content: 'Hello' }, + ]; + + const result = normalizeMessagesForLocal('You are helpful.', messages); + + expect(result).toEqual([ + { role: 'system', content: 'You are helpful.' }, + { role: 'user', content: 'Hello' }, + ]); + }); + + it('omits system message when undefined', () => { + const messages: Message[] = [ + { role: 'user', content: 'Hello' }, + ]; + + const result = normalizeMessagesForLocal(undefined, messages); + + expect(result).toEqual([ + { role: 'user', content: 'Hello' }, + ]); + }); + + it('merges consecutive same-role messages', () => { + const messages: Message[] = [ + { role: 'user', content: 'Part 1' }, + { role: 'user', content: 'Part 2' }, + { role: 'assistant', content: 'Response' }, + ]; + + const result = normalizeMessagesForLocal(undefined, messages); + + expect(result).toEqual([ + { role: 'user', content: 'Part 1\n\nPart 2' }, + { role: 'assistant', content: 'Response' }, + ]); + }); + + it('drops empty messages (e.g. image-only content that serializes to "")', () => { + const messages: Message[] = [ + { role: 'user', content: 'Before' }, + { + role: 'user', + content: [ + { type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/img.png' } }, + ], + }, + { role: 'assistant', content: 'After' }, + ]; + + const result = normalizeMessagesForLocal(undefined, messages); + + expect(result).toEqual([ + { role: 'user', content: 'Before' }, + { role: 'assistant', content: 'After' }, + ]); + }); + + it('handles realistic agent tool loop sequence', () => { + // Simulates: user asks question → assistant calls tool → user provides result → assistant responds + const messages = [ + { role: 'user', content: 'What is the weather?' }, + { + role: 'assistant', + content: [ + { type: 'text', text: 'Let me check.' }, + { type: 'tool_use', name: 'get_weather', input: { city: 'London' } }, + ], + }, + { + role: 'user', + content: [ + { type: 'tool_result', content: 'Sunny, 22°C' }, + ], + }, + { role: 'assistant', content: 'The weather in London is sunny at 22°C.' }, + ] as unknown as Message[]; + + const result = normalizeMessagesForLocal('You are a weather bot.', messages); + + expect(result).toEqual([ + { role: 'system', content: 'You are a weather bot.' }, + { role: 'user', content: 'What is the weather?' }, + { role: 'assistant', content: 'Let me check.\n[Calling tool: get_weather({"city":"London"})]' }, + { role: 'user', content: '[Tool result: Sunny, 22°C]' }, + { role: 'assistant', content: 'The weather in London is sunny at 22°C.' }, + ]); + }); + + it('returns empty array when all messages are empty', () => { + const messages: Message[] = [ + { role: 'user', content: '' }, + { role: 'assistant', content: '' }, + ]; + + const result = normalizeMessagesForLocal(undefined, messages); + + expect(result).toEqual([]); + }); + + it('returns only system message when all messages are empty but system is set', () => { + const messages: Message[] = [ + { role: 'user', content: '' }, + ]; + + const result = normalizeMessagesForLocal('System prompt', messages); + + expect(result).toEqual([ + { role: 'system', content: 'System prompt' }, + ]); + }); +}); diff --git a/src/models/media.ts b/src/models/media.ts index 3fbe630..9aac52e 100644 --- a/src/models/media.ts +++ b/src/models/media.ts @@ -120,6 +120,78 @@ export function getMessageText(message: Message): string { .join(''); } +/** + * Serialize a message's content to a plain string, including tool_use and + * tool_result structured blocks that getMessageText() would discard. + * This is needed for local model backends (llama.cpp, Ollama) whose chat + * templates don't understand Anthropic-style structured content blocks. + */ +export function getMessageTextWithTools(message: Message): string { + if (typeof message.content === 'string') { + return message.content; + } + + const parts: string[] = []; + for (const block of message.content as Record[]) { + if (block.type === 'text' && typeof block.text === 'string') { + parts.push(block.text); + } else if (block.type === 'tool_use') { + const name = block.name as string; + let argsStr: string; + try { + argsStr = JSON.stringify(block.input); + } catch { + argsStr = String(block.input); + } + parts.push(`[Calling tool: ${name}(${argsStr})]`); + } else if (block.type === 'tool_result') { + const content = (block.content as string) ?? ''; + const isError = block.is_error ? ' (error)' : ''; + parts.push(`[Tool result${isError}: ${content}]`); + } + } + return parts.join('\n'); +} + +interface SimpleMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +/** + * Normalize a message array for local model backends that require strict + * role alternation (system? -> user -> assistant -> user -> ...). + * + * 1. Serializes structured tool_use/tool_result content blocks to text + * 2. Drops empty messages + * 3. Merges consecutive same-role messages with a newline separator + */ +export function normalizeMessagesForLocal( + system: string | undefined, + messages: Message[], +): SimpleMessage[] { + const result: SimpleMessage[] = []; + + if (system) { + result.push({ role: 'system', content: system }); + } + + for (const msg of messages) { + const text = getMessageTextWithTools(msg); + if (!text) continue; // drop empty messages + + const last = result.length > 0 ? result[result.length - 1] : undefined; + if (last && last.role === msg.role) { + // Merge consecutive same-role messages + last.content += '\n\n' + text; + } else { + result.push({ role: msg.role, content: text }); + } + } + + return result; +} + /** Configuration for audio transcription via Whisper-compatible API. */ export interface AudioTranscriptionConfig { /** Whisper-compatible API endpoint (e.g. "https://api.openai.com/v1/audio/transcriptions") */