fix: normalize message roles for local model backends (llama.cpp, Ollama)

Local backends using strict chat templates (e.g. Mistral 3) rejected Flynn's Anthropic-style tool_use/tool_result content blocks, causing 'roles must alternate' errors. Added getMessageTextWithTools() and normalizeMessagesForLocal() to serialize structured blocks to plain text, drop empty messages, and merge consecutive same-role messages. Also fixed compaction to ensure kept messages start with user role.
2026-02-10 22:04:17 -08:00
parent 2f6d045e2a
commit 6761dca1c2
6 changed files with 318 additions and 43 deletions
@@ -101,4 +101,23 @@ describe('compactHistory', () => {
    expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
    expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
  });
+
+  it('shifts leading assistant messages from toKeep into toCompact to ensure user-first', async () => {
+    // 9 messages: user(0), assistant(1), ..., user(8)
+    // With keepTurns=2 (keepCount=4), toKeep starts as [assistant(5), user(6), assistant(7), user(8)]
+    // The fix shifts assistant(5) into toCompact, so toKeep = [user(6), assistant(7), user(8)]
+    const messages = makeMessages(9);
+    const orchestrator = makeMockOrchestrator();
+
+    const result = await compactHistory({ messages, orchestrator, config });
+
+    // summary(assistant) + user(6) + assistant(7) + user(8) = 4 messages
+    expect(result.messages).toHaveLength(4);
+    expect(result.compactedCount).toBe(6);
+    // First is summary (assistant), second must be user
+    expect(result.messages[0].role).toBe('assistant');
+    expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
+    expect(result.messages[1].role).toBe('user');
+    expect(result.messages[1].content).toBe('Message 6');
+  });
 });
@@ -53,6 +53,12 @@ export async function compactHistory(opts: {
  const toCompact = messages.slice(0, -keepCount);
  const toKeep = messages.slice(-keepCount);

+  // Ensure toKeep starts with a user message to avoid assistant→assistant
+  // after the compaction summary (which has role 'assistant').
+  while (toKeep.length > 0 && toKeep[0].role === 'assistant') {
+    toCompact.push(toKeep.shift()!);
+  }
+
  const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');

  const tier = orchestrator.getDelegationTier('compaction');
@@ -1,5 +1,5 @@
 import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
-import { getMessageText } from '../media.js';
+import { normalizeMessagesForLocal } from '../media.js';

 export interface LlamaCppClientConfig {
  endpoint: string;
@@ -7,11 +7,6 @@ export interface LlamaCppClientConfig {
  authToken?: string;
 }

-interface LlamaCppMessage {
-  role: 'system' | 'user' | 'assistant';
-  content: string;
-}
-
 interface LlamaCppToolCall {
  id: string;
  type: 'function';
@@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient {
  }

  async chat(request: ChatRequest): Promise<ChatResponse> {
-    const messages: LlamaCppMessage[] = [];
-
-    if (request.system) {
-      messages.push({ role: 'system', content: request.system });
-    }
-
-    for (const msg of request.messages) {
-      messages.push({ role: msg.role, content: getMessageText(msg) });
-    }
+    const messages = normalizeMessagesForLocal(request.system, request.messages);

    const headers: Record<string, string> = {
      'Content-Type': 'application/json',
@@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient {
  }

  async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
-    const messages: LlamaCppMessage[] = [];
-
-    if (request.system) {
-      messages.push({ role: 'system', content: request.system });
-    }
-
-    for (const msg of request.messages) {
-      messages.push({ role: msg.role, content: getMessageText(msg) });
-    }
+    const messages = normalizeMessagesForLocal(request.system, request.messages);

    const headers: Record<string, string> = {
      'Content-Type': 'application/json',
@@ -1,6 +1,6 @@
 import { Ollama, type Tool } from 'ollama';
 import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
-import { getMessageText } from '../media.js';
+import { normalizeMessagesForLocal } from '../media.js';

 export interface OllamaClientConfig {
  host?: string;
@@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient {
  }

  async chat(request: ChatRequest): Promise<ChatResponse> {
-    const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
-
-    if (request.system) {
-      messages.push({ role: 'system', content: request.system });
-    }
-
-    for (const msg of request.messages) {
-      messages.push({ role: msg.role, content: getMessageText(msg) });
-    }
+    const messages = normalizeMessagesForLocal(request.system, request.messages);

    // Build the chat params, optionally including tools
    const chatParams: Parameters<typeof this.client.chat>[0] = {
@@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient {
  }

  async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
-    const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
-
-    if (request.system) {
-      messages.push({ role: 'system', content: request.system });
-    }
-
-    for (const msg of request.messages) {
-      messages.push({ role: msg.role, content: getMessageText(msg) });
-    }
+    const messages = normalizeMessagesForLocal(request.system, request.messages);

    try {
      // Build tools array if provided and model supports them
@@ -8,6 +8,8 @@ import {
  attachmentToImageSource,
  buildUserMessage,
  getMessageText,
+  getMessageTextWithTools,
+  normalizeMessagesForLocal,
  hasImages,
  transcribeAudio,
  buildUserMessageWithAudio,
@@ -605,3 +607,216 @@ describe('buildUserMessageWithAudio', () => {
    expect(result.content).toContain('[Voice message]:');
  });
 });
+
+// ---------------------------------------------------------------------------
+// 10. getMessageTextWithTools
+// ---------------------------------------------------------------------------
+
+describe('getMessageTextWithTools', () => {
+  it('returns string directly for string content', () => {
+    const msg: Message = { role: 'user', content: 'plain text' };
+    expect(getMessageTextWithTools(msg)).toBe('plain text');
+  });
+
+  it('extracts text from text-only array content', () => {
+    const msg: Message = {
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'Hello ' },
+        { type: 'text', text: 'World' },
+      ],
+    };
+    expect(getMessageTextWithTools(msg)).toBe('Hello \nWorld');
+  });
+
+  it('serializes tool_use blocks to readable text', () => {
+    const msg = {
+      role: 'assistant',
+      content: [
+        { type: 'tool_use', name: 'search', input: { query: 'foo' } },
+      ],
+    } as unknown as Message;
+
+    expect(getMessageTextWithTools(msg)).toBe('[Calling tool: search({"query":"foo"})]');
+  });
+
+  it('serializes tool_result blocks to readable text', () => {
+    const msg = {
+      role: 'user',
+      content: [
+        { type: 'tool_result', content: 'Found 3 results' },
+      ],
+    } as unknown as Message;
+
+    expect(getMessageTextWithTools(msg)).toBe('[Tool result: Found 3 results]');
+  });
+
+  it('marks error tool_result blocks', () => {
+    const msg = {
+      role: 'user',
+      content: [
+        { type: 'tool_result', content: 'File not found', is_error: true },
+      ],
+    } as unknown as Message;
+
+    expect(getMessageTextWithTools(msg)).toBe('[Tool result (error): File not found]');
+  });
+
+  it('handles mixed content (text + tool_use + tool_result) joined with newline', () => {
+    const msg = {
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'Let me search for that.' },
+        { type: 'tool_use', name: 'web_search', input: { q: 'test' } },
+        { type: 'tool_result', content: 'No results' },
+      ],
+    } as unknown as Message;
+
+    const result = getMessageTextWithTools(msg);
+    expect(result).toBe(
+      'Let me search for that.\n[Calling tool: web_search({"q":"test"})]\n[Tool result: No results]',
+    );
+  });
+
+  it('returns empty string for empty array content', () => {
+    const msg: Message = {
+      role: 'assistant',
+      content: [],
+    };
+    expect(getMessageTextWithTools(msg)).toBe('');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 11. normalizeMessagesForLocal
+// ---------------------------------------------------------------------------
+
+describe('normalizeMessagesForLocal', () => {
+  it('passes through simple text messages', () => {
+    const messages: Message[] = [
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi there' },
+    ];
+
+    const result = normalizeMessagesForLocal(undefined, messages);
+
+    expect(result).toEqual([
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi there' },
+    ]);
+  });
+
+  it('prepends system message when provided', () => {
+    const messages: Message[] = [
+      { role: 'user', content: 'Hello' },
+    ];
+
+    const result = normalizeMessagesForLocal('You are helpful.', messages);
+
+    expect(result).toEqual([
+      { role: 'system', content: 'You are helpful.' },
+      { role: 'user', content: 'Hello' },
+    ]);
+  });
+
+  it('omits system message when undefined', () => {
+    const messages: Message[] = [
+      { role: 'user', content: 'Hello' },
+    ];
+
+    const result = normalizeMessagesForLocal(undefined, messages);
+
+    expect(result).toEqual([
+      { role: 'user', content: 'Hello' },
+    ]);
+  });
+
+  it('merges consecutive same-role messages', () => {
+    const messages: Message[] = [
+      { role: 'user', content: 'Part 1' },
+      { role: 'user', content: 'Part 2' },
+      { role: 'assistant', content: 'Response' },
+    ];
+
+    const result = normalizeMessagesForLocal(undefined, messages);
+
+    expect(result).toEqual([
+      { role: 'user', content: 'Part 1\n\nPart 2' },
+      { role: 'assistant', content: 'Response' },
+    ]);
+  });
+
+  it('drops empty messages (e.g. image-only content that serializes to "")', () => {
+    const messages: Message[] = [
+      { role: 'user', content: 'Before' },
+      {
+        role: 'user',
+        content: [
+          { type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/img.png' } },
+        ],
+      },
+      { role: 'assistant', content: 'After' },
+    ];
+
+    const result = normalizeMessagesForLocal(undefined, messages);
+
+    expect(result).toEqual([
+      { role: 'user', content: 'Before' },
+      { role: 'assistant', content: 'After' },
+    ]);
+  });
+
+  it('handles realistic agent tool loop sequence', () => {
+    // Simulates: user asks question → assistant calls tool → user provides result → assistant responds
+    const messages = [
+      { role: 'user', content: 'What is the weather?' },
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'Let me check.' },
+          { type: 'tool_use', name: 'get_weather', input: { city: 'London' } },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          { type: 'tool_result', content: 'Sunny, 22°C' },
+        ],
+      },
+      { role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
+    ] as unknown as Message[];
+
+    const result = normalizeMessagesForLocal('You are a weather bot.', messages);
+
+    expect(result).toEqual([
+      { role: 'system', content: 'You are a weather bot.' },
+      { role: 'user', content: 'What is the weather?' },
+      { role: 'assistant', content: 'Let me check.\n[Calling tool: get_weather({"city":"London"})]' },
+      { role: 'user', content: '[Tool result: Sunny, 22°C]' },
+      { role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
+    ]);
+  });
+
+  it('returns empty array when all messages are empty', () => {
+    const messages: Message[] = [
+      { role: 'user', content: '' },
+      { role: 'assistant', content: '' },
+    ];
+
+    const result = normalizeMessagesForLocal(undefined, messages);
+
+    expect(result).toEqual([]);
+  });
+
+  it('returns only system message when all messages are empty but system is set', () => {
+    const messages: Message[] = [
+      { role: 'user', content: '' },
+    ];
+
+    const result = normalizeMessagesForLocal('System prompt', messages);
+
+    expect(result).toEqual([
+      { role: 'system', content: 'System prompt' },
+    ]);
+  });
+});
@@ -120,6 +120,78 @@ export function getMessageText(message: Message): string {
    .join('');
 }

+/**
+ * Serialize a message's content to a plain string, including tool_use and
+ * tool_result structured blocks that getMessageText() would discard.
+ * This is needed for local model backends (llama.cpp, Ollama) whose chat
+ * templates don't understand Anthropic-style structured content blocks.
+ */
+export function getMessageTextWithTools(message: Message): string {
+  if (typeof message.content === 'string') {
+    return message.content;
+  }
+
+  const parts: string[] = [];
+  for (const block of message.content as Record<string, unknown>[]) {
+    if (block.type === 'text' && typeof block.text === 'string') {
+      parts.push(block.text);
+    } else if (block.type === 'tool_use') {
+      const name = block.name as string;
+      let argsStr: string;
+      try {
+        argsStr = JSON.stringify(block.input);
+      } catch {
+        argsStr = String(block.input);
+      }
+      parts.push(`[Calling tool: ${name}(${argsStr})]`);
+    } else if (block.type === 'tool_result') {
+      const content = (block.content as string) ?? '';
+      const isError = block.is_error ? ' (error)' : '';
+      parts.push(`[Tool result${isError}: ${content}]`);
+    }
+  }
+  return parts.join('\n');
+}
+
+interface SimpleMessage {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+}
+
+/**
+ * Normalize a message array for local model backends that require strict
+ * role alternation (system? -> user -> assistant -> user -> ...).
+ *
+ * 1. Serializes structured tool_use/tool_result content blocks to text
+ * 2. Drops empty messages
+ * 3. Merges consecutive same-role messages with a newline separator
+ */
+export function normalizeMessagesForLocal(
+  system: string | undefined,
+  messages: Message[],
+): SimpleMessage[] {
+  const result: SimpleMessage[] = [];
+
+  if (system) {
+    result.push({ role: 'system', content: system });
+  }
+
+  for (const msg of messages) {
+    const text = getMessageTextWithTools(msg);
+    if (!text) continue; // drop empty messages
+
+    const last = result.length > 0 ? result[result.length - 1] : undefined;
+    if (last && last.role === msg.role) {
+      // Merge consecutive same-role messages
+      last.content += '\n\n' + text;
+    } else {
+      result.push({ role: msg.role, content: text });
+    }
+  }
+
+  return result;
+}
+
 /** Configuration for audio transcription via Whisper-compatible API. */
 export interface AudioTranscriptionConfig {
  /** Whisper-compatible API endpoint (e.g. "https://api.openai.com/v1/audio/transcriptions") */