fix: normalize message roles for local model backends (llama.cpp, Ollama)
Local backends using strict chat templates (e.g. Mistral 3) rejected Flynn's Anthropic-style tool_use/tool_result content blocks, causing 'roles must alternate' errors. Added getMessageTextWithTools() and normalizeMessagesForLocal() to serialize structured blocks to plain text, drop empty messages, and merge consecutive same-role messages. Also fixed compaction to ensure kept messages start with user role.
This commit is contained in:
@@ -101,4 +101,23 @@ describe('compactHistory', () => {
|
|||||||
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
|
expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
|
||||||
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
|
expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('shifts leading assistant messages from toKeep into toCompact to ensure user-first', async () => {
|
||||||
|
// 9 messages: user(0), assistant(1), ..., user(8)
|
||||||
|
// With keepTurns=2 (keepCount=4), toKeep starts as [assistant(5), user(6), assistant(7), user(8)]
|
||||||
|
// The fix shifts assistant(5) into toCompact, so toKeep = [user(6), assistant(7), user(8)]
|
||||||
|
const messages = makeMessages(9);
|
||||||
|
const orchestrator = makeMockOrchestrator();
|
||||||
|
|
||||||
|
const result = await compactHistory({ messages, orchestrator, config });
|
||||||
|
|
||||||
|
// summary(assistant) + user(6) + assistant(7) + user(8) = 4 messages
|
||||||
|
expect(result.messages).toHaveLength(4);
|
||||||
|
expect(result.compactedCount).toBe(6);
|
||||||
|
// First is summary (assistant), second must be user
|
||||||
|
expect(result.messages[0].role).toBe('assistant');
|
||||||
|
expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
|
||||||
|
expect(result.messages[1].role).toBe('user');
|
||||||
|
expect(result.messages[1].content).toBe('Message 6');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -53,6 +53,12 @@ export async function compactHistory(opts: {
|
|||||||
const toCompact = messages.slice(0, -keepCount);
|
const toCompact = messages.slice(0, -keepCount);
|
||||||
const toKeep = messages.slice(-keepCount);
|
const toKeep = messages.slice(-keepCount);
|
||||||
|
|
||||||
|
// Ensure toKeep starts with a user message to avoid assistant→assistant
|
||||||
|
// after the compaction summary (which has role 'assistant').
|
||||||
|
while (toKeep.length > 0 && toKeep[0].role === 'assistant') {
|
||||||
|
toCompact.push(toKeep.shift()!);
|
||||||
|
}
|
||||||
|
|
||||||
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');
|
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');
|
||||||
|
|
||||||
const tier = orchestrator.getDelegationTier('compaction');
|
const tier = orchestrator.getDelegationTier('compaction');
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
|
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ModelToolCall } from '../types.js';
|
||||||
import { getMessageText } from '../media.js';
|
import { normalizeMessagesForLocal } from '../media.js';
|
||||||
|
|
||||||
export interface LlamaCppClientConfig {
|
export interface LlamaCppClientConfig {
|
||||||
endpoint: string;
|
endpoint: string;
|
||||||
@@ -7,11 +7,6 @@ export interface LlamaCppClientConfig {
|
|||||||
authToken?: string;
|
authToken?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface LlamaCppMessage {
|
|
||||||
role: 'system' | 'user' | 'assistant';
|
|
||||||
content: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface LlamaCppToolCall {
|
interface LlamaCppToolCall {
|
||||||
id: string;
|
id: string;
|
||||||
type: 'function';
|
type: 'function';
|
||||||
@@ -63,15 +58,7 @@ export class LlamaCppClient implements ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
const messages: LlamaCppMessage[] = [];
|
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||||
|
|
||||||
if (request.system) {
|
|
||||||
messages.push({ role: 'system', content: request.system });
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const msg of request.messages) {
|
|
||||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
|
||||||
}
|
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
@@ -142,15 +129,7 @@ export class LlamaCppClient implements ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||||
const messages: LlamaCppMessage[] = [];
|
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||||
|
|
||||||
if (request.system) {
|
|
||||||
messages.push({ role: 'system', content: request.system });
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const msg of request.messages) {
|
|
||||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
|
||||||
}
|
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { Ollama, type Tool } from 'ollama';
|
import { Ollama, type Tool } from 'ollama';
|
||||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
|
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, ToolDefinition, ModelToolCall } from '../types.js';
|
||||||
import { getMessageText } from '../media.js';
|
import { normalizeMessagesForLocal } from '../media.js';
|
||||||
|
|
||||||
export interface OllamaClientConfig {
|
export interface OllamaClientConfig {
|
||||||
host?: string;
|
host?: string;
|
||||||
@@ -61,15 +61,7 @@ export class OllamaClient implements ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||||
|
|
||||||
if (request.system) {
|
|
||||||
messages.push({ role: 'system', content: request.system });
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const msg of request.messages) {
|
|
||||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build the chat params, optionally including tools
|
// Build the chat params, optionally including tools
|
||||||
const chatParams: Parameters<typeof this.client.chat>[0] = {
|
const chatParams: Parameters<typeof this.client.chat>[0] = {
|
||||||
@@ -120,15 +112,7 @@ export class OllamaClient implements ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||||
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
const messages = normalizeMessagesForLocal(request.system, request.messages);
|
||||||
|
|
||||||
if (request.system) {
|
|
||||||
messages.push({ role: 'system', content: request.system });
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const msg of request.messages) {
|
|
||||||
messages.push({ role: msg.role, content: getMessageText(msg) });
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Build tools array if provided and model supports them
|
// Build tools array if provided and model supports them
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import {
|
|||||||
attachmentToImageSource,
|
attachmentToImageSource,
|
||||||
buildUserMessage,
|
buildUserMessage,
|
||||||
getMessageText,
|
getMessageText,
|
||||||
|
getMessageTextWithTools,
|
||||||
|
normalizeMessagesForLocal,
|
||||||
hasImages,
|
hasImages,
|
||||||
transcribeAudio,
|
transcribeAudio,
|
||||||
buildUserMessageWithAudio,
|
buildUserMessageWithAudio,
|
||||||
@@ -605,3 +607,216 @@ describe('buildUserMessageWithAudio', () => {
|
|||||||
expect(result.content).toContain('[Voice message]:');
|
expect(result.content).toContain('[Voice message]:');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// 10. getMessageTextWithTools
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('getMessageTextWithTools', () => {
|
||||||
|
it('returns string directly for string content', () => {
|
||||||
|
const msg: Message = { role: 'user', content: 'plain text' };
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('plain text');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('extracts text from text-only array content', () => {
|
||||||
|
const msg: Message = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Hello ' },
|
||||||
|
{ type: 'text', text: 'World' },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('Hello \nWorld');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('serializes tool_use blocks to readable text', () => {
|
||||||
|
const msg = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_use', name: 'search', input: { query: 'foo' } },
|
||||||
|
],
|
||||||
|
} as unknown as Message;
|
||||||
|
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('[Calling tool: search({"query":"foo"})]');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('serializes tool_result blocks to readable text', () => {
|
||||||
|
const msg = {
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', content: 'Found 3 results' },
|
||||||
|
],
|
||||||
|
} as unknown as Message;
|
||||||
|
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('[Tool result: Found 3 results]');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks error tool_result blocks', () => {
|
||||||
|
const msg = {
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', content: 'File not found', is_error: true },
|
||||||
|
],
|
||||||
|
} as unknown as Message;
|
||||||
|
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('[Tool result (error): File not found]');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles mixed content (text + tool_use + tool_result) joined with newline', () => {
|
||||||
|
const msg = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Let me search for that.' },
|
||||||
|
{ type: 'tool_use', name: 'web_search', input: { q: 'test' } },
|
||||||
|
{ type: 'tool_result', content: 'No results' },
|
||||||
|
],
|
||||||
|
} as unknown as Message;
|
||||||
|
|
||||||
|
const result = getMessageTextWithTools(msg);
|
||||||
|
expect(result).toBe(
|
||||||
|
'Let me search for that.\n[Calling tool: web_search({"q":"test"})]\n[Tool result: No results]',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty string for empty array content', () => {
|
||||||
|
const msg: Message = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: [],
|
||||||
|
};
|
||||||
|
expect(getMessageTextWithTools(msg)).toBe('');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// 11. normalizeMessagesForLocal
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('normalizeMessagesForLocal', () => {
|
||||||
|
it('passes through simple text messages', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
{ role: 'assistant', content: 'Hi there' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal(undefined, messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
{ role: 'assistant', content: 'Hi there' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('prepends system message when provided', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal('You are helpful.', messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'system', content: 'You are helpful.' },
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('omits system message when undefined', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal(undefined, messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'user', content: 'Hello' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('merges consecutive same-role messages', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: 'Part 1' },
|
||||||
|
{ role: 'user', content: 'Part 2' },
|
||||||
|
{ role: 'assistant', content: 'Response' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal(undefined, messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'user', content: 'Part 1\n\nPart 2' },
|
||||||
|
{ role: 'assistant', content: 'Response' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('drops empty messages (e.g. image-only content that serializes to "")', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: 'Before' },
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/img.png' } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: 'assistant', content: 'After' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal(undefined, messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'user', content: 'Before' },
|
||||||
|
{ role: 'assistant', content: 'After' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles realistic agent tool loop sequence', () => {
|
||||||
|
// Simulates: user asks question → assistant calls tool → user provides result → assistant responds
|
||||||
|
const messages = [
|
||||||
|
{ role: 'user', content: 'What is the weather?' },
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Let me check.' },
|
||||||
|
{ type: 'tool_use', name: 'get_weather', input: { city: 'London' } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_result', content: 'Sunny, 22°C' },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
|
||||||
|
] as unknown as Message[];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal('You are a weather bot.', messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'system', content: 'You are a weather bot.' },
|
||||||
|
{ role: 'user', content: 'What is the weather?' },
|
||||||
|
{ role: 'assistant', content: 'Let me check.\n[Calling tool: get_weather({"city":"London"})]' },
|
||||||
|
{ role: 'user', content: '[Tool result: Sunny, 22°C]' },
|
||||||
|
{ role: 'assistant', content: 'The weather in London is sunny at 22°C.' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty array when all messages are empty', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: '' },
|
||||||
|
{ role: 'assistant', content: '' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal(undefined, messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns only system message when all messages are empty but system is set', () => {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{ role: 'user', content: '' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = normalizeMessagesForLocal('System prompt', messages);
|
||||||
|
|
||||||
|
expect(result).toEqual([
|
||||||
|
{ role: 'system', content: 'System prompt' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -120,6 +120,78 @@ export function getMessageText(message: Message): string {
|
|||||||
.join('');
|
.join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize a message's content to a plain string, including tool_use and
|
||||||
|
* tool_result structured blocks that getMessageText() would discard.
|
||||||
|
* This is needed for local model backends (llama.cpp, Ollama) whose chat
|
||||||
|
* templates don't understand Anthropic-style structured content blocks.
|
||||||
|
*/
|
||||||
|
export function getMessageTextWithTools(message: Message): string {
|
||||||
|
if (typeof message.content === 'string') {
|
||||||
|
return message.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const block of message.content as Record<string, unknown>[]) {
|
||||||
|
if (block.type === 'text' && typeof block.text === 'string') {
|
||||||
|
parts.push(block.text);
|
||||||
|
} else if (block.type === 'tool_use') {
|
||||||
|
const name = block.name as string;
|
||||||
|
let argsStr: string;
|
||||||
|
try {
|
||||||
|
argsStr = JSON.stringify(block.input);
|
||||||
|
} catch {
|
||||||
|
argsStr = String(block.input);
|
||||||
|
}
|
||||||
|
parts.push(`[Calling tool: ${name}(${argsStr})]`);
|
||||||
|
} else if (block.type === 'tool_result') {
|
||||||
|
const content = (block.content as string) ?? '';
|
||||||
|
const isError = block.is_error ? ' (error)' : '';
|
||||||
|
parts.push(`[Tool result${isError}: ${content}]`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SimpleMessage {
|
||||||
|
role: 'system' | 'user' | 'assistant';
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a message array for local model backends that require strict
|
||||||
|
* role alternation (system? -> user -> assistant -> user -> ...).
|
||||||
|
*
|
||||||
|
* 1. Serializes structured tool_use/tool_result content blocks to text
|
||||||
|
* 2. Drops empty messages
|
||||||
|
* 3. Merges consecutive same-role messages with a newline separator
|
||||||
|
*/
|
||||||
|
export function normalizeMessagesForLocal(
|
||||||
|
system: string | undefined,
|
||||||
|
messages: Message[],
|
||||||
|
): SimpleMessage[] {
|
||||||
|
const result: SimpleMessage[] = [];
|
||||||
|
|
||||||
|
if (system) {
|
||||||
|
result.push({ role: 'system', content: system });
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const msg of messages) {
|
||||||
|
const text = getMessageTextWithTools(msg);
|
||||||
|
if (!text) continue; // drop empty messages
|
||||||
|
|
||||||
|
const last = result.length > 0 ? result[result.length - 1] : undefined;
|
||||||
|
if (last && last.role === msg.role) {
|
||||||
|
// Merge consecutive same-role messages
|
||||||
|
last.content += '\n\n' + text;
|
||||||
|
} else {
|
||||||
|
result.push({ role: msg.role, content: text });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/** Configuration for audio transcription via Whisper-compatible API. */
|
/** Configuration for audio transcription via Whisper-compatible API. */
|
||||||
export interface AudioTranscriptionConfig {
|
export interface AudioTranscriptionConfig {
|
||||||
/** Whisper-compatible API endpoint (e.g. "https://api.openai.com/v1/audio/transcriptions") */
|
/** Whisper-compatible API endpoint (e.g. "https://api.openai.com/v1/audio/transcriptions") */
|
||||||
|
|||||||
Reference in New Issue
Block a user