import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { vi } from 'vitest'; import type { Attachment } from '../channels/types.js'; import type { Message } from './types.js'; import { isSupportedImage, isSupportedAudio, attachmentToImageSource, buildUserMessage, getMessageText, getMessageTextWithTools, normalizeMessagesForLocal, hasImages, transcribeAudio, buildUserMessageWithAudio, type AudioTranscriptionConfig, mimeToExtension, } from './media.js'; // --------------------------------------------------------------------------- // Helpers – reusable attachment fixtures // --------------------------------------------------------------------------- function makeAttachment(overrides: Partial & { mimeType: string }): Attachment { return { ...overrides }; } const jpegBase64Attachment: Attachment = makeAttachment({ mimeType: 'image/jpeg', data: 'aGVsbG8=', // "hello" in base64 filename: 'photo.jpg', }); const pngUrlAttachment: Attachment = makeAttachment({ mimeType: 'image/png', url: 'https://example.com/image.png', }); const pdfAttachment: Attachment = makeAttachment({ mimeType: 'application/pdf', data: 'cGRm', filename: 'doc.pdf', }); const oggAudioAttachment: Attachment = makeAttachment({ mimeType: 'audio/ogg', data: 'AAAAAAAAAAAAAAAAAAAA', filename: 'voice.ogg', }); const mp3AudioAttachment: Attachment = makeAttachment({ mimeType: 'audio/mpeg', data: 'AAAAAQAAAAAAAEAAABkAAABTQA=', // Base64 of a short MP3 filename: 'audio.mp3', }); const wavAudioAttachment: Attachment = makeAttachment({ mimeType: 'audio/wav', data: 'UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA=', // Base64 of a short WAV filename: 'audio.wav', }); const m4aAudioAttachment: Attachment = makeAttachment({ mimeType: 'audio/x-m4a', data: 'AAAAUGV0Zi4xLjAgc291cmNlIGZvciBzdGFydHBvaW50', // Base64 of M4A filename: 'audio.m4a', }); // --------------------------------------------------------------------------- // 1. isSupportedImage // --------------------------------------------------------------------------- describe('isSupportedImage', () => { // Positive: all four supported MIME types should return true. it.each([ 'image/jpeg', 'image/png', 'image/gif', 'image/webp', ])('returns true for supported type %s', (mime) => { expect(isSupportedImage(makeAttachment({ mimeType: mime }))).toBe(true); }); // Negative: unsupported MIME types should return false. it.each([ 'image/bmp', 'application/pdf', 'audio/mp3', 'text/plain', ])('returns false for unsupported type %s', (mime) => { expect(isSupportedImage(makeAttachment({ mimeType: mime }))).toBe(false); }); }); // --------------------------------------------------------------------------- // 2. attachmentToImageSource // --------------------------------------------------------------------------- describe('attachmentToImageSource', () => { // Positive: attachment with base64 data produces a base64 ImageSource. it('returns base64 ImageSource when attachment has data', () => { const result = attachmentToImageSource(jpegBase64Attachment); expect(result).toEqual({ type: 'base64', media_type: 'image/jpeg', data: 'aGVsbG8=', }); }); // Positive: attachment with url (no data) produces a url ImageSource. it('returns url ImageSource when attachment has url but no data', () => { const result = attachmentToImageSource(pngUrlAttachment); expect(result).toEqual({ type: 'url', media_type: 'image/png', url: 'https://example.com/image.png', }); }); // Positive: when both data and url are present, base64 is preferred. it('prefers base64 data over url when both are present', () => { const both = makeAttachment({ mimeType: 'image/webp', data: 'YWJj', url: 'https://example.com/img.webp', }); const result = attachmentToImageSource(both); expect(result).toEqual({ type: 'base64', media_type: 'image/webp', data: 'YWJj', }); }); // Negative: unsupported MIME type returns null. it('returns null for unsupported MIME type', () => { expect(attachmentToImageSource(pdfAttachment)).toBeNull(); }); // Negative: supported MIME but neither data nor url returns null. it('returns null when attachment has neither data nor url', () => { const bare = makeAttachment({ mimeType: 'image/gif' }); expect(attachmentToImageSource(bare)).toBeNull(); }); }); // --------------------------------------------------------------------------- // 3. buildUserMessage // --------------------------------------------------------------------------- describe('buildUserMessage', () => { // Positive: plain text message when no attachments argument is provided. it('returns plain string content when no attachments', () => { const msg = buildUserMessage('Hello'); expect(msg).toEqual({ role: 'user', content: 'Hello' }); }); // Positive: plain text message when attachments is an empty array. it('returns plain string content when attachments is empty array', () => { const msg = buildUserMessage('Hello', []); expect(msg).toEqual({ role: 'user', content: 'Hello' }); }); // Positive: plain text message when attachments contain no supported images. it('returns plain string content when no image attachments (PDF only)', () => { const msg = buildUserMessage('See attached', [pdfAttachment]); expect(msg).toEqual({ role: 'user', content: 'See attached' }); }); // Positive: multimodal message with text + image parts when image attachment present. it('returns multimodal message with text + image parts', () => { const msg = buildUserMessage('Look at this', [jpegBase64Attachment]); expect(msg.role).toBe('user'); expect(Array.isArray(msg.content)).toBe(true); const parts = msg.content as Array<{ type: string }>; expect(parts).toHaveLength(2); expect(parts[0]).toEqual({ type: 'text', text: 'Look at this' }); expect(parts[1]).toEqual({ type: 'image', source: { type: 'base64', media_type: 'image/jpeg', data: 'aGVsbG8=' }, }); }); // Positive: multimodal message with just image part when text is empty. it('returns multimodal message with just image part when text is empty', () => { const msg = buildUserMessage('', [pngUrlAttachment]); expect(msg.role).toBe('user'); const parts = msg.content as Array<{ type: string }>; // Empty text is omitted, only image part expect(parts).toHaveLength(1); expect(parts[0]).toEqual({ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/image.png' }, }); }); // Positive: handles multiple image attachments. it('handles multiple image attachments', () => { const msg = buildUserMessage('Two images', [jpegBase64Attachment, pngUrlAttachment]); const parts = msg.content as Array<{ type: string }>; expect(parts).toHaveLength(3); // text + 2 images expect(parts[0]).toEqual({ type: 'text', text: 'Two images' }); expect(parts[1]).toMatchObject({ type: 'image' }); expect(parts[2]).toMatchObject({ type: 'image' }); }); }); // --------------------------------------------------------------------------- // 4. getMessageText // --------------------------------------------------------------------------- describe('getMessageText', () => { // Positive: returns string directly for string content. it('returns string directly for string content messages', () => { const msg: Message = { role: 'user', content: 'plain text' }; expect(getMessageText(msg)).toBe('plain text'); }); // Positive: extracts and joins text parts from multimodal messages. it('extracts and joins text parts from multimodal messages', () => { const msg: Message = { role: 'user', content: [ { type: 'text', text: 'Hello ' }, { type: 'image', source: { type: 'base64', media_type: 'image/png', data: 'x' } }, { type: 'text', text: 'World' }, ], }; expect(getMessageText(msg)).toBe('Hello World'); }); // Negative: returns empty string for multimodal messages with only image parts. it('returns empty string for multimodal messages with only image parts', () => { const msg: Message = { role: 'user', content: [ { type: 'image', source: { type: 'url', media_type: 'image/gif', url: 'https://example.com/a.gif' } }, ], }; expect(getMessageText(msg)).toBe(''); }); }); // --------------------------------------------------------------------------- // 5. hasImages // --------------------------------------------------------------------------- describe('hasImages', () => { // Negative: string content never has images. it('returns false for string content messages', () => { const msg: Message = { role: 'user', content: 'no images here' }; expect(hasImages(msg)).toBe(false); }); // Negative: multimodal messages with only text parts have no images. it('returns false for multimodal messages with only text parts', () => { const msg: Message = { role: 'user', content: [{ type: 'text', text: 'just text' }], }; expect(hasImages(msg)).toBe(false); }); // Positive: multimodal messages with image parts are detected. it('returns true for multimodal messages with image parts', () => { const msg: Message = { role: 'user', content: [ { type: 'text', text: 'caption' }, { type: 'image', source: { type: 'base64', media_type: 'image/jpeg', data: 'abc' } }, ], }; expect(hasImages(msg)).toBe(true); }); }); // --------------------------------------------------------------------------- // 6. isSupportedAudio // --------------------------------------------------------------------------- describe('isSupportedAudio', () => { // Positive: all supported audio MIME types should return true. it.each([ 'audio/ogg', 'audio/mpeg', 'audio/mp3', 'audio/wav', 'audio/webm', 'audio/mp4', 'audio/x-m4a', ])('returns true for supported type %s', (mime) => { expect(isSupportedAudio(makeAttachment({ mimeType: mime }))).toBe(true); }); // Negative: unsupported MIME types should return false. it.each([ 'audio/flac', 'audio/aac', 'audio/wma', 'application/pdf', 'image/jpeg', 'text/plain', ])('returns false for unsupported type %s', (mime) => { expect(isSupportedAudio(makeAttachment({ mimeType: mime }))).toBe(false); }); }); // --------------------------------------------------------------------------- // 7. mimeToExtension // --------------------------------------------------------------------------- describe('mimeToExtension', () => { it('returns correct extension for audio/ogg', () => { expect(mimeToExtension('audio/ogg')).toBe('ogg'); }); it('returns correct extension for audio/mpeg', () => { expect(mimeToExtension('audio/mpeg')).toBe('mp3'); }); it('returns correct extension for audio/wav', () => { expect(mimeToExtension('audio/wav')).toBe('wav'); }); it('returns correct extension for audio/webm', () => { expect(mimeToExtension('audio/webm')).toBe('webm'); }); it('returns correct extension for audio/mp4', () => { expect(mimeToExtension('audio/mp4')).toBe('m4a'); }); it('returns correct extension for audio/x-m4a', () => { expect(mimeToExtension('audio/x-m4a')).toBe('m4a'); }); it('returns bin for unknown MIME type', () => { expect(mimeToExtension('audio/flac')).toBe('bin'); }); }); // --------------------------------------------------------------------------- // 8. transcribeAudio // --------------------------------------------------------------------------- describe('transcribeAudio', () => { const mockTranscript = 'Hello, this is a test transcription'; const originalFetch = global.fetch; beforeEach(() => { global.fetch = vi.fn(); }); afterEach(() => { global.fetch = originalFetch; }); // Positive: transcribes audio with valid config. it('transcribes audio successfully with valid config', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: mockTranscript }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', apiKey: 'test-key', model: 'test-model', }; const result = await transcribeAudio(oggAudioAttachment, config); expect(result).toBe(mockTranscript); expect(global.fetch).toHaveBeenCalledWith( 'https://api.example.com/v1/audio/transcriptions', expect.objectContaining({ method: 'POST', body: expect.any(FormData), }), ); }); // Negative: returns placeholder when endpoint is missing. it('returns placeholder message when endpoint is not configured', async () => { const result = await transcribeAudio(oggAudioAttachment); expect(result).toBe('[Audio message received but no transcription service is configured]'); }); // Negative: returns placeholder when API fails. it('returns placeholder message when API returns error', async () => { vi.mocked(global.fetch).mockResolvedValue({ ok: false, status: 500, statusText: 'Internal Server Error', text: async () => 'Internal Server Error', } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await transcribeAudio(oggAudioAttachment, config); expect(result).toBe('[Audio message transcription failed]'); }); // Negative: handles network errors gracefully. it('returns placeholder message on network error', async () => { vi.mocked(global.fetch).mockRejectedValue(new Error('Network error')); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await transcribeAudio(oggAudioAttachment, config); expect(result).toBe('[Audio message transcription failed]'); }); // Positive: uses Whisper-1 model by default. it('uses whisper-1 model by default', async () => { const config: AudioTranscriptionConfig = { endpoint: 'https://api.openai.com/v1/audio/transcriptions', }; // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'test' }), } as Response); await transcribeAudio(oggAudioAttachment, config); expect(global.fetch).toHaveBeenCalledWith( 'https://api.openai.com/v1/audio/transcriptions', expect.objectContaining({ body: expect.any(FormData), }), ); }); }); // --------------------------------------------------------------------------- // 9. buildUserMessageWithAudio // --------------------------------------------------------------------------- describe('buildUserMessageWithAudio', () => { const textMessage = 'What is 2 + 2?'; const originalFetch = global.fetch; beforeEach(() => { global.fetch = vi.fn(); }); afterEach(() => { global.fetch = originalFetch; }); // Positive: plain text message when no attachments. it('returns plain text message when no attachments', async () => { const result = await buildUserMessageWithAudio(textMessage); expect(result).toEqual({ role: 'user', content: textMessage }); }); // Positive: includes transcription when audio attachment present. it('includes transcription when audio attachment is present', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'The answer is 4' }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await buildUserMessageWithAudio(textMessage, [oggAudioAttachment], config); expect(result.role).toBe('user'); expect(result.content).toContain('[Voice message]:'); expect(result.content).toContain('The answer is 4'); expect(result.content).toContain(textMessage); }); // Positive: transcribes multiple audio attachments. it('transcribes multiple audio attachments', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'The answer is 4' }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await buildUserMessageWithAudio( textMessage, [oggAudioAttachment, mp3AudioAttachment], config, ); expect(result.content).toContain('[Voice message]: The answer is 4'); expect(result.content).toContain('[Voice message]: The answer is 4'); }); // Positive: audio transcripts appear before original text. it('places audio transcripts before original message text', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'The answer is 4' }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await buildUserMessageWithAudio(textMessage, [oggAudioAttachment], config); const content = Array.isArray(result.content) ? result.content : [{ type: 'text' as const, text: result.content }]; const textPart = content.find((p) => p.type === 'text') as { type: 'text'; text: string } | undefined; expect(textPart).toBeDefined(); const textContent = textPart!.text || ''; const firstVoiceIndex = textContent.indexOf('[Voice message]:'); const textIndex = textContent.indexOf(textMessage); expect(firstVoiceIndex).toBeLessThan(textIndex); }); // Positive: handles mixed image and audio attachments. it('handles mixed image and audio attachments', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'The answer is 4' }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await buildUserMessageWithAudio( textMessage, [jpegBase64Attachment, oggAudioAttachment, pngUrlAttachment], config, ); expect(result.role).toBe('user'); expect(Array.isArray(result.content)).toBe(true); const parts = result.content as Array<{ type: string; text?: string }>; expect(parts).toHaveLength(3); // transcription text, image part, text part const textPart = parts.find((p) => p.type === 'text'); expect(textPart?.text).toContain('[Voice message]:'); expect(textPart?.text).toContain(textMessage); const imagePart = parts.find((p) => p.type === 'image'); expect(imagePart).toBeDefined(); }); // Positive: no transcription when audio config is missing. it('returns original message when audio config is missing', async () => { const result = await buildUserMessageWithAudio(textMessage, [oggAudioAttachment]); expect(result).toEqual({ role: 'user', content: textMessage }); }); // Positive: empty text with audio attachments. it('handles empty text with audio attachments', async () => { // Mock fetch to avoid actual API calls vi.mocked(global.fetch).mockResolvedValue({ ok: true, json: async () => ({ text: 'Test' }), } as Response); const config: AudioTranscriptionConfig = { endpoint: 'https://api.example.com/v1/audio/transcriptions', }; const result = await buildUserMessageWithAudio('', [oggAudioAttachment], config); expect(result.role).toBe('user'); expect(result.content).toContain('[Voice message]:'); }); }); // --------------------------------------------------------------------------- // 10. getMessageTextWithTools // --------------------------------------------------------------------------- describe('getMessageTextWithTools', () => { it('returns string directly for string content', () => { const msg: Message = { role: 'user', content: 'plain text' }; expect(getMessageTextWithTools(msg)).toBe('plain text'); }); it('extracts text from text-only array content', () => { const msg: Message = { role: 'assistant', content: [ { type: 'text', text: 'Hello ' }, { type: 'text', text: 'World' }, ], }; expect(getMessageTextWithTools(msg)).toBe('Hello \nWorld'); }); it('serializes tool_use blocks to readable text', () => { const msg = { role: 'assistant', content: [ { type: 'tool_use', name: 'search', input: { query: 'foo' } }, ], } as unknown as Message; expect(getMessageTextWithTools(msg)).toBe('[Calling tool: search({"query":"foo"})]'); }); it('serializes tool_result blocks to readable text', () => { const msg = { role: 'user', content: [ { type: 'tool_result', content: 'Found 3 results' }, ], } as unknown as Message; expect(getMessageTextWithTools(msg)).toBe('[Tool result: Found 3 results]'); }); it('marks error tool_result blocks', () => { const msg = { role: 'user', content: [ { type: 'tool_result', content: 'File not found', is_error: true }, ], } as unknown as Message; expect(getMessageTextWithTools(msg)).toBe('[Tool result (error): File not found]'); }); it('handles mixed content (text + tool_use + tool_result) joined with newline', () => { const msg = { role: 'assistant', content: [ { type: 'text', text: 'Let me search for that.' }, { type: 'tool_use', name: 'web_search', input: { q: 'test' } }, { type: 'tool_result', content: 'No results' }, ], } as unknown as Message; const result = getMessageTextWithTools(msg); expect(result).toBe( 'Let me search for that.\n[Calling tool: web_search({"q":"test"})]\n[Tool result: No results]', ); }); it('returns empty string for empty array content', () => { const msg: Message = { role: 'assistant', content: [], }; expect(getMessageTextWithTools(msg)).toBe(''); }); }); // --------------------------------------------------------------------------- // 11. normalizeMessagesForLocal // --------------------------------------------------------------------------- describe('normalizeMessagesForLocal', () => { it('passes through simple text messages', () => { const messages: Message[] = [ { role: 'user', content: 'Hello' }, { role: 'assistant', content: 'Hi there' }, ]; const result = normalizeMessagesForLocal(undefined, messages); expect(result).toEqual([ { role: 'user', content: 'Hello' }, { role: 'assistant', content: 'Hi there' }, ]); }); it('prepends system message when provided', () => { const messages: Message[] = [ { role: 'user', content: 'Hello' }, ]; const result = normalizeMessagesForLocal('You are helpful.', messages); expect(result).toEqual([ { role: 'system', content: 'You are helpful.' }, { role: 'user', content: 'Hello' }, ]); }); it('omits system message when undefined', () => { const messages: Message[] = [ { role: 'user', content: 'Hello' }, ]; const result = normalizeMessagesForLocal(undefined, messages); expect(result).toEqual([ { role: 'user', content: 'Hello' }, ]); }); it('merges consecutive same-role messages', () => { const messages: Message[] = [ { role: 'user', content: 'Part 1' }, { role: 'user', content: 'Part 2' }, { role: 'assistant', content: 'Response' }, ]; const result = normalizeMessagesForLocal(undefined, messages); expect(result).toEqual([ { role: 'user', content: 'Part 1\n\nPart 2' }, { role: 'assistant', content: 'Response' }, ]); }); it('drops empty messages (e.g. image-only content that serializes to "")', () => { const messages: Message[] = [ { role: 'user', content: 'Before' }, { role: 'user', content: [ { type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/img.png' } }, ], }, { role: 'assistant', content: 'After' }, ]; const result = normalizeMessagesForLocal(undefined, messages); expect(result).toEqual([ { role: 'user', content: 'Before' }, { role: 'assistant', content: 'After' }, ]); }); it('handles realistic agent tool loop sequence', () => { // Simulates: user asks question → assistant calls tool → user provides result → assistant responds const messages = [ { role: 'user', content: 'What is the weather?' }, { role: 'assistant', content: [ { type: 'text', text: 'Let me check.' }, { type: 'tool_use', name: 'get_weather', input: { city: 'London' } }, ], }, { role: 'user', content: [ { type: 'tool_result', content: 'Sunny, 22°C' }, ], }, { role: 'assistant', content: 'The weather in London is sunny at 22°C.' }, ] as unknown as Message[]; const result = normalizeMessagesForLocal('You are a weather bot.', messages); expect(result).toEqual([ { role: 'system', content: 'You are a weather bot.' }, { role: 'user', content: 'What is the weather?' }, { role: 'assistant', content: 'Let me check.\n[Calling tool: get_weather({"city":"London"})]' }, { role: 'user', content: '[Tool result: Sunny, 22°C]' }, { role: 'assistant', content: 'The weather in London is sunny at 22°C.' }, ]); }); it('returns empty array when all messages are empty', () => { const messages: Message[] = [ { role: 'user', content: '' }, { role: 'assistant', content: '' }, ]; const result = normalizeMessagesForLocal(undefined, messages); expect(result).toEqual([]); }); it('returns only system message when all messages are empty but system is set', () => { const messages: Message[] = [ { role: 'user', content: '' }, ]; const result = normalizeMessagesForLocal('System prompt', messages); expect(result).toEqual([ { role: 'system', content: 'System prompt' }, ]); }); });