feat(audio): add native audio support to type system and model clients

- Add AudioSource interface and 'audio' variant to MessageContentPart union
- Update buildUserMessage() to create audio content parts from attachments
- Add attachmentToAudioSource(), hasAudio(), stripAudioParts() helpers
- Gemini: native audio via inlineData (same format as images)
- OpenAI/GitHub: native audio via input_audio content parts
- Anthropic/Bedrock: graceful fallback to transcript text
- Update getMessageTextWithTools() to handle audio blocks for local models
This commit is contained in:
William Valentin
2026-02-11 18:17:33 -08:00
parent a875bcc4ae
commit 32e1a2724a
8 changed files with 169 additions and 22 deletions
+17
View File
@@ -36,6 +36,23 @@ function toOpenAIContent(content: string | MessageContentPart[]): string | OpenA
: part.source.url!;
return { type: 'image_url', image_url: { url } };
}
if (part.type === 'audio') {
// GitHub Models uses OpenAI-compatible API — native audio via input_audio
const formatMap: Record<string, string> = {
'audio/wav': 'wav',
'audio/mpeg': 'mp3',
'audio/mp3': 'mp3',
'audio/ogg': 'ogg',
'audio/webm': 'webm',
'audio/mp4': 'mp4',
'audio/x-m4a': 'mp4',
};
const format = formatMap[part.source.media_type] ?? 'wav';
return {
type: 'input_audio',
input_audio: { data: part.source.data, format },
} as unknown as OpenAI.ChatCompletionContentPart;
}
// Fallback — shouldn't happen
return { type: 'text', text: JSON.stringify(part) };
});