feat(audio): add native audio support to type system and model clients

- Add AudioSource interface and 'audio' variant to MessageContentPart union - Update buildUserMessage() to create audio content parts from attachments - Add attachmentToAudioSource(), hasAudio(), stripAudioParts() helpers - Gemini: native audio via inlineData (same format as images) - OpenAI/GitHub: native audio via input_audio content parts - Anthropic/Bedrock: graceful fallback to transcript text - Update getMessageTextWithTools() to handle audio blocks for local models
2026-02-11 18:17:33 -08:00
parent a875bcc4ae
commit 32e1a2724a
8 changed files with 169 additions and 22 deletions
@@ -36,6 +36,23 @@ function toOpenAIContent(content: string | MessageContentPart[]): string | OpenA
        : part.source.url!;
      return { type: 'image_url', image_url: { url } };
    }
+    if (part.type === 'audio') {
+      // GitHub Models uses OpenAI-compatible API — native audio via input_audio
+      const formatMap: Record<string, string> = {
+        'audio/wav': 'wav',
+        'audio/mpeg': 'mp3',
+        'audio/mp3': 'mp3',
+        'audio/ogg': 'ogg',
+        'audio/webm': 'webm',
+        'audio/mp4': 'mp4',
+        'audio/x-m4a': 'mp4',
+      };
+      const format = formatMap[part.source.media_type] ?? 'wav';
+      return {
+        type: 'input_audio',
+        input_audio: { data: part.source.data, format },
+      } as unknown as OpenAI.ChatCompletionContentPart;
+    }
    // Fallback — shouldn't happen
    return { type: 'text', text: JSON.stringify(part) };
  });