/** * Model capability detection for native audio input support. * * Models that support native audio will receive raw audio data directly. * Models that don't will receive a Whisper transcript as text instead. */ export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai'; /** Providers that support native audio input in their API. */ const AUDIO_CAPABLE_PROVIDERS = new Set([ 'gemini', 'openai', 'github', // GitHub Models uses OpenAI-compatible API ]); /** * Models known NOT to support audio despite their provider supporting it. * For example, older OpenAI models or specialized models. */ const AUDIO_INCAPABLE_MODELS = new Set([ // Older OpenAI models that predate audio input support 'gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', ]); /** * Check whether a provider+model combination supports native audio input. * * Returns true if the model can receive raw audio data directly via its API, * false if audio must be transcribed to text before sending. */ export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean { if (override !== undefined) return override; // Provider must be in the capable set if (!AUDIO_CAPABLE_PROVIDERS.has(provider)) { return false; } // Check model-specific exclusions if (AUDIO_INCAPABLE_MODELS.has(model)) { return false; } return true; }