feat(audio): add smart routing for native vs transcribed audio
- Create capabilities.ts with supportsAudioInput() detection - Gemini, OpenAI, and GitHub Models get native audio passthrough - Anthropic, Bedrock, Ollama, llama.cpp fall back to Whisper transcription - routing.ts now checks model capability before deciding to transcribe - Audio attachments are stripped for non-native models (only transcript text passed) - Remove deprecated audioConfig from createMessageRouter deps (read from config.audio)
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* Model capability detection for native audio input support.
|
||||
*
|
||||
* Models that support native audio will receive raw audio data directly.
|
||||
* Models that don't will receive a Whisper transcript as text instead.
|
||||
*/
|
||||
|
||||
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
|
||||
|
||||
/** Providers that support native audio input in their API. */
|
||||
const AUDIO_CAPABLE_PROVIDERS = new Set<string>([
|
||||
'gemini',
|
||||
'openai',
|
||||
'github', // GitHub Models uses OpenAI-compatible API
|
||||
]);
|
||||
|
||||
/**
|
||||
* Models known NOT to support audio despite their provider supporting it.
|
||||
* For example, older OpenAI models or specialized models.
|
||||
*/
|
||||
const AUDIO_INCAPABLE_MODELS = new Set<string>([
|
||||
// Older OpenAI models that predate audio input support
|
||||
'gpt-3.5-turbo',
|
||||
'gpt-4',
|
||||
'gpt-4-turbo',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Check whether a provider+model combination supports native audio input.
|
||||
*
|
||||
* Returns true if the model can receive raw audio data directly via its API,
|
||||
* false if audio must be transcribed to text before sending.
|
||||
*/
|
||||
export function supportsAudioInput(provider: string, model: string): boolean {
|
||||
// Provider must be in the capable set
|
||||
if (!AUDIO_CAPABLE_PROVIDERS.has(provider)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check model-specific exclusions
|
||||
if (AUDIO_INCAPABLE_MODELS.has(model)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -8,6 +8,7 @@ export { LlamaCppClient, type LlamaCppClientConfig } from './local/index.js';
|
||||
export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js';
|
||||
export { withRetry, isRetryable, DEFAULT_RETRY_CONFIG, type RetryConfig } from './retry.js';
|
||||
export { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
|
||||
export { supportsAudioInput } from './capabilities.js';
|
||||
export {
|
||||
isSupportedImage,
|
||||
isSupportedAudio,
|
||||
|
||||
Reference in New Issue
Block a user