Files
flynn/src/models/capabilities.ts
T
William Valentin 148219153e feat(audio): add tests, token estimation, and config override for native audio
- Add capabilities.test.ts (18 tests) for supportsAudioInput()
- Add 15 audio tests to media.test.ts (hasAudio, stripAudioParts, attachmentToAudioSource)
- Add estimateAudioTokens() to tokens.ts (base64→bytes→duration→tokens)
- Update estimateMessageTokens() to include audio content parts
- Add 5 audio token tests to tokens.test.ts
- Add supports_audio config override to model schema
- Wire supports_audio from tier config through routing to capability check

Total tests: 1369 (was 1331, +38 audio-related)
2026-02-11 18:27:19 -08:00

49 lines
1.4 KiB
TypeScript

/**
* Model capability detection for native audio input support.
*
* Models that support native audio will receive raw audio data directly.
* Models that don't will receive a Whisper transcript as text instead.
*/
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
/** Providers that support native audio input in their API. */
const AUDIO_CAPABLE_PROVIDERS = new Set<string>([
'gemini',
'openai',
'github', // GitHub Models uses OpenAI-compatible API
]);
/**
* Models known NOT to support audio despite their provider supporting it.
* For example, older OpenAI models or specialized models.
*/
const AUDIO_INCAPABLE_MODELS = new Set<string>([
// Older OpenAI models that predate audio input support
'gpt-3.5-turbo',
'gpt-4',
'gpt-4-turbo',
]);
/**
* Check whether a provider+model combination supports native audio input.
*
* Returns true if the model can receive raw audio data directly via its API,
* false if audio must be transcribed to text before sending.
*/
export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean {
if (override !== undefined) return override;
// Provider must be in the capable set
if (!AUDIO_CAPABLE_PROVIDERS.has(provider)) {
return false;
}
// Check model-specific exclusions
if (AUDIO_INCAPABLE_MODELS.has(model)) {
return false;
}
return true;
}