fix(models): tighten audio capability + correct openai oauth content

This commit is contained in:
William Valentin
2026-02-14 00:43:48 -08:00
parent 4bb8c88fbe
commit 6212603821
3 changed files with 37 additions and 22 deletions
+10
View File
@@ -89,6 +89,16 @@
], ],
"test_status": "pnpm test:run src/auth/anthropic.test.ts + pnpm typecheck passing" "test_status": "pnpm test:run src/auth/anthropic.test.ts + pnpm typecheck passing"
}, },
"openai-oauth-and-audio-capability-tweaks": {
"status": "completed",
"date": "2026-02-14",
"summary": "Fixed OpenAI OAuth (Codex) message shaping by using output_text for assistant turns, and made native-audio detection conservative via a model allowlist so Flynn does not attempt to send raw audio to models that cannot consume it.",
"files_modified": [
"src/models/openai.ts",
"src/models/capabilities.ts"
],
"test_status": "pnpm typecheck passing (no new tests added in this change)"
},
"p0-p1-implementation-plan": { "p0-p1-implementation-plan": {
"file": "2026-02-06-p0-p1-implementation-plan.md", "file": "2026-02-06-p0-p1-implementation-plan.md",
"status": "completed", "status": "completed",
+25 -21
View File
@@ -7,22 +7,26 @@
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai'; export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
/** Providers that support native audio input in their API. */ /**
const AUDIO_CAPABLE_PROVIDERS = new Set<string>([ * Models known to support native audio input via their API.
'gemini', * We use an allowlist (not a provider-level blanket) to avoid silently
'openai', * dropping audio for models that don't actually handle audio content parts.
'github', // GitHub Models uses OpenAI-compatible API */
const AUDIO_CAPABLE_MODELS = new Set<string>([
// Gemini — all current models support audio
'gemini-2.0-flash',
'gemini-2.0-pro',
'gemini-1.5-flash',
'gemini-1.5-pro',
// OpenAI — only multimodal audio models
'gpt-4o',
'gpt-4o-mini',
'gpt-4o-audio-preview',
]); ]);
/** /** Providers where all models support audio (e.g. Gemini). */
* Models known NOT to support audio despite their provider supporting it. const AUDIO_CAPABLE_PROVIDERS = new Set<string>([
* For example, older OpenAI models or specialized models. 'gemini',
*/
const AUDIO_INCAPABLE_MODELS = new Set<string>([
// Older OpenAI models that predate audio input support
'gpt-3.5-turbo',
'gpt-4',
'gpt-4-turbo',
]); ]);
/** /**
@@ -34,15 +38,15 @@ const AUDIO_INCAPABLE_MODELS = new Set<string>([
export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean { export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean {
if (override !== undefined) {return override;} if (override !== undefined) {return override;}
// Provider must be in the capable set // Provider-level blanket (all models support audio)
if (!AUDIO_CAPABLE_PROVIDERS.has(provider)) { if (AUDIO_CAPABLE_PROVIDERS.has(provider)) {
return false; return true;
} }
// Check model-specific exclusions // Model-level allowlist
if (AUDIO_INCAPABLE_MODELS.has(model)) { if (AUDIO_CAPABLE_MODELS.has(model)) {
return false; return true;
} }
return true; return false;
} }
+2 -1
View File
@@ -96,9 +96,10 @@ export class OpenAIClient implements ModelClient {
.map((m) => { .map((m) => {
const text = getMessageTextWithTools(m); const text = getMessageTextWithTools(m);
if (!text) {return null;} if (!text) {return null;}
const contentType = m.role === 'assistant' ? 'output_text' : 'input_text';
return { return {
role: m.role, role: m.role,
content: [{ type: 'input_text', text }], content: [{ type: contentType, text }],
}; };
}) })
.filter((x): x is NonNullable<typeof x> => Boolean(x)); .filter((x): x is NonNullable<typeof x> => Boolean(x));