fix(models): tighten audio capability + correct openai oauth content
This commit is contained in:
@@ -89,6 +89,16 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/auth/anthropic.test.ts + pnpm typecheck passing"
|
"test_status": "pnpm test:run src/auth/anthropic.test.ts + pnpm typecheck passing"
|
||||||
},
|
},
|
||||||
|
"openai-oauth-and-audio-capability-tweaks": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-14",
|
||||||
|
"summary": "Fixed OpenAI OAuth (Codex) message shaping by using output_text for assistant turns, and made native-audio detection conservative via a model allowlist so Flynn does not attempt to send raw audio to models that cannot consume it.",
|
||||||
|
"files_modified": [
|
||||||
|
"src/models/openai.ts",
|
||||||
|
"src/models/capabilities.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm typecheck passing (no new tests added in this change)"
|
||||||
|
},
|
||||||
"p0-p1-implementation-plan": {
|
"p0-p1-implementation-plan": {
|
||||||
"file": "2026-02-06-p0-p1-implementation-plan.md",
|
"file": "2026-02-06-p0-p1-implementation-plan.md",
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
|
|||||||
+25
-21
@@ -7,22 +7,26 @@
|
|||||||
|
|
||||||
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
|
export type ModelProvider = 'anthropic' | 'openai' | 'gemini' | 'bedrock' | 'github' | 'ollama' | 'llamacpp' | 'openrouter' | 'zhipuai' | 'xai';
|
||||||
|
|
||||||
/** Providers that support native audio input in their API. */
|
/**
|
||||||
const AUDIO_CAPABLE_PROVIDERS = new Set<string>([
|
* Models known to support native audio input via their API.
|
||||||
'gemini',
|
* We use an allowlist (not a provider-level blanket) to avoid silently
|
||||||
'openai',
|
* dropping audio for models that don't actually handle audio content parts.
|
||||||
'github', // GitHub Models uses OpenAI-compatible API
|
*/
|
||||||
|
const AUDIO_CAPABLE_MODELS = new Set<string>([
|
||||||
|
// Gemini — all current models support audio
|
||||||
|
'gemini-2.0-flash',
|
||||||
|
'gemini-2.0-pro',
|
||||||
|
'gemini-1.5-flash',
|
||||||
|
'gemini-1.5-pro',
|
||||||
|
// OpenAI — only multimodal audio models
|
||||||
|
'gpt-4o',
|
||||||
|
'gpt-4o-mini',
|
||||||
|
'gpt-4o-audio-preview',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
/**
|
/** Providers where all models support audio (e.g. Gemini). */
|
||||||
* Models known NOT to support audio despite their provider supporting it.
|
const AUDIO_CAPABLE_PROVIDERS = new Set<string>([
|
||||||
* For example, older OpenAI models or specialized models.
|
'gemini',
|
||||||
*/
|
|
||||||
const AUDIO_INCAPABLE_MODELS = new Set<string>([
|
|
||||||
// Older OpenAI models that predate audio input support
|
|
||||||
'gpt-3.5-turbo',
|
|
||||||
'gpt-4',
|
|
||||||
'gpt-4-turbo',
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -34,15 +38,15 @@ const AUDIO_INCAPABLE_MODELS = new Set<string>([
|
|||||||
export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean {
|
export function supportsAudioInput(provider: string, model: string, override?: boolean): boolean {
|
||||||
if (override !== undefined) {return override;}
|
if (override !== undefined) {return override;}
|
||||||
|
|
||||||
// Provider must be in the capable set
|
// Provider-level blanket (all models support audio)
|
||||||
if (!AUDIO_CAPABLE_PROVIDERS.has(provider)) {
|
if (AUDIO_CAPABLE_PROVIDERS.has(provider)) {
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check model-specific exclusions
|
// Model-level allowlist
|
||||||
if (AUDIO_INCAPABLE_MODELS.has(model)) {
|
if (AUDIO_CAPABLE_MODELS.has(model)) {
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -96,9 +96,10 @@ export class OpenAIClient implements ModelClient {
|
|||||||
.map((m) => {
|
.map((m) => {
|
||||||
const text = getMessageTextWithTools(m);
|
const text = getMessageTextWithTools(m);
|
||||||
if (!text) {return null;}
|
if (!text) {return null;}
|
||||||
|
const contentType = m.role === 'assistant' ? 'output_text' : 'input_text';
|
||||||
return {
|
return {
|
||||||
role: m.role,
|
role: m.role,
|
||||||
content: [{ type: 'input_text', text }],
|
content: [{ type: contentType, text }],
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
.filter((x): x is NonNullable<typeof x> => Boolean(x));
|
.filter((x): x is NonNullable<typeof x> => Boolean(x));
|
||||||
|
|||||||
Reference in New Issue
Block a user