feat(audio): add audio.transcribe tool with Whisper-compatible API support

- Add createAudioTranscribeTool with OpenAI/Groq/Ollama/llama.cpp provider support
- Refactor audio config schema to nested audio.enabled + audio.provider structure
- Move audio tool registration to initTools() for conditional enablement
- Fix duplication bug in audio-transcribe.ts URL download handler
- Support base64 data and URL-based audio input with format detection
This commit is contained in:
William Valentin
2026-02-11 18:13:19 -08:00
parent 5491d5a82a
commit a875bcc4ae
6 changed files with 240 additions and 13 deletions
+9 -3
View File
@@ -314,10 +314,16 @@ const webSearchSchema = z.object({
max_results: z.number().min(1).max(20).default(5),
}).default({});
const audioProviderSchema = z.object({
type: z.enum(['openai', 'groq', 'ollama', 'llamacpp', 'custom']),
endpoint: z.string().optional(),
api_key: z.string().optional(),
model: z.string().optional(),
});
const audioSchema = z.object({
transcription_endpoint: z.string().optional(),
transcription_api_key: z.string().optional(),
transcription_model: z.string().default('whisper-1'),
enabled: z.boolean().default(false),
provider: audioProviderSchema.optional(),
}).default({});
// ── Tool policy schemas ──────────────────────────────────────────────