feat(audio): add audio.transcribe tool with Whisper-compatible API support
- Add createAudioTranscribeTool with OpenAI/Groq/Ollama/llama.cpp provider support - Refactor audio config schema to nested audio.enabled + audio.provider structure - Move audio tool registration to initTools() for conditional enablement - Fix duplication bug in audio-transcribe.ts URL download handler - Support base64 data and URL-based audio input with format detection
This commit is contained in:
@@ -314,10 +314,16 @@ const webSearchSchema = z.object({
|
||||
max_results: z.number().min(1).max(20).default(5),
|
||||
}).default({});
|
||||
|
||||
const audioProviderSchema = z.object({
|
||||
type: z.enum(['openai', 'groq', 'ollama', 'llamacpp', 'custom']),
|
||||
endpoint: z.string().optional(),
|
||||
api_key: z.string().optional(),
|
||||
model: z.string().optional(),
|
||||
});
|
||||
|
||||
const audioSchema = z.object({
|
||||
transcription_endpoint: z.string().optional(),
|
||||
transcription_api_key: z.string().optional(),
|
||||
transcription_model: z.string().default('whisper-1'),
|
||||
enabled: z.boolean().default(false),
|
||||
provider: audioProviderSchema.optional(),
|
||||
}).default({});
|
||||
|
||||
// ── Tool policy schemas ──────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user