diff --git a/src/tools/builtin/audio-transcribe.test.ts b/src/tools/builtin/audio-transcribe.test.ts new file mode 100644 index 0000000..3a970ba --- /dev/null +++ b/src/tools/builtin/audio-transcribe.test.ts @@ -0,0 +1,291 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { createAudioTranscribeTool } from './audio-transcribe.js'; + +const mockFetch = vi.hoisted(() => vi.fn()); +vi.stubGlobal('fetch', mockFetch); + +describe('createAudioTranscribeTool', () => { + const audioConfig = { + endpoint: 'https://api.openai.com/v1/audio/transcriptions', + apiKey: 'sk-test', + model: 'whisper-1', + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('creates a tool with correct name and schema', () => { + const tool = createAudioTranscribeTool(audioConfig); + expect(tool.name).toBe('audio.transcribe'); + expect(tool.inputSchema.properties).toHaveProperty('data'); + expect(tool.inputSchema.properties).toHaveProperty('url'); + expect(tool.inputSchema.properties).toHaveProperty('mime_type'); + expect(tool.inputSchema.properties).toHaveProperty('language'); + expect(tool.inputSchema.properties).toHaveProperty('prompt'); + }); + + describe('validation errors', () => { + const tool = createAudioTranscribeTool(audioConfig); + + it('rejects when neither data nor url is provided', async () => { + const result = await tool.execute({}); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Either data or url must be provided/); + }); + + it('rejects when both data and url are provided', async () => { + const result = await tool.execute({ data: 'AAAA', url: 'https://example.com/audio.wav', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Only one of data or url/); + }); + + it('rejects data without mime_type', async () => { + const result = await tool.execute({ data: 'AAAA' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/mime_type is required/); + }); + + it('rejects unsupported mime_type', async () => { + const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/flac' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Unsupported MIME type/); + }); + }); + + describe('URL validation (SSRF protection)', () => { + const tool = createAudioTranscribeTool(audioConfig); + + it('rejects localhost URLs', async () => { + const result = await tool.execute({ url: 'http://localhost:8080/audio.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/localhost/); + }); + + it('rejects 127.0.0.1 URLs', async () => { + const result = await tool.execute({ url: 'http://127.0.0.1/audio.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/localhost/); + }); + + it('rejects private IP ranges (10.x)', async () => { + const result = await tool.execute({ url: 'http://10.0.0.1/audio.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/private/i); + }); + + it('rejects private IP ranges (192.168.x)', async () => { + const result = await tool.execute({ url: 'http://192.168.1.1/audio.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/private/i); + }); + + it('rejects private IP ranges (172.16-31.x)', async () => { + const result = await tool.execute({ url: 'http://172.16.0.1/audio.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/private/i); + }); + + it('rejects file:// protocol', async () => { + const result = await tool.execute({ url: 'file:///etc/passwd' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/http\/https/); + }); + + it('rejects invalid URLs', async () => { + const result = await tool.execute({ url: 'not-a-url' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Invalid URL/); + }); + + it('allows public HTTPS URLs', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + headers: new Headers({ 'content-type': 'audio/wav' }), + arrayBuffer: async () => new ArrayBuffer(8), + }); + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'hello' }), + }); + + const result = await tool.execute({ url: 'https://example.com/audio.wav' }); + expect(result.success).toBe(true); + }); + }); + + describe('config errors', () => { + it('returns error when no audio config is provided', async () => { + const tool = createAudioTranscribeTool(undefined); + const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/endpoint not configured/); + }); + + it('returns error when endpoint is missing from config', async () => { + const tool = createAudioTranscribeTool({ apiKey: 'sk-test' }); + const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/endpoint not configured/); + }); + }); + + describe('successful transcription (OpenAI/Groq path)', () => { + const tool = createAudioTranscribeTool(audioConfig); + + it('transcribes base64 audio data', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'Hello, world!' }), + }); + + const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(true); + expect(result.output).toBe('Hello, world!'); + + expect(mockFetch).toHaveBeenCalledWith( + 'https://api.openai.com/v1/audio/transcriptions', + expect.objectContaining({ method: 'POST' }), + ); + }); + + it('sends Authorization header when apiKey is set', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'test' }), + }); + + await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/ogg' }); + + const call = mockFetch.mock.calls[0]; + expect(call[1].headers).toEqual({ Authorization: 'Bearer sk-test' }); + }); + + it('passes language and prompt parameters', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'Hola mundo' }), + }); + + const result = await tool.execute({ + data: 'AAAAAAA=', + mime_type: 'audio/mp3', + language: 'es', + prompt: 'Spanish conversation', + }); + expect(result.success).toBe(true); + expect(result.output).toBe('Hola mundo'); + }); + }); + + describe('URL-based transcription', () => { + const tool = createAudioTranscribeTool(audioConfig); + + it('downloads and transcribes audio from URL', async () => { + // First fetch: download audio + mockFetch.mockResolvedValueOnce({ + ok: true, + headers: new Headers({ 'content-type': 'audio/mpeg' }), + arrayBuffer: async () => new ArrayBuffer(16), + }); + // Second fetch: transcription API + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'URL transcription result' }), + }); + + const result = await tool.execute({ url: 'https://cdn.example.com/audio.mp3' }); + expect(result.success).toBe(true); + expect(result.output).toBe('URL transcription result'); + }); + + it('uses content-type from response headers for blob', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + headers: new Headers({ 'content-type': 'audio/ogg' }), + arrayBuffer: async () => new ArrayBuffer(8), + }); + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'ogg result' }), + }); + + const result = await tool.execute({ url: 'https://cdn.example.com/voice' }); + expect(result.success).toBe(true); + }); + + it('returns error when URL download fails', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 404, + statusText: 'Not Found', + }); + + const result = await tool.execute({ url: 'https://cdn.example.com/missing.wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Failed to download/); + }); + }); + + describe('Ollama path', () => { + const ollamaConfig = { + endpoint: 'http://localhost:11434/api/generate', + model: 'whisper', + }; + const tool = createAudioTranscribeTool(ollamaConfig); + + it('sends JSON request to Ollama endpoint', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ response: 'Ollama transcript' }), + }); + + const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(true); + expect(result.output).toBe('Ollama transcript'); + + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('http://localhost:11434/api/generate'); + expect(JSON.parse(opts.body as string)).toEqual({ + model: 'whisper', + audio: 'AAAAAAA=', + stream: false, + }); + }); + + it('returns error on Ollama failure', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + }); + + const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Ollama transcription failed/); + }); + }); + + describe('API errors', () => { + const tool = createAudioTranscribeTool(audioConfig); + + it('returns error on transcription API failure', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: async () => 'Internal Server Error', + }); + + const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/Transcription request failed.*500/); + }); + + it('handles network errors gracefully', async () => { + mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED')); + + const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(false); + expect(result.error).toMatch(/ECONNREFUSED/); + }); + }); +}); diff --git a/src/tools/builtin/audio-transcribe.ts b/src/tools/builtin/audio-transcribe.ts index 4930920..fe702c9 100644 --- a/src/tools/builtin/audio-transcribe.ts +++ b/src/tools/builtin/audio-transcribe.ts @@ -25,6 +25,31 @@ const PROVIDER_ENDPOINTS: Record = { llamacpp: 'http://localhost:8080/v1/audio/transcriptions', }; +function validateUrl(url: string): { valid: boolean; error?: string } { + let parsed: URL; + try { + parsed = new URL(url); + } catch { + return { valid: false, error: `Invalid URL: ${url}` }; + } + + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { + return { valid: false, error: `Only http/https URLs are allowed, got ${parsed.protocol}` }; + } + + const hostname = parsed.hostname; + if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') { + return { valid: false, error: 'URLs pointing to localhost are not allowed' }; + } + + // Block private/internal IP ranges + if (/^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/.test(hostname)) { + return { valid: false, error: 'URLs pointing to private/internal networks are not allowed' }; + } + + return { valid: true }; +} + function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: string } { const hasData = args.data !== undefined && args.data !== ''; const hasUrl = args.url !== undefined && args.url !== ''; @@ -45,6 +70,13 @@ function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: str return { valid: false, error: `Unsupported MIME type: ${args.mime_type}. Supported: ${Array.from(SUPPORTED_MIME_TYPES).join(', ')}` }; } + if (hasUrl) { + const urlValidation = validateUrl(args.url!); + if (!urlValidation.valid) { + return urlValidation; + } + } + return { valid: true }; } @@ -136,7 +168,8 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig const urlExt = args.url.split('.').pop()?.split('?')[0] || 'bin'; filename = `audio.${urlExt}`; - audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' }); + const contentType = response.headers.get('content-type') ?? 'audio/wav'; + audioBlob = new Blob([arrayBuffer], { type: contentType }); } const endpoint = audioConfig.endpoint;