fix(audio): add SSRF protection, MIME type fix, and tests for audio-transcribe tool

- Add URL validation blocking localhost, private IPs, and non-http protocols - Use response Content-Type header instead of hardcoded audio/wav for URL downloads - Add 25 tests covering validation, SSRF, config errors, transcription paths, and error handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 21:57:45 -08:00
parent a8a2c59313
commit 0b44adbaea
2 changed files with 325 additions and 1 deletions
@@ -0,0 +1,291 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { createAudioTranscribeTool } from './audio-transcribe.js';
+
+const mockFetch = vi.hoisted(() => vi.fn());
+vi.stubGlobal('fetch', mockFetch);
+
+describe('createAudioTranscribeTool', () => {
+  const audioConfig = {
+    endpoint: 'https://api.openai.com/v1/audio/transcriptions',
+    apiKey: 'sk-test',
+    model: 'whisper-1',
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('creates a tool with correct name and schema', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+    expect(tool.name).toBe('audio.transcribe');
+    expect(tool.inputSchema.properties).toHaveProperty('data');
+    expect(tool.inputSchema.properties).toHaveProperty('url');
+    expect(tool.inputSchema.properties).toHaveProperty('mime_type');
+    expect(tool.inputSchema.properties).toHaveProperty('language');
+    expect(tool.inputSchema.properties).toHaveProperty('prompt');
+  });
+
+  describe('validation errors', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+
+    it('rejects when neither data nor url is provided', async () => {
+      const result = await tool.execute({});
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Either data or url must be provided/);
+    });
+
+    it('rejects when both data and url are provided', async () => {
+      const result = await tool.execute({ data: 'AAAA', url: 'https://example.com/audio.wav', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Only one of data or url/);
+    });
+
+    it('rejects data without mime_type', async () => {
+      const result = await tool.execute({ data: 'AAAA' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/mime_type is required/);
+    });
+
+    it('rejects unsupported mime_type', async () => {
+      const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/flac' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Unsupported MIME type/);
+    });
+  });
+
+  describe('URL validation (SSRF protection)', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+
+    it('rejects localhost URLs', async () => {
+      const result = await tool.execute({ url: 'http://localhost:8080/audio.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/localhost/);
+    });
+
+    it('rejects 127.0.0.1 URLs', async () => {
+      const result = await tool.execute({ url: 'http://127.0.0.1/audio.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/localhost/);
+    });
+
+    it('rejects private IP ranges (10.x)', async () => {
+      const result = await tool.execute({ url: 'http://10.0.0.1/audio.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/private/i);
+    });
+
+    it('rejects private IP ranges (192.168.x)', async () => {
+      const result = await tool.execute({ url: 'http://192.168.1.1/audio.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/private/i);
+    });
+
+    it('rejects private IP ranges (172.16-31.x)', async () => {
+      const result = await tool.execute({ url: 'http://172.16.0.1/audio.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/private/i);
+    });
+
+    it('rejects file:// protocol', async () => {
+      const result = await tool.execute({ url: 'file:///etc/passwd' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/http\/https/);
+    });
+
+    it('rejects invalid URLs', async () => {
+      const result = await tool.execute({ url: 'not-a-url' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Invalid URL/);
+    });
+
+    it('allows public HTTPS URLs', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        headers: new Headers({ 'content-type': 'audio/wav' }),
+        arrayBuffer: async () => new ArrayBuffer(8),
+      });
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'hello' }),
+      });
+
+      const result = await tool.execute({ url: 'https://example.com/audio.wav' });
+      expect(result.success).toBe(true);
+    });
+  });
+
+  describe('config errors', () => {
+    it('returns error when no audio config is provided', async () => {
+      const tool = createAudioTranscribeTool(undefined);
+      const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/endpoint not configured/);
+    });
+
+    it('returns error when endpoint is missing from config', async () => {
+      const tool = createAudioTranscribeTool({ apiKey: 'sk-test' });
+      const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/endpoint not configured/);
+    });
+  });
+
+  describe('successful transcription (OpenAI/Groq path)', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+
+    it('transcribes base64 audio data', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'Hello, world!' }),
+      });
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(true);
+      expect(result.output).toBe('Hello, world!');
+
+      expect(mockFetch).toHaveBeenCalledWith(
+        'https://api.openai.com/v1/audio/transcriptions',
+        expect.objectContaining({ method: 'POST' }),
+      );
+    });
+
+    it('sends Authorization header when apiKey is set', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'test' }),
+      });
+
+      await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/ogg' });
+
+      const call = mockFetch.mock.calls[0];
+      expect(call[1].headers).toEqual({ Authorization: 'Bearer sk-test' });
+    });
+
+    it('passes language and prompt parameters', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'Hola mundo' }),
+      });
+
+      const result = await tool.execute({
+        data: 'AAAAAAA=',
+        mime_type: 'audio/mp3',
+        language: 'es',
+        prompt: 'Spanish conversation',
+      });
+      expect(result.success).toBe(true);
+      expect(result.output).toBe('Hola mundo');
+    });
+  });
+
+  describe('URL-based transcription', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+
+    it('downloads and transcribes audio from URL', async () => {
+      // First fetch: download audio
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        headers: new Headers({ 'content-type': 'audio/mpeg' }),
+        arrayBuffer: async () => new ArrayBuffer(16),
+      });
+      // Second fetch: transcription API
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'URL transcription result' }),
+      });
+
+      const result = await tool.execute({ url: 'https://cdn.example.com/audio.mp3' });
+      expect(result.success).toBe(true);
+      expect(result.output).toBe('URL transcription result');
+    });
+
+    it('uses content-type from response headers for blob', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        headers: new Headers({ 'content-type': 'audio/ogg' }),
+        arrayBuffer: async () => new ArrayBuffer(8),
+      });
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: 'ogg result' }),
+      });
+
+      const result = await tool.execute({ url: 'https://cdn.example.com/voice' });
+      expect(result.success).toBe(true);
+    });
+
+    it('returns error when URL download fails', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: false,
+        status: 404,
+        statusText: 'Not Found',
+      });
+
+      const result = await tool.execute({ url: 'https://cdn.example.com/missing.wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Failed to download/);
+    });
+  });
+
+  describe('Ollama path', () => {
+    const ollamaConfig = {
+      endpoint: 'http://localhost:11434/api/generate',
+      model: 'whisper',
+    };
+    const tool = createAudioTranscribeTool(ollamaConfig);
+
+    it('sends JSON request to Ollama endpoint', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ response: 'Ollama transcript' }),
+      });
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(true);
+      expect(result.output).toBe('Ollama transcript');
+
+      const [url, opts] = mockFetch.mock.calls[0];
+      expect(url).toBe('http://localhost:11434/api/generate');
+      expect(JSON.parse(opts.body as string)).toEqual({
+        model: 'whisper',
+        audio: 'AAAAAAA=',
+        stream: false,
+      });
+    });
+
+    it('returns error on Ollama failure', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+      });
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Ollama transcription failed/);
+    });
+  });
+
+  describe('API errors', () => {
+    const tool = createAudioTranscribeTool(audioConfig);
+
+    it('returns error on transcription API failure', async () => {
+      mockFetch.mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+        text: async () => 'Internal Server Error',
+      });
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/Transcription request failed.*500/);
+    });
+
+    it('handles network errors gracefully', async () => {
+      mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/ECONNREFUSED/);
+    });
+  });
+});
@@ -25,6 +25,31 @@ const PROVIDER_ENDPOINTS: Record<string, string> = {
  llamacpp: 'http://localhost:8080/v1/audio/transcriptions',
 };

+function validateUrl(url: string): { valid: boolean; error?: string } {
+  let parsed: URL;
+  try {
+    parsed = new URL(url);
+  } catch {
+    return { valid: false, error: `Invalid URL: ${url}` };
+  }
+
+  if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
+    return { valid: false, error: `Only http/https URLs are allowed, got ${parsed.protocol}` };
+  }
+
+  const hostname = parsed.hostname;
+  if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') {
+    return { valid: false, error: 'URLs pointing to localhost are not allowed' };
+  }
+
+  // Block private/internal IP ranges
+  if (/^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/.test(hostname)) {
+    return { valid: false, error: 'URLs pointing to private/internal networks are not allowed' };
+  }
+
+  return { valid: true };
+}
+
 function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: string } {
  const hasData = args.data !== undefined && args.data !== '';
  const hasUrl = args.url !== undefined && args.url !== '';
@@ -45,6 +70,13 @@ function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: str
    return { valid: false, error: `Unsupported MIME type: ${args.mime_type}. Supported: ${Array.from(SUPPORTED_MIME_TYPES).join(', ')}` };
  }

+  if (hasUrl) {
+    const urlValidation = validateUrl(args.url!);
+    if (!urlValidation.valid) {
+      return urlValidation;
+    }
+  }
+
  return { valid: true };
 }

@@ -136,7 +168,8 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig
          const urlExt = args.url.split('.').pop()?.split('?')[0] || 'bin';
          filename = `audio.${urlExt}`;

-          audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' });
+          const contentType = response.headers.get('content-type') ?? 'audio/wav';
+          audioBlob = new Blob([arrayBuffer], { type: contentType });
        }

        const endpoint = audioConfig.endpoint;