fix(audio): add SSRF protection, MIME type fix, and tests for audio-transcribe tool

- Add URL validation blocking localhost, private IPs, and non-http protocols
- Use response Content-Type header instead of hardcoded audio/wav for URL downloads
- Add 25 tests covering validation, SSRF, config errors, transcription paths, and error handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
William Valentin
2026-02-11 21:57:45 -08:00
parent a8a2c59313
commit 0b44adbaea
2 changed files with 325 additions and 1 deletions
+291
View File
@@ -0,0 +1,291 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { createAudioTranscribeTool } from './audio-transcribe.js';
const mockFetch = vi.hoisted(() => vi.fn());
vi.stubGlobal('fetch', mockFetch);
describe('createAudioTranscribeTool', () => {
const audioConfig = {
endpoint: 'https://api.openai.com/v1/audio/transcriptions',
apiKey: 'sk-test',
model: 'whisper-1',
};
beforeEach(() => {
vi.clearAllMocks();
});
it('creates a tool with correct name and schema', () => {
const tool = createAudioTranscribeTool(audioConfig);
expect(tool.name).toBe('audio.transcribe');
expect(tool.inputSchema.properties).toHaveProperty('data');
expect(tool.inputSchema.properties).toHaveProperty('url');
expect(tool.inputSchema.properties).toHaveProperty('mime_type');
expect(tool.inputSchema.properties).toHaveProperty('language');
expect(tool.inputSchema.properties).toHaveProperty('prompt');
});
describe('validation errors', () => {
const tool = createAudioTranscribeTool(audioConfig);
it('rejects when neither data nor url is provided', async () => {
const result = await tool.execute({});
expect(result.success).toBe(false);
expect(result.error).toMatch(/Either data or url must be provided/);
});
it('rejects when both data and url are provided', async () => {
const result = await tool.execute({ data: 'AAAA', url: 'https://example.com/audio.wav', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Only one of data or url/);
});
it('rejects data without mime_type', async () => {
const result = await tool.execute({ data: 'AAAA' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/mime_type is required/);
});
it('rejects unsupported mime_type', async () => {
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/flac' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Unsupported MIME type/);
});
});
describe('URL validation (SSRF protection)', () => {
const tool = createAudioTranscribeTool(audioConfig);
it('rejects localhost URLs', async () => {
const result = await tool.execute({ url: 'http://localhost:8080/audio.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/localhost/);
});
it('rejects 127.0.0.1 URLs', async () => {
const result = await tool.execute({ url: 'http://127.0.0.1/audio.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/localhost/);
});
it('rejects private IP ranges (10.x)', async () => {
const result = await tool.execute({ url: 'http://10.0.0.1/audio.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/private/i);
});
it('rejects private IP ranges (192.168.x)', async () => {
const result = await tool.execute({ url: 'http://192.168.1.1/audio.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/private/i);
});
it('rejects private IP ranges (172.16-31.x)', async () => {
const result = await tool.execute({ url: 'http://172.16.0.1/audio.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/private/i);
});
it('rejects file:// protocol', async () => {
const result = await tool.execute({ url: 'file:///etc/passwd' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/http\/https/);
});
it('rejects invalid URLs', async () => {
const result = await tool.execute({ url: 'not-a-url' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Invalid URL/);
});
it('allows public HTTPS URLs', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
headers: new Headers({ 'content-type': 'audio/wav' }),
arrayBuffer: async () => new ArrayBuffer(8),
});
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'hello' }),
});
const result = await tool.execute({ url: 'https://example.com/audio.wav' });
expect(result.success).toBe(true);
});
});
describe('config errors', () => {
it('returns error when no audio config is provided', async () => {
const tool = createAudioTranscribeTool(undefined);
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/endpoint not configured/);
});
it('returns error when endpoint is missing from config', async () => {
const tool = createAudioTranscribeTool({ apiKey: 'sk-test' });
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/endpoint not configured/);
});
});
describe('successful transcription (OpenAI/Groq path)', () => {
const tool = createAudioTranscribeTool(audioConfig);
it('transcribes base64 audio data', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'Hello, world!' }),
});
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
expect(result.success).toBe(true);
expect(result.output).toBe('Hello, world!');
expect(mockFetch).toHaveBeenCalledWith(
'https://api.openai.com/v1/audio/transcriptions',
expect.objectContaining({ method: 'POST' }),
);
});
it('sends Authorization header when apiKey is set', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'test' }),
});
await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/ogg' });
const call = mockFetch.mock.calls[0];
expect(call[1].headers).toEqual({ Authorization: 'Bearer sk-test' });
});
it('passes language and prompt parameters', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'Hola mundo' }),
});
const result = await tool.execute({
data: 'AAAAAAA=',
mime_type: 'audio/mp3',
language: 'es',
prompt: 'Spanish conversation',
});
expect(result.success).toBe(true);
expect(result.output).toBe('Hola mundo');
});
});
describe('URL-based transcription', () => {
const tool = createAudioTranscribeTool(audioConfig);
it('downloads and transcribes audio from URL', async () => {
// First fetch: download audio
mockFetch.mockResolvedValueOnce({
ok: true,
headers: new Headers({ 'content-type': 'audio/mpeg' }),
arrayBuffer: async () => new ArrayBuffer(16),
});
// Second fetch: transcription API
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'URL transcription result' }),
});
const result = await tool.execute({ url: 'https://cdn.example.com/audio.mp3' });
expect(result.success).toBe(true);
expect(result.output).toBe('URL transcription result');
});
it('uses content-type from response headers for blob', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
headers: new Headers({ 'content-type': 'audio/ogg' }),
arrayBuffer: async () => new ArrayBuffer(8),
});
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ text: 'ogg result' }),
});
const result = await tool.execute({ url: 'https://cdn.example.com/voice' });
expect(result.success).toBe(true);
});
it('returns error when URL download fails', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 404,
statusText: 'Not Found',
});
const result = await tool.execute({ url: 'https://cdn.example.com/missing.wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Failed to download/);
});
});
describe('Ollama path', () => {
const ollamaConfig = {
endpoint: 'http://localhost:11434/api/generate',
model: 'whisper',
};
const tool = createAudioTranscribeTool(ollamaConfig);
it('sends JSON request to Ollama endpoint', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ response: 'Ollama transcript' }),
});
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
expect(result.success).toBe(true);
expect(result.output).toBe('Ollama transcript');
const [url, opts] = mockFetch.mock.calls[0];
expect(url).toBe('http://localhost:11434/api/generate');
expect(JSON.parse(opts.body as string)).toEqual({
model: 'whisper',
audio: 'AAAAAAA=',
stream: false,
});
});
it('returns error on Ollama failure', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 500,
});
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Ollama transcription failed/);
});
});
describe('API errors', () => {
const tool = createAudioTranscribeTool(audioConfig);
it('returns error on transcription API failure', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 500,
text: async () => 'Internal Server Error',
});
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/Transcription request failed.*500/);
});
it('handles network errors gracefully', async () => {
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
expect(result.success).toBe(false);
expect(result.error).toMatch(/ECONNREFUSED/);
});
});
});
+34 -1
View File
@@ -25,6 +25,31 @@ const PROVIDER_ENDPOINTS: Record<string, string> = {
llamacpp: 'http://localhost:8080/v1/audio/transcriptions',
};
function validateUrl(url: string): { valid: boolean; error?: string } {
let parsed: URL;
try {
parsed = new URL(url);
} catch {
return { valid: false, error: `Invalid URL: ${url}` };
}
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
return { valid: false, error: `Only http/https URLs are allowed, got ${parsed.protocol}` };
}
const hostname = parsed.hostname;
if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') {
return { valid: false, error: 'URLs pointing to localhost are not allowed' };
}
// Block private/internal IP ranges
if (/^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/.test(hostname)) {
return { valid: false, error: 'URLs pointing to private/internal networks are not allowed' };
}
return { valid: true };
}
function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: string } {
const hasData = args.data !== undefined && args.data !== '';
const hasUrl = args.url !== undefined && args.url !== '';
@@ -45,6 +70,13 @@ function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: str
return { valid: false, error: `Unsupported MIME type: ${args.mime_type}. Supported: ${Array.from(SUPPORTED_MIME_TYPES).join(', ')}` };
}
if (hasUrl) {
const urlValidation = validateUrl(args.url!);
if (!urlValidation.valid) {
return urlValidation;
}
}
return { valid: true };
}
@@ -136,7 +168,8 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig
const urlExt = args.url.split('.').pop()?.split('?')[0] || 'bin';
filename = `audio.${urlExt}`;
audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' });
const contentType = response.headers.get('content-type') ?? 'audio/wav';
audioBlob = new Blob([arrayBuffer], { type: contentType });
}
const endpoint = audioConfig.endpoint;