fix(audio): add SSRF protection, MIME type fix, and tests for audio-transcribe tool
- Add URL validation blocking localhost, private IPs, and non-http protocols - Use response Content-Type header instead of hardcoded audio/wav for URL downloads - Add 25 tests covering validation, SSRF, config errors, transcription paths, and error handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,291 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { createAudioTranscribeTool } from './audio-transcribe.js';
|
||||
|
||||
const mockFetch = vi.hoisted(() => vi.fn());
|
||||
vi.stubGlobal('fetch', mockFetch);
|
||||
|
||||
describe('createAudioTranscribeTool', () => {
|
||||
const audioConfig = {
|
||||
endpoint: 'https://api.openai.com/v1/audio/transcriptions',
|
||||
apiKey: 'sk-test',
|
||||
model: 'whisper-1',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('creates a tool with correct name and schema', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
expect(tool.name).toBe('audio.transcribe');
|
||||
expect(tool.inputSchema.properties).toHaveProperty('data');
|
||||
expect(tool.inputSchema.properties).toHaveProperty('url');
|
||||
expect(tool.inputSchema.properties).toHaveProperty('mime_type');
|
||||
expect(tool.inputSchema.properties).toHaveProperty('language');
|
||||
expect(tool.inputSchema.properties).toHaveProperty('prompt');
|
||||
});
|
||||
|
||||
describe('validation errors', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
|
||||
it('rejects when neither data nor url is provided', async () => {
|
||||
const result = await tool.execute({});
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Either data or url must be provided/);
|
||||
});
|
||||
|
||||
it('rejects when both data and url are provided', async () => {
|
||||
const result = await tool.execute({ data: 'AAAA', url: 'https://example.com/audio.wav', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Only one of data or url/);
|
||||
});
|
||||
|
||||
it('rejects data without mime_type', async () => {
|
||||
const result = await tool.execute({ data: 'AAAA' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/mime_type is required/);
|
||||
});
|
||||
|
||||
it('rejects unsupported mime_type', async () => {
|
||||
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/flac' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Unsupported MIME type/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('URL validation (SSRF protection)', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
|
||||
it('rejects localhost URLs', async () => {
|
||||
const result = await tool.execute({ url: 'http://localhost:8080/audio.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/localhost/);
|
||||
});
|
||||
|
||||
it('rejects 127.0.0.1 URLs', async () => {
|
||||
const result = await tool.execute({ url: 'http://127.0.0.1/audio.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/localhost/);
|
||||
});
|
||||
|
||||
it('rejects private IP ranges (10.x)', async () => {
|
||||
const result = await tool.execute({ url: 'http://10.0.0.1/audio.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/private/i);
|
||||
});
|
||||
|
||||
it('rejects private IP ranges (192.168.x)', async () => {
|
||||
const result = await tool.execute({ url: 'http://192.168.1.1/audio.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/private/i);
|
||||
});
|
||||
|
||||
it('rejects private IP ranges (172.16-31.x)', async () => {
|
||||
const result = await tool.execute({ url: 'http://172.16.0.1/audio.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/private/i);
|
||||
});
|
||||
|
||||
it('rejects file:// protocol', async () => {
|
||||
const result = await tool.execute({ url: 'file:///etc/passwd' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/http\/https/);
|
||||
});
|
||||
|
||||
it('rejects invalid URLs', async () => {
|
||||
const result = await tool.execute({ url: 'not-a-url' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Invalid URL/);
|
||||
});
|
||||
|
||||
it('allows public HTTPS URLs', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
headers: new Headers({ 'content-type': 'audio/wav' }),
|
||||
arrayBuffer: async () => new ArrayBuffer(8),
|
||||
});
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'hello' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ url: 'https://example.com/audio.wav' });
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('config errors', () => {
|
||||
it('returns error when no audio config is provided', async () => {
|
||||
const tool = createAudioTranscribeTool(undefined);
|
||||
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/endpoint not configured/);
|
||||
});
|
||||
|
||||
it('returns error when endpoint is missing from config', async () => {
|
||||
const tool = createAudioTranscribeTool({ apiKey: 'sk-test' });
|
||||
const result = await tool.execute({ data: 'AAAA', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/endpoint not configured/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('successful transcription (OpenAI/Groq path)', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
|
||||
it('transcribes base64 audio data', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'Hello, world!' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('Hello, world!');
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledWith(
|
||||
'https://api.openai.com/v1/audio/transcriptions',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('sends Authorization header when apiKey is set', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'test' }),
|
||||
});
|
||||
|
||||
await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/ogg' });
|
||||
|
||||
const call = mockFetch.mock.calls[0];
|
||||
expect(call[1].headers).toEqual({ Authorization: 'Bearer sk-test' });
|
||||
});
|
||||
|
||||
it('passes language and prompt parameters', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'Hola mundo' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({
|
||||
data: 'AAAAAAA=',
|
||||
mime_type: 'audio/mp3',
|
||||
language: 'es',
|
||||
prompt: 'Spanish conversation',
|
||||
});
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('Hola mundo');
|
||||
});
|
||||
});
|
||||
|
||||
describe('URL-based transcription', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
|
||||
it('downloads and transcribes audio from URL', async () => {
|
||||
// First fetch: download audio
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
headers: new Headers({ 'content-type': 'audio/mpeg' }),
|
||||
arrayBuffer: async () => new ArrayBuffer(16),
|
||||
});
|
||||
// Second fetch: transcription API
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'URL transcription result' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ url: 'https://cdn.example.com/audio.mp3' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('URL transcription result');
|
||||
});
|
||||
|
||||
it('uses content-type from response headers for blob', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
headers: new Headers({ 'content-type': 'audio/ogg' }),
|
||||
arrayBuffer: async () => new ArrayBuffer(8),
|
||||
});
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: 'ogg result' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ url: 'https://cdn.example.com/voice' });
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('returns error when URL download fails', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 404,
|
||||
statusText: 'Not Found',
|
||||
});
|
||||
|
||||
const result = await tool.execute({ url: 'https://cdn.example.com/missing.wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Failed to download/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Ollama path', () => {
|
||||
const ollamaConfig = {
|
||||
endpoint: 'http://localhost:11434/api/generate',
|
||||
model: 'whisper',
|
||||
};
|
||||
const tool = createAudioTranscribeTool(ollamaConfig);
|
||||
|
||||
it('sends JSON request to Ollama endpoint', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ response: 'Ollama transcript' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('Ollama transcript');
|
||||
|
||||
const [url, opts] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('http://localhost:11434/api/generate');
|
||||
expect(JSON.parse(opts.body as string)).toEqual({
|
||||
model: 'whisper',
|
||||
audio: 'AAAAAAA=',
|
||||
stream: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns error on Ollama failure', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
});
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Ollama transcription failed/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('API errors', () => {
|
||||
const tool = createAudioTranscribeTool(audioConfig);
|
||||
|
||||
it('returns error on transcription API failure', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: async () => 'Internal Server Error',
|
||||
});
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/Transcription request failed.*500/);
|
||||
});
|
||||
|
||||
it('handles network errors gracefully', async () => {
|
||||
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/ECONNREFUSED/);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -25,6 +25,31 @@ const PROVIDER_ENDPOINTS: Record<string, string> = {
|
||||
llamacpp: 'http://localhost:8080/v1/audio/transcriptions',
|
||||
};
|
||||
|
||||
function validateUrl(url: string): { valid: boolean; error?: string } {
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(url);
|
||||
} catch {
|
||||
return { valid: false, error: `Invalid URL: ${url}` };
|
||||
}
|
||||
|
||||
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
||||
return { valid: false, error: `Only http/https URLs are allowed, got ${parsed.protocol}` };
|
||||
}
|
||||
|
||||
const hostname = parsed.hostname;
|
||||
if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') {
|
||||
return { valid: false, error: 'URLs pointing to localhost are not allowed' };
|
||||
}
|
||||
|
||||
// Block private/internal IP ranges
|
||||
if (/^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/.test(hostname)) {
|
||||
return { valid: false, error: 'URLs pointing to private/internal networks are not allowed' };
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: string } {
|
||||
const hasData = args.data !== undefined && args.data !== '';
|
||||
const hasUrl = args.url !== undefined && args.url !== '';
|
||||
@@ -45,6 +70,13 @@ function validateInput(args: AudioTranscribeArgs): { valid: boolean; error?: str
|
||||
return { valid: false, error: `Unsupported MIME type: ${args.mime_type}. Supported: ${Array.from(SUPPORTED_MIME_TYPES).join(', ')}` };
|
||||
}
|
||||
|
||||
if (hasUrl) {
|
||||
const urlValidation = validateUrl(args.url!);
|
||||
if (!urlValidation.valid) {
|
||||
return urlValidation;
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
@@ -136,7 +168,8 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig
|
||||
const urlExt = args.url.split('.').pop()?.split('?')[0] || 'bin';
|
||||
filename = `audio.${urlExt}`;
|
||||
|
||||
audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' });
|
||||
const contentType = response.headers.get('content-type') ?? 'audio/wav';
|
||||
audioBlob = new Blob([arrayBuffer], { type: contentType });
|
||||
}
|
||||
|
||||
const endpoint = audioConfig.endpoint;
|
||||
|
||||
Reference in New Issue
Block a user