feat: implement tier-a4 tts voice output replies

This commit is contained in:
William Valentin
2026-02-18 10:22:28 -08:00
parent 3eb07875f1
commit a71aa5992d
11 changed files with 482 additions and 4 deletions
+1
View File
@@ -10,6 +10,7 @@ export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js
export { withRetry, isRetryable, DEFAULT_RETRY_CONFIG, type RetryConfig } from './retry.js';
export { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
export { supportsAudioInput } from './capabilities.js';
export { synthesizeSpeechAttachment, type TtsSynthesisConfig, type TtsOutputFormat } from './tts.js';
export {
isSupportedImage,
isSupportedAudio,
+67
View File
@@ -0,0 +1,67 @@
import { afterEach, describe, expect, it, vi } from 'vitest';
import { synthesizeSpeechAttachment } from './tts.js';
describe('synthesizeSpeechAttachment', () => {
afterEach(() => {
vi.restoreAllMocks();
});
it('returns null when text is empty', async () => {
const result = await synthesizeSpeechAttachment(' ', {
endpoint: 'https://example.com/v1/audio/speech',
});
expect(result).toBeNull();
});
it('returns null when endpoint is missing', async () => {
const result = await synthesizeSpeechAttachment('hello', {});
expect(result).toBeNull();
});
it('returns an outbound audio attachment on success', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([1, 2, 3, 4]).buffer,
} as Response);
const result = await synthesizeSpeechAttachment('Hello from Flynn', {
endpoint: 'https://example.com/v1/audio/speech',
apiKey: 'sk-test',
model: 'gpt-4o-mini-tts',
voice: 'alloy',
format: 'mp3',
});
expect(fetchSpy).toHaveBeenCalledWith(
'https://example.com/v1/audio/speech',
expect.objectContaining({
method: 'POST',
headers: expect.objectContaining({
'Content-Type': 'application/json',
Authorization: 'Bearer sk-test',
}),
}),
);
expect(result).toMatchObject({
mimeType: 'audio/mpeg',
data: 'AQIDBA==',
});
expect(result?.filename).toMatch(/^flynn-reply-\d+\.mp3$/);
});
it('throws when the tts endpoint returns an error', async () => {
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: false,
status: 429,
statusText: 'Too Many Requests',
text: async () => 'rate limit',
} as Response);
await expect(synthesizeSpeechAttachment('Hello', {
endpoint: 'https://example.com/v1/audio/speech',
})).rejects.toThrow(/TTS request failed: 429 Too Many Requests/i);
});
});
+88
View File
@@ -0,0 +1,88 @@
import type { OutboundAttachment } from '../channels/types.js';
export type TtsOutputFormat = 'mp3' | 'wav' | 'opus';
export interface TtsSynthesisConfig {
endpoint?: string;
apiKey?: string;
model?: string;
voice?: string;
format?: TtsOutputFormat;
}
function outputFormatToMimeType(format: TtsOutputFormat): string {
switch (format) {
case 'wav':
return 'audio/wav';
case 'opus':
return 'audio/ogg';
case 'mp3':
default:
return 'audio/mpeg';
}
}
function outputFormatToExtension(format: TtsOutputFormat): string {
switch (format) {
case 'wav':
return 'wav';
case 'opus':
return 'ogg';
case 'mp3':
default:
return 'mp3';
}
}
/** Synthesize speech via an OpenAI-compatible /v1/audio/speech endpoint. */
export async function synthesizeSpeechAttachment(
text: string,
config: TtsSynthesisConfig,
): Promise<OutboundAttachment | null> {
const trimmed = text.trim();
if (!trimmed) {
return null;
}
if (!config.endpoint) {
return null;
}
const format = config.format ?? 'mp3';
const model = config.model ?? 'gpt-4o-mini-tts';
const voice = config.voice ?? 'alloy';
const headers: Record<string, string> = {
'Content-Type': 'application/json',
};
if (config.apiKey) {
headers.Authorization = `Bearer ${config.apiKey}`;
}
const response = await fetch(config.endpoint, {
method: 'POST',
headers,
body: JSON.stringify({
model,
voice,
input: trimmed,
response_format: format,
}),
});
if (!response.ok) {
const detail = await response.text().catch(() => '');
throw new Error(
`TTS request failed: ${response.status} ${response.statusText}${detail ? ` - ${detail.slice(0, 200)}` : ''}`,
);
}
const audioBytes = await response.arrayBuffer();
const data = Buffer.from(audioBytes).toString('base64');
const extension = outputFormatToExtension(format);
return {
mimeType: outputFormatToMimeType(format),
data,
filename: `flynn-reply-${Date.now()}.${extension}`,
};
}