feat: harden voice reliability with tts fallback and talk controls

This commit is contained in:
William Valentin
2026-02-26 17:29:23 -08:00
parent 2a9bed8c91
commit 163b1a0139
13 changed files with 781 additions and 17 deletions
+171
View File
@@ -8,6 +8,65 @@ export interface TtsSynthesisConfig {
model?: string;
voice?: string;
format?: TtsOutputFormat;
/** Optional provider identity used by fallback/health tracking. */
id?: string;
/** Optional provider type hint for endpoint defaults. */
type?: 'openai' | 'custom';
}
export interface TtsFallbackConfig {
/** Maximum number of providers to try per reply. */
maxAttempts?: number;
/** Cooldown window before retrying a provider that failed. */
failureCooldownMs?: number;
}
export interface TtsProviderHealth {
consecutiveFailures: number;
cooldownUntil: number;
lastFailureAt?: number;
lastError?: string;
}
export interface TtsFallbackResult {
attachment: OutboundAttachment | null;
providerId?: string;
attemptedProviders: string[];
skippedProviders: string[];
lastError?: string;
}
export class TtsHealthTracker {
private readonly states = new Map<string, TtsProviderHealth>();
isHealthy(providerId: string, now = Date.now()): boolean {
const state = this.states.get(providerId);
if (!state) {
return true;
}
return state.cooldownUntil <= now;
}
markSuccess(providerId: string): void {
this.states.delete(providerId);
}
markFailure(providerId: string, error: unknown, now = Date.now(), failureCooldownMs = 60_000): void {
const previous = this.states.get(providerId);
const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
const message = error instanceof Error ? error.message : String(error);
this.states.set(providerId, {
consecutiveFailures,
cooldownUntil: now + Math.max(1_000, failureCooldownMs),
lastFailureAt: now,
lastError: message,
});
}
getState(providerId: string): TtsProviderHealth | undefined {
return this.states.get(providerId);
}
}
function outputFormatToMimeType(format: TtsOutputFormat): string {
@@ -34,6 +93,26 @@ function outputFormatToExtension(format: TtsOutputFormat): string {
}
}
function resolveProviderEndpoint(config: TtsSynthesisConfig): string | undefined {
if (config.endpoint && config.endpoint.trim().length > 0) {
return config.endpoint;
}
if (config.type === 'openai') {
return 'https://api.openai.com/v1/audio/speech';
}
return undefined;
}
function normalizeMaxAttempts(maxAttempts: number | undefined, providerCount: number): number {
if (providerCount <= 0) {
return 0;
}
if (!Number.isFinite(maxAttempts)) {
return providerCount;
}
return Math.min(providerCount, Math.max(1, Math.floor(maxAttempts as number)));
}
/** Synthesize speech via an OpenAI-compatible /v1/audio/speech endpoint. */
export async function synthesizeSpeechAttachment(
text: string,
@@ -86,3 +165,95 @@ export async function synthesizeSpeechAttachment(
filename: `flynn-reply-${Date.now()}.${extension}`,
};
}
/**
* Attempt TTS synthesis against an ordered provider chain with health-aware fallback.
* When every provider fails, returns text fallback metadata instead of throwing.
*/
export async function synthesizeSpeechWithFallback(
text: string,
input: {
providers: TtsSynthesisConfig[];
fallback?: TtsFallbackConfig;
healthTracker?: TtsHealthTracker;
},
): Promise<TtsFallbackResult> {
const trimmed = text.trim();
if (!trimmed) {
return {
attachment: null,
attemptedProviders: [],
skippedProviders: [],
};
}
const providers = input.providers
.map((provider, index) => {
const endpoint = resolveProviderEndpoint(provider);
return {
...provider,
endpoint,
id: provider.id ?? `tts-provider-${index + 1}`,
};
})
.filter((provider) => typeof provider.endpoint === 'string' && provider.endpoint.length > 0);
if (providers.length === 0) {
return {
attachment: null,
attemptedProviders: [],
skippedProviders: [],
};
}
const healthTracker = input.healthTracker;
const now = Date.now();
const healthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
const unhealthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
for (const provider of providers) {
const typedProvider = provider as TtsSynthesisConfig & { id: string; endpoint: string };
if (!healthTracker || healthTracker.isHealthy(typedProvider.id, now)) {
healthyProviders.push(typedProvider);
} else {
unhealthyProviders.push(typedProvider);
}
}
const orderedProviders = healthyProviders.length > 0
? [...healthyProviders, ...unhealthyProviders]
: unhealthyProviders;
const maxAttempts = normalizeMaxAttempts(input.fallback?.maxAttempts, orderedProviders.length);
const attempts = orderedProviders.slice(0, maxAttempts);
const skippedProviders = orderedProviders.slice(maxAttempts).map((provider) => provider.id);
const attemptedProviders: string[] = [];
let lastError: string | undefined;
const failureCooldownMs = input.fallback?.failureCooldownMs ?? 60_000;
for (const provider of attempts) {
attemptedProviders.push(provider.id);
try {
const attachment = await synthesizeSpeechAttachment(trimmed, provider);
if (attachment) {
healthTracker?.markSuccess(provider.id);
return {
attachment,
providerId: provider.id,
attemptedProviders,
skippedProviders,
};
}
} catch (error) {
healthTracker?.markFailure(provider.id, error, Date.now(), failureCooldownMs);
lastError = error instanceof Error ? error.message : String(error);
}
}
return {
attachment: null,
attemptedProviders,
skippedProviders,
lastError,
};
}