feat: harden voice reliability with tts fallback and talk controls

This commit is contained in:
William Valentin
2026-02-26 17:29:23 -08:00
parent 2a9bed8c91
commit 163b1a0139
13 changed files with 781 additions and 17 deletions
+37
View File
@@ -870,6 +870,11 @@ describe('configSchema — tts', () => {
expect(result.tts.enabled).toBe(false);
expect(result.tts.enabled_channels).toEqual([]);
expect(result.tts.provider).toBeUndefined();
expect(result.tts.providers).toEqual([]);
expect(result.tts.fallback).toEqual({
max_attempts: 3,
failure_cooldown_ms: 60000,
});
});
it('accepts custom tts provider settings', () => {
@@ -886,6 +891,29 @@ describe('configSchema — tts', () => {
voice: 'nova',
format: 'wav',
},
providers: [
{
name: 'primary',
type: 'custom',
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
api_key: 'sk-1',
model: 'gpt-4o-mini-tts',
voice: 'alloy',
format: 'mp3',
},
{
name: 'backup',
type: 'openai',
api_key: 'sk-2',
model: 'gpt-4o-mini-tts',
voice: 'nova',
format: 'opus',
},
],
fallback: {
max_attempts: 2,
failure_cooldown_ms: 90000,
},
},
});
@@ -899,6 +927,15 @@ describe('configSchema — tts', () => {
voice: 'nova',
format: 'wav',
});
expect(result.tts.providers).toHaveLength(2);
expect(result.tts.providers[0]).toMatchObject({
name: 'primary',
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
});
expect(result.tts.fallback).toEqual({
max_attempts: 2,
failure_cooldown_ms: 90000,
});
});
});
+13
View File
@@ -842,6 +842,7 @@ const audioSchema = z.object({
const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']);
const ttsProviderSchema = z.object({
name: z.string().min(1).optional(),
type: z.enum(['openai', 'custom']).default('openai'),
endpoint: z.string().optional(),
api_key: z.string().optional(),
@@ -850,11 +851,23 @@ const ttsProviderSchema = z.object({
format: ttsOutputFormatSchema.default('mp3'),
});
const ttsFallbackSchema = z.object({
/** Number of providers attempted in-order before text-only fallback. */
max_attempts: z.number().int().min(1).max(10).default(3),
/** Cooldown window applied to providers after synthesis failures. */
failure_cooldown_ms: z.number().int().min(1000).max(3_600_000).default(60_000),
}).default({});
const ttsSchema = z.object({
enabled: z.boolean().default(false),
/** Restrict voice replies to selected channels. Empty means all channels. */
enabled_channels: z.array(z.string().min(1)).default([]),
/** Legacy single-provider config. */
provider: ttsProviderSchema.optional(),
/** Ordered provider chain for synthesis fallback. */
providers: z.array(ttsProviderSchema).default([]),
/** Fallback + health policy for provider chain handling. */
fallback: ttsFallbackSchema,
}).default({});
// ── Tool policy schemas ──────────────────────────────────────────────
+174
View File
@@ -2393,6 +2393,100 @@ describe('daemon tts routing integration', () => {
expect(outbound?.attachments).toBeUndefined();
});
it('falls back to secondary TTS provider when primary fails', async () => {
vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback-chain response');
const fetchSpy = vi.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce({
ok: false,
status: 503,
statusText: 'Service Unavailable',
text: async () => 'primary down',
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([5, 6, 7]).buffer,
} as Response);
const session = {
id: 'telegram:tts-user-4',
addMessage: vi.fn(),
getHistory: vi.fn(() => []),
clear: vi.fn(),
replaceHistory: vi.fn(),
getConfig: vi.fn(() => undefined),
setConfig: vi.fn(),
deleteConfig: vi.fn(),
};
const router = createMessageRouter({
sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
modelRouter: {
getAvailableTiers: () => ['default'],
getAllLabels: () => ({ default: 'default' }),
getLabel: (tier: string) => tier,
} as unknown as MessageRouterDeps['modelRouter'],
systemPrompt: 'test prompt',
toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
config: {
agents: {
primary_tier: 'default',
delegation: {
compaction: 'default',
memory_extraction: 'default',
classification: 'default',
tool_summarisation: 'default',
complex_reasoning: 'default',
},
max_delegation_depth: 1,
max_iterations: 3,
},
compaction: { enabled: false },
models: { default: { provider: 'anthropic', model: 'claude' } },
tts: {
enabled: true,
enabled_channels: ['telegram'],
providers: [
{
name: 'primary',
type: 'custom',
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
},
{
name: 'backup',
type: 'custom',
endpoint: 'https://tts-backup.example.com/v1/audio/speech',
},
],
fallback: {
max_attempts: 2,
failure_cooldown_ms: 60000,
},
},
} as unknown as MessageRouterDeps['config'],
});
const reply = vi.fn(async (_message: OutboundMessage) => {});
await router.handler({
id: 'tts-4',
channel: 'telegram',
senderId: 'tts-user-4',
text: 'respond with provider fallback',
timestamp: Date.now(),
} as MessageRouterInput, reply);
expect(fetchSpy).toHaveBeenCalledTimes(2);
expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined;
expect(outbound?.attachments?.[0]).toMatchObject({
mimeType: 'audio/mpeg',
data: 'BQYH',
});
});
it('falls back to text-only replies when tts synthesis fails', async () => {
vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback response');
vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('tts down'));
@@ -2909,4 +3003,84 @@ describe('daemon talk mode (voice wake) integration', () => {
expect(processSpy).toHaveBeenCalledOnce();
expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined, undefined);
});
it('treats spoken cancel as /stop while talk mode is active', async () => {
const cancelSpy = vi.spyOn(AgentOrchestrator.prototype, 'cancel');
vi.spyOn(AgentOrchestrator.prototype, 'isCancellable').mockReturnValue(true);
const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process');
let resolveFirst: ((value: string) => void) | undefined;
let markStarted: (() => void) | undefined;
const started = new Promise<void>((resolve) => { markStarted = resolve; });
processSpy.mockImplementationOnce(() => {
markStarted?.();
return new Promise<string>((resolve) => { resolveFirst = resolve; });
});
const session = {
id: 'telegram:user-talk-2',
addMessage: vi.fn(),
getHistory: vi.fn(() => []),
clear: vi.fn(),
replaceHistory: vi.fn(),
getConfig: vi.fn(() => undefined),
setConfig: vi.fn(),
deleteConfig: vi.fn(),
};
const commandRegistry = new CommandRegistry();
registerBuiltinCommands(commandRegistry);
const router = createMessageRouter({
sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
modelRouter: {
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
getLabel: (tier: string) => tier,
} as unknown as MessageRouterDeps['modelRouter'],
systemPrompt: 'test prompt',
toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
config: {
agents: {
primary_tier: 'default',
delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex' },
max_delegation_depth: 3,
max_iterations: 10,
},
compaction: { enabled: false },
models: { default: { provider: 'anthropic', model: 'claude' } },
audio: { talk_mode: { enabled: true, wake_phrase: 'hey flynn', timeout_ms: 120000, allow_manual_toggle: true } },
} as unknown as MessageRouterDeps['config'],
commandRegistry,
});
const reply = vi.fn(async (_message: OutboundMessage) => {});
const firstRun = router.handler({
id: 'm-talk-3',
channel: 'telegram',
senderId: 'user-talk-2',
text: 'hey flynn start a long task',
timestamp: Date.now(),
} as MessageRouterInput, reply);
await started;
await router.handler({
id: 'm-talk-4',
channel: 'telegram',
senderId: 'user-talk-2',
text: 'cancel',
timestamp: Date.now(),
} as MessageRouterInput, reply);
expect(cancelSpy).toHaveBeenCalledTimes(1);
expect(processSpy).toHaveBeenCalledTimes(1);
expect(reply).toHaveBeenCalledWith(expect.objectContaining({
text: 'Cancellation requested. The active operation will stop at the next safe point.',
replyTo: 'm-talk-4',
}));
resolveFirst?.('operation cancelled by user.');
await firstRun;
});
});
+43 -14
View File
@@ -1,7 +1,7 @@
import type { AudioTranscriptionConfig } from '../models/media.js';
import type { Attachment } from '../channels/types.js';
import { isSupportedAudio, transcribeAudio } from '../models/media.js';
import { synthesizeSpeechAttachment } from '../models/tts.js';
import { synthesizeSpeechWithFallback, TtsHealthTracker } from '../models/tts.js';
import { supportsAudioInput } from '../models/capabilities.js';
import { AgentOrchestrator, SubagentManager, type DelegationConfig } from '../backends/index.js';
import { OutboundAttachmentCollector } from '../backends/native/attachments.js';
@@ -397,6 +397,7 @@ export function createMessageRouter(deps: {
const talkModeUntil = new Map<string, number>();
const activeRuns = new Map<string, AgentOrchestrator>();
const reactionCooldowns = new Map<string, number>();
const ttsHealthTracker = new TtsHealthTracker();
function getBackendMode(): BackendRuntimeMode {
return deps.getBackendMode?.() ?? 'config_default';
@@ -518,24 +519,42 @@ export function createMessageRouter(deps: {
return undefined;
}
const provider = deps.config.tts?.provider;
const endpoint = provider?.endpoint ?? (provider?.type === 'openai' ? 'https://api.openai.com/v1/audio/speech' : undefined);
if (!endpoint) {
const configuredProviders = deps.config.tts?.providers ?? [];
const providers = configuredProviders.length > 0
? configuredProviders
: (deps.config.tts?.provider ? [deps.config.tts.provider] : []);
if (providers.length === 0) {
return undefined;
}
try {
return await synthesizeSpeechAttachment(responseText, {
endpoint,
apiKey: provider?.api_key,
model: provider?.model,
voice: provider?.voice,
format: provider?.format,
const outcome = await synthesizeSpeechWithFallback(responseText, {
providers: providers.map((provider, index) => ({
id: provider.name?.trim() || `tts-provider-${index + 1}`,
type: provider.type,
endpoint: provider.endpoint,
apiKey: provider.api_key,
model: provider.model,
voice: provider.voice,
format: provider.format,
})),
fallback: {
maxAttempts: deps.config.tts?.fallback?.max_attempts,
failureCooldownMs: deps.config.tts?.fallback?.failure_cooldown_ms,
},
healthTracker: ttsHealthTracker,
});
} catch (error) {
console.warn(`TTS synthesis failed for channel ${channel}:`, error instanceof Error ? error.message : 'Unknown error');
return undefined;
if (!outcome.attachment && outcome.attemptedProviders.length > 0) {
console.warn(
`TTS synthesis fallback exhausted for channel ${channel}. `
+ `attempted=${outcome.attemptedProviders.join(',') || 'none'} `
+ `skipped=${outcome.skippedProviders.join(',') || 'none'} `
+ `${outcome.lastError ? `last_error=${outcome.lastError}` : ''}`.trim(),
);
}
return outcome.attachment ?? undefined;
}
function getOrCreateAgent(
@@ -822,6 +841,7 @@ export function createMessageRouter(deps: {
let incomingText = msg.text;
let matchedReactionName: string | undefined;
const talkMode = deps.config.audio?.talk_mode;
let inTalkModeContext = false;
if (talkMode?.enabled && incomingText.trim().length > 0) {
const key = `${msg.channel}:${msg.senderId}`;
const now = Date.now();
@@ -858,6 +878,7 @@ export function createMessageRouter(deps: {
if (wakeMatched && wakeRegex) {
talkModeUntil.set(key, now + timeoutMs);
inTalkModeContext = true;
incomingText = incomingText.replace(wakeRegex, '').trim();
if (!incomingText) {
await reply({ text: `Listening. Talk mode active for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id });
@@ -865,11 +886,19 @@ export function createMessageRouter(deps: {
}
} else if (currentUntil > now) {
talkModeUntil.set(key, now + timeoutMs);
inTalkModeContext = true;
} else {
return;
}
}
if (inTalkModeContext && !msg.metadata?.isCommand) {
const spokenCommand = incomingText.trim().toLowerCase();
if (spokenCommand === 'stop' || spokenCommand === 'cancel') {
incomingText = '/stop';
}
}
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
const queueMode = session.getConfig('queue.mode') ?? deps.config.server?.queue?.mode ?? 'collect';
const rawCommand = msg.metadata?.isCommand
+42
View File
@@ -661,6 +661,34 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
config.audio.enabled = value;
return true;
},
'audio.talk_mode.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.audio ??= {} as Config['audio'];
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
config.audio.talk_mode.enabled = value;
return true;
},
'audio.talk_mode.wake_phrase': (config, value) => {
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
config.audio ??= {} as Config['audio'];
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
config.audio.talk_mode.wake_phrase = value.trim();
return true;
},
'audio.talk_mode.timeout_ms': (config, value) => {
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
config.audio ??= {} as Config['audio'];
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
config.audio.talk_mode.timeout_ms = Math.floor(value);
return true;
},
'audio.talk_mode.allow_manual_toggle': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.audio ??= {} as Config['audio'];
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
config.audio.talk_mode.allow_manual_toggle = value;
return true;
},
'sandbox.enabled': (config, value) => {
if (typeof value !== 'boolean') {return false;}
config.sandbox ??= {} as Config['sandbox'];
@@ -700,6 +728,20 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
config.tts.enabled_channels = value as string[];
return true;
},
'tts.fallback.max_attempts': (config, value) => {
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1 || value > 10) {return false;}
config.tts ??= {} as Config['tts'];
config.tts.fallback ??= {} as Config['tts']['fallback'];
config.tts.fallback.max_attempts = Math.floor(value);
return true;
},
'tts.fallback.failure_cooldown_ms': (config, value) => {
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
config.tts ??= {} as Config['tts'];
config.tts.fallback ??= {} as Config['tts']['fallback'];
config.tts.fallback.failure_cooldown_ms = Math.floor(value);
return true;
},
};
export function createConfigHandlers(deps: ConfigHandlerDeps) {
+26
View File
@@ -1550,8 +1550,14 @@ describe('config handlers', () => {
'memory.daily_log.enabled': true,
'memory.proactive_extract.enabled': true,
'memory.proactive_extract.min_tool_calls': 2,
'audio.talk_mode.enabled': true,
'audio.talk_mode.wake_phrase': 'ok flynn',
'audio.talk_mode.timeout_ms': 180000,
'audio.talk_mode.allow_manual_toggle': false,
'tts.enabled': true,
'tts.enabled_channels': ['telegram', 'discord'],
'tts.fallback.max_attempts': 2,
'tts.fallback.failure_cooldown_ms': 90000,
},
},
};
@@ -1573,8 +1579,14 @@ describe('config handlers', () => {
'memory.daily_log.enabled',
'memory.proactive_extract.enabled',
'memory.proactive_extract.min_tool_calls',
'audio.talk_mode.enabled',
'audio.talk_mode.wake_phrase',
'audio.talk_mode.timeout_ms',
'audio.talk_mode.allow_manual_toggle',
'tts.enabled',
'tts.enabled_channels',
'tts.fallback.max_attempts',
'tts.fallback.failure_cooldown_ms',
]);
expect(r.rejected).toEqual([]);
expect(r.persisted).toBe(false);
@@ -1593,8 +1605,14 @@ describe('config handlers', () => {
expect(getPath(config, 'memory', 'daily_log', 'enabled')).toBe(true);
expect(getPath(config, 'memory', 'proactive_extract', 'enabled')).toBe(true);
expect(getPath(config, 'memory', 'proactive_extract', 'min_tool_calls')).toBe(2);
expect(getPath(config, 'audio', 'talk_mode', 'enabled')).toBe(true);
expect(getPath(config, 'audio', 'talk_mode', 'wake_phrase')).toBe('ok flynn');
expect(getPath(config, 'audio', 'talk_mode', 'timeout_ms')).toBe(180000);
expect(getPath(config, 'audio', 'talk_mode', 'allow_manual_toggle')).toBe(false);
expect(getPath(config, 'tts', 'enabled')).toBe(true);
expect(getPath(config, 'tts', 'enabled_channels')).toEqual(['telegram', 'discord']);
expect(getPath(config, 'tts', 'fallback', 'max_attempts')).toBe(2);
expect(getPath(config, 'tts', 'fallback', 'failure_cooldown_ms')).toBe(90000);
});
it('config.patch applies councils model and routing patches', async () => {
@@ -1675,7 +1693,11 @@ describe('config handlers', () => {
'hooks.confirm': 'not-an-array',
'server.queue.cap': 0,
'memory.proactive_extract.min_tool_calls': 99,
'audio.talk_mode.wake_phrase': '',
'audio.talk_mode.timeout_ms': 99999999,
'tts.enabled_channels': [1, 2, 3],
'tts.fallback.max_attempts': 0,
'tts.fallback.failure_cooldown_ms': 0,
'automation.daily_briefing.model_tier': 'ultra',
},
},
@@ -1688,7 +1710,11 @@ describe('config handlers', () => {
'hooks.confirm',
'server.queue.cap',
'memory.proactive_extract.min_tool_calls',
'audio.talk_mode.wake_phrase',
'audio.talk_mode.timeout_ms',
'tts.enabled_channels',
'tts.fallback.max_attempts',
'tts.fallback.failure_cooldown_ms',
'automation.daily_briefing.model_tier',
]);
expect(r.persisted).toBe(false);
+9
View File
@@ -254,6 +254,15 @@ describe('ChatPage wiring', () => {
throw new Error('Run status line not found');
}
expect(statusLine.classList.contains('hidden')).toBe(false);
expect(String(statusLine.textContent ?? '')).toContain('working');
stream.emit('run_state', { state: 'cancel_requested' });
await Promise.resolve();
expect(String(statusLine.textContent ?? '')).toContain('cancellation requested');
stream.emit('run_state', { state: 'cancelled' });
await Promise.resolve();
expect(String(statusLine.textContent ?? '')).toContain('cancelled');
resolveResult?.({ content: 'ok' });
await Promise.resolve();
+30 -1
View File
@@ -139,6 +139,8 @@ function escapeHtml(str) {
function getAssistantStateSnapshot(configData) {
const automation = configData?.automation ?? {};
const memory = configData?.memory ?? {};
const audio = configData?.audio ?? {};
const talkMode = audio.talk_mode ?? {};
const tts = configData?.tts ?? {};
const queue = configData?.server?.queue ?? {};
return {
@@ -147,6 +149,10 @@ function getAssistantStateSnapshot(configData) {
memoryDaily: Boolean(memory.daily_log?.enabled),
memoryProactive: Boolean(memory.proactive_extract?.enabled),
memoryMinToolCalls: Number(memory.proactive_extract?.min_tool_calls ?? 1),
talkModeEnabled: Boolean(talkMode.enabled),
talkWakePhrase: typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn',
talkTimeoutMs: Number(talkMode.timeout_ms ?? 120000),
talkManualToggle: talkMode.allow_manual_toggle !== false,
ttsEnabled: Boolean(tts.enabled),
ttsChannels: Array.isArray(tts.enabled_channels) ? tts.enabled_channels : [],
queueMode: queue.mode ?? 'collect',
@@ -161,6 +167,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': true,
'memory.proactive_extract.enabled': true,
'memory.proactive_extract.min_tool_calls': 1,
'audio.talk_mode.enabled': true,
'tts.enabled': true,
'tts.enabled_channels': [],
'server.queue.mode': 'interrupt',
@@ -177,6 +184,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': true,
'memory.proactive_extract.enabled': true,
'memory.proactive_extract.min_tool_calls': 2,
'audio.talk_mode.enabled': false,
'tts.enabled': false,
'server.queue.mode': 'steer_backlog',
};
@@ -191,6 +199,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': false,
'memory.proactive_extract.enabled': false,
'memory.proactive_extract.min_tool_calls': 3,
'audio.talk_mode.enabled': false,
'tts.enabled': false,
'server.queue.mode': 'collect',
};
@@ -207,6 +216,10 @@ function buildRollbackPatchesFromSnapshot(snapshot) {
'memory.daily_log.enabled': snapshot.memoryDaily,
'memory.proactive_extract.enabled': snapshot.memoryProactive,
'memory.proactive_extract.min_tool_calls': Number.isFinite(snapshot.memoryMinToolCalls) ? snapshot.memoryMinToolCalls : 1,
'audio.talk_mode.enabled': snapshot.talkModeEnabled,
'audio.talk_mode.wake_phrase': snapshot.talkWakePhrase,
'audio.talk_mode.timeout_ms': Number.isFinite(snapshot.talkTimeoutMs) ? snapshot.talkTimeoutMs : 120000,
'audio.talk_mode.allow_manual_toggle': snapshot.talkManualToggle,
'tts.enabled': snapshot.ttsEnabled,
'tts.enabled_channels': snapshot.ttsChannels,
'server.queue.mode': snapshot.queueMode,
@@ -936,6 +949,8 @@ function updateAssistantHealth(configData) {
const automation = configData?.automation ?? {};
const memory = configData?.memory ?? {};
const audio = configData?.audio ?? {};
const talkMode = audio.talk_mode ?? {};
const tts = configData?.tts ?? {};
const deliveryMode = automation.delivery_mode ?? 'shared_session';
@@ -944,6 +959,9 @@ function updateAssistantHealth(configData) {
const memoryDaily = Boolean(memory.daily_log?.enabled);
const memoryProactive = Boolean(memory.proactive_extract?.enabled);
const proactiveThreshold = Number(memory.proactive_extract?.min_tool_calls ?? 1);
const talkModeEnabled = Boolean(talkMode.enabled);
const talkWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
const talkTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
const ttsEnabled = Boolean(tts.enabled);
const briefing = automation.daily_briefing ?? {};
const briefingName = briefing.name ?? 'daily-briefing';
@@ -1011,17 +1029,22 @@ function updateAssistantHealth(configData) {
: (_lastCouncilError ? `Last run failed: ${_lastCouncilError}` : 'No council run yet in this dashboard session.');
el.innerHTML = `
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-2 mb-4">
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-7 gap-2 mb-4">
${chip('Announce Mode', announce)}
${chip('Daily Briefing', dailyBriefing)}
${chip('Memory Daily Log', memoryDaily)}
${chip('Proactive Extract', memoryProactive)}
${chip('Talk Mode', talkModeEnabled)}
${chip('TTS Replies', ttsEnabled)}
<div class="flex justify-between items-center px-3 py-2.5 bg-zinc-900 border border-zinc-800 rounded-lg text-sm">
<span class="text-zinc-400">Extract Threshold</span>
<span class="font-bold">${Number.isFinite(proactiveThreshold) ? proactiveThreshold : 1}</span>
</div>
</div>
<div class="mb-3 text-sm text-zinc-500">
Talk controls: wake phrase <code class="text-zinc-300">${escapeHtml(talkWakePhrase)}</code>,
timeout ${Number.isFinite(talkTimeoutMs) ? Math.round(talkTimeoutMs / 1000) : 120}s.
</div>
<div class="flex flex-wrap gap-2 mb-4">
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-announce">
${announce ? 'Disable Announce Mode' : 'Enable Announce Mode'}
@@ -1035,6 +1058,9 @@ function updateAssistantHealth(configData) {
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-memory-proactive">
${memoryProactive ? 'Disable Proactive Extract' : 'Enable Proactive Extract'}
</button>
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-talk-mode">
${talkModeEnabled ? 'Disable Talk Mode' : 'Enable Talk Mode'}
</button>
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-tts">
${ttsEnabled ? 'Disable TTS' : 'Enable TTS'}
</button>
@@ -1341,6 +1367,9 @@ function updateAssistantHealth(configData) {
} else if (action === 'toggle-memory-proactive') {
patches = { 'memory.proactive_extract.enabled': !memoryProactive };
_assistantManualOverrides.add('memory.proactive_extract.enabled');
} else if (action === 'toggle-talk-mode') {
patches = { 'audio.talk_mode.enabled': !talkModeEnabled };
_assistantManualOverrides.add('audio.talk_mode.enabled');
} else if (action === 'toggle-tts') {
patches = { 'tts.enabled': !ttsEnabled };
_assistantManualOverrides.add('tts.enabled');
+10
View File
@@ -47,6 +47,14 @@ function createInitialConfig() {
daily_log: { enabled: true },
proactive_extract: { enabled: true, min_tool_calls: 2 },
},
audio: {
talk_mode: {
enabled: false,
wake_phrase: 'hey flynn',
timeout_ms: 120000,
allow_manual_toggle: true,
},
},
tts: {
enabled: false,
enabled_channels: [],
@@ -520,6 +528,7 @@ describe('DashboardPage assistant controls', () => {
'toggle-daily-briefing',
'toggle-memory-daily',
'toggle-memory-proactive',
'toggle-talk-mode',
'toggle-tts',
'playbook-executive',
'playbook-operator',
@@ -552,6 +561,7 @@ describe('DashboardPage assistant controls', () => {
await clickAction('toggle-daily-briefing');
await clickAction('toggle-memory-daily');
await clickAction('toggle-memory-proactive');
await clickAction('toggle-talk-mode');
await clickAction('toggle-tts');
await clickAction('playbook-executive');
await clickAction('playbook-operator');
+48
View File
@@ -117,6 +117,7 @@ async function loadSettings() {
const silentPatterns = hooks.silent ?? [];
const automation = config?.automation ?? {};
const memory = config?.memory ?? {};
const audio = config?.audio ?? {};
const tts = config?.tts ?? {};
_settingsCache = config ?? {};
@@ -125,8 +126,16 @@ async function loadSettings() {
const dailyMemoryEnabled = Boolean(memory.daily_log?.enabled);
const proactiveExtractEnabled = Boolean(memory.proactive_extract?.enabled);
const proactiveMinToolCalls = Number(memory.proactive_extract?.min_tool_calls ?? 1);
const talkMode = audio.talk_mode ?? {};
const talkModeEnabled = Boolean(talkMode.enabled);
const talkModeWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
const talkModeTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
const talkModeManualToggle = talkMode.allow_manual_toggle !== false;
const ttsEnabled = Boolean(tts.enabled);
const ttsChannelText = Array.isArray(tts.enabled_channels) ? tts.enabled_channels.join(', ') : '';
const ttsFallback = tts.fallback ?? {};
const ttsFallbackMaxAttempts = Number(ttsFallback.max_attempts ?? 3);
const ttsFallbackCooldownMs = Number(ttsFallback.failure_cooldown_ms ?? 60000);
const briefingOutputChannel = automation.daily_briefing?.output?.channel ?? '';
const briefingOutputPeer = automation.daily_briefing?.output?.peer ?? '';
@@ -173,6 +182,30 @@ async function loadSettings() {
<span>TTS channels (comma-separated, blank = all)</span>
<input id="assist-tts-channels" type="text" value="${escapeHtml(ttsChannelText)}" placeholder="telegram,discord,whatsapp" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Example: telegram,discord,whatsapp" />
</label>
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="When enabled, wake phrase + talk window controls are active.">
<input id="assist-talk-mode-enabled" type="checkbox" ${talkModeEnabled ? 'checked' : ''} title="Enable or disable talk mode." />
<span>Talk mode enabled</span>
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Wake phrase that activates the talk window.">
<span>Talk mode wake phrase</span>
<input id="assist-talk-wake-phrase" type="text" value="${escapeHtml(talkModeWakePhrase)}" placeholder="hey flynn" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Say this phrase to activate talk mode for the configured timeout." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How long talk mode stays active after wake phrase or input.">
<span>Talk mode timeout (ms)</span>
<input id="assist-talk-timeout-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(talkModeTimeoutMs) ? talkModeTimeoutMs : 120000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
</label>
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="Allow /talk on|off|status manual commands while talk mode is enabled.">
<input id="assist-talk-manual-toggle" type="checkbox" ${talkModeManualToggle ? 'checked' : ''} title="Allow manual talk mode toggles." />
<span>Allow manual talk toggles</span>
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How many TTS providers to try before text-only fallback.">
<span>TTS max fallback attempts</span>
<input id="assist-tts-max-attempts" type="number" min="1" max="10" value="${Number.isFinite(ttsFallbackMaxAttempts) ? ttsFallbackMaxAttempts : 3}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1 to 10 provider attempts." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Cooldown period applied after a provider fails TTS synthesis.">
<span>TTS failure cooldown (ms)</span>
<input id="assist-tts-fallback-cooldown-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(ttsFallbackCooldownMs) ? ttsFallbackCooldownMs : 60000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Channel used for daily briefing delivery, such as telegram or slack.">
<span>Briefing output channel</span>
<input id="assist-briefing-channel" type="text" value="${escapeHtml(briefingOutputChannel)}" placeholder="telegram" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set the channel adapter name for briefings." />
@@ -302,9 +335,18 @@ async function saveAssistantMode() {
const memoryDaily = Boolean(_el.querySelector('#assist-memory-daily')?.checked);
const memoryProactive = Boolean(_el.querySelector('#assist-memory-proactive')?.checked);
const ttsEnabled = Boolean(_el.querySelector('#assist-tts-enabled')?.checked);
const talkModeEnabled = Boolean(_el.querySelector('#assist-talk-mode-enabled')?.checked);
const talkModeManualToggle = Boolean(_el.querySelector('#assist-talk-manual-toggle')?.checked);
const minToolsRaw = Number.parseInt(_el.querySelector('#assist-memory-min-tools')?.value ?? '1', 10);
const minTools = Number.isFinite(minToolsRaw) ? Math.min(50, Math.max(0, minToolsRaw)) : 1;
const ttsChannelsRaw = _el.querySelector('#assist-tts-channels')?.value ?? '';
const talkWakePhrase = (_el.querySelector('#assist-talk-wake-phrase')?.value ?? '').trim() || 'hey flynn';
const talkTimeoutRaw = Number.parseInt(_el.querySelector('#assist-talk-timeout-ms')?.value ?? '120000', 10);
const talkTimeoutMs = Number.isFinite(talkTimeoutRaw) ? Math.min(3_600_000, Math.max(1000, talkTimeoutRaw)) : 120000;
const ttsMaxAttemptsRaw = Number.parseInt(_el.querySelector('#assist-tts-max-attempts')?.value ?? '3', 10);
const ttsMaxAttempts = Number.isFinite(ttsMaxAttemptsRaw) ? Math.min(10, Math.max(1, ttsMaxAttemptsRaw)) : 3;
const ttsCooldownRaw = Number.parseInt(_el.querySelector('#assist-tts-fallback-cooldown-ms')?.value ?? '60000', 10);
const ttsFailureCooldownMs = Number.isFinite(ttsCooldownRaw) ? Math.min(3_600_000, Math.max(1000, ttsCooldownRaw)) : 60000;
const briefingChannel = (_el.querySelector('#assist-briefing-channel')?.value ?? '').trim();
const briefingPeer = (_el.querySelector('#assist-briefing-peer')?.value ?? '').trim();
const ttsChannels = ttsChannelsRaw
@@ -320,6 +362,12 @@ async function saveAssistantMode() {
'memory.proactive_extract.min_tool_calls': minTools,
'tts.enabled': ttsEnabled,
'tts.enabled_channels': ttsChannels,
'audio.talk_mode.enabled': talkModeEnabled,
'audio.talk_mode.wake_phrase': talkWakePhrase,
'audio.talk_mode.timeout_ms': talkTimeoutMs,
'audio.talk_mode.allow_manual_toggle': talkModeManualToggle,
'tts.fallback.max_attempts': ttsMaxAttempts,
'tts.fallback.failure_cooldown_ms': ttsFailureCooldownMs,
};
if (briefingChannel) {
patches['automation.daily_briefing.output.channel'] = briefingChannel;
+26
View File
@@ -45,9 +45,21 @@ function createClient() {
daily_log: { enabled: true },
proactive_extract: { enabled: true, min_tool_calls: 2 },
},
audio: {
talk_mode: {
enabled: false,
wake_phrase: 'hey flynn',
timeout_ms: 120000,
allow_manual_toggle: true,
},
},
tts: {
enabled: false,
enabled_channels: ['telegram'],
fallback: {
max_attempts: 3,
failure_cooldown_ms: 60000,
},
},
hooks: {
confirm: ['tool:group:fs/**/*'],
@@ -119,6 +131,12 @@ describe('SettingsPage wiring', () => {
root.querySelector('#assist-memory-min-tools').value = '6';
root.querySelector('#assist-tts-enabled').checked = true;
root.querySelector('#assist-tts-channels').value = 'telegram, discord';
root.querySelector('#assist-talk-mode-enabled').checked = true;
root.querySelector('#assist-talk-wake-phrase').value = 'ok flynn';
root.querySelector('#assist-talk-timeout-ms').value = '180000';
root.querySelector('#assist-talk-manual-toggle').checked = false;
root.querySelector('#assist-tts-max-attempts').value = '2';
root.querySelector('#assist-tts-fallback-cooldown-ms').value = '45000';
root.querySelector('#assist-briefing-channel').value = 'discord';
root.querySelector('#assist-briefing-peer').value = '98765';
@@ -127,6 +145,14 @@ describe('SettingsPage wiring', () => {
const assistantPatch = calls.find((entry) => entry.method === 'config.patch' && Object.prototype.hasOwnProperty.call(entry.params?.patches ?? {}, 'automation.delivery_mode'));
expect(assistantPatch).toBeTruthy();
expect(assistantPatch?.params?.patches).toMatchObject({
'audio.talk_mode.enabled': true,
'audio.talk_mode.wake_phrase': 'ok flynn',
'audio.talk_mode.timeout_ms': 180000,
'audio.talk_mode.allow_manual_toggle': false,
'tts.fallback.max_attempts': 2,
'tts.fallback.failure_cooldown_ms': 45000,
});
root.querySelector('#hooks-confirm').value = 'tool:group:fs/**/*\ntool:group:web/**/*';
root.querySelector('#hooks-log').value = 'tool:web.search';
+151 -1
View File
@@ -1,6 +1,6 @@
import { afterEach, describe, expect, it, vi } from 'vitest';
import { synthesizeSpeechAttachment } from './tts.js';
import { synthesizeSpeechAttachment, synthesizeSpeechWithFallback, TtsHealthTracker } from './tts.js';
describe('synthesizeSpeechAttachment', () => {
afterEach(() => {
@@ -65,3 +65,153 @@ describe('synthesizeSpeechAttachment', () => {
})).rejects.toThrow(/TTS request failed: 429 Too Many Requests/i);
});
});
describe('synthesizeSpeechWithFallback', () => {
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
});
it('falls back to the next provider when the first provider fails', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce({
ok: false,
status: 503,
statusText: 'Service Unavailable',
text: async () => 'primary down',
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([9, 9]).buffer,
} as Response);
const result = await synthesizeSpeechWithFallback('hello', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
});
expect(fetchSpy).toHaveBeenCalledTimes(2);
expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
expect(result.providerId).toBe('backup');
expect(result.attachment).toMatchObject({ mimeType: 'audio/mpeg', data: 'CQk=' });
expect(result.attemptedProviders).toEqual(['primary', 'backup']);
});
it('uses health cooldown to avoid repeatedly trying a failing provider', async () => {
const health = new TtsHealthTracker();
const fetchSpy = vi.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce({
ok: false,
status: 500,
statusText: 'Server Error',
text: async () => 'primary down',
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([1]).buffer,
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([2]).buffer,
} as Response);
const first = await synthesizeSpeechWithFallback('hello', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
fallback: {
failureCooldownMs: 120_000,
},
healthTracker: health,
});
expect(first.providerId).toBe('backup');
const second = await synthesizeSpeechWithFallback('hello again', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
healthTracker: health,
});
expect(fetchSpy).toHaveBeenCalledTimes(3);
expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
expect(second.attemptedProviders).toEqual(['backup']);
});
it('tries unhealthy providers again after cooldown expires', async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date('2026-01-01T00:00:00Z'));
const health = new TtsHealthTracker();
const fetchSpy = vi.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce({
ok: false,
status: 503,
statusText: 'Service Unavailable',
text: async () => 'primary down',
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([3]).buffer,
} as Response)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: 'OK',
arrayBuffer: async () => Uint8Array.from([4]).buffer,
} as Response);
await synthesizeSpeechWithFallback('first', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
fallback: { failureCooldownMs: 10_000 },
healthTracker: health,
});
vi.setSystemTime(new Date('2026-01-01T00:00:11Z'));
const second = await synthesizeSpeechWithFallback('second', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
healthTracker: health,
});
expect(fetchSpy).toHaveBeenCalledTimes(3);
expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
expect(second.providerId).toBe('primary');
});
it('returns text fallback metadata when all providers fail', async () => {
vi.spyOn(globalThis, 'fetch')
.mockRejectedValueOnce(new Error('primary down'))
.mockRejectedValueOnce(new Error('backup down'));
const result = await synthesizeSpeechWithFallback('hello', {
providers: [
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
],
});
expect(result.attachment).toBeNull();
expect(result.attemptedProviders).toEqual(['primary', 'backup']);
expect(result.lastError).toContain('backup down');
});
});
+171
View File
@@ -8,6 +8,65 @@ export interface TtsSynthesisConfig {
model?: string;
voice?: string;
format?: TtsOutputFormat;
/** Optional provider identity used by fallback/health tracking. */
id?: string;
/** Optional provider type hint for endpoint defaults. */
type?: 'openai' | 'custom';
}
export interface TtsFallbackConfig {
/** Maximum number of providers to try per reply. */
maxAttempts?: number;
/** Cooldown window before retrying a provider that failed. */
failureCooldownMs?: number;
}
export interface TtsProviderHealth {
consecutiveFailures: number;
cooldownUntil: number;
lastFailureAt?: number;
lastError?: string;
}
export interface TtsFallbackResult {
attachment: OutboundAttachment | null;
providerId?: string;
attemptedProviders: string[];
skippedProviders: string[];
lastError?: string;
}
export class TtsHealthTracker {
private readonly states = new Map<string, TtsProviderHealth>();
isHealthy(providerId: string, now = Date.now()): boolean {
const state = this.states.get(providerId);
if (!state) {
return true;
}
return state.cooldownUntil <= now;
}
markSuccess(providerId: string): void {
this.states.delete(providerId);
}
markFailure(providerId: string, error: unknown, now = Date.now(), failureCooldownMs = 60_000): void {
const previous = this.states.get(providerId);
const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
const message = error instanceof Error ? error.message : String(error);
this.states.set(providerId, {
consecutiveFailures,
cooldownUntil: now + Math.max(1_000, failureCooldownMs),
lastFailureAt: now,
lastError: message,
});
}
getState(providerId: string): TtsProviderHealth | undefined {
return this.states.get(providerId);
}
}
function outputFormatToMimeType(format: TtsOutputFormat): string {
@@ -34,6 +93,26 @@ function outputFormatToExtension(format: TtsOutputFormat): string {
}
}
function resolveProviderEndpoint(config: TtsSynthesisConfig): string | undefined {
if (config.endpoint && config.endpoint.trim().length > 0) {
return config.endpoint;
}
if (config.type === 'openai') {
return 'https://api.openai.com/v1/audio/speech';
}
return undefined;
}
function normalizeMaxAttempts(maxAttempts: number | undefined, providerCount: number): number {
if (providerCount <= 0) {
return 0;
}
if (!Number.isFinite(maxAttempts)) {
return providerCount;
}
return Math.min(providerCount, Math.max(1, Math.floor(maxAttempts as number)));
}
/** Synthesize speech via an OpenAI-compatible /v1/audio/speech endpoint. */
export async function synthesizeSpeechAttachment(
text: string,
@@ -86,3 +165,95 @@ export async function synthesizeSpeechAttachment(
filename: `flynn-reply-${Date.now()}.${extension}`,
};
}
/**
* Attempt TTS synthesis against an ordered provider chain with health-aware fallback.
* When every provider fails, returns text fallback metadata instead of throwing.
*/
export async function synthesizeSpeechWithFallback(
text: string,
input: {
providers: TtsSynthesisConfig[];
fallback?: TtsFallbackConfig;
healthTracker?: TtsHealthTracker;
},
): Promise<TtsFallbackResult> {
const trimmed = text.trim();
if (!trimmed) {
return {
attachment: null,
attemptedProviders: [],
skippedProviders: [],
};
}
const providers = input.providers
.map((provider, index) => {
const endpoint = resolveProviderEndpoint(provider);
return {
...provider,
endpoint,
id: provider.id ?? `tts-provider-${index + 1}`,
};
})
.filter((provider) => typeof provider.endpoint === 'string' && provider.endpoint.length > 0);
if (providers.length === 0) {
return {
attachment: null,
attemptedProviders: [],
skippedProviders: [],
};
}
const healthTracker = input.healthTracker;
const now = Date.now();
const healthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
const unhealthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
for (const provider of providers) {
const typedProvider = provider as TtsSynthesisConfig & { id: string; endpoint: string };
if (!healthTracker || healthTracker.isHealthy(typedProvider.id, now)) {
healthyProviders.push(typedProvider);
} else {
unhealthyProviders.push(typedProvider);
}
}
const orderedProviders = healthyProviders.length > 0
? [...healthyProviders, ...unhealthyProviders]
: unhealthyProviders;
const maxAttempts = normalizeMaxAttempts(input.fallback?.maxAttempts, orderedProviders.length);
const attempts = orderedProviders.slice(0, maxAttempts);
const skippedProviders = orderedProviders.slice(maxAttempts).map((provider) => provider.id);
const attemptedProviders: string[] = [];
let lastError: string | undefined;
const failureCooldownMs = input.fallback?.failureCooldownMs ?? 60_000;
for (const provider of attempts) {
attemptedProviders.push(provider.id);
try {
const attachment = await synthesizeSpeechAttachment(trimmed, provider);
if (attachment) {
healthTracker?.markSuccess(provider.id);
return {
attachment,
providerId: provider.id,
attemptedProviders,
skippedProviders,
};
}
} catch (error) {
healthTracker?.markFailure(provider.id, error, Date.now(), failureCooldownMs);
lastError = error instanceof Error ? error.message : String(error);
}
}
return {
attachment: null,
attemptedProviders,
skippedProviders,
lastError,
};
}