feat: harden voice reliability with tts fallback and talk controls

2026-02-26 17:29:23 -08:00
parent 2a9bed8c91
commit 163b1a0139
13 changed files with 781 additions and 17 deletions
@@ -870,6 +870,11 @@ describe('configSchema — tts', () => {
    expect(result.tts.enabled).toBe(false);
    expect(result.tts.enabled_channels).toEqual([]);
    expect(result.tts.provider).toBeUndefined();
+    expect(result.tts.providers).toEqual([]);
+    expect(result.tts.fallback).toEqual({
+      max_attempts: 3,
+      failure_cooldown_ms: 60000,
+    });
  });

  it('accepts custom tts provider settings', () => {
@@ -886,6 +891,29 @@ describe('configSchema — tts', () => {
          voice: 'nova',
          format: 'wav',
        },
+        providers: [
+          {
+            name: 'primary',
+            type: 'custom',
+            endpoint: 'https://tts-primary.example.com/v1/audio/speech',
+            api_key: 'sk-1',
+            model: 'gpt-4o-mini-tts',
+            voice: 'alloy',
+            format: 'mp3',
+          },
+          {
+            name: 'backup',
+            type: 'openai',
+            api_key: 'sk-2',
+            model: 'gpt-4o-mini-tts',
+            voice: 'nova',
+            format: 'opus',
+          },
+        ],
+        fallback: {
+          max_attempts: 2,
+          failure_cooldown_ms: 90000,
+        },
      },
    });

@@ -899,6 +927,15 @@ describe('configSchema — tts', () => {
      voice: 'nova',
      format: 'wav',
    });
+    expect(result.tts.providers).toHaveLength(2);
+    expect(result.tts.providers[0]).toMatchObject({
+      name: 'primary',
+      endpoint: 'https://tts-primary.example.com/v1/audio/speech',
+    });
+    expect(result.tts.fallback).toEqual({
+      max_attempts: 2,
+      failure_cooldown_ms: 90000,
+    });
  });
 });

@@ -842,6 +842,7 @@ const audioSchema = z.object({
 const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']);

 const ttsProviderSchema = z.object({
+  name: z.string().min(1).optional(),
  type: z.enum(['openai', 'custom']).default('openai'),
  endpoint: z.string().optional(),
  api_key: z.string().optional(),
@@ -850,11 +851,23 @@ const ttsProviderSchema = z.object({
  format: ttsOutputFormatSchema.default('mp3'),
 });

+const ttsFallbackSchema = z.object({
+  /** Number of providers attempted in-order before text-only fallback. */
+  max_attempts: z.number().int().min(1).max(10).default(3),
+  /** Cooldown window applied to providers after synthesis failures. */
+  failure_cooldown_ms: z.number().int().min(1000).max(3_600_000).default(60_000),
+}).default({});
+
 const ttsSchema = z.object({
  enabled: z.boolean().default(false),
  /** Restrict voice replies to selected channels. Empty means all channels. */
  enabled_channels: z.array(z.string().min(1)).default([]),
+  /** Legacy single-provider config. */
  provider: ttsProviderSchema.optional(),
+  /** Ordered provider chain for synthesis fallback. */
+  providers: z.array(ttsProviderSchema).default([]),
+  /** Fallback + health policy for provider chain handling. */
+  fallback: ttsFallbackSchema,
 }).default({});

 // ── Tool policy schemas ──────────────────────────────────────────────
@@ -2393,6 +2393,100 @@ describe('daemon tts routing integration', () => {
    expect(outbound?.attachments).toBeUndefined();
  });

+  it('falls back to secondary TTS provider when primary fails', async () => {
+    vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback-chain response');
+    const fetchSpy = vi.spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 503,
+        statusText: 'Service Unavailable',
+        text: async () => 'primary down',
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([5, 6, 7]).buffer,
+      } as Response);
+
+    const session = {
+      id: 'telegram:tts-user-4',
+      addMessage: vi.fn(),
+      getHistory: vi.fn(() => []),
+      clear: vi.fn(),
+      replaceHistory: vi.fn(),
+      getConfig: vi.fn(() => undefined),
+      setConfig: vi.fn(),
+      deleteConfig: vi.fn(),
+    };
+
+    const router = createMessageRouter({
+      sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
+      modelRouter: {
+        getAvailableTiers: () => ['default'],
+        getAllLabels: () => ({ default: 'default' }),
+        getLabel: (tier: string) => tier,
+      } as unknown as MessageRouterDeps['modelRouter'],
+      systemPrompt: 'test prompt',
+      toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
+      toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
+      config: {
+        agents: {
+          primary_tier: 'default',
+          delegation: {
+            compaction: 'default',
+            memory_extraction: 'default',
+            classification: 'default',
+            tool_summarisation: 'default',
+            complex_reasoning: 'default',
+          },
+          max_delegation_depth: 1,
+          max_iterations: 3,
+        },
+        compaction: { enabled: false },
+        models: { default: { provider: 'anthropic', model: 'claude' } },
+        tts: {
+          enabled: true,
+          enabled_channels: ['telegram'],
+          providers: [
+            {
+              name: 'primary',
+              type: 'custom',
+              endpoint: 'https://tts-primary.example.com/v1/audio/speech',
+            },
+            {
+              name: 'backup',
+              type: 'custom',
+              endpoint: 'https://tts-backup.example.com/v1/audio/speech',
+            },
+          ],
+          fallback: {
+            max_attempts: 2,
+            failure_cooldown_ms: 60000,
+          },
+        },
+      } as unknown as MessageRouterDeps['config'],
+    });
+
+    const reply = vi.fn(async (_message: OutboundMessage) => {});
+    await router.handler({
+      id: 'tts-4',
+      channel: 'telegram',
+      senderId: 'tts-user-4',
+      text: 'respond with provider fallback',
+      timestamp: Date.now(),
+    } as MessageRouterInput, reply);
+
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+    expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
+    expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
+    const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined;
+    expect(outbound?.attachments?.[0]).toMatchObject({
+      mimeType: 'audio/mpeg',
+      data: 'BQYH',
+    });
+  });
+
  it('falls back to text-only replies when tts synthesis fails', async () => {
    vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback response');
    vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('tts down'));
@@ -2909,4 +3003,84 @@ describe('daemon talk mode (voice wake) integration', () => {
    expect(processSpy).toHaveBeenCalledOnce();
    expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined, undefined);
  });
+
+  it('treats spoken cancel as /stop while talk mode is active', async () => {
+    const cancelSpy = vi.spyOn(AgentOrchestrator.prototype, 'cancel');
+    vi.spyOn(AgentOrchestrator.prototype, 'isCancellable').mockReturnValue(true);
+    const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process');
+    let resolveFirst: ((value: string) => void) | undefined;
+    let markStarted: (() => void) | undefined;
+    const started = new Promise<void>((resolve) => { markStarted = resolve; });
+    processSpy.mockImplementationOnce(() => {
+      markStarted?.();
+      return new Promise<string>((resolve) => { resolveFirst = resolve; });
+    });
+
+    const session = {
+      id: 'telegram:user-talk-2',
+      addMessage: vi.fn(),
+      getHistory: vi.fn(() => []),
+      clear: vi.fn(),
+      replaceHistory: vi.fn(),
+      getConfig: vi.fn(() => undefined),
+      setConfig: vi.fn(),
+      deleteConfig: vi.fn(),
+    };
+
+    const commandRegistry = new CommandRegistry();
+    registerBuiltinCommands(commandRegistry);
+
+    const router = createMessageRouter({
+      sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
+      modelRouter: {
+        getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
+        getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
+        getLabel: (tier: string) => tier,
+      } as unknown as MessageRouterDeps['modelRouter'],
+      systemPrompt: 'test prompt',
+      toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
+      toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
+      config: {
+        agents: {
+          primary_tier: 'default',
+          delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex' },
+          max_delegation_depth: 3,
+          max_iterations: 10,
+        },
+        compaction: { enabled: false },
+        models: { default: { provider: 'anthropic', model: 'claude' } },
+        audio: { talk_mode: { enabled: true, wake_phrase: 'hey flynn', timeout_ms: 120000, allow_manual_toggle: true } },
+      } as unknown as MessageRouterDeps['config'],
+      commandRegistry,
+    });
+
+    const reply = vi.fn(async (_message: OutboundMessage) => {});
+    const firstRun = router.handler({
+      id: 'm-talk-3',
+      channel: 'telegram',
+      senderId: 'user-talk-2',
+      text: 'hey flynn start a long task',
+      timestamp: Date.now(),
+    } as MessageRouterInput, reply);
+
+    await started;
+
+    await router.handler({
+      id: 'm-talk-4',
+      channel: 'telegram',
+      senderId: 'user-talk-2',
+      text: 'cancel',
+      timestamp: Date.now(),
+    } as MessageRouterInput, reply);
+
+    expect(cancelSpy).toHaveBeenCalledTimes(1);
+    expect(processSpy).toHaveBeenCalledTimes(1);
+    expect(reply).toHaveBeenCalledWith(expect.objectContaining({
+      text: 'Cancellation requested. The active operation will stop at the next safe point.',
+      replyTo: 'm-talk-4',
+    }));
+
+    resolveFirst?.('operation cancelled by user.');
+    await firstRun;
+  });
 });
@@ -1,7 +1,7 @@
 import type { AudioTranscriptionConfig } from '../models/media.js';
 import type { Attachment } from '../channels/types.js';
 import { isSupportedAudio, transcribeAudio } from '../models/media.js';
-import { synthesizeSpeechAttachment } from '../models/tts.js';
+import { synthesizeSpeechWithFallback, TtsHealthTracker } from '../models/tts.js';
 import { supportsAudioInput } from '../models/capabilities.js';
 import { AgentOrchestrator, SubagentManager, type DelegationConfig } from '../backends/index.js';
 import { OutboundAttachmentCollector } from '../backends/native/attachments.js';
@@ -397,6 +397,7 @@ export function createMessageRouter(deps: {
  const talkModeUntil = new Map<string, number>();
  const activeRuns = new Map<string, AgentOrchestrator>();
  const reactionCooldowns = new Map<string, number>();
+  const ttsHealthTracker = new TtsHealthTracker();

  function getBackendMode(): BackendRuntimeMode {
    return deps.getBackendMode?.() ?? 'config_default';
@@ -518,24 +519,42 @@ export function createMessageRouter(deps: {
      return undefined;
    }

-    const provider = deps.config.tts?.provider;
-    const endpoint = provider?.endpoint ?? (provider?.type === 'openai' ? 'https://api.openai.com/v1/audio/speech' : undefined);
-    if (!endpoint) {
+    const configuredProviders = deps.config.tts?.providers ?? [];
+    const providers = configuredProviders.length > 0
+      ? configuredProviders
+      : (deps.config.tts?.provider ? [deps.config.tts.provider] : []);
+
+    if (providers.length === 0) {
      return undefined;
    }

-    try {
-      return await synthesizeSpeechAttachment(responseText, {
-        endpoint,
-        apiKey: provider?.api_key,
-        model: provider?.model,
-        voice: provider?.voice,
-        format: provider?.format,
+    const outcome = await synthesizeSpeechWithFallback(responseText, {
+      providers: providers.map((provider, index) => ({
+        id: provider.name?.trim() || `tts-provider-${index + 1}`,
+        type: provider.type,
+        endpoint: provider.endpoint,
+        apiKey: provider.api_key,
+        model: provider.model,
+        voice: provider.voice,
+        format: provider.format,
+      })),
+      fallback: {
+        maxAttempts: deps.config.tts?.fallback?.max_attempts,
+        failureCooldownMs: deps.config.tts?.fallback?.failure_cooldown_ms,
+      },
+      healthTracker: ttsHealthTracker,
    });
-    } catch (error) {
-      console.warn(`TTS synthesis failed for channel ${channel}:`, error instanceof Error ? error.message : 'Unknown error');
-      return undefined;
+
+    if (!outcome.attachment && outcome.attemptedProviders.length > 0) {
+      console.warn(
+        `TTS synthesis fallback exhausted for channel ${channel}. `
+        + `attempted=${outcome.attemptedProviders.join(',') || 'none'} `
+        + `skipped=${outcome.skippedProviders.join(',') || 'none'} `
+        + `${outcome.lastError ? `last_error=${outcome.lastError}` : ''}`.trim(),
+      );
    }
+
+    return outcome.attachment ?? undefined;
  }

  function getOrCreateAgent(
@@ -822,6 +841,7 @@ export function createMessageRouter(deps: {
    let incomingText = msg.text;
    let matchedReactionName: string | undefined;
    const talkMode = deps.config.audio?.talk_mode;
+    let inTalkModeContext = false;
    if (talkMode?.enabled && incomingText.trim().length > 0) {
      const key = `${msg.channel}:${msg.senderId}`;
      const now = Date.now();
@@ -858,6 +878,7 @@ export function createMessageRouter(deps: {

      if (wakeMatched && wakeRegex) {
        talkModeUntil.set(key, now + timeoutMs);
+        inTalkModeContext = true;
        incomingText = incomingText.replace(wakeRegex, '').trim();
        if (!incomingText) {
          await reply({ text: `Listening. Talk mode active for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id });
@@ -865,11 +886,19 @@ export function createMessageRouter(deps: {
        }
      } else if (currentUntil > now) {
        talkModeUntil.set(key, now + timeoutMs);
+        inTalkModeContext = true;
      } else {
        return;
      }
    }

+    if (inTalkModeContext && !msg.metadata?.isCommand) {
+      const spokenCommand = incomingText.trim().toLowerCase();
+      if (spokenCommand === 'stop' || spokenCommand === 'cancel') {
+        incomingText = '/stop';
+      }
+    }
+
    const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
    const queueMode = session.getConfig('queue.mode') ?? deps.config.server?.queue?.mode ?? 'collect';
    const rawCommand = msg.metadata?.isCommand
@@ -661,6 +661,34 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
    config.audio.enabled = value;
    return true;
  },
+  'audio.talk_mode.enabled': (config, value) => {
+    if (typeof value !== 'boolean') {return false;}
+    config.audio ??= {} as Config['audio'];
+    config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
+    config.audio.talk_mode.enabled = value;
+    return true;
+  },
+  'audio.talk_mode.wake_phrase': (config, value) => {
+    if (typeof value !== 'string' || value.trim().length === 0) {return false;}
+    config.audio ??= {} as Config['audio'];
+    config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
+    config.audio.talk_mode.wake_phrase = value.trim();
+    return true;
+  },
+  'audio.talk_mode.timeout_ms': (config, value) => {
+    if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
+    config.audio ??= {} as Config['audio'];
+    config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
+    config.audio.talk_mode.timeout_ms = Math.floor(value);
+    return true;
+  },
+  'audio.talk_mode.allow_manual_toggle': (config, value) => {
+    if (typeof value !== 'boolean') {return false;}
+    config.audio ??= {} as Config['audio'];
+    config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
+    config.audio.talk_mode.allow_manual_toggle = value;
+    return true;
+  },
  'sandbox.enabled': (config, value) => {
    if (typeof value !== 'boolean') {return false;}
    config.sandbox ??= {} as Config['sandbox'];
@@ -700,6 +728,20 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
    config.tts.enabled_channels = value as string[];
    return true;
  },
+  'tts.fallback.max_attempts': (config, value) => {
+    if (typeof value !== 'number' || !Number.isFinite(value) || value < 1 || value > 10) {return false;}
+    config.tts ??= {} as Config['tts'];
+    config.tts.fallback ??= {} as Config['tts']['fallback'];
+    config.tts.fallback.max_attempts = Math.floor(value);
+    return true;
+  },
+  'tts.fallback.failure_cooldown_ms': (config, value) => {
+    if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
+    config.tts ??= {} as Config['tts'];
+    config.tts.fallback ??= {} as Config['tts']['fallback'];
+    config.tts.fallback.failure_cooldown_ms = Math.floor(value);
+    return true;
+  },
 };

 export function createConfigHandlers(deps: ConfigHandlerDeps) {
@@ -1550,8 +1550,14 @@ describe('config handlers', () => {
          'memory.daily_log.enabled': true,
          'memory.proactive_extract.enabled': true,
          'memory.proactive_extract.min_tool_calls': 2,
+          'audio.talk_mode.enabled': true,
+          'audio.talk_mode.wake_phrase': 'ok flynn',
+          'audio.talk_mode.timeout_ms': 180000,
+          'audio.talk_mode.allow_manual_toggle': false,
          'tts.enabled': true,
          'tts.enabled_channels': ['telegram', 'discord'],
+          'tts.fallback.max_attempts': 2,
+          'tts.fallback.failure_cooldown_ms': 90000,
        },
      },
    };
@@ -1573,8 +1579,14 @@ describe('config handlers', () => {
      'memory.daily_log.enabled',
      'memory.proactive_extract.enabled',
      'memory.proactive_extract.min_tool_calls',
+      'audio.talk_mode.enabled',
+      'audio.talk_mode.wake_phrase',
+      'audio.talk_mode.timeout_ms',
+      'audio.talk_mode.allow_manual_toggle',
      'tts.enabled',
      'tts.enabled_channels',
+      'tts.fallback.max_attempts',
+      'tts.fallback.failure_cooldown_ms',
    ]);
    expect(r.rejected).toEqual([]);
    expect(r.persisted).toBe(false);
@@ -1593,8 +1605,14 @@ describe('config handlers', () => {
    expect(getPath(config, 'memory', 'daily_log', 'enabled')).toBe(true);
    expect(getPath(config, 'memory', 'proactive_extract', 'enabled')).toBe(true);
    expect(getPath(config, 'memory', 'proactive_extract', 'min_tool_calls')).toBe(2);
+    expect(getPath(config, 'audio', 'talk_mode', 'enabled')).toBe(true);
+    expect(getPath(config, 'audio', 'talk_mode', 'wake_phrase')).toBe('ok flynn');
+    expect(getPath(config, 'audio', 'talk_mode', 'timeout_ms')).toBe(180000);
+    expect(getPath(config, 'audio', 'talk_mode', 'allow_manual_toggle')).toBe(false);
    expect(getPath(config, 'tts', 'enabled')).toBe(true);
    expect(getPath(config, 'tts', 'enabled_channels')).toEqual(['telegram', 'discord']);
+    expect(getPath(config, 'tts', 'fallback', 'max_attempts')).toBe(2);
+    expect(getPath(config, 'tts', 'fallback', 'failure_cooldown_ms')).toBe(90000);
  });

  it('config.patch applies councils model and routing patches', async () => {
@@ -1675,7 +1693,11 @@ describe('config handlers', () => {
          'hooks.confirm': 'not-an-array',
          'server.queue.cap': 0,
          'memory.proactive_extract.min_tool_calls': 99,
+          'audio.talk_mode.wake_phrase': '',
+          'audio.talk_mode.timeout_ms': 99999999,
          'tts.enabled_channels': [1, 2, 3],
+          'tts.fallback.max_attempts': 0,
+          'tts.fallback.failure_cooldown_ms': 0,
          'automation.daily_briefing.model_tier': 'ultra',
        },
      },
@@ -1688,7 +1710,11 @@ describe('config handlers', () => {
      'hooks.confirm',
      'server.queue.cap',
      'memory.proactive_extract.min_tool_calls',
+      'audio.talk_mode.wake_phrase',
+      'audio.talk_mode.timeout_ms',
      'tts.enabled_channels',
+      'tts.fallback.max_attempts',
+      'tts.fallback.failure_cooldown_ms',
      'automation.daily_briefing.model_tier',
    ]);
    expect(r.persisted).toBe(false);
@@ -254,6 +254,15 @@ describe('ChatPage wiring', () => {
      throw new Error('Run status line not found');
    }
    expect(statusLine.classList.contains('hidden')).toBe(false);
+    expect(String(statusLine.textContent ?? '')).toContain('working');
+
+    stream.emit('run_state', { state: 'cancel_requested' });
+    await Promise.resolve();
+    expect(String(statusLine.textContent ?? '')).toContain('cancellation requested');
+
+    stream.emit('run_state', { state: 'cancelled' });
+    await Promise.resolve();
+    expect(String(statusLine.textContent ?? '')).toContain('cancelled');

    resolveResult?.({ content: 'ok' });
    await Promise.resolve();
@@ -139,6 +139,8 @@ function escapeHtml(str) {
 function getAssistantStateSnapshot(configData) {
  const automation = configData?.automation ?? {};
  const memory = configData?.memory ?? {};
+  const audio = configData?.audio ?? {};
+  const talkMode = audio.talk_mode ?? {};
  const tts = configData?.tts ?? {};
  const queue = configData?.server?.queue ?? {};
  return {
@@ -147,6 +149,10 @@ function getAssistantStateSnapshot(configData) {
    memoryDaily: Boolean(memory.daily_log?.enabled),
    memoryProactive: Boolean(memory.proactive_extract?.enabled),
    memoryMinToolCalls: Number(memory.proactive_extract?.min_tool_calls ?? 1),
+    talkModeEnabled: Boolean(talkMode.enabled),
+    talkWakePhrase: typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn',
+    talkTimeoutMs: Number(talkMode.timeout_ms ?? 120000),
+    talkManualToggle: talkMode.allow_manual_toggle !== false,
    ttsEnabled: Boolean(tts.enabled),
    ttsChannels: Array.isArray(tts.enabled_channels) ? tts.enabled_channels : [],
    queueMode: queue.mode ?? 'collect',
@@ -161,6 +167,7 @@ function buildPlaybookPatches(playbook) {
      'memory.daily_log.enabled': true,
      'memory.proactive_extract.enabled': true,
      'memory.proactive_extract.min_tool_calls': 1,
+      'audio.talk_mode.enabled': true,
      'tts.enabled': true,
      'tts.enabled_channels': [],
      'server.queue.mode': 'interrupt',
@@ -177,6 +184,7 @@ function buildPlaybookPatches(playbook) {
      'memory.daily_log.enabled': true,
      'memory.proactive_extract.enabled': true,
      'memory.proactive_extract.min_tool_calls': 2,
+      'audio.talk_mode.enabled': false,
      'tts.enabled': false,
      'server.queue.mode': 'steer_backlog',
    };
@@ -191,6 +199,7 @@ function buildPlaybookPatches(playbook) {
    'memory.daily_log.enabled': false,
    'memory.proactive_extract.enabled': false,
    'memory.proactive_extract.min_tool_calls': 3,
+    'audio.talk_mode.enabled': false,
    'tts.enabled': false,
    'server.queue.mode': 'collect',
  };
@@ -207,6 +216,10 @@ function buildRollbackPatchesFromSnapshot(snapshot) {
    'memory.daily_log.enabled': snapshot.memoryDaily,
    'memory.proactive_extract.enabled': snapshot.memoryProactive,
    'memory.proactive_extract.min_tool_calls': Number.isFinite(snapshot.memoryMinToolCalls) ? snapshot.memoryMinToolCalls : 1,
+    'audio.talk_mode.enabled': snapshot.talkModeEnabled,
+    'audio.talk_mode.wake_phrase': snapshot.talkWakePhrase,
+    'audio.talk_mode.timeout_ms': Number.isFinite(snapshot.talkTimeoutMs) ? snapshot.talkTimeoutMs : 120000,
+    'audio.talk_mode.allow_manual_toggle': snapshot.talkManualToggle,
    'tts.enabled': snapshot.ttsEnabled,
    'tts.enabled_channels': snapshot.ttsChannels,
    'server.queue.mode': snapshot.queueMode,
@@ -936,6 +949,8 @@ function updateAssistantHealth(configData) {

  const automation = configData?.automation ?? {};
  const memory = configData?.memory ?? {};
+  const audio = configData?.audio ?? {};
+  const talkMode = audio.talk_mode ?? {};
  const tts = configData?.tts ?? {};

  const deliveryMode = automation.delivery_mode ?? 'shared_session';
@@ -944,6 +959,9 @@ function updateAssistantHealth(configData) {
  const memoryDaily = Boolean(memory.daily_log?.enabled);
  const memoryProactive = Boolean(memory.proactive_extract?.enabled);
  const proactiveThreshold = Number(memory.proactive_extract?.min_tool_calls ?? 1);
+  const talkModeEnabled = Boolean(talkMode.enabled);
+  const talkWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
+  const talkTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
  const ttsEnabled = Boolean(tts.enabled);
  const briefing = automation.daily_briefing ?? {};
  const briefingName = briefing.name ?? 'daily-briefing';
@@ -1011,17 +1029,22 @@ function updateAssistantHealth(configData) {
    : (_lastCouncilError ? `Last run failed: ${_lastCouncilError}` : 'No council run yet in this dashboard session.');

  el.innerHTML = `
-    <div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-2 mb-4">
+    <div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-7 gap-2 mb-4">
      ${chip('Announce Mode', announce)}
      ${chip('Daily Briefing', dailyBriefing)}
      ${chip('Memory Daily Log', memoryDaily)}
      ${chip('Proactive Extract', memoryProactive)}
+      ${chip('Talk Mode', talkModeEnabled)}
      ${chip('TTS Replies', ttsEnabled)}
      <div class="flex justify-between items-center px-3 py-2.5 bg-zinc-900 border border-zinc-800 rounded-lg text-sm">
        <span class="text-zinc-400">Extract Threshold</span>
        <span class="font-bold">${Number.isFinite(proactiveThreshold) ? proactiveThreshold : 1}</span>
      </div>
    </div>
+    <div class="mb-3 text-sm text-zinc-500">
+      Talk controls: wake phrase <code class="text-zinc-300">${escapeHtml(talkWakePhrase)}</code>,
+      timeout ${Number.isFinite(talkTimeoutMs) ? Math.round(talkTimeoutMs / 1000) : 120}s.
+    </div>
    <div class="flex flex-wrap gap-2 mb-4">
      <button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-announce">
        ${announce ? 'Disable Announce Mode' : 'Enable Announce Mode'}
@@ -1035,6 +1058,9 @@ function updateAssistantHealth(configData) {
      <button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-memory-proactive">
        ${memoryProactive ? 'Disable Proactive Extract' : 'Enable Proactive Extract'}
      </button>
+      <button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-talk-mode">
+        ${talkModeEnabled ? 'Disable Talk Mode' : 'Enable Talk Mode'}
+      </button>
      <button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-tts">
        ${ttsEnabled ? 'Disable TTS' : 'Enable TTS'}
      </button>
@@ -1341,6 +1367,9 @@ function updateAssistantHealth(configData) {
      } else if (action === 'toggle-memory-proactive') {
        patches = { 'memory.proactive_extract.enabled': !memoryProactive };
        _assistantManualOverrides.add('memory.proactive_extract.enabled');
+      } else if (action === 'toggle-talk-mode') {
+        patches = { 'audio.talk_mode.enabled': !talkModeEnabled };
+        _assistantManualOverrides.add('audio.talk_mode.enabled');
      } else if (action === 'toggle-tts') {
        patches = { 'tts.enabled': !ttsEnabled };
        _assistantManualOverrides.add('tts.enabled');
@@ -47,6 +47,14 @@ function createInitialConfig() {
      daily_log: { enabled: true },
      proactive_extract: { enabled: true, min_tool_calls: 2 },
    },
+    audio: {
+      talk_mode: {
+        enabled: false,
+        wake_phrase: 'hey flynn',
+        timeout_ms: 120000,
+        allow_manual_toggle: true,
+      },
+    },
    tts: {
      enabled: false,
      enabled_channels: [],
@@ -520,6 +528,7 @@ describe('DashboardPage assistant controls', () => {
      'toggle-daily-briefing',
      'toggle-memory-daily',
      'toggle-memory-proactive',
+      'toggle-talk-mode',
      'toggle-tts',
      'playbook-executive',
      'playbook-operator',
@@ -552,6 +561,7 @@ describe('DashboardPage assistant controls', () => {
    await clickAction('toggle-daily-briefing');
    await clickAction('toggle-memory-daily');
    await clickAction('toggle-memory-proactive');
+    await clickAction('toggle-talk-mode');
    await clickAction('toggle-tts');
    await clickAction('playbook-executive');
    await clickAction('playbook-operator');
@@ -117,6 +117,7 @@ async function loadSettings() {
  const silentPatterns = hooks.silent ?? [];
  const automation = config?.automation ?? {};
  const memory = config?.memory ?? {};
+  const audio = config?.audio ?? {};
  const tts = config?.tts ?? {};
  _settingsCache = config ?? {};

@@ -125,8 +126,16 @@ async function loadSettings() {
  const dailyMemoryEnabled = Boolean(memory.daily_log?.enabled);
  const proactiveExtractEnabled = Boolean(memory.proactive_extract?.enabled);
  const proactiveMinToolCalls = Number(memory.proactive_extract?.min_tool_calls ?? 1);
+  const talkMode = audio.talk_mode ?? {};
+  const talkModeEnabled = Boolean(talkMode.enabled);
+  const talkModeWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
+  const talkModeTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
+  const talkModeManualToggle = talkMode.allow_manual_toggle !== false;
  const ttsEnabled = Boolean(tts.enabled);
  const ttsChannelText = Array.isArray(tts.enabled_channels) ? tts.enabled_channels.join(', ') : '';
+  const ttsFallback = tts.fallback ?? {};
+  const ttsFallbackMaxAttempts = Number(ttsFallback.max_attempts ?? 3);
+  const ttsFallbackCooldownMs = Number(ttsFallback.failure_cooldown_ms ?? 60000);
  const briefingOutputChannel = automation.daily_briefing?.output?.channel ?? '';
  const briefingOutputPeer = automation.daily_briefing?.output?.peer ?? '';

@@ -173,6 +182,30 @@ async function loadSettings() {
          <span>TTS channels (comma-separated, blank = all)</span>
          <input id="assist-tts-channels" type="text" value="${escapeHtml(ttsChannelText)}" placeholder="telegram,discord,whatsapp" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Example: telegram,discord,whatsapp" />
        </label>
+        <label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="When enabled, wake phrase + talk window controls are active.">
+          <input id="assist-talk-mode-enabled" type="checkbox" ${talkModeEnabled ? 'checked' : ''} title="Enable or disable talk mode." />
+          <span>Talk mode enabled</span>
+        </label>
+        <label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Wake phrase that activates the talk window.">
+          <span>Talk mode wake phrase</span>
+          <input id="assist-talk-wake-phrase" type="text" value="${escapeHtml(talkModeWakePhrase)}" placeholder="hey flynn" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Say this phrase to activate talk mode for the configured timeout." />
+        </label>
+        <label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How long talk mode stays active after wake phrase or input.">
+          <span>Talk mode timeout (ms)</span>
+          <input id="assist-talk-timeout-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(talkModeTimeoutMs) ? talkModeTimeoutMs : 120000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
+        </label>
+        <label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="Allow /talk on|off|status manual commands while talk mode is enabled.">
+          <input id="assist-talk-manual-toggle" type="checkbox" ${talkModeManualToggle ? 'checked' : ''} title="Allow manual talk mode toggles." />
+          <span>Allow manual talk toggles</span>
+        </label>
+        <label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How many TTS providers to try before text-only fallback.">
+          <span>TTS max fallback attempts</span>
+          <input id="assist-tts-max-attempts" type="number" min="1" max="10" value="${Number.isFinite(ttsFallbackMaxAttempts) ? ttsFallbackMaxAttempts : 3}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1 to 10 provider attempts." />
+        </label>
+        <label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Cooldown period applied after a provider fails TTS synthesis.">
+          <span>TTS failure cooldown (ms)</span>
+          <input id="assist-tts-fallback-cooldown-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(ttsFallbackCooldownMs) ? ttsFallbackCooldownMs : 60000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
+        </label>
        <label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Channel used for daily briefing delivery, such as telegram or slack.">
          <span>Briefing output channel</span>
          <input id="assist-briefing-channel" type="text" value="${escapeHtml(briefingOutputChannel)}" placeholder="telegram" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set the channel adapter name for briefings." />
@@ -302,9 +335,18 @@ async function saveAssistantMode() {
  const memoryDaily = Boolean(_el.querySelector('#assist-memory-daily')?.checked);
  const memoryProactive = Boolean(_el.querySelector('#assist-memory-proactive')?.checked);
  const ttsEnabled = Boolean(_el.querySelector('#assist-tts-enabled')?.checked);
+  const talkModeEnabled = Boolean(_el.querySelector('#assist-talk-mode-enabled')?.checked);
+  const talkModeManualToggle = Boolean(_el.querySelector('#assist-talk-manual-toggle')?.checked);
  const minToolsRaw = Number.parseInt(_el.querySelector('#assist-memory-min-tools')?.value ?? '1', 10);
  const minTools = Number.isFinite(minToolsRaw) ? Math.min(50, Math.max(0, minToolsRaw)) : 1;
  const ttsChannelsRaw = _el.querySelector('#assist-tts-channels')?.value ?? '';
+  const talkWakePhrase = (_el.querySelector('#assist-talk-wake-phrase')?.value ?? '').trim() || 'hey flynn';
+  const talkTimeoutRaw = Number.parseInt(_el.querySelector('#assist-talk-timeout-ms')?.value ?? '120000', 10);
+  const talkTimeoutMs = Number.isFinite(talkTimeoutRaw) ? Math.min(3_600_000, Math.max(1000, talkTimeoutRaw)) : 120000;
+  const ttsMaxAttemptsRaw = Number.parseInt(_el.querySelector('#assist-tts-max-attempts')?.value ?? '3', 10);
+  const ttsMaxAttempts = Number.isFinite(ttsMaxAttemptsRaw) ? Math.min(10, Math.max(1, ttsMaxAttemptsRaw)) : 3;
+  const ttsCooldownRaw = Number.parseInt(_el.querySelector('#assist-tts-fallback-cooldown-ms')?.value ?? '60000', 10);
+  const ttsFailureCooldownMs = Number.isFinite(ttsCooldownRaw) ? Math.min(3_600_000, Math.max(1000, ttsCooldownRaw)) : 60000;
  const briefingChannel = (_el.querySelector('#assist-briefing-channel')?.value ?? '').trim();
  const briefingPeer = (_el.querySelector('#assist-briefing-peer')?.value ?? '').trim();
  const ttsChannels = ttsChannelsRaw
@@ -320,6 +362,12 @@ async function saveAssistantMode() {
    'memory.proactive_extract.min_tool_calls': minTools,
    'tts.enabled': ttsEnabled,
    'tts.enabled_channels': ttsChannels,
+    'audio.talk_mode.enabled': talkModeEnabled,
+    'audio.talk_mode.wake_phrase': talkWakePhrase,
+    'audio.talk_mode.timeout_ms': talkTimeoutMs,
+    'audio.talk_mode.allow_manual_toggle': talkModeManualToggle,
+    'tts.fallback.max_attempts': ttsMaxAttempts,
+    'tts.fallback.failure_cooldown_ms': ttsFailureCooldownMs,
  };
  if (briefingChannel) {
    patches['automation.daily_briefing.output.channel'] = briefingChannel;
@@ -45,9 +45,21 @@ function createClient() {
            daily_log: { enabled: true },
            proactive_extract: { enabled: true, min_tool_calls: 2 },
          },
+          audio: {
+            talk_mode: {
+              enabled: false,
+              wake_phrase: 'hey flynn',
+              timeout_ms: 120000,
+              allow_manual_toggle: true,
+            },
+          },
          tts: {
            enabled: false,
            enabled_channels: ['telegram'],
+            fallback: {
+              max_attempts: 3,
+              failure_cooldown_ms: 60000,
+            },
          },
          hooks: {
            confirm: ['tool:group:fs/**/*'],
@@ -119,6 +131,12 @@ describe('SettingsPage wiring', () => {
    root.querySelector('#assist-memory-min-tools').value = '6';
    root.querySelector('#assist-tts-enabled').checked = true;
    root.querySelector('#assist-tts-channels').value = 'telegram, discord';
+    root.querySelector('#assist-talk-mode-enabled').checked = true;
+    root.querySelector('#assist-talk-wake-phrase').value = 'ok flynn';
+    root.querySelector('#assist-talk-timeout-ms').value = '180000';
+    root.querySelector('#assist-talk-manual-toggle').checked = false;
+    root.querySelector('#assist-tts-max-attempts').value = '2';
+    root.querySelector('#assist-tts-fallback-cooldown-ms').value = '45000';
    root.querySelector('#assist-briefing-channel').value = 'discord';
    root.querySelector('#assist-briefing-peer').value = '98765';

@@ -127,6 +145,14 @@ describe('SettingsPage wiring', () => {

    const assistantPatch = calls.find((entry) => entry.method === 'config.patch' && Object.prototype.hasOwnProperty.call(entry.params?.patches ?? {}, 'automation.delivery_mode'));
    expect(assistantPatch).toBeTruthy();
+    expect(assistantPatch?.params?.patches).toMatchObject({
+      'audio.talk_mode.enabled': true,
+      'audio.talk_mode.wake_phrase': 'ok flynn',
+      'audio.talk_mode.timeout_ms': 180000,
+      'audio.talk_mode.allow_manual_toggle': false,
+      'tts.fallback.max_attempts': 2,
+      'tts.fallback.failure_cooldown_ms': 45000,
+    });

    root.querySelector('#hooks-confirm').value = 'tool:group:fs/**/*\ntool:group:web/**/*';
    root.querySelector('#hooks-log').value = 'tool:web.search';
@@ -1,6 +1,6 @@
 import { afterEach, describe, expect, it, vi } from 'vitest';

-import { synthesizeSpeechAttachment } from './tts.js';
+import { synthesizeSpeechAttachment, synthesizeSpeechWithFallback, TtsHealthTracker } from './tts.js';

 describe('synthesizeSpeechAttachment', () => {
  afterEach(() => {
@@ -65,3 +65,153 @@ describe('synthesizeSpeechAttachment', () => {
    })).rejects.toThrow(/TTS request failed: 429 Too Many Requests/i);
  });
 });
+
+describe('synthesizeSpeechWithFallback', () => {
+  afterEach(() => {
+    vi.useRealTimers();
+    vi.restoreAllMocks();
+  });
+
+  it('falls back to the next provider when the first provider fails', async () => {
+    const fetchSpy = vi.spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 503,
+        statusText: 'Service Unavailable',
+        text: async () => 'primary down',
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([9, 9]).buffer,
+      } as Response);
+
+    const result = await synthesizeSpeechWithFallback('hello', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+    });
+
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+    expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
+    expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
+    expect(result.providerId).toBe('backup');
+    expect(result.attachment).toMatchObject({ mimeType: 'audio/mpeg', data: 'CQk=' });
+    expect(result.attemptedProviders).toEqual(['primary', 'backup']);
+  });
+
+  it('uses health cooldown to avoid repeatedly trying a failing provider', async () => {
+    const health = new TtsHealthTracker();
+    const fetchSpy = vi.spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+        statusText: 'Server Error',
+        text: async () => 'primary down',
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([1]).buffer,
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([2]).buffer,
+      } as Response);
+
+    const first = await synthesizeSpeechWithFallback('hello', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+      fallback: {
+        failureCooldownMs: 120_000,
+      },
+      healthTracker: health,
+    });
+    expect(first.providerId).toBe('backup');
+
+    const second = await synthesizeSpeechWithFallback('hello again', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+      healthTracker: health,
+    });
+
+    expect(fetchSpy).toHaveBeenCalledTimes(3);
+    expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
+    expect(second.attemptedProviders).toEqual(['backup']);
+  });
+
+  it('tries unhealthy providers again after cooldown expires', async () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-01-01T00:00:00Z'));
+
+    const health = new TtsHealthTracker();
+    const fetchSpy = vi.spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce({
+        ok: false,
+        status: 503,
+        statusText: 'Service Unavailable',
+        text: async () => 'primary down',
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([3]).buffer,
+      } as Response)
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
+        arrayBuffer: async () => Uint8Array.from([4]).buffer,
+      } as Response);
+
+    await synthesizeSpeechWithFallback('first', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+      fallback: { failureCooldownMs: 10_000 },
+      healthTracker: health,
+    });
+
+    vi.setSystemTime(new Date('2026-01-01T00:00:11Z'));
+
+    const second = await synthesizeSpeechWithFallback('second', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+      healthTracker: health,
+    });
+
+    expect(fetchSpy).toHaveBeenCalledTimes(3);
+    expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
+    expect(second.providerId).toBe('primary');
+  });
+
+  it('returns text fallback metadata when all providers fail', async () => {
+    vi.spyOn(globalThis, 'fetch')
+      .mockRejectedValueOnce(new Error('primary down'))
+      .mockRejectedValueOnce(new Error('backup down'));
+
+    const result = await synthesizeSpeechWithFallback('hello', {
+      providers: [
+        { id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
+        { id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
+      ],
+    });
+
+    expect(result.attachment).toBeNull();
+    expect(result.attemptedProviders).toEqual(['primary', 'backup']);
+    expect(result.lastError).toContain('backup down');
+  });
+});
@@ -8,6 +8,65 @@ export interface TtsSynthesisConfig {
  model?: string;
  voice?: string;
  format?: TtsOutputFormat;
+  /** Optional provider identity used by fallback/health tracking. */
+  id?: string;
+  /** Optional provider type hint for endpoint defaults. */
+  type?: 'openai' | 'custom';
+}
+
+export interface TtsFallbackConfig {
+  /** Maximum number of providers to try per reply. */
+  maxAttempts?: number;
+  /** Cooldown window before retrying a provider that failed. */
+  failureCooldownMs?: number;
+}
+
+export interface TtsProviderHealth {
+  consecutiveFailures: number;
+  cooldownUntil: number;
+  lastFailureAt?: number;
+  lastError?: string;
+}
+
+export interface TtsFallbackResult {
+  attachment: OutboundAttachment | null;
+  providerId?: string;
+  attemptedProviders: string[];
+  skippedProviders: string[];
+  lastError?: string;
+}
+
+export class TtsHealthTracker {
+  private readonly states = new Map<string, TtsProviderHealth>();
+
+  isHealthy(providerId: string, now = Date.now()): boolean {
+    const state = this.states.get(providerId);
+    if (!state) {
+      return true;
+    }
+    return state.cooldownUntil <= now;
+  }
+
+  markSuccess(providerId: string): void {
+    this.states.delete(providerId);
+  }
+
+  markFailure(providerId: string, error: unknown, now = Date.now(), failureCooldownMs = 60_000): void {
+    const previous = this.states.get(providerId);
+    const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
+    const message = error instanceof Error ? error.message : String(error);
+
+    this.states.set(providerId, {
+      consecutiveFailures,
+      cooldownUntil: now + Math.max(1_000, failureCooldownMs),
+      lastFailureAt: now,
+      lastError: message,
+    });
+  }
+
+  getState(providerId: string): TtsProviderHealth | undefined {
+    return this.states.get(providerId);
+  }
 }

 function outputFormatToMimeType(format: TtsOutputFormat): string {
@@ -34,6 +93,26 @@ function outputFormatToExtension(format: TtsOutputFormat): string {
  }
 }

+function resolveProviderEndpoint(config: TtsSynthesisConfig): string | undefined {
+  if (config.endpoint && config.endpoint.trim().length > 0) {
+    return config.endpoint;
+  }
+  if (config.type === 'openai') {
+    return 'https://api.openai.com/v1/audio/speech';
+  }
+  return undefined;
+}
+
+function normalizeMaxAttempts(maxAttempts: number | undefined, providerCount: number): number {
+  if (providerCount <= 0) {
+    return 0;
+  }
+  if (!Number.isFinite(maxAttempts)) {
+    return providerCount;
+  }
+  return Math.min(providerCount, Math.max(1, Math.floor(maxAttempts as number)));
+}
+
 /** Synthesize speech via an OpenAI-compatible /v1/audio/speech endpoint. */
 export async function synthesizeSpeechAttachment(
  text: string,
@@ -86,3 +165,95 @@ export async function synthesizeSpeechAttachment(
    filename: `flynn-reply-${Date.now()}.${extension}`,
  };
 }
+
+/**
+ * Attempt TTS synthesis against an ordered provider chain with health-aware fallback.
+ * When every provider fails, returns text fallback metadata instead of throwing.
+ */
+export async function synthesizeSpeechWithFallback(
+  text: string,
+  input: {
+    providers: TtsSynthesisConfig[];
+    fallback?: TtsFallbackConfig;
+    healthTracker?: TtsHealthTracker;
+  },
+): Promise<TtsFallbackResult> {
+  const trimmed = text.trim();
+  if (!trimmed) {
+    return {
+      attachment: null,
+      attemptedProviders: [],
+      skippedProviders: [],
+    };
+  }
+
+  const providers = input.providers
+    .map((provider, index) => {
+      const endpoint = resolveProviderEndpoint(provider);
+      return {
+        ...provider,
+        endpoint,
+        id: provider.id ?? `tts-provider-${index + 1}`,
+      };
+    })
+    .filter((provider) => typeof provider.endpoint === 'string' && provider.endpoint.length > 0);
+
+  if (providers.length === 0) {
+    return {
+      attachment: null,
+      attemptedProviders: [],
+      skippedProviders: [],
+    };
+  }
+
+  const healthTracker = input.healthTracker;
+  const now = Date.now();
+  const healthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
+  const unhealthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
+
+  for (const provider of providers) {
+    const typedProvider = provider as TtsSynthesisConfig & { id: string; endpoint: string };
+    if (!healthTracker || healthTracker.isHealthy(typedProvider.id, now)) {
+      healthyProviders.push(typedProvider);
+    } else {
+      unhealthyProviders.push(typedProvider);
+    }
+  }
+
+  const orderedProviders = healthyProviders.length > 0
+    ? [...healthyProviders, ...unhealthyProviders]
+    : unhealthyProviders;
+
+  const maxAttempts = normalizeMaxAttempts(input.fallback?.maxAttempts, orderedProviders.length);
+  const attempts = orderedProviders.slice(0, maxAttempts);
+  const skippedProviders = orderedProviders.slice(maxAttempts).map((provider) => provider.id);
+  const attemptedProviders: string[] = [];
+  let lastError: string | undefined;
+  const failureCooldownMs = input.fallback?.failureCooldownMs ?? 60_000;
+
+  for (const provider of attempts) {
+    attemptedProviders.push(provider.id);
+    try {
+      const attachment = await synthesizeSpeechAttachment(trimmed, provider);
+      if (attachment) {
+        healthTracker?.markSuccess(provider.id);
+        return {
+          attachment,
+          providerId: provider.id,
+          attemptedProviders,
+          skippedProviders,
+        };
+      }
+    } catch (error) {
+      healthTracker?.markFailure(provider.id, error, Date.now(), failureCooldownMs);
+      lastError = error instanceof Error ? error.message : String(error);
+    }
+  }
+
+  return {
+    attachment: null,
+    attemptedProviders,
+    skippedProviders,
+    lastError,
+  };
+}