From 163b1a013992db67e1f64fdcd3c12865de5413ce Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 26 Feb 2026 17:29:23 -0800 Subject: [PATCH] feat: harden voice reliability with tts fallback and talk controls --- src/config/schema.test.ts | 37 ++++++ src/config/schema.ts | 13 ++ src/daemon/routing.test.ts | 174 +++++++++++++++++++++++++ src/daemon/routing.ts | 59 ++++++--- src/gateway/handlers/config.ts | 42 ++++++ src/gateway/handlers/handlers.test.ts | 26 ++++ src/gateway/ui/pages/chat.test.ts | 9 ++ src/gateway/ui/pages/dashboard.js | 31 ++++- src/gateway/ui/pages/dashboard.test.ts | 10 ++ src/gateway/ui/pages/settings.js | 48 +++++++ src/gateway/ui/pages/settings.test.ts | 26 ++++ src/models/tts.test.ts | 152 ++++++++++++++++++++- src/models/tts.ts | 171 ++++++++++++++++++++++++ 13 files changed, 781 insertions(+), 17 deletions(-) diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index c59f552..be67d1f 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -870,6 +870,11 @@ describe('configSchema — tts', () => { expect(result.tts.enabled).toBe(false); expect(result.tts.enabled_channels).toEqual([]); expect(result.tts.provider).toBeUndefined(); + expect(result.tts.providers).toEqual([]); + expect(result.tts.fallback).toEqual({ + max_attempts: 3, + failure_cooldown_ms: 60000, + }); }); it('accepts custom tts provider settings', () => { @@ -886,6 +891,29 @@ describe('configSchema — tts', () => { voice: 'nova', format: 'wav', }, + providers: [ + { + name: 'primary', + type: 'custom', + endpoint: 'https://tts-primary.example.com/v1/audio/speech', + api_key: 'sk-1', + model: 'gpt-4o-mini-tts', + voice: 'alloy', + format: 'mp3', + }, + { + name: 'backup', + type: 'openai', + api_key: 'sk-2', + model: 'gpt-4o-mini-tts', + voice: 'nova', + format: 'opus', + }, + ], + fallback: { + max_attempts: 2, + failure_cooldown_ms: 90000, + }, }, }); @@ -899,6 +927,15 @@ describe('configSchema — tts', () => { voice: 'nova', format: 'wav', }); + expect(result.tts.providers).toHaveLength(2); + expect(result.tts.providers[0]).toMatchObject({ + name: 'primary', + endpoint: 'https://tts-primary.example.com/v1/audio/speech', + }); + expect(result.tts.fallback).toEqual({ + max_attempts: 2, + failure_cooldown_ms: 90000, + }); }); }); diff --git a/src/config/schema.ts b/src/config/schema.ts index 5b29f37..ff81a5a 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -842,6 +842,7 @@ const audioSchema = z.object({ const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']); const ttsProviderSchema = z.object({ + name: z.string().min(1).optional(), type: z.enum(['openai', 'custom']).default('openai'), endpoint: z.string().optional(), api_key: z.string().optional(), @@ -850,11 +851,23 @@ const ttsProviderSchema = z.object({ format: ttsOutputFormatSchema.default('mp3'), }); +const ttsFallbackSchema = z.object({ + /** Number of providers attempted in-order before text-only fallback. */ + max_attempts: z.number().int().min(1).max(10).default(3), + /** Cooldown window applied to providers after synthesis failures. */ + failure_cooldown_ms: z.number().int().min(1000).max(3_600_000).default(60_000), +}).default({}); + const ttsSchema = z.object({ enabled: z.boolean().default(false), /** Restrict voice replies to selected channels. Empty means all channels. */ enabled_channels: z.array(z.string().min(1)).default([]), + /** Legacy single-provider config. */ provider: ttsProviderSchema.optional(), + /** Ordered provider chain for synthesis fallback. */ + providers: z.array(ttsProviderSchema).default([]), + /** Fallback + health policy for provider chain handling. */ + fallback: ttsFallbackSchema, }).default({}); // ── Tool policy schemas ────────────────────────────────────────────── diff --git a/src/daemon/routing.test.ts b/src/daemon/routing.test.ts index 120f450..7625731 100644 --- a/src/daemon/routing.test.ts +++ b/src/daemon/routing.test.ts @@ -2393,6 +2393,100 @@ describe('daemon tts routing integration', () => { expect(outbound?.attachments).toBeUndefined(); }); + it('falls back to secondary TTS provider when primary fails', async () => { + vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback-chain response'); + const fetchSpy = vi.spyOn(globalThis, 'fetch') + .mockResolvedValueOnce({ + ok: false, + status: 503, + statusText: 'Service Unavailable', + text: async () => 'primary down', + } as Response) + .mockResolvedValueOnce({ + ok: true, + status: 200, + statusText: 'OK', + arrayBuffer: async () => Uint8Array.from([5, 6, 7]).buffer, + } as Response); + + const session = { + id: 'telegram:tts-user-4', + addMessage: vi.fn(), + getHistory: vi.fn(() => []), + clear: vi.fn(), + replaceHistory: vi.fn(), + getConfig: vi.fn(() => undefined), + setConfig: vi.fn(), + deleteConfig: vi.fn(), + }; + + const router = createMessageRouter({ + sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'], + modelRouter: { + getAvailableTiers: () => ['default'], + getAllLabels: () => ({ default: 'default' }), + getLabel: (tier: string) => tier, + } as unknown as MessageRouterDeps['modelRouter'], + systemPrompt: 'test prompt', + toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'], + toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'], + config: { + agents: { + primary_tier: 'default', + delegation: { + compaction: 'default', + memory_extraction: 'default', + classification: 'default', + tool_summarisation: 'default', + complex_reasoning: 'default', + }, + max_delegation_depth: 1, + max_iterations: 3, + }, + compaction: { enabled: false }, + models: { default: { provider: 'anthropic', model: 'claude' } }, + tts: { + enabled: true, + enabled_channels: ['telegram'], + providers: [ + { + name: 'primary', + type: 'custom', + endpoint: 'https://tts-primary.example.com/v1/audio/speech', + }, + { + name: 'backup', + type: 'custom', + endpoint: 'https://tts-backup.example.com/v1/audio/speech', + }, + ], + fallback: { + max_attempts: 2, + failure_cooldown_ms: 60000, + }, + }, + } as unknown as MessageRouterDeps['config'], + }); + + const reply = vi.fn(async (_message: OutboundMessage) => {}); + await router.handler({ + id: 'tts-4', + channel: 'telegram', + senderId: 'tts-user-4', + text: 'respond with provider fallback', + timestamp: Date.now(), + } as MessageRouterInput, reply); + + expect(fetchSpy).toHaveBeenCalledTimes(2); + expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech'); + expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech'); + const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined; + expect(outbound?.attachments?.[0]).toMatchObject({ + mimeType: 'audio/mpeg', + data: 'BQYH', + }); + }); + it('falls back to text-only replies when tts synthesis fails', async () => { vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback response'); vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('tts down')); @@ -2909,4 +3003,84 @@ describe('daemon talk mode (voice wake) integration', () => { expect(processSpy).toHaveBeenCalledOnce(); expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined, undefined); }); + + it('treats spoken cancel as /stop while talk mode is active', async () => { + const cancelSpy = vi.spyOn(AgentOrchestrator.prototype, 'cancel'); + vi.spyOn(AgentOrchestrator.prototype, 'isCancellable').mockReturnValue(true); + const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process'); + let resolveFirst: ((value: string) => void) | undefined; + let markStarted: (() => void) | undefined; + const started = new Promise((resolve) => { markStarted = resolve; }); + processSpy.mockImplementationOnce(() => { + markStarted?.(); + return new Promise((resolve) => { resolveFirst = resolve; }); + }); + + const session = { + id: 'telegram:user-talk-2', + addMessage: vi.fn(), + getHistory: vi.fn(() => []), + clear: vi.fn(), + replaceHistory: vi.fn(), + getConfig: vi.fn(() => undefined), + setConfig: vi.fn(), + deleteConfig: vi.fn(), + }; + + const commandRegistry = new CommandRegistry(); + registerBuiltinCommands(commandRegistry); + + const router = createMessageRouter({ + sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'], + modelRouter: { + getAvailableTiers: () => ['fast', 'default', 'complex', 'local'], + getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }), + getLabel: (tier: string) => tier, + } as unknown as MessageRouterDeps['modelRouter'], + systemPrompt: 'test prompt', + toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'], + toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'], + config: { + agents: { + primary_tier: 'default', + delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex' }, + max_delegation_depth: 3, + max_iterations: 10, + }, + compaction: { enabled: false }, + models: { default: { provider: 'anthropic', model: 'claude' } }, + audio: { talk_mode: { enabled: true, wake_phrase: 'hey flynn', timeout_ms: 120000, allow_manual_toggle: true } }, + } as unknown as MessageRouterDeps['config'], + commandRegistry, + }); + + const reply = vi.fn(async (_message: OutboundMessage) => {}); + const firstRun = router.handler({ + id: 'm-talk-3', + channel: 'telegram', + senderId: 'user-talk-2', + text: 'hey flynn start a long task', + timestamp: Date.now(), + } as MessageRouterInput, reply); + + await started; + + await router.handler({ + id: 'm-talk-4', + channel: 'telegram', + senderId: 'user-talk-2', + text: 'cancel', + timestamp: Date.now(), + } as MessageRouterInput, reply); + + expect(cancelSpy).toHaveBeenCalledTimes(1); + expect(processSpy).toHaveBeenCalledTimes(1); + expect(reply).toHaveBeenCalledWith(expect.objectContaining({ + text: 'Cancellation requested. The active operation will stop at the next safe point.', + replyTo: 'm-talk-4', + })); + + resolveFirst?.('operation cancelled by user.'); + await firstRun; + }); }); diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index 84a4b78..4184760 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -1,7 +1,7 @@ import type { AudioTranscriptionConfig } from '../models/media.js'; import type { Attachment } from '../channels/types.js'; import { isSupportedAudio, transcribeAudio } from '../models/media.js'; -import { synthesizeSpeechAttachment } from '../models/tts.js'; +import { synthesizeSpeechWithFallback, TtsHealthTracker } from '../models/tts.js'; import { supportsAudioInput } from '../models/capabilities.js'; import { AgentOrchestrator, SubagentManager, type DelegationConfig } from '../backends/index.js'; import { OutboundAttachmentCollector } from '../backends/native/attachments.js'; @@ -397,6 +397,7 @@ export function createMessageRouter(deps: { const talkModeUntil = new Map(); const activeRuns = new Map(); const reactionCooldowns = new Map(); + const ttsHealthTracker = new TtsHealthTracker(); function getBackendMode(): BackendRuntimeMode { return deps.getBackendMode?.() ?? 'config_default'; @@ -518,24 +519,42 @@ export function createMessageRouter(deps: { return undefined; } - const provider = deps.config.tts?.provider; - const endpoint = provider?.endpoint ?? (provider?.type === 'openai' ? 'https://api.openai.com/v1/audio/speech' : undefined); - if (!endpoint) { + const configuredProviders = deps.config.tts?.providers ?? []; + const providers = configuredProviders.length > 0 + ? configuredProviders + : (deps.config.tts?.provider ? [deps.config.tts.provider] : []); + + if (providers.length === 0) { return undefined; } - try { - return await synthesizeSpeechAttachment(responseText, { - endpoint, - apiKey: provider?.api_key, - model: provider?.model, - voice: provider?.voice, - format: provider?.format, - }); - } catch (error) { - console.warn(`TTS synthesis failed for channel ${channel}:`, error instanceof Error ? error.message : 'Unknown error'); - return undefined; + const outcome = await synthesizeSpeechWithFallback(responseText, { + providers: providers.map((provider, index) => ({ + id: provider.name?.trim() || `tts-provider-${index + 1}`, + type: provider.type, + endpoint: provider.endpoint, + apiKey: provider.api_key, + model: provider.model, + voice: provider.voice, + format: provider.format, + })), + fallback: { + maxAttempts: deps.config.tts?.fallback?.max_attempts, + failureCooldownMs: deps.config.tts?.fallback?.failure_cooldown_ms, + }, + healthTracker: ttsHealthTracker, + }); + + if (!outcome.attachment && outcome.attemptedProviders.length > 0) { + console.warn( + `TTS synthesis fallback exhausted for channel ${channel}. ` + + `attempted=${outcome.attemptedProviders.join(',') || 'none'} ` + + `skipped=${outcome.skippedProviders.join(',') || 'none'} ` + + `${outcome.lastError ? `last_error=${outcome.lastError}` : ''}`.trim(), + ); } + + return outcome.attachment ?? undefined; } function getOrCreateAgent( @@ -822,6 +841,7 @@ export function createMessageRouter(deps: { let incomingText = msg.text; let matchedReactionName: string | undefined; const talkMode = deps.config.audio?.talk_mode; + let inTalkModeContext = false; if (talkMode?.enabled && incomingText.trim().length > 0) { const key = `${msg.channel}:${msg.senderId}`; const now = Date.now(); @@ -858,6 +878,7 @@ export function createMessageRouter(deps: { if (wakeMatched && wakeRegex) { talkModeUntil.set(key, now + timeoutMs); + inTalkModeContext = true; incomingText = incomingText.replace(wakeRegex, '').trim(); if (!incomingText) { await reply({ text: `Listening. Talk mode active for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id }); @@ -865,11 +886,19 @@ export function createMessageRouter(deps: { } } else if (currentUntil > now) { talkModeUntil.set(key, now + timeoutMs); + inTalkModeContext = true; } else { return; } } + if (inTalkModeContext && !msg.metadata?.isCommand) { + const spokenCommand = incomingText.trim().toLowerCase(); + if (spokenCommand === 'stop' || spokenCommand === 'cancel') { + incomingText = '/stop'; + } + } + const session = deps.sessionManager.getSession(msg.channel, msg.senderId); const queueMode = session.getConfig('queue.mode') ?? deps.config.server?.queue?.mode ?? 'collect'; const rawCommand = msg.metadata?.isCommand diff --git a/src/gateway/handlers/config.ts b/src/gateway/handlers/config.ts index 2428961..0fca7f5 100644 --- a/src/gateway/handlers/config.ts +++ b/src/gateway/handlers/config.ts @@ -661,6 +661,34 @@ const PATCHABLE_KEYS: Record boolean config.audio.enabled = value; return true; }, + 'audio.talk_mode.enabled': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.audio ??= {} as Config['audio']; + config.audio.talk_mode ??= {} as Config['audio']['talk_mode']; + config.audio.talk_mode.enabled = value; + return true; + }, + 'audio.talk_mode.wake_phrase': (config, value) => { + if (typeof value !== 'string' || value.trim().length === 0) {return false;} + config.audio ??= {} as Config['audio']; + config.audio.talk_mode ??= {} as Config['audio']['talk_mode']; + config.audio.talk_mode.wake_phrase = value.trim(); + return true; + }, + 'audio.talk_mode.timeout_ms': (config, value) => { + if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;} + config.audio ??= {} as Config['audio']; + config.audio.talk_mode ??= {} as Config['audio']['talk_mode']; + config.audio.talk_mode.timeout_ms = Math.floor(value); + return true; + }, + 'audio.talk_mode.allow_manual_toggle': (config, value) => { + if (typeof value !== 'boolean') {return false;} + config.audio ??= {} as Config['audio']; + config.audio.talk_mode ??= {} as Config['audio']['talk_mode']; + config.audio.talk_mode.allow_manual_toggle = value; + return true; + }, 'sandbox.enabled': (config, value) => { if (typeof value !== 'boolean') {return false;} config.sandbox ??= {} as Config['sandbox']; @@ -700,6 +728,20 @@ const PATCHABLE_KEYS: Record boolean config.tts.enabled_channels = value as string[]; return true; }, + 'tts.fallback.max_attempts': (config, value) => { + if (typeof value !== 'number' || !Number.isFinite(value) || value < 1 || value > 10) {return false;} + config.tts ??= {} as Config['tts']; + config.tts.fallback ??= {} as Config['tts']['fallback']; + config.tts.fallback.max_attempts = Math.floor(value); + return true; + }, + 'tts.fallback.failure_cooldown_ms': (config, value) => { + if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;} + config.tts ??= {} as Config['tts']; + config.tts.fallback ??= {} as Config['tts']['fallback']; + config.tts.fallback.failure_cooldown_ms = Math.floor(value); + return true; + }, }; export function createConfigHandlers(deps: ConfigHandlerDeps) { diff --git a/src/gateway/handlers/handlers.test.ts b/src/gateway/handlers/handlers.test.ts index cd81474..c967fe8 100644 --- a/src/gateway/handlers/handlers.test.ts +++ b/src/gateway/handlers/handlers.test.ts @@ -1550,8 +1550,14 @@ describe('config handlers', () => { 'memory.daily_log.enabled': true, 'memory.proactive_extract.enabled': true, 'memory.proactive_extract.min_tool_calls': 2, + 'audio.talk_mode.enabled': true, + 'audio.talk_mode.wake_phrase': 'ok flynn', + 'audio.talk_mode.timeout_ms': 180000, + 'audio.talk_mode.allow_manual_toggle': false, 'tts.enabled': true, 'tts.enabled_channels': ['telegram', 'discord'], + 'tts.fallback.max_attempts': 2, + 'tts.fallback.failure_cooldown_ms': 90000, }, }, }; @@ -1573,8 +1579,14 @@ describe('config handlers', () => { 'memory.daily_log.enabled', 'memory.proactive_extract.enabled', 'memory.proactive_extract.min_tool_calls', + 'audio.talk_mode.enabled', + 'audio.talk_mode.wake_phrase', + 'audio.talk_mode.timeout_ms', + 'audio.talk_mode.allow_manual_toggle', 'tts.enabled', 'tts.enabled_channels', + 'tts.fallback.max_attempts', + 'tts.fallback.failure_cooldown_ms', ]); expect(r.rejected).toEqual([]); expect(r.persisted).toBe(false); @@ -1593,8 +1605,14 @@ describe('config handlers', () => { expect(getPath(config, 'memory', 'daily_log', 'enabled')).toBe(true); expect(getPath(config, 'memory', 'proactive_extract', 'enabled')).toBe(true); expect(getPath(config, 'memory', 'proactive_extract', 'min_tool_calls')).toBe(2); + expect(getPath(config, 'audio', 'talk_mode', 'enabled')).toBe(true); + expect(getPath(config, 'audio', 'talk_mode', 'wake_phrase')).toBe('ok flynn'); + expect(getPath(config, 'audio', 'talk_mode', 'timeout_ms')).toBe(180000); + expect(getPath(config, 'audio', 'talk_mode', 'allow_manual_toggle')).toBe(false); expect(getPath(config, 'tts', 'enabled')).toBe(true); expect(getPath(config, 'tts', 'enabled_channels')).toEqual(['telegram', 'discord']); + expect(getPath(config, 'tts', 'fallback', 'max_attempts')).toBe(2); + expect(getPath(config, 'tts', 'fallback', 'failure_cooldown_ms')).toBe(90000); }); it('config.patch applies councils model and routing patches', async () => { @@ -1675,7 +1693,11 @@ describe('config handlers', () => { 'hooks.confirm': 'not-an-array', 'server.queue.cap': 0, 'memory.proactive_extract.min_tool_calls': 99, + 'audio.talk_mode.wake_phrase': '', + 'audio.talk_mode.timeout_ms': 99999999, 'tts.enabled_channels': [1, 2, 3], + 'tts.fallback.max_attempts': 0, + 'tts.fallback.failure_cooldown_ms': 0, 'automation.daily_briefing.model_tier': 'ultra', }, }, @@ -1688,7 +1710,11 @@ describe('config handlers', () => { 'hooks.confirm', 'server.queue.cap', 'memory.proactive_extract.min_tool_calls', + 'audio.talk_mode.wake_phrase', + 'audio.talk_mode.timeout_ms', 'tts.enabled_channels', + 'tts.fallback.max_attempts', + 'tts.fallback.failure_cooldown_ms', 'automation.daily_briefing.model_tier', ]); expect(r.persisted).toBe(false); diff --git a/src/gateway/ui/pages/chat.test.ts b/src/gateway/ui/pages/chat.test.ts index 0c903de..9dadfe8 100644 --- a/src/gateway/ui/pages/chat.test.ts +++ b/src/gateway/ui/pages/chat.test.ts @@ -254,6 +254,15 @@ describe('ChatPage wiring', () => { throw new Error('Run status line not found'); } expect(statusLine.classList.contains('hidden')).toBe(false); + expect(String(statusLine.textContent ?? '')).toContain('working'); + + stream.emit('run_state', { state: 'cancel_requested' }); + await Promise.resolve(); + expect(String(statusLine.textContent ?? '')).toContain('cancellation requested'); + + stream.emit('run_state', { state: 'cancelled' }); + await Promise.resolve(); + expect(String(statusLine.textContent ?? '')).toContain('cancelled'); resolveResult?.({ content: 'ok' }); await Promise.resolve(); diff --git a/src/gateway/ui/pages/dashboard.js b/src/gateway/ui/pages/dashboard.js index ddc9ed3..0234253 100644 --- a/src/gateway/ui/pages/dashboard.js +++ b/src/gateway/ui/pages/dashboard.js @@ -139,6 +139,8 @@ function escapeHtml(str) { function getAssistantStateSnapshot(configData) { const automation = configData?.automation ?? {}; const memory = configData?.memory ?? {}; + const audio = configData?.audio ?? {}; + const talkMode = audio.talk_mode ?? {}; const tts = configData?.tts ?? {}; const queue = configData?.server?.queue ?? {}; return { @@ -147,6 +149,10 @@ function getAssistantStateSnapshot(configData) { memoryDaily: Boolean(memory.daily_log?.enabled), memoryProactive: Boolean(memory.proactive_extract?.enabled), memoryMinToolCalls: Number(memory.proactive_extract?.min_tool_calls ?? 1), + talkModeEnabled: Boolean(talkMode.enabled), + talkWakePhrase: typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn', + talkTimeoutMs: Number(talkMode.timeout_ms ?? 120000), + talkManualToggle: talkMode.allow_manual_toggle !== false, ttsEnabled: Boolean(tts.enabled), ttsChannels: Array.isArray(tts.enabled_channels) ? tts.enabled_channels : [], queueMode: queue.mode ?? 'collect', @@ -161,6 +167,7 @@ function buildPlaybookPatches(playbook) { 'memory.daily_log.enabled': true, 'memory.proactive_extract.enabled': true, 'memory.proactive_extract.min_tool_calls': 1, + 'audio.talk_mode.enabled': true, 'tts.enabled': true, 'tts.enabled_channels': [], 'server.queue.mode': 'interrupt', @@ -177,6 +184,7 @@ function buildPlaybookPatches(playbook) { 'memory.daily_log.enabled': true, 'memory.proactive_extract.enabled': true, 'memory.proactive_extract.min_tool_calls': 2, + 'audio.talk_mode.enabled': false, 'tts.enabled': false, 'server.queue.mode': 'steer_backlog', }; @@ -191,6 +199,7 @@ function buildPlaybookPatches(playbook) { 'memory.daily_log.enabled': false, 'memory.proactive_extract.enabled': false, 'memory.proactive_extract.min_tool_calls': 3, + 'audio.talk_mode.enabled': false, 'tts.enabled': false, 'server.queue.mode': 'collect', }; @@ -207,6 +216,10 @@ function buildRollbackPatchesFromSnapshot(snapshot) { 'memory.daily_log.enabled': snapshot.memoryDaily, 'memory.proactive_extract.enabled': snapshot.memoryProactive, 'memory.proactive_extract.min_tool_calls': Number.isFinite(snapshot.memoryMinToolCalls) ? snapshot.memoryMinToolCalls : 1, + 'audio.talk_mode.enabled': snapshot.talkModeEnabled, + 'audio.talk_mode.wake_phrase': snapshot.talkWakePhrase, + 'audio.talk_mode.timeout_ms': Number.isFinite(snapshot.talkTimeoutMs) ? snapshot.talkTimeoutMs : 120000, + 'audio.talk_mode.allow_manual_toggle': snapshot.talkManualToggle, 'tts.enabled': snapshot.ttsEnabled, 'tts.enabled_channels': snapshot.ttsChannels, 'server.queue.mode': snapshot.queueMode, @@ -936,6 +949,8 @@ function updateAssistantHealth(configData) { const automation = configData?.automation ?? {}; const memory = configData?.memory ?? {}; + const audio = configData?.audio ?? {}; + const talkMode = audio.talk_mode ?? {}; const tts = configData?.tts ?? {}; const deliveryMode = automation.delivery_mode ?? 'shared_session'; @@ -944,6 +959,9 @@ function updateAssistantHealth(configData) { const memoryDaily = Boolean(memory.daily_log?.enabled); const memoryProactive = Boolean(memory.proactive_extract?.enabled); const proactiveThreshold = Number(memory.proactive_extract?.min_tool_calls ?? 1); + const talkModeEnabled = Boolean(talkMode.enabled); + const talkWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn'; + const talkTimeoutMs = Number(talkMode.timeout_ms ?? 120000); const ttsEnabled = Boolean(tts.enabled); const briefing = automation.daily_briefing ?? {}; const briefingName = briefing.name ?? 'daily-briefing'; @@ -1011,17 +1029,22 @@ function updateAssistantHealth(configData) { : (_lastCouncilError ? `Last run failed: ${_lastCouncilError}` : 'No council run yet in this dashboard session.'); el.innerHTML = ` -
+
${chip('Announce Mode', announce)} ${chip('Daily Briefing', dailyBriefing)} ${chip('Memory Daily Log', memoryDaily)} ${chip('Proactive Extract', memoryProactive)} + ${chip('Talk Mode', talkModeEnabled)} ${chip('TTS Replies', ttsEnabled)}
Extract Threshold ${Number.isFinite(proactiveThreshold) ? proactiveThreshold : 1}
+
+ Talk controls: wake phrase ${escapeHtml(talkWakePhrase)}, + timeout ${Number.isFinite(talkTimeoutMs) ? Math.round(talkTimeoutMs / 1000) : 120}s. +
+ @@ -1341,6 +1367,9 @@ function updateAssistantHealth(configData) { } else if (action === 'toggle-memory-proactive') { patches = { 'memory.proactive_extract.enabled': !memoryProactive }; _assistantManualOverrides.add('memory.proactive_extract.enabled'); + } else if (action === 'toggle-talk-mode') { + patches = { 'audio.talk_mode.enabled': !talkModeEnabled }; + _assistantManualOverrides.add('audio.talk_mode.enabled'); } else if (action === 'toggle-tts') { patches = { 'tts.enabled': !ttsEnabled }; _assistantManualOverrides.add('tts.enabled'); diff --git a/src/gateway/ui/pages/dashboard.test.ts b/src/gateway/ui/pages/dashboard.test.ts index 1d9d945..6c22dfb 100644 --- a/src/gateway/ui/pages/dashboard.test.ts +++ b/src/gateway/ui/pages/dashboard.test.ts @@ -47,6 +47,14 @@ function createInitialConfig() { daily_log: { enabled: true }, proactive_extract: { enabled: true, min_tool_calls: 2 }, }, + audio: { + talk_mode: { + enabled: false, + wake_phrase: 'hey flynn', + timeout_ms: 120000, + allow_manual_toggle: true, + }, + }, tts: { enabled: false, enabled_channels: [], @@ -520,6 +528,7 @@ describe('DashboardPage assistant controls', () => { 'toggle-daily-briefing', 'toggle-memory-daily', 'toggle-memory-proactive', + 'toggle-talk-mode', 'toggle-tts', 'playbook-executive', 'playbook-operator', @@ -552,6 +561,7 @@ describe('DashboardPage assistant controls', () => { await clickAction('toggle-daily-briefing'); await clickAction('toggle-memory-daily'); await clickAction('toggle-memory-proactive'); + await clickAction('toggle-talk-mode'); await clickAction('toggle-tts'); await clickAction('playbook-executive'); await clickAction('playbook-operator'); diff --git a/src/gateway/ui/pages/settings.js b/src/gateway/ui/pages/settings.js index 2f47c4d..e2cbd61 100644 --- a/src/gateway/ui/pages/settings.js +++ b/src/gateway/ui/pages/settings.js @@ -117,6 +117,7 @@ async function loadSettings() { const silentPatterns = hooks.silent ?? []; const automation = config?.automation ?? {}; const memory = config?.memory ?? {}; + const audio = config?.audio ?? {}; const tts = config?.tts ?? {}; _settingsCache = config ?? {}; @@ -125,8 +126,16 @@ async function loadSettings() { const dailyMemoryEnabled = Boolean(memory.daily_log?.enabled); const proactiveExtractEnabled = Boolean(memory.proactive_extract?.enabled); const proactiveMinToolCalls = Number(memory.proactive_extract?.min_tool_calls ?? 1); + const talkMode = audio.talk_mode ?? {}; + const talkModeEnabled = Boolean(talkMode.enabled); + const talkModeWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn'; + const talkModeTimeoutMs = Number(talkMode.timeout_ms ?? 120000); + const talkModeManualToggle = talkMode.allow_manual_toggle !== false; const ttsEnabled = Boolean(tts.enabled); const ttsChannelText = Array.isArray(tts.enabled_channels) ? tts.enabled_channels.join(', ') : ''; + const ttsFallback = tts.fallback ?? {}; + const ttsFallbackMaxAttempts = Number(ttsFallback.max_attempts ?? 3); + const ttsFallbackCooldownMs = Number(ttsFallback.failure_cooldown_ms ?? 60000); const briefingOutputChannel = automation.daily_briefing?.output?.channel ?? ''; const briefingOutputPeer = automation.daily_briefing?.output?.peer ?? ''; @@ -173,6 +182,30 @@ async function loadSettings() { TTS channels (comma-separated, blank = all) + + + + + +