feat: harden voice reliability with tts fallback and talk controls

This commit is contained in:
William Valentin
2026-02-26 17:29:23 -08:00
parent 2a9bed8c91
commit 163b1a0139
13 changed files with 781 additions and 17 deletions
+9
View File
@@ -254,6 +254,15 @@ describe('ChatPage wiring', () => {
throw new Error('Run status line not found');
}
expect(statusLine.classList.contains('hidden')).toBe(false);
expect(String(statusLine.textContent ?? '')).toContain('working');
stream.emit('run_state', { state: 'cancel_requested' });
await Promise.resolve();
expect(String(statusLine.textContent ?? '')).toContain('cancellation requested');
stream.emit('run_state', { state: 'cancelled' });
await Promise.resolve();
expect(String(statusLine.textContent ?? '')).toContain('cancelled');
resolveResult?.({ content: 'ok' });
await Promise.resolve();
+30 -1
View File
@@ -139,6 +139,8 @@ function escapeHtml(str) {
function getAssistantStateSnapshot(configData) {
const automation = configData?.automation ?? {};
const memory = configData?.memory ?? {};
const audio = configData?.audio ?? {};
const talkMode = audio.talk_mode ?? {};
const tts = configData?.tts ?? {};
const queue = configData?.server?.queue ?? {};
return {
@@ -147,6 +149,10 @@ function getAssistantStateSnapshot(configData) {
memoryDaily: Boolean(memory.daily_log?.enabled),
memoryProactive: Boolean(memory.proactive_extract?.enabled),
memoryMinToolCalls: Number(memory.proactive_extract?.min_tool_calls ?? 1),
talkModeEnabled: Boolean(talkMode.enabled),
talkWakePhrase: typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn',
talkTimeoutMs: Number(talkMode.timeout_ms ?? 120000),
talkManualToggle: talkMode.allow_manual_toggle !== false,
ttsEnabled: Boolean(tts.enabled),
ttsChannels: Array.isArray(tts.enabled_channels) ? tts.enabled_channels : [],
queueMode: queue.mode ?? 'collect',
@@ -161,6 +167,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': true,
'memory.proactive_extract.enabled': true,
'memory.proactive_extract.min_tool_calls': 1,
'audio.talk_mode.enabled': true,
'tts.enabled': true,
'tts.enabled_channels': [],
'server.queue.mode': 'interrupt',
@@ -177,6 +184,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': true,
'memory.proactive_extract.enabled': true,
'memory.proactive_extract.min_tool_calls': 2,
'audio.talk_mode.enabled': false,
'tts.enabled': false,
'server.queue.mode': 'steer_backlog',
};
@@ -191,6 +199,7 @@ function buildPlaybookPatches(playbook) {
'memory.daily_log.enabled': false,
'memory.proactive_extract.enabled': false,
'memory.proactive_extract.min_tool_calls': 3,
'audio.talk_mode.enabled': false,
'tts.enabled': false,
'server.queue.mode': 'collect',
};
@@ -207,6 +216,10 @@ function buildRollbackPatchesFromSnapshot(snapshot) {
'memory.daily_log.enabled': snapshot.memoryDaily,
'memory.proactive_extract.enabled': snapshot.memoryProactive,
'memory.proactive_extract.min_tool_calls': Number.isFinite(snapshot.memoryMinToolCalls) ? snapshot.memoryMinToolCalls : 1,
'audio.talk_mode.enabled': snapshot.talkModeEnabled,
'audio.talk_mode.wake_phrase': snapshot.talkWakePhrase,
'audio.talk_mode.timeout_ms': Number.isFinite(snapshot.talkTimeoutMs) ? snapshot.talkTimeoutMs : 120000,
'audio.talk_mode.allow_manual_toggle': snapshot.talkManualToggle,
'tts.enabled': snapshot.ttsEnabled,
'tts.enabled_channels': snapshot.ttsChannels,
'server.queue.mode': snapshot.queueMode,
@@ -936,6 +949,8 @@ function updateAssistantHealth(configData) {
const automation = configData?.automation ?? {};
const memory = configData?.memory ?? {};
const audio = configData?.audio ?? {};
const talkMode = audio.talk_mode ?? {};
const tts = configData?.tts ?? {};
const deliveryMode = automation.delivery_mode ?? 'shared_session';
@@ -944,6 +959,9 @@ function updateAssistantHealth(configData) {
const memoryDaily = Boolean(memory.daily_log?.enabled);
const memoryProactive = Boolean(memory.proactive_extract?.enabled);
const proactiveThreshold = Number(memory.proactive_extract?.min_tool_calls ?? 1);
const talkModeEnabled = Boolean(talkMode.enabled);
const talkWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
const talkTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
const ttsEnabled = Boolean(tts.enabled);
const briefing = automation.daily_briefing ?? {};
const briefingName = briefing.name ?? 'daily-briefing';
@@ -1011,17 +1029,22 @@ function updateAssistantHealth(configData) {
: (_lastCouncilError ? `Last run failed: ${_lastCouncilError}` : 'No council run yet in this dashboard session.');
el.innerHTML = `
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-2 mb-4">
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-7 gap-2 mb-4">
${chip('Announce Mode', announce)}
${chip('Daily Briefing', dailyBriefing)}
${chip('Memory Daily Log', memoryDaily)}
${chip('Proactive Extract', memoryProactive)}
${chip('Talk Mode', talkModeEnabled)}
${chip('TTS Replies', ttsEnabled)}
<div class="flex justify-between items-center px-3 py-2.5 bg-zinc-900 border border-zinc-800 rounded-lg text-sm">
<span class="text-zinc-400">Extract Threshold</span>
<span class="font-bold">${Number.isFinite(proactiveThreshold) ? proactiveThreshold : 1}</span>
</div>
</div>
<div class="mb-3 text-sm text-zinc-500">
Talk controls: wake phrase <code class="text-zinc-300">${escapeHtml(talkWakePhrase)}</code>,
timeout ${Number.isFinite(talkTimeoutMs) ? Math.round(talkTimeoutMs / 1000) : 120}s.
</div>
<div class="flex flex-wrap gap-2 mb-4">
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-announce">
${announce ? 'Disable Announce Mode' : 'Enable Announce Mode'}
@@ -1035,6 +1058,9 @@ function updateAssistantHealth(configData) {
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-memory-proactive">
${memoryProactive ? 'Disable Proactive Extract' : 'Enable Proactive Extract'}
</button>
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-talk-mode">
${talkModeEnabled ? 'Disable Talk Mode' : 'Enable Talk Mode'}
</button>
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-tts">
${ttsEnabled ? 'Disable TTS' : 'Enable TTS'}
</button>
@@ -1341,6 +1367,9 @@ function updateAssistantHealth(configData) {
} else if (action === 'toggle-memory-proactive') {
patches = { 'memory.proactive_extract.enabled': !memoryProactive };
_assistantManualOverrides.add('memory.proactive_extract.enabled');
} else if (action === 'toggle-talk-mode') {
patches = { 'audio.talk_mode.enabled': !talkModeEnabled };
_assistantManualOverrides.add('audio.talk_mode.enabled');
} else if (action === 'toggle-tts') {
patches = { 'tts.enabled': !ttsEnabled };
_assistantManualOverrides.add('tts.enabled');
+10
View File
@@ -47,6 +47,14 @@ function createInitialConfig() {
daily_log: { enabled: true },
proactive_extract: { enabled: true, min_tool_calls: 2 },
},
audio: {
talk_mode: {
enabled: false,
wake_phrase: 'hey flynn',
timeout_ms: 120000,
allow_manual_toggle: true,
},
},
tts: {
enabled: false,
enabled_channels: [],
@@ -520,6 +528,7 @@ describe('DashboardPage assistant controls', () => {
'toggle-daily-briefing',
'toggle-memory-daily',
'toggle-memory-proactive',
'toggle-talk-mode',
'toggle-tts',
'playbook-executive',
'playbook-operator',
@@ -552,6 +561,7 @@ describe('DashboardPage assistant controls', () => {
await clickAction('toggle-daily-briefing');
await clickAction('toggle-memory-daily');
await clickAction('toggle-memory-proactive');
await clickAction('toggle-talk-mode');
await clickAction('toggle-tts');
await clickAction('playbook-executive');
await clickAction('playbook-operator');
+48
View File
@@ -117,6 +117,7 @@ async function loadSettings() {
const silentPatterns = hooks.silent ?? [];
const automation = config?.automation ?? {};
const memory = config?.memory ?? {};
const audio = config?.audio ?? {};
const tts = config?.tts ?? {};
_settingsCache = config ?? {};
@@ -125,8 +126,16 @@ async function loadSettings() {
const dailyMemoryEnabled = Boolean(memory.daily_log?.enabled);
const proactiveExtractEnabled = Boolean(memory.proactive_extract?.enabled);
const proactiveMinToolCalls = Number(memory.proactive_extract?.min_tool_calls ?? 1);
const talkMode = audio.talk_mode ?? {};
const talkModeEnabled = Boolean(talkMode.enabled);
const talkModeWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
const talkModeTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
const talkModeManualToggle = talkMode.allow_manual_toggle !== false;
const ttsEnabled = Boolean(tts.enabled);
const ttsChannelText = Array.isArray(tts.enabled_channels) ? tts.enabled_channels.join(', ') : '';
const ttsFallback = tts.fallback ?? {};
const ttsFallbackMaxAttempts = Number(ttsFallback.max_attempts ?? 3);
const ttsFallbackCooldownMs = Number(ttsFallback.failure_cooldown_ms ?? 60000);
const briefingOutputChannel = automation.daily_briefing?.output?.channel ?? '';
const briefingOutputPeer = automation.daily_briefing?.output?.peer ?? '';
@@ -173,6 +182,30 @@ async function loadSettings() {
<span>TTS channels (comma-separated, blank = all)</span>
<input id="assist-tts-channels" type="text" value="${escapeHtml(ttsChannelText)}" placeholder="telegram,discord,whatsapp" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Example: telegram,discord,whatsapp" />
</label>
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="When enabled, wake phrase + talk window controls are active.">
<input id="assist-talk-mode-enabled" type="checkbox" ${talkModeEnabled ? 'checked' : ''} title="Enable or disable talk mode." />
<span>Talk mode enabled</span>
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Wake phrase that activates the talk window.">
<span>Talk mode wake phrase</span>
<input id="assist-talk-wake-phrase" type="text" value="${escapeHtml(talkModeWakePhrase)}" placeholder="hey flynn" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Say this phrase to activate talk mode for the configured timeout." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How long talk mode stays active after wake phrase or input.">
<span>Talk mode timeout (ms)</span>
<input id="assist-talk-timeout-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(talkModeTimeoutMs) ? talkModeTimeoutMs : 120000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
</label>
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="Allow /talk on|off|status manual commands while talk mode is enabled.">
<input id="assist-talk-manual-toggle" type="checkbox" ${talkModeManualToggle ? 'checked' : ''} title="Allow manual talk mode toggles." />
<span>Allow manual talk toggles</span>
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How many TTS providers to try before text-only fallback.">
<span>TTS max fallback attempts</span>
<input id="assist-tts-max-attempts" type="number" min="1" max="10" value="${Number.isFinite(ttsFallbackMaxAttempts) ? ttsFallbackMaxAttempts : 3}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1 to 10 provider attempts." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Cooldown period applied after a provider fails TTS synthesis.">
<span>TTS failure cooldown (ms)</span>
<input id="assist-tts-fallback-cooldown-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(ttsFallbackCooldownMs) ? ttsFallbackCooldownMs : 60000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
</label>
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Channel used for daily briefing delivery, such as telegram or slack.">
<span>Briefing output channel</span>
<input id="assist-briefing-channel" type="text" value="${escapeHtml(briefingOutputChannel)}" placeholder="telegram" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set the channel adapter name for briefings." />
@@ -302,9 +335,18 @@ async function saveAssistantMode() {
const memoryDaily = Boolean(_el.querySelector('#assist-memory-daily')?.checked);
const memoryProactive = Boolean(_el.querySelector('#assist-memory-proactive')?.checked);
const ttsEnabled = Boolean(_el.querySelector('#assist-tts-enabled')?.checked);
const talkModeEnabled = Boolean(_el.querySelector('#assist-talk-mode-enabled')?.checked);
const talkModeManualToggle = Boolean(_el.querySelector('#assist-talk-manual-toggle')?.checked);
const minToolsRaw = Number.parseInt(_el.querySelector('#assist-memory-min-tools')?.value ?? '1', 10);
const minTools = Number.isFinite(minToolsRaw) ? Math.min(50, Math.max(0, minToolsRaw)) : 1;
const ttsChannelsRaw = _el.querySelector('#assist-tts-channels')?.value ?? '';
const talkWakePhrase = (_el.querySelector('#assist-talk-wake-phrase')?.value ?? '').trim() || 'hey flynn';
const talkTimeoutRaw = Number.parseInt(_el.querySelector('#assist-talk-timeout-ms')?.value ?? '120000', 10);
const talkTimeoutMs = Number.isFinite(talkTimeoutRaw) ? Math.min(3_600_000, Math.max(1000, talkTimeoutRaw)) : 120000;
const ttsMaxAttemptsRaw = Number.parseInt(_el.querySelector('#assist-tts-max-attempts')?.value ?? '3', 10);
const ttsMaxAttempts = Number.isFinite(ttsMaxAttemptsRaw) ? Math.min(10, Math.max(1, ttsMaxAttemptsRaw)) : 3;
const ttsCooldownRaw = Number.parseInt(_el.querySelector('#assist-tts-fallback-cooldown-ms')?.value ?? '60000', 10);
const ttsFailureCooldownMs = Number.isFinite(ttsCooldownRaw) ? Math.min(3_600_000, Math.max(1000, ttsCooldownRaw)) : 60000;
const briefingChannel = (_el.querySelector('#assist-briefing-channel')?.value ?? '').trim();
const briefingPeer = (_el.querySelector('#assist-briefing-peer')?.value ?? '').trim();
const ttsChannels = ttsChannelsRaw
@@ -320,6 +362,12 @@ async function saveAssistantMode() {
'memory.proactive_extract.min_tool_calls': minTools,
'tts.enabled': ttsEnabled,
'tts.enabled_channels': ttsChannels,
'audio.talk_mode.enabled': talkModeEnabled,
'audio.talk_mode.wake_phrase': talkWakePhrase,
'audio.talk_mode.timeout_ms': talkTimeoutMs,
'audio.talk_mode.allow_manual_toggle': talkModeManualToggle,
'tts.fallback.max_attempts': ttsMaxAttempts,
'tts.fallback.failure_cooldown_ms': ttsFailureCooldownMs,
};
if (briefingChannel) {
patches['automation.daily_briefing.output.channel'] = briefingChannel;
+26
View File
@@ -45,9 +45,21 @@ function createClient() {
daily_log: { enabled: true },
proactive_extract: { enabled: true, min_tool_calls: 2 },
},
audio: {
talk_mode: {
enabled: false,
wake_phrase: 'hey flynn',
timeout_ms: 120000,
allow_manual_toggle: true,
},
},
tts: {
enabled: false,
enabled_channels: ['telegram'],
fallback: {
max_attempts: 3,
failure_cooldown_ms: 60000,
},
},
hooks: {
confirm: ['tool:group:fs/**/*'],
@@ -119,6 +131,12 @@ describe('SettingsPage wiring', () => {
root.querySelector('#assist-memory-min-tools').value = '6';
root.querySelector('#assist-tts-enabled').checked = true;
root.querySelector('#assist-tts-channels').value = 'telegram, discord';
root.querySelector('#assist-talk-mode-enabled').checked = true;
root.querySelector('#assist-talk-wake-phrase').value = 'ok flynn';
root.querySelector('#assist-talk-timeout-ms').value = '180000';
root.querySelector('#assist-talk-manual-toggle').checked = false;
root.querySelector('#assist-tts-max-attempts').value = '2';
root.querySelector('#assist-tts-fallback-cooldown-ms').value = '45000';
root.querySelector('#assist-briefing-channel').value = 'discord';
root.querySelector('#assist-briefing-peer').value = '98765';
@@ -127,6 +145,14 @@ describe('SettingsPage wiring', () => {
const assistantPatch = calls.find((entry) => entry.method === 'config.patch' && Object.prototype.hasOwnProperty.call(entry.params?.patches ?? {}, 'automation.delivery_mode'));
expect(assistantPatch).toBeTruthy();
expect(assistantPatch?.params?.patches).toMatchObject({
'audio.talk_mode.enabled': true,
'audio.talk_mode.wake_phrase': 'ok flynn',
'audio.talk_mode.timeout_ms': 180000,
'audio.talk_mode.allow_manual_toggle': false,
'tts.fallback.max_attempts': 2,
'tts.fallback.failure_cooldown_ms': 45000,
});
root.querySelector('#hooks-confirm').value = 'tool:group:fs/**/*\ntool:group:web/**/*';
root.querySelector('#hooks-log').value = 'tool:web.search';