feat: harden voice reliability with tts fallback and talk controls
This commit is contained in:
@@ -870,6 +870,11 @@ describe('configSchema — tts', () => {
|
||||
expect(result.tts.enabled).toBe(false);
|
||||
expect(result.tts.enabled_channels).toEqual([]);
|
||||
expect(result.tts.provider).toBeUndefined();
|
||||
expect(result.tts.providers).toEqual([]);
|
||||
expect(result.tts.fallback).toEqual({
|
||||
max_attempts: 3,
|
||||
failure_cooldown_ms: 60000,
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts custom tts provider settings', () => {
|
||||
@@ -886,6 +891,29 @@ describe('configSchema — tts', () => {
|
||||
voice: 'nova',
|
||||
format: 'wav',
|
||||
},
|
||||
providers: [
|
||||
{
|
||||
name: 'primary',
|
||||
type: 'custom',
|
||||
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
|
||||
api_key: 'sk-1',
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'alloy',
|
||||
format: 'mp3',
|
||||
},
|
||||
{
|
||||
name: 'backup',
|
||||
type: 'openai',
|
||||
api_key: 'sk-2',
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'nova',
|
||||
format: 'opus',
|
||||
},
|
||||
],
|
||||
fallback: {
|
||||
max_attempts: 2,
|
||||
failure_cooldown_ms: 90000,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -899,6 +927,15 @@ describe('configSchema — tts', () => {
|
||||
voice: 'nova',
|
||||
format: 'wav',
|
||||
});
|
||||
expect(result.tts.providers).toHaveLength(2);
|
||||
expect(result.tts.providers[0]).toMatchObject({
|
||||
name: 'primary',
|
||||
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
|
||||
});
|
||||
expect(result.tts.fallback).toEqual({
|
||||
max_attempts: 2,
|
||||
failure_cooldown_ms: 90000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -842,6 +842,7 @@ const audioSchema = z.object({
|
||||
const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']);
|
||||
|
||||
const ttsProviderSchema = z.object({
|
||||
name: z.string().min(1).optional(),
|
||||
type: z.enum(['openai', 'custom']).default('openai'),
|
||||
endpoint: z.string().optional(),
|
||||
api_key: z.string().optional(),
|
||||
@@ -850,11 +851,23 @@ const ttsProviderSchema = z.object({
|
||||
format: ttsOutputFormatSchema.default('mp3'),
|
||||
});
|
||||
|
||||
const ttsFallbackSchema = z.object({
|
||||
/** Number of providers attempted in-order before text-only fallback. */
|
||||
max_attempts: z.number().int().min(1).max(10).default(3),
|
||||
/** Cooldown window applied to providers after synthesis failures. */
|
||||
failure_cooldown_ms: z.number().int().min(1000).max(3_600_000).default(60_000),
|
||||
}).default({});
|
||||
|
||||
const ttsSchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
/** Restrict voice replies to selected channels. Empty means all channels. */
|
||||
enabled_channels: z.array(z.string().min(1)).default([]),
|
||||
/** Legacy single-provider config. */
|
||||
provider: ttsProviderSchema.optional(),
|
||||
/** Ordered provider chain for synthesis fallback. */
|
||||
providers: z.array(ttsProviderSchema).default([]),
|
||||
/** Fallback + health policy for provider chain handling. */
|
||||
fallback: ttsFallbackSchema,
|
||||
}).default({});
|
||||
|
||||
// ── Tool policy schemas ──────────────────────────────────────────────
|
||||
|
||||
@@ -2393,6 +2393,100 @@ describe('daemon tts routing integration', () => {
|
||||
expect(outbound?.attachments).toBeUndefined();
|
||||
});
|
||||
|
||||
it('falls back to secondary TTS provider when primary fails', async () => {
|
||||
vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback-chain response');
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 503,
|
||||
statusText: 'Service Unavailable',
|
||||
text: async () => 'primary down',
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([5, 6, 7]).buffer,
|
||||
} as Response);
|
||||
|
||||
const session = {
|
||||
id: 'telegram:tts-user-4',
|
||||
addMessage: vi.fn(),
|
||||
getHistory: vi.fn(() => []),
|
||||
clear: vi.fn(),
|
||||
replaceHistory: vi.fn(),
|
||||
getConfig: vi.fn(() => undefined),
|
||||
setConfig: vi.fn(),
|
||||
deleteConfig: vi.fn(),
|
||||
};
|
||||
|
||||
const router = createMessageRouter({
|
||||
sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
|
||||
modelRouter: {
|
||||
getAvailableTiers: () => ['default'],
|
||||
getAllLabels: () => ({ default: 'default' }),
|
||||
getLabel: (tier: string) => tier,
|
||||
} as unknown as MessageRouterDeps['modelRouter'],
|
||||
systemPrompt: 'test prompt',
|
||||
toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
|
||||
toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
|
||||
config: {
|
||||
agents: {
|
||||
primary_tier: 'default',
|
||||
delegation: {
|
||||
compaction: 'default',
|
||||
memory_extraction: 'default',
|
||||
classification: 'default',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'default',
|
||||
},
|
||||
max_delegation_depth: 1,
|
||||
max_iterations: 3,
|
||||
},
|
||||
compaction: { enabled: false },
|
||||
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||
tts: {
|
||||
enabled: true,
|
||||
enabled_channels: ['telegram'],
|
||||
providers: [
|
||||
{
|
||||
name: 'primary',
|
||||
type: 'custom',
|
||||
endpoint: 'https://tts-primary.example.com/v1/audio/speech',
|
||||
},
|
||||
{
|
||||
name: 'backup',
|
||||
type: 'custom',
|
||||
endpoint: 'https://tts-backup.example.com/v1/audio/speech',
|
||||
},
|
||||
],
|
||||
fallback: {
|
||||
max_attempts: 2,
|
||||
failure_cooldown_ms: 60000,
|
||||
},
|
||||
},
|
||||
} as unknown as MessageRouterDeps['config'],
|
||||
});
|
||||
|
||||
const reply = vi.fn(async (_message: OutboundMessage) => {});
|
||||
await router.handler({
|
||||
id: 'tts-4',
|
||||
channel: 'telegram',
|
||||
senderId: 'tts-user-4',
|
||||
text: 'respond with provider fallback',
|
||||
timestamp: Date.now(),
|
||||
} as MessageRouterInput, reply);
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
|
||||
expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
|
||||
const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined;
|
||||
expect(outbound?.attachments?.[0]).toMatchObject({
|
||||
mimeType: 'audio/mpeg',
|
||||
data: 'BQYH',
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to text-only replies when tts synthesis fails', async () => {
|
||||
vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback response');
|
||||
vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('tts down'));
|
||||
@@ -2909,4 +3003,84 @@ describe('daemon talk mode (voice wake) integration', () => {
|
||||
expect(processSpy).toHaveBeenCalledOnce();
|
||||
expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined, undefined);
|
||||
});
|
||||
|
||||
it('treats spoken cancel as /stop while talk mode is active', async () => {
|
||||
const cancelSpy = vi.spyOn(AgentOrchestrator.prototype, 'cancel');
|
||||
vi.spyOn(AgentOrchestrator.prototype, 'isCancellable').mockReturnValue(true);
|
||||
const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process');
|
||||
let resolveFirst: ((value: string) => void) | undefined;
|
||||
let markStarted: (() => void) | undefined;
|
||||
const started = new Promise<void>((resolve) => { markStarted = resolve; });
|
||||
processSpy.mockImplementationOnce(() => {
|
||||
markStarted?.();
|
||||
return new Promise<string>((resolve) => { resolveFirst = resolve; });
|
||||
});
|
||||
|
||||
const session = {
|
||||
id: 'telegram:user-talk-2',
|
||||
addMessage: vi.fn(),
|
||||
getHistory: vi.fn(() => []),
|
||||
clear: vi.fn(),
|
||||
replaceHistory: vi.fn(),
|
||||
getConfig: vi.fn(() => undefined),
|
||||
setConfig: vi.fn(),
|
||||
deleteConfig: vi.fn(),
|
||||
};
|
||||
|
||||
const commandRegistry = new CommandRegistry();
|
||||
registerBuiltinCommands(commandRegistry);
|
||||
|
||||
const router = createMessageRouter({
|
||||
sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'],
|
||||
modelRouter: {
|
||||
getAvailableTiers: () => ['fast', 'default', 'complex', 'local'],
|
||||
getAllLabels: () => ({ fast: 'fast', default: 'default', complex: 'complex', local: 'local' }),
|
||||
getLabel: (tier: string) => tier,
|
||||
} as unknown as MessageRouterDeps['modelRouter'],
|
||||
systemPrompt: 'test prompt',
|
||||
toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'],
|
||||
toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'],
|
||||
config: {
|
||||
agents: {
|
||||
primary_tier: 'default',
|
||||
delegation: { compaction: 'fast', memory_extraction: 'fast', classification: 'fast', tool_summarisation: 'fast', complex_reasoning: 'complex' },
|
||||
max_delegation_depth: 3,
|
||||
max_iterations: 10,
|
||||
},
|
||||
compaction: { enabled: false },
|
||||
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||
audio: { talk_mode: { enabled: true, wake_phrase: 'hey flynn', timeout_ms: 120000, allow_manual_toggle: true } },
|
||||
} as unknown as MessageRouterDeps['config'],
|
||||
commandRegistry,
|
||||
});
|
||||
|
||||
const reply = vi.fn(async (_message: OutboundMessage) => {});
|
||||
const firstRun = router.handler({
|
||||
id: 'm-talk-3',
|
||||
channel: 'telegram',
|
||||
senderId: 'user-talk-2',
|
||||
text: 'hey flynn start a long task',
|
||||
timestamp: Date.now(),
|
||||
} as MessageRouterInput, reply);
|
||||
|
||||
await started;
|
||||
|
||||
await router.handler({
|
||||
id: 'm-talk-4',
|
||||
channel: 'telegram',
|
||||
senderId: 'user-talk-2',
|
||||
text: 'cancel',
|
||||
timestamp: Date.now(),
|
||||
} as MessageRouterInput, reply);
|
||||
|
||||
expect(cancelSpy).toHaveBeenCalledTimes(1);
|
||||
expect(processSpy).toHaveBeenCalledTimes(1);
|
||||
expect(reply).toHaveBeenCalledWith(expect.objectContaining({
|
||||
text: 'Cancellation requested. The active operation will stop at the next safe point.',
|
||||
replyTo: 'm-talk-4',
|
||||
}));
|
||||
|
||||
resolveFirst?.('operation cancelled by user.');
|
||||
await firstRun;
|
||||
});
|
||||
});
|
||||
|
||||
+44
-15
@@ -1,7 +1,7 @@
|
||||
import type { AudioTranscriptionConfig } from '../models/media.js';
|
||||
import type { Attachment } from '../channels/types.js';
|
||||
import { isSupportedAudio, transcribeAudio } from '../models/media.js';
|
||||
import { synthesizeSpeechAttachment } from '../models/tts.js';
|
||||
import { synthesizeSpeechWithFallback, TtsHealthTracker } from '../models/tts.js';
|
||||
import { supportsAudioInput } from '../models/capabilities.js';
|
||||
import { AgentOrchestrator, SubagentManager, type DelegationConfig } from '../backends/index.js';
|
||||
import { OutboundAttachmentCollector } from '../backends/native/attachments.js';
|
||||
@@ -397,6 +397,7 @@ export function createMessageRouter(deps: {
|
||||
const talkModeUntil = new Map<string, number>();
|
||||
const activeRuns = new Map<string, AgentOrchestrator>();
|
||||
const reactionCooldowns = new Map<string, number>();
|
||||
const ttsHealthTracker = new TtsHealthTracker();
|
||||
|
||||
function getBackendMode(): BackendRuntimeMode {
|
||||
return deps.getBackendMode?.() ?? 'config_default';
|
||||
@@ -518,24 +519,42 @@ export function createMessageRouter(deps: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const provider = deps.config.tts?.provider;
|
||||
const endpoint = provider?.endpoint ?? (provider?.type === 'openai' ? 'https://api.openai.com/v1/audio/speech' : undefined);
|
||||
if (!endpoint) {
|
||||
const configuredProviders = deps.config.tts?.providers ?? [];
|
||||
const providers = configuredProviders.length > 0
|
||||
? configuredProviders
|
||||
: (deps.config.tts?.provider ? [deps.config.tts.provider] : []);
|
||||
|
||||
if (providers.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
return await synthesizeSpeechAttachment(responseText, {
|
||||
endpoint,
|
||||
apiKey: provider?.api_key,
|
||||
model: provider?.model,
|
||||
voice: provider?.voice,
|
||||
format: provider?.format,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(`TTS synthesis failed for channel ${channel}:`, error instanceof Error ? error.message : 'Unknown error');
|
||||
return undefined;
|
||||
const outcome = await synthesizeSpeechWithFallback(responseText, {
|
||||
providers: providers.map((provider, index) => ({
|
||||
id: provider.name?.trim() || `tts-provider-${index + 1}`,
|
||||
type: provider.type,
|
||||
endpoint: provider.endpoint,
|
||||
apiKey: provider.api_key,
|
||||
model: provider.model,
|
||||
voice: provider.voice,
|
||||
format: provider.format,
|
||||
})),
|
||||
fallback: {
|
||||
maxAttempts: deps.config.tts?.fallback?.max_attempts,
|
||||
failureCooldownMs: deps.config.tts?.fallback?.failure_cooldown_ms,
|
||||
},
|
||||
healthTracker: ttsHealthTracker,
|
||||
});
|
||||
|
||||
if (!outcome.attachment && outcome.attemptedProviders.length > 0) {
|
||||
console.warn(
|
||||
`TTS synthesis fallback exhausted for channel ${channel}. `
|
||||
+ `attempted=${outcome.attemptedProviders.join(',') || 'none'} `
|
||||
+ `skipped=${outcome.skippedProviders.join(',') || 'none'} `
|
||||
+ `${outcome.lastError ? `last_error=${outcome.lastError}` : ''}`.trim(),
|
||||
);
|
||||
}
|
||||
|
||||
return outcome.attachment ?? undefined;
|
||||
}
|
||||
|
||||
function getOrCreateAgent(
|
||||
@@ -822,6 +841,7 @@ export function createMessageRouter(deps: {
|
||||
let incomingText = msg.text;
|
||||
let matchedReactionName: string | undefined;
|
||||
const talkMode = deps.config.audio?.talk_mode;
|
||||
let inTalkModeContext = false;
|
||||
if (talkMode?.enabled && incomingText.trim().length > 0) {
|
||||
const key = `${msg.channel}:${msg.senderId}`;
|
||||
const now = Date.now();
|
||||
@@ -858,6 +878,7 @@ export function createMessageRouter(deps: {
|
||||
|
||||
if (wakeMatched && wakeRegex) {
|
||||
talkModeUntil.set(key, now + timeoutMs);
|
||||
inTalkModeContext = true;
|
||||
incomingText = incomingText.replace(wakeRegex, '').trim();
|
||||
if (!incomingText) {
|
||||
await reply({ text: `Listening. Talk mode active for ${Math.ceil(timeoutMs / 1000)}s.`, replyTo: msg.id });
|
||||
@@ -865,11 +886,19 @@ export function createMessageRouter(deps: {
|
||||
}
|
||||
} else if (currentUntil > now) {
|
||||
talkModeUntil.set(key, now + timeoutMs);
|
||||
inTalkModeContext = true;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (inTalkModeContext && !msg.metadata?.isCommand) {
|
||||
const spokenCommand = incomingText.trim().toLowerCase();
|
||||
if (spokenCommand === 'stop' || spokenCommand === 'cancel') {
|
||||
incomingText = '/stop';
|
||||
}
|
||||
}
|
||||
|
||||
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
|
||||
const queueMode = session.getConfig('queue.mode') ?? deps.config.server?.queue?.mode ?? 'collect';
|
||||
const rawCommand = msg.metadata?.isCommand
|
||||
|
||||
@@ -661,6 +661,34 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
|
||||
config.audio.enabled = value;
|
||||
return true;
|
||||
},
|
||||
'audio.talk_mode.enabled': (config, value) => {
|
||||
if (typeof value !== 'boolean') {return false;}
|
||||
config.audio ??= {} as Config['audio'];
|
||||
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
|
||||
config.audio.talk_mode.enabled = value;
|
||||
return true;
|
||||
},
|
||||
'audio.talk_mode.wake_phrase': (config, value) => {
|
||||
if (typeof value !== 'string' || value.trim().length === 0) {return false;}
|
||||
config.audio ??= {} as Config['audio'];
|
||||
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
|
||||
config.audio.talk_mode.wake_phrase = value.trim();
|
||||
return true;
|
||||
},
|
||||
'audio.talk_mode.timeout_ms': (config, value) => {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
|
||||
config.audio ??= {} as Config['audio'];
|
||||
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
|
||||
config.audio.talk_mode.timeout_ms = Math.floor(value);
|
||||
return true;
|
||||
},
|
||||
'audio.talk_mode.allow_manual_toggle': (config, value) => {
|
||||
if (typeof value !== 'boolean') {return false;}
|
||||
config.audio ??= {} as Config['audio'];
|
||||
config.audio.talk_mode ??= {} as Config['audio']['talk_mode'];
|
||||
config.audio.talk_mode.allow_manual_toggle = value;
|
||||
return true;
|
||||
},
|
||||
'sandbox.enabled': (config, value) => {
|
||||
if (typeof value !== 'boolean') {return false;}
|
||||
config.sandbox ??= {} as Config['sandbox'];
|
||||
@@ -700,6 +728,20 @@ const PATCHABLE_KEYS: Record<string, (config: Config, value: unknown) => boolean
|
||||
config.tts.enabled_channels = value as string[];
|
||||
return true;
|
||||
},
|
||||
'tts.fallback.max_attempts': (config, value) => {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1 || value > 10) {return false;}
|
||||
config.tts ??= {} as Config['tts'];
|
||||
config.tts.fallback ??= {} as Config['tts']['fallback'];
|
||||
config.tts.fallback.max_attempts = Math.floor(value);
|
||||
return true;
|
||||
},
|
||||
'tts.fallback.failure_cooldown_ms': (config, value) => {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1000 || value > 3_600_000) {return false;}
|
||||
config.tts ??= {} as Config['tts'];
|
||||
config.tts.fallback ??= {} as Config['tts']['fallback'];
|
||||
config.tts.fallback.failure_cooldown_ms = Math.floor(value);
|
||||
return true;
|
||||
},
|
||||
};
|
||||
|
||||
export function createConfigHandlers(deps: ConfigHandlerDeps) {
|
||||
|
||||
@@ -1550,8 +1550,14 @@ describe('config handlers', () => {
|
||||
'memory.daily_log.enabled': true,
|
||||
'memory.proactive_extract.enabled': true,
|
||||
'memory.proactive_extract.min_tool_calls': 2,
|
||||
'audio.talk_mode.enabled': true,
|
||||
'audio.talk_mode.wake_phrase': 'ok flynn',
|
||||
'audio.talk_mode.timeout_ms': 180000,
|
||||
'audio.talk_mode.allow_manual_toggle': false,
|
||||
'tts.enabled': true,
|
||||
'tts.enabled_channels': ['telegram', 'discord'],
|
||||
'tts.fallback.max_attempts': 2,
|
||||
'tts.fallback.failure_cooldown_ms': 90000,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -1573,8 +1579,14 @@ describe('config handlers', () => {
|
||||
'memory.daily_log.enabled',
|
||||
'memory.proactive_extract.enabled',
|
||||
'memory.proactive_extract.min_tool_calls',
|
||||
'audio.talk_mode.enabled',
|
||||
'audio.talk_mode.wake_phrase',
|
||||
'audio.talk_mode.timeout_ms',
|
||||
'audio.talk_mode.allow_manual_toggle',
|
||||
'tts.enabled',
|
||||
'tts.enabled_channels',
|
||||
'tts.fallback.max_attempts',
|
||||
'tts.fallback.failure_cooldown_ms',
|
||||
]);
|
||||
expect(r.rejected).toEqual([]);
|
||||
expect(r.persisted).toBe(false);
|
||||
@@ -1593,8 +1605,14 @@ describe('config handlers', () => {
|
||||
expect(getPath(config, 'memory', 'daily_log', 'enabled')).toBe(true);
|
||||
expect(getPath(config, 'memory', 'proactive_extract', 'enabled')).toBe(true);
|
||||
expect(getPath(config, 'memory', 'proactive_extract', 'min_tool_calls')).toBe(2);
|
||||
expect(getPath(config, 'audio', 'talk_mode', 'enabled')).toBe(true);
|
||||
expect(getPath(config, 'audio', 'talk_mode', 'wake_phrase')).toBe('ok flynn');
|
||||
expect(getPath(config, 'audio', 'talk_mode', 'timeout_ms')).toBe(180000);
|
||||
expect(getPath(config, 'audio', 'talk_mode', 'allow_manual_toggle')).toBe(false);
|
||||
expect(getPath(config, 'tts', 'enabled')).toBe(true);
|
||||
expect(getPath(config, 'tts', 'enabled_channels')).toEqual(['telegram', 'discord']);
|
||||
expect(getPath(config, 'tts', 'fallback', 'max_attempts')).toBe(2);
|
||||
expect(getPath(config, 'tts', 'fallback', 'failure_cooldown_ms')).toBe(90000);
|
||||
});
|
||||
|
||||
it('config.patch applies councils model and routing patches', async () => {
|
||||
@@ -1675,7 +1693,11 @@ describe('config handlers', () => {
|
||||
'hooks.confirm': 'not-an-array',
|
||||
'server.queue.cap': 0,
|
||||
'memory.proactive_extract.min_tool_calls': 99,
|
||||
'audio.talk_mode.wake_phrase': '',
|
||||
'audio.talk_mode.timeout_ms': 99999999,
|
||||
'tts.enabled_channels': [1, 2, 3],
|
||||
'tts.fallback.max_attempts': 0,
|
||||
'tts.fallback.failure_cooldown_ms': 0,
|
||||
'automation.daily_briefing.model_tier': 'ultra',
|
||||
},
|
||||
},
|
||||
@@ -1688,7 +1710,11 @@ describe('config handlers', () => {
|
||||
'hooks.confirm',
|
||||
'server.queue.cap',
|
||||
'memory.proactive_extract.min_tool_calls',
|
||||
'audio.talk_mode.wake_phrase',
|
||||
'audio.talk_mode.timeout_ms',
|
||||
'tts.enabled_channels',
|
||||
'tts.fallback.max_attempts',
|
||||
'tts.fallback.failure_cooldown_ms',
|
||||
'automation.daily_briefing.model_tier',
|
||||
]);
|
||||
expect(r.persisted).toBe(false);
|
||||
|
||||
@@ -254,6 +254,15 @@ describe('ChatPage wiring', () => {
|
||||
throw new Error('Run status line not found');
|
||||
}
|
||||
expect(statusLine.classList.contains('hidden')).toBe(false);
|
||||
expect(String(statusLine.textContent ?? '')).toContain('working');
|
||||
|
||||
stream.emit('run_state', { state: 'cancel_requested' });
|
||||
await Promise.resolve();
|
||||
expect(String(statusLine.textContent ?? '')).toContain('cancellation requested');
|
||||
|
||||
stream.emit('run_state', { state: 'cancelled' });
|
||||
await Promise.resolve();
|
||||
expect(String(statusLine.textContent ?? '')).toContain('cancelled');
|
||||
|
||||
resolveResult?.({ content: 'ok' });
|
||||
await Promise.resolve();
|
||||
|
||||
@@ -139,6 +139,8 @@ function escapeHtml(str) {
|
||||
function getAssistantStateSnapshot(configData) {
|
||||
const automation = configData?.automation ?? {};
|
||||
const memory = configData?.memory ?? {};
|
||||
const audio = configData?.audio ?? {};
|
||||
const talkMode = audio.talk_mode ?? {};
|
||||
const tts = configData?.tts ?? {};
|
||||
const queue = configData?.server?.queue ?? {};
|
||||
return {
|
||||
@@ -147,6 +149,10 @@ function getAssistantStateSnapshot(configData) {
|
||||
memoryDaily: Boolean(memory.daily_log?.enabled),
|
||||
memoryProactive: Boolean(memory.proactive_extract?.enabled),
|
||||
memoryMinToolCalls: Number(memory.proactive_extract?.min_tool_calls ?? 1),
|
||||
talkModeEnabled: Boolean(talkMode.enabled),
|
||||
talkWakePhrase: typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn',
|
||||
talkTimeoutMs: Number(talkMode.timeout_ms ?? 120000),
|
||||
talkManualToggle: talkMode.allow_manual_toggle !== false,
|
||||
ttsEnabled: Boolean(tts.enabled),
|
||||
ttsChannels: Array.isArray(tts.enabled_channels) ? tts.enabled_channels : [],
|
||||
queueMode: queue.mode ?? 'collect',
|
||||
@@ -161,6 +167,7 @@ function buildPlaybookPatches(playbook) {
|
||||
'memory.daily_log.enabled': true,
|
||||
'memory.proactive_extract.enabled': true,
|
||||
'memory.proactive_extract.min_tool_calls': 1,
|
||||
'audio.talk_mode.enabled': true,
|
||||
'tts.enabled': true,
|
||||
'tts.enabled_channels': [],
|
||||
'server.queue.mode': 'interrupt',
|
||||
@@ -177,6 +184,7 @@ function buildPlaybookPatches(playbook) {
|
||||
'memory.daily_log.enabled': true,
|
||||
'memory.proactive_extract.enabled': true,
|
||||
'memory.proactive_extract.min_tool_calls': 2,
|
||||
'audio.talk_mode.enabled': false,
|
||||
'tts.enabled': false,
|
||||
'server.queue.mode': 'steer_backlog',
|
||||
};
|
||||
@@ -191,6 +199,7 @@ function buildPlaybookPatches(playbook) {
|
||||
'memory.daily_log.enabled': false,
|
||||
'memory.proactive_extract.enabled': false,
|
||||
'memory.proactive_extract.min_tool_calls': 3,
|
||||
'audio.talk_mode.enabled': false,
|
||||
'tts.enabled': false,
|
||||
'server.queue.mode': 'collect',
|
||||
};
|
||||
@@ -207,6 +216,10 @@ function buildRollbackPatchesFromSnapshot(snapshot) {
|
||||
'memory.daily_log.enabled': snapshot.memoryDaily,
|
||||
'memory.proactive_extract.enabled': snapshot.memoryProactive,
|
||||
'memory.proactive_extract.min_tool_calls': Number.isFinite(snapshot.memoryMinToolCalls) ? snapshot.memoryMinToolCalls : 1,
|
||||
'audio.talk_mode.enabled': snapshot.talkModeEnabled,
|
||||
'audio.talk_mode.wake_phrase': snapshot.talkWakePhrase,
|
||||
'audio.talk_mode.timeout_ms': Number.isFinite(snapshot.talkTimeoutMs) ? snapshot.talkTimeoutMs : 120000,
|
||||
'audio.talk_mode.allow_manual_toggle': snapshot.talkManualToggle,
|
||||
'tts.enabled': snapshot.ttsEnabled,
|
||||
'tts.enabled_channels': snapshot.ttsChannels,
|
||||
'server.queue.mode': snapshot.queueMode,
|
||||
@@ -936,6 +949,8 @@ function updateAssistantHealth(configData) {
|
||||
|
||||
const automation = configData?.automation ?? {};
|
||||
const memory = configData?.memory ?? {};
|
||||
const audio = configData?.audio ?? {};
|
||||
const talkMode = audio.talk_mode ?? {};
|
||||
const tts = configData?.tts ?? {};
|
||||
|
||||
const deliveryMode = automation.delivery_mode ?? 'shared_session';
|
||||
@@ -944,6 +959,9 @@ function updateAssistantHealth(configData) {
|
||||
const memoryDaily = Boolean(memory.daily_log?.enabled);
|
||||
const memoryProactive = Boolean(memory.proactive_extract?.enabled);
|
||||
const proactiveThreshold = Number(memory.proactive_extract?.min_tool_calls ?? 1);
|
||||
const talkModeEnabled = Boolean(talkMode.enabled);
|
||||
const talkWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
|
||||
const talkTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
|
||||
const ttsEnabled = Boolean(tts.enabled);
|
||||
const briefing = automation.daily_briefing ?? {};
|
||||
const briefingName = briefing.name ?? 'daily-briefing';
|
||||
@@ -1011,17 +1029,22 @@ function updateAssistantHealth(configData) {
|
||||
: (_lastCouncilError ? `Last run failed: ${_lastCouncilError}` : 'No council run yet in this dashboard session.');
|
||||
|
||||
el.innerHTML = `
|
||||
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-2 mb-4">
|
||||
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-7 gap-2 mb-4">
|
||||
${chip('Announce Mode', announce)}
|
||||
${chip('Daily Briefing', dailyBriefing)}
|
||||
${chip('Memory Daily Log', memoryDaily)}
|
||||
${chip('Proactive Extract', memoryProactive)}
|
||||
${chip('Talk Mode', talkModeEnabled)}
|
||||
${chip('TTS Replies', ttsEnabled)}
|
||||
<div class="flex justify-between items-center px-3 py-2.5 bg-zinc-900 border border-zinc-800 rounded-lg text-sm">
|
||||
<span class="text-zinc-400">Extract Threshold</span>
|
||||
<span class="font-bold">${Number.isFinite(proactiveThreshold) ? proactiveThreshold : 1}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-3 text-sm text-zinc-500">
|
||||
Talk controls: wake phrase <code class="text-zinc-300">${escapeHtml(talkWakePhrase)}</code>,
|
||||
timeout ${Number.isFinite(talkTimeoutMs) ? Math.round(talkTimeoutMs / 1000) : 120}s.
|
||||
</div>
|
||||
<div class="flex flex-wrap gap-2 mb-4">
|
||||
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-announce">
|
||||
${announce ? 'Disable Announce Mode' : 'Enable Announce Mode'}
|
||||
@@ -1035,6 +1058,9 @@ function updateAssistantHealth(configData) {
|
||||
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-memory-proactive">
|
||||
${memoryProactive ? 'Disable Proactive Extract' : 'Enable Proactive Extract'}
|
||||
</button>
|
||||
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-talk-mode">
|
||||
${talkModeEnabled ? 'Disable Talk Mode' : 'Enable Talk Mode'}
|
||||
</button>
|
||||
<button class="px-3 py-1.5 text-sm font-medium rounded-md border border-zinc-700 bg-zinc-800 text-zinc-200 hover:bg-zinc-700 transition-colors assistant-action-btn" data-action="toggle-tts">
|
||||
${ttsEnabled ? 'Disable TTS' : 'Enable TTS'}
|
||||
</button>
|
||||
@@ -1341,6 +1367,9 @@ function updateAssistantHealth(configData) {
|
||||
} else if (action === 'toggle-memory-proactive') {
|
||||
patches = { 'memory.proactive_extract.enabled': !memoryProactive };
|
||||
_assistantManualOverrides.add('memory.proactive_extract.enabled');
|
||||
} else if (action === 'toggle-talk-mode') {
|
||||
patches = { 'audio.talk_mode.enabled': !talkModeEnabled };
|
||||
_assistantManualOverrides.add('audio.talk_mode.enabled');
|
||||
} else if (action === 'toggle-tts') {
|
||||
patches = { 'tts.enabled': !ttsEnabled };
|
||||
_assistantManualOverrides.add('tts.enabled');
|
||||
|
||||
@@ -47,6 +47,14 @@ function createInitialConfig() {
|
||||
daily_log: { enabled: true },
|
||||
proactive_extract: { enabled: true, min_tool_calls: 2 },
|
||||
},
|
||||
audio: {
|
||||
talk_mode: {
|
||||
enabled: false,
|
||||
wake_phrase: 'hey flynn',
|
||||
timeout_ms: 120000,
|
||||
allow_manual_toggle: true,
|
||||
},
|
||||
},
|
||||
tts: {
|
||||
enabled: false,
|
||||
enabled_channels: [],
|
||||
@@ -520,6 +528,7 @@ describe('DashboardPage assistant controls', () => {
|
||||
'toggle-daily-briefing',
|
||||
'toggle-memory-daily',
|
||||
'toggle-memory-proactive',
|
||||
'toggle-talk-mode',
|
||||
'toggle-tts',
|
||||
'playbook-executive',
|
||||
'playbook-operator',
|
||||
@@ -552,6 +561,7 @@ describe('DashboardPage assistant controls', () => {
|
||||
await clickAction('toggle-daily-briefing');
|
||||
await clickAction('toggle-memory-daily');
|
||||
await clickAction('toggle-memory-proactive');
|
||||
await clickAction('toggle-talk-mode');
|
||||
await clickAction('toggle-tts');
|
||||
await clickAction('playbook-executive');
|
||||
await clickAction('playbook-operator');
|
||||
|
||||
@@ -117,6 +117,7 @@ async function loadSettings() {
|
||||
const silentPatterns = hooks.silent ?? [];
|
||||
const automation = config?.automation ?? {};
|
||||
const memory = config?.memory ?? {};
|
||||
const audio = config?.audio ?? {};
|
||||
const tts = config?.tts ?? {};
|
||||
_settingsCache = config ?? {};
|
||||
|
||||
@@ -125,8 +126,16 @@ async function loadSettings() {
|
||||
const dailyMemoryEnabled = Boolean(memory.daily_log?.enabled);
|
||||
const proactiveExtractEnabled = Boolean(memory.proactive_extract?.enabled);
|
||||
const proactiveMinToolCalls = Number(memory.proactive_extract?.min_tool_calls ?? 1);
|
||||
const talkMode = audio.talk_mode ?? {};
|
||||
const talkModeEnabled = Boolean(talkMode.enabled);
|
||||
const talkModeWakePhrase = typeof talkMode.wake_phrase === 'string' ? talkMode.wake_phrase : 'hey flynn';
|
||||
const talkModeTimeoutMs = Number(talkMode.timeout_ms ?? 120000);
|
||||
const talkModeManualToggle = talkMode.allow_manual_toggle !== false;
|
||||
const ttsEnabled = Boolean(tts.enabled);
|
||||
const ttsChannelText = Array.isArray(tts.enabled_channels) ? tts.enabled_channels.join(', ') : '';
|
||||
const ttsFallback = tts.fallback ?? {};
|
||||
const ttsFallbackMaxAttempts = Number(ttsFallback.max_attempts ?? 3);
|
||||
const ttsFallbackCooldownMs = Number(ttsFallback.failure_cooldown_ms ?? 60000);
|
||||
const briefingOutputChannel = automation.daily_briefing?.output?.channel ?? '';
|
||||
const briefingOutputPeer = automation.daily_briefing?.output?.peer ?? '';
|
||||
|
||||
@@ -173,6 +182,30 @@ async function loadSettings() {
|
||||
<span>TTS channels (comma-separated, blank = all)</span>
|
||||
<input id="assist-tts-channels" type="text" value="${escapeHtml(ttsChannelText)}" placeholder="telegram,discord,whatsapp" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Example: telegram,discord,whatsapp" />
|
||||
</label>
|
||||
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="When enabled, wake phrase + talk window controls are active.">
|
||||
<input id="assist-talk-mode-enabled" type="checkbox" ${talkModeEnabled ? 'checked' : ''} title="Enable or disable talk mode." />
|
||||
<span>Talk mode enabled</span>
|
||||
</label>
|
||||
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Wake phrase that activates the talk window.">
|
||||
<span>Talk mode wake phrase</span>
|
||||
<input id="assist-talk-wake-phrase" type="text" value="${escapeHtml(talkModeWakePhrase)}" placeholder="hey flynn" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Say this phrase to activate talk mode for the configured timeout." />
|
||||
</label>
|
||||
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How long talk mode stays active after wake phrase or input.">
|
||||
<span>Talk mode timeout (ms)</span>
|
||||
<input id="assist-talk-timeout-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(talkModeTimeoutMs) ? talkModeTimeoutMs : 120000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
|
||||
</label>
|
||||
<label class="flex items-center gap-2.5 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400 cursor-pointer min-h-[44px]" title="Allow /talk on|off|status manual commands while talk mode is enabled.">
|
||||
<input id="assist-talk-manual-toggle" type="checkbox" ${talkModeManualToggle ? 'checked' : ''} title="Allow manual talk mode toggles." />
|
||||
<span>Allow manual talk toggles</span>
|
||||
</label>
|
||||
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="How many TTS providers to try before text-only fallback.">
|
||||
<span>TTS max fallback attempts</span>
|
||||
<input id="assist-tts-max-attempts" type="number" min="1" max="10" value="${Number.isFinite(ttsFallbackMaxAttempts) ? ttsFallbackMaxAttempts : 3}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1 to 10 provider attempts." />
|
||||
</label>
|
||||
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Cooldown period applied after a provider fails TTS synthesis.">
|
||||
<span>TTS failure cooldown (ms)</span>
|
||||
<input id="assist-tts-fallback-cooldown-ms" type="number" min="1000" max="3600000" value="${Number.isFinite(ttsFallbackCooldownMs) ? ttsFallbackCooldownMs : 60000}" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set a value from 1000 to 3600000 milliseconds." />
|
||||
</label>
|
||||
<label class="flex flex-col gap-2 bg-zinc-900 border border-zinc-800 rounded-lg px-3 py-2.5 text-sm text-zinc-400" title="Channel used for daily briefing delivery, such as telegram or slack.">
|
||||
<span>Briefing output channel</span>
|
||||
<input id="assist-briefing-channel" type="text" value="${escapeHtml(briefingOutputChannel)}" placeholder="telegram" class="w-full bg-zinc-950 text-zinc-50 border border-zinc-800 rounded-md px-3 py-2 text-sm focus:border-blue-500 outline-none" title="Set the channel adapter name for briefings." />
|
||||
@@ -302,9 +335,18 @@ async function saveAssistantMode() {
|
||||
const memoryDaily = Boolean(_el.querySelector('#assist-memory-daily')?.checked);
|
||||
const memoryProactive = Boolean(_el.querySelector('#assist-memory-proactive')?.checked);
|
||||
const ttsEnabled = Boolean(_el.querySelector('#assist-tts-enabled')?.checked);
|
||||
const talkModeEnabled = Boolean(_el.querySelector('#assist-talk-mode-enabled')?.checked);
|
||||
const talkModeManualToggle = Boolean(_el.querySelector('#assist-talk-manual-toggle')?.checked);
|
||||
const minToolsRaw = Number.parseInt(_el.querySelector('#assist-memory-min-tools')?.value ?? '1', 10);
|
||||
const minTools = Number.isFinite(minToolsRaw) ? Math.min(50, Math.max(0, minToolsRaw)) : 1;
|
||||
const ttsChannelsRaw = _el.querySelector('#assist-tts-channels')?.value ?? '';
|
||||
const talkWakePhrase = (_el.querySelector('#assist-talk-wake-phrase')?.value ?? '').trim() || 'hey flynn';
|
||||
const talkTimeoutRaw = Number.parseInt(_el.querySelector('#assist-talk-timeout-ms')?.value ?? '120000', 10);
|
||||
const talkTimeoutMs = Number.isFinite(talkTimeoutRaw) ? Math.min(3_600_000, Math.max(1000, talkTimeoutRaw)) : 120000;
|
||||
const ttsMaxAttemptsRaw = Number.parseInt(_el.querySelector('#assist-tts-max-attempts')?.value ?? '3', 10);
|
||||
const ttsMaxAttempts = Number.isFinite(ttsMaxAttemptsRaw) ? Math.min(10, Math.max(1, ttsMaxAttemptsRaw)) : 3;
|
||||
const ttsCooldownRaw = Number.parseInt(_el.querySelector('#assist-tts-fallback-cooldown-ms')?.value ?? '60000', 10);
|
||||
const ttsFailureCooldownMs = Number.isFinite(ttsCooldownRaw) ? Math.min(3_600_000, Math.max(1000, ttsCooldownRaw)) : 60000;
|
||||
const briefingChannel = (_el.querySelector('#assist-briefing-channel')?.value ?? '').trim();
|
||||
const briefingPeer = (_el.querySelector('#assist-briefing-peer')?.value ?? '').trim();
|
||||
const ttsChannels = ttsChannelsRaw
|
||||
@@ -320,6 +362,12 @@ async function saveAssistantMode() {
|
||||
'memory.proactive_extract.min_tool_calls': minTools,
|
||||
'tts.enabled': ttsEnabled,
|
||||
'tts.enabled_channels': ttsChannels,
|
||||
'audio.talk_mode.enabled': talkModeEnabled,
|
||||
'audio.talk_mode.wake_phrase': talkWakePhrase,
|
||||
'audio.talk_mode.timeout_ms': talkTimeoutMs,
|
||||
'audio.talk_mode.allow_manual_toggle': talkModeManualToggle,
|
||||
'tts.fallback.max_attempts': ttsMaxAttempts,
|
||||
'tts.fallback.failure_cooldown_ms': ttsFailureCooldownMs,
|
||||
};
|
||||
if (briefingChannel) {
|
||||
patches['automation.daily_briefing.output.channel'] = briefingChannel;
|
||||
|
||||
@@ -45,9 +45,21 @@ function createClient() {
|
||||
daily_log: { enabled: true },
|
||||
proactive_extract: { enabled: true, min_tool_calls: 2 },
|
||||
},
|
||||
audio: {
|
||||
talk_mode: {
|
||||
enabled: false,
|
||||
wake_phrase: 'hey flynn',
|
||||
timeout_ms: 120000,
|
||||
allow_manual_toggle: true,
|
||||
},
|
||||
},
|
||||
tts: {
|
||||
enabled: false,
|
||||
enabled_channels: ['telegram'],
|
||||
fallback: {
|
||||
max_attempts: 3,
|
||||
failure_cooldown_ms: 60000,
|
||||
},
|
||||
},
|
||||
hooks: {
|
||||
confirm: ['tool:group:fs/**/*'],
|
||||
@@ -119,6 +131,12 @@ describe('SettingsPage wiring', () => {
|
||||
root.querySelector('#assist-memory-min-tools').value = '6';
|
||||
root.querySelector('#assist-tts-enabled').checked = true;
|
||||
root.querySelector('#assist-tts-channels').value = 'telegram, discord';
|
||||
root.querySelector('#assist-talk-mode-enabled').checked = true;
|
||||
root.querySelector('#assist-talk-wake-phrase').value = 'ok flynn';
|
||||
root.querySelector('#assist-talk-timeout-ms').value = '180000';
|
||||
root.querySelector('#assist-talk-manual-toggle').checked = false;
|
||||
root.querySelector('#assist-tts-max-attempts').value = '2';
|
||||
root.querySelector('#assist-tts-fallback-cooldown-ms').value = '45000';
|
||||
root.querySelector('#assist-briefing-channel').value = 'discord';
|
||||
root.querySelector('#assist-briefing-peer').value = '98765';
|
||||
|
||||
@@ -127,6 +145,14 @@ describe('SettingsPage wiring', () => {
|
||||
|
||||
const assistantPatch = calls.find((entry) => entry.method === 'config.patch' && Object.prototype.hasOwnProperty.call(entry.params?.patches ?? {}, 'automation.delivery_mode'));
|
||||
expect(assistantPatch).toBeTruthy();
|
||||
expect(assistantPatch?.params?.patches).toMatchObject({
|
||||
'audio.talk_mode.enabled': true,
|
||||
'audio.talk_mode.wake_phrase': 'ok flynn',
|
||||
'audio.talk_mode.timeout_ms': 180000,
|
||||
'audio.talk_mode.allow_manual_toggle': false,
|
||||
'tts.fallback.max_attempts': 2,
|
||||
'tts.fallback.failure_cooldown_ms': 45000,
|
||||
});
|
||||
|
||||
root.querySelector('#hooks-confirm').value = 'tool:group:fs/**/*\ntool:group:web/**/*';
|
||||
root.querySelector('#hooks-log').value = 'tool:web.search';
|
||||
|
||||
+151
-1
@@ -1,6 +1,6 @@
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { synthesizeSpeechAttachment } from './tts.js';
|
||||
import { synthesizeSpeechAttachment, synthesizeSpeechWithFallback, TtsHealthTracker } from './tts.js';
|
||||
|
||||
describe('synthesizeSpeechAttachment', () => {
|
||||
afterEach(() => {
|
||||
@@ -65,3 +65,153 @@ describe('synthesizeSpeechAttachment', () => {
|
||||
})).rejects.toThrow(/TTS request failed: 429 Too Many Requests/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe('synthesizeSpeechWithFallback', () => {
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('falls back to the next provider when the first provider fails', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 503,
|
||||
statusText: 'Service Unavailable',
|
||||
text: async () => 'primary down',
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([9, 9]).buffer,
|
||||
} as Response);
|
||||
|
||||
const result = await synthesizeSpeechWithFallback('hello', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
expect(fetchSpy.mock.calls[0]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
|
||||
expect(fetchSpy.mock.calls[1]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
|
||||
expect(result.providerId).toBe('backup');
|
||||
expect(result.attachment).toMatchObject({ mimeType: 'audio/mpeg', data: 'CQk=' });
|
||||
expect(result.attemptedProviders).toEqual(['primary', 'backup']);
|
||||
});
|
||||
|
||||
it('uses health cooldown to avoid repeatedly trying a failing provider', async () => {
|
||||
const health = new TtsHealthTracker();
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
statusText: 'Server Error',
|
||||
text: async () => 'primary down',
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([1]).buffer,
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([2]).buffer,
|
||||
} as Response);
|
||||
|
||||
const first = await synthesizeSpeechWithFallback('hello', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
fallback: {
|
||||
failureCooldownMs: 120_000,
|
||||
},
|
||||
healthTracker: health,
|
||||
});
|
||||
expect(first.providerId).toBe('backup');
|
||||
|
||||
const second = await synthesizeSpeechWithFallback('hello again', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
healthTracker: health,
|
||||
});
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(3);
|
||||
expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-backup.example.com/v1/audio/speech');
|
||||
expect(second.attemptedProviders).toEqual(['backup']);
|
||||
});
|
||||
|
||||
it('tries unhealthy providers again after cooldown expires', async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-01-01T00:00:00Z'));
|
||||
|
||||
const health = new TtsHealthTracker();
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 503,
|
||||
statusText: 'Service Unavailable',
|
||||
text: async () => 'primary down',
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([3]).buffer,
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
arrayBuffer: async () => Uint8Array.from([4]).buffer,
|
||||
} as Response);
|
||||
|
||||
await synthesizeSpeechWithFallback('first', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
fallback: { failureCooldownMs: 10_000 },
|
||||
healthTracker: health,
|
||||
});
|
||||
|
||||
vi.setSystemTime(new Date('2026-01-01T00:00:11Z'));
|
||||
|
||||
const second = await synthesizeSpeechWithFallback('second', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
healthTracker: health,
|
||||
});
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(3);
|
||||
expect(fetchSpy.mock.calls[2]?.[0]).toBe('https://tts-primary.example.com/v1/audio/speech');
|
||||
expect(second.providerId).toBe('primary');
|
||||
});
|
||||
|
||||
it('returns text fallback metadata when all providers fail', async () => {
|
||||
vi.spyOn(globalThis, 'fetch')
|
||||
.mockRejectedValueOnce(new Error('primary down'))
|
||||
.mockRejectedValueOnce(new Error('backup down'));
|
||||
|
||||
const result = await synthesizeSpeechWithFallback('hello', {
|
||||
providers: [
|
||||
{ id: 'primary', endpoint: 'https://tts-primary.example.com/v1/audio/speech' },
|
||||
{ id: 'backup', endpoint: 'https://tts-backup.example.com/v1/audio/speech' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.attachment).toBeNull();
|
||||
expect(result.attemptedProviders).toEqual(['primary', 'backup']);
|
||||
expect(result.lastError).toContain('backup down');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -8,6 +8,65 @@ export interface TtsSynthesisConfig {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
format?: TtsOutputFormat;
|
||||
/** Optional provider identity used by fallback/health tracking. */
|
||||
id?: string;
|
||||
/** Optional provider type hint for endpoint defaults. */
|
||||
type?: 'openai' | 'custom';
|
||||
}
|
||||
|
||||
export interface TtsFallbackConfig {
|
||||
/** Maximum number of providers to try per reply. */
|
||||
maxAttempts?: number;
|
||||
/** Cooldown window before retrying a provider that failed. */
|
||||
failureCooldownMs?: number;
|
||||
}
|
||||
|
||||
export interface TtsProviderHealth {
|
||||
consecutiveFailures: number;
|
||||
cooldownUntil: number;
|
||||
lastFailureAt?: number;
|
||||
lastError?: string;
|
||||
}
|
||||
|
||||
export interface TtsFallbackResult {
|
||||
attachment: OutboundAttachment | null;
|
||||
providerId?: string;
|
||||
attemptedProviders: string[];
|
||||
skippedProviders: string[];
|
||||
lastError?: string;
|
||||
}
|
||||
|
||||
export class TtsHealthTracker {
|
||||
private readonly states = new Map<string, TtsProviderHealth>();
|
||||
|
||||
isHealthy(providerId: string, now = Date.now()): boolean {
|
||||
const state = this.states.get(providerId);
|
||||
if (!state) {
|
||||
return true;
|
||||
}
|
||||
return state.cooldownUntil <= now;
|
||||
}
|
||||
|
||||
markSuccess(providerId: string): void {
|
||||
this.states.delete(providerId);
|
||||
}
|
||||
|
||||
markFailure(providerId: string, error: unknown, now = Date.now(), failureCooldownMs = 60_000): void {
|
||||
const previous = this.states.get(providerId);
|
||||
const consecutiveFailures = (previous?.consecutiveFailures ?? 0) + 1;
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
|
||||
this.states.set(providerId, {
|
||||
consecutiveFailures,
|
||||
cooldownUntil: now + Math.max(1_000, failureCooldownMs),
|
||||
lastFailureAt: now,
|
||||
lastError: message,
|
||||
});
|
||||
}
|
||||
|
||||
getState(providerId: string): TtsProviderHealth | undefined {
|
||||
return this.states.get(providerId);
|
||||
}
|
||||
}
|
||||
|
||||
function outputFormatToMimeType(format: TtsOutputFormat): string {
|
||||
@@ -34,6 +93,26 @@ function outputFormatToExtension(format: TtsOutputFormat): string {
|
||||
}
|
||||
}
|
||||
|
||||
function resolveProviderEndpoint(config: TtsSynthesisConfig): string | undefined {
|
||||
if (config.endpoint && config.endpoint.trim().length > 0) {
|
||||
return config.endpoint;
|
||||
}
|
||||
if (config.type === 'openai') {
|
||||
return 'https://api.openai.com/v1/audio/speech';
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeMaxAttempts(maxAttempts: number | undefined, providerCount: number): number {
|
||||
if (providerCount <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if (!Number.isFinite(maxAttempts)) {
|
||||
return providerCount;
|
||||
}
|
||||
return Math.min(providerCount, Math.max(1, Math.floor(maxAttempts as number)));
|
||||
}
|
||||
|
||||
/** Synthesize speech via an OpenAI-compatible /v1/audio/speech endpoint. */
|
||||
export async function synthesizeSpeechAttachment(
|
||||
text: string,
|
||||
@@ -86,3 +165,95 @@ export async function synthesizeSpeechAttachment(
|
||||
filename: `flynn-reply-${Date.now()}.${extension}`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt TTS synthesis against an ordered provider chain with health-aware fallback.
|
||||
* When every provider fails, returns text fallback metadata instead of throwing.
|
||||
*/
|
||||
export async function synthesizeSpeechWithFallback(
|
||||
text: string,
|
||||
input: {
|
||||
providers: TtsSynthesisConfig[];
|
||||
fallback?: TtsFallbackConfig;
|
||||
healthTracker?: TtsHealthTracker;
|
||||
},
|
||||
): Promise<TtsFallbackResult> {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return {
|
||||
attachment: null,
|
||||
attemptedProviders: [],
|
||||
skippedProviders: [],
|
||||
};
|
||||
}
|
||||
|
||||
const providers = input.providers
|
||||
.map((provider, index) => {
|
||||
const endpoint = resolveProviderEndpoint(provider);
|
||||
return {
|
||||
...provider,
|
||||
endpoint,
|
||||
id: provider.id ?? `tts-provider-${index + 1}`,
|
||||
};
|
||||
})
|
||||
.filter((provider) => typeof provider.endpoint === 'string' && provider.endpoint.length > 0);
|
||||
|
||||
if (providers.length === 0) {
|
||||
return {
|
||||
attachment: null,
|
||||
attemptedProviders: [],
|
||||
skippedProviders: [],
|
||||
};
|
||||
}
|
||||
|
||||
const healthTracker = input.healthTracker;
|
||||
const now = Date.now();
|
||||
const healthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
|
||||
const unhealthyProviders: Array<TtsSynthesisConfig & { id: string; endpoint: string }> = [];
|
||||
|
||||
for (const provider of providers) {
|
||||
const typedProvider = provider as TtsSynthesisConfig & { id: string; endpoint: string };
|
||||
if (!healthTracker || healthTracker.isHealthy(typedProvider.id, now)) {
|
||||
healthyProviders.push(typedProvider);
|
||||
} else {
|
||||
unhealthyProviders.push(typedProvider);
|
||||
}
|
||||
}
|
||||
|
||||
const orderedProviders = healthyProviders.length > 0
|
||||
? [...healthyProviders, ...unhealthyProviders]
|
||||
: unhealthyProviders;
|
||||
|
||||
const maxAttempts = normalizeMaxAttempts(input.fallback?.maxAttempts, orderedProviders.length);
|
||||
const attempts = orderedProviders.slice(0, maxAttempts);
|
||||
const skippedProviders = orderedProviders.slice(maxAttempts).map((provider) => provider.id);
|
||||
const attemptedProviders: string[] = [];
|
||||
let lastError: string | undefined;
|
||||
const failureCooldownMs = input.fallback?.failureCooldownMs ?? 60_000;
|
||||
|
||||
for (const provider of attempts) {
|
||||
attemptedProviders.push(provider.id);
|
||||
try {
|
||||
const attachment = await synthesizeSpeechAttachment(trimmed, provider);
|
||||
if (attachment) {
|
||||
healthTracker?.markSuccess(provider.id);
|
||||
return {
|
||||
attachment,
|
||||
providerId: provider.id,
|
||||
attemptedProviders,
|
||||
skippedProviders,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
healthTracker?.markFailure(provider.id, error, Date.now(), failureCooldownMs);
|
||||
lastError = error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
attachment: null,
|
||||
attemptedProviders,
|
||||
skippedProviders,
|
||||
lastError,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user