feat: implement tier-a4 tts voice output replies

This commit is contained in:
William Valentin
2026-02-18 10:22:28 -08:00
parent 3eb07875f1
commit a71aa5992d
11 changed files with 482 additions and 4 deletions
+1 -1
View File
@@ -1,3 +1,3 @@
export { loadConfig, deepMerge } from './loader.js';
export { persistConfig } from './persistence.js';
export { configSchema, MODEL_PROVIDERS, type ModelProvider, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig, type ToolProfile, type ToolOverrideConfig, type ToolsConfig, type SandboxConfig, type AgentConfigEntry, type RoutingConfig, type ServerConfig, type BackupConfig, type K8sConfig } from './schema.js';
export { configSchema, MODEL_PROVIDERS, type ModelProvider, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig, type ToolProfile, type ToolOverrideConfig, type ToolsConfig, type SandboxConfig, type AgentConfigEntry, type RoutingConfig, type ServerConfig, type BackupConfig, type K8sConfig, type TtsConfig } from './schema.js';
+43
View File
@@ -660,6 +660,49 @@ describe('configSchema — audio talk mode', () => {
});
});
describe('configSchema — tts', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults tts fields', () => {
const result = configSchema.parse(minimalConfig);
expect(result.tts.enabled).toBe(false);
expect(result.tts.enabled_channels).toEqual([]);
expect(result.tts.provider).toBeUndefined();
});
it('accepts custom tts provider settings', () => {
const result = configSchema.parse({
...minimalConfig,
tts: {
enabled: true,
enabled_channels: ['telegram', 'discord'],
provider: {
type: 'custom',
endpoint: 'https://example.com/v1/audio/speech',
api_key: 'sk-test',
model: 'gpt-4o-mini-tts',
voice: 'nova',
format: 'wav',
},
},
});
expect(result.tts.enabled).toBe(true);
expect(result.tts.enabled_channels).toEqual(['telegram', 'discord']);
expect(result.tts.provider).toMatchObject({
type: 'custom',
endpoint: 'https://example.com/v1/audio/speech',
api_key: 'sk-test',
model: 'gpt-4o-mini-tts',
voice: 'nova',
format: 'wav',
});
});
});
describe('configSchema — mattermost', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+20
View File
@@ -730,6 +730,24 @@ const audioSchema = z.object({
talk_mode: talkModeSchema,
}).default({});
const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']);
const ttsProviderSchema = z.object({
type: z.enum(['openai', 'custom']).default('openai'),
endpoint: z.string().optional(),
api_key: z.string().optional(),
model: z.string().default('gpt-4o-mini-tts'),
voice: z.string().default('alloy'),
format: ttsOutputFormatSchema.default('mp3'),
});
const ttsSchema = z.object({
enabled: z.boolean().default(false),
/** Restrict voice replies to selected channels. Empty means all channels. */
enabled_channels: z.array(z.string().min(1)).default([]),
provider: ttsProviderSchema.optional(),
}).default({});
// ── Tool policy schemas ──────────────────────────────────────────────
const toolProfileEnum = z.enum(['minimal', 'messaging', 'coding', 'full']);
@@ -912,6 +930,7 @@ export const configSchema = z.object({
retry: retrySchema,
web_search: webSearchSchema,
audio: audioSchema,
tts: ttsSchema,
prompt: promptSchema,
tools: toolsSchema,
sandbox: sandboxSchema,
@@ -936,6 +955,7 @@ export type CompactionConfig = z.infer<typeof compactionSchema>;
export type MemoryConfig = z.infer<typeof memorySchema>;
export type WebSearchConfig = z.infer<typeof webSearchSchema>;
export type AudioConfig = z.infer<typeof audioSchema>;
export type TtsConfig = z.infer<typeof ttsSchema>;
export type ProcessConfig = z.infer<typeof processSchema>;
export type BrowserConfig = z.infer<typeof browserSchema>;
export type K8sConfig = z.infer<typeof k8sSchema>;