feat: implement tier-a4 tts voice output replies

2026-02-18 10:22:28 -08:00
parent 3eb07875f1
commit a71aa5992d
11 changed files with 482 additions and 4 deletions
@@ -1,3 +1,3 @@
 export { loadConfig, deepMerge } from './loader.js';
 export { persistConfig } from './persistence.js';
-export { configSchema, MODEL_PROVIDERS, type ModelProvider, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig, type ToolProfile, type ToolOverrideConfig, type ToolsConfig, type SandboxConfig, type AgentConfigEntry, type RoutingConfig, type ServerConfig, type BackupConfig, type K8sConfig } from './schema.js';
+export { configSchema, MODEL_PROVIDERS, type ModelProvider, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig, type ToolProfile, type ToolOverrideConfig, type ToolsConfig, type SandboxConfig, type AgentConfigEntry, type RoutingConfig, type ServerConfig, type BackupConfig, type K8sConfig, type TtsConfig } from './schema.js';
@@ -660,6 +660,49 @@ describe('configSchema — audio talk mode', () => {
  });
 });

+describe('configSchema — tts', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults tts fields', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.tts.enabled).toBe(false);
+    expect(result.tts.enabled_channels).toEqual([]);
+    expect(result.tts.provider).toBeUndefined();
+  });
+
+  it('accepts custom tts provider settings', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      tts: {
+        enabled: true,
+        enabled_channels: ['telegram', 'discord'],
+        provider: {
+          type: 'custom',
+          endpoint: 'https://example.com/v1/audio/speech',
+          api_key: 'sk-test',
+          model: 'gpt-4o-mini-tts',
+          voice: 'nova',
+          format: 'wav',
+        },
+      },
+    });
+
+    expect(result.tts.enabled).toBe(true);
+    expect(result.tts.enabled_channels).toEqual(['telegram', 'discord']);
+    expect(result.tts.provider).toMatchObject({
+      type: 'custom',
+      endpoint: 'https://example.com/v1/audio/speech',
+      api_key: 'sk-test',
+      model: 'gpt-4o-mini-tts',
+      voice: 'nova',
+      format: 'wav',
+    });
+  });
+});
+
 describe('configSchema — mattermost', () => {
  const minimalConfig = {
    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
@@ -730,6 +730,24 @@ const audioSchema = z.object({
  talk_mode: talkModeSchema,
 }).default({});

+const ttsOutputFormatSchema = z.enum(['mp3', 'wav', 'opus']);
+
+const ttsProviderSchema = z.object({
+  type: z.enum(['openai', 'custom']).default('openai'),
+  endpoint: z.string().optional(),
+  api_key: z.string().optional(),
+  model: z.string().default('gpt-4o-mini-tts'),
+  voice: z.string().default('alloy'),
+  format: ttsOutputFormatSchema.default('mp3'),
+});
+
+const ttsSchema = z.object({
+  enabled: z.boolean().default(false),
+  /** Restrict voice replies to selected channels. Empty means all channels. */
+  enabled_channels: z.array(z.string().min(1)).default([]),
+  provider: ttsProviderSchema.optional(),
+}).default({});
+
 // ── Tool policy schemas ──────────────────────────────────────────────

 const toolProfileEnum = z.enum(['minimal', 'messaging', 'coding', 'full']);
@@ -912,6 +930,7 @@ export const configSchema = z.object({
  retry: retrySchema,
  web_search: webSearchSchema,
  audio: audioSchema,
+  tts: ttsSchema,
  prompt: promptSchema,
  tools: toolsSchema,
  sandbox: sandboxSchema,
@@ -936,6 +955,7 @@ export type CompactionConfig = z.infer<typeof compactionSchema>;
 export type MemoryConfig = z.infer<typeof memorySchema>;
 export type WebSearchConfig = z.infer<typeof webSearchSchema>;
 export type AudioConfig = z.infer<typeof audioSchema>;
+export type TtsConfig = z.infer<typeof ttsSchema>;
 export type ProcessConfig = z.infer<typeof processSchema>;
 export type BrowserConfig = z.infer<typeof browserSchema>;
 export type K8sConfig = z.infer<typeof k8sSchema>;