From eec54ace9dc55cdd12c09a6839adc48d4f6ae877 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Wed, 25 Feb 2026 11:21:49 -0800 Subject: [PATCH] test(voice): cover tts fallback --- docs/api/PROTOCOL.md | 2 + docs/architecture/AGENT_DIAGRAM.md | 1 + .../GATEWAY_SESSIONS_AND_QUEUE.md | 1 + docs/plans/state.json | 16 ++++- src/daemon/routing.test.ts | 65 +++++++++++++++++++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/docs/api/PROTOCOL.md b/docs/api/PROTOCOL.md index 0b7721b..1c96cfa 100644 --- a/docs/api/PROTOCOL.md +++ b/docs/api/PROTOCOL.md @@ -1539,6 +1539,8 @@ Outbound attachment (image, audio, file). } ``` +Audio attachments (TTS responses) are best-effort: if synthesis fails, the gateway still returns the text reply without any audio attachment. + #### `context_warning` Proactive context pressure signal emitted by `agent.send` before `done`. diff --git a/docs/architecture/AGENT_DIAGRAM.md b/docs/architecture/AGENT_DIAGRAM.md index 976964e..d1f56bd 100644 --- a/docs/architecture/AGENT_DIAGRAM.md +++ b/docs/architecture/AGENT_DIAGRAM.md @@ -145,6 +145,7 @@ Gateway streaming UX signals: - Routing applies reaction rules with deterministic priority/cooldown (and recursion guard) before intent routing. - Companion nodes re-register `node.*` capabilities after reconnect; runtime clients can auto-reconnect and surface connection events. - Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts. +- TTS synthesis failures degrade to text-only replies without dropping the response. Key files: diff --git a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md index a1ce179..904241a 100644 --- a/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md +++ b/docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md @@ -18,6 +18,7 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`. - Reaction matching is deterministic (priority + cooldown + recursion guard) before intent/agent routing. - Companion `node.*` registration is per WebSocket connection; reconnects must re-register capabilities before invoking node RPC methods. - Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts. +- TTS output is best-effort; synthesis failures fall back to text-only responses. ## Component Map diff --git a/docs/plans/state.json b/docs/plans/state.json index 4ce6be0..3bf5bbb 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -6737,10 +6737,24 @@ "docs/plans/state.json" ], "test_status": "pnpm test:run src/gateway/canvas-store.test.ts src/gateway/ui/pages/chat.test.ts passing" + }, + "deeper-surfaces-phase3-voice-continuity": { + "status": "completed", + "date": "2026-02-25", + "updated": "2026-02-25", + "summary": "Verified voice continuity by adding explicit test coverage for TTS synthesis failure fallbacks and documenting the text-only degradation behavior in protocol/architecture notes.", + "files_modified": [ + "src/daemon/routing.test.ts", + "docs/api/PROTOCOL.md", + "docs/architecture/AGENT_DIAGRAM.md", + "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/daemon/routing.test.ts passing" } }, "overall_progress": { - "total_test_count": 2021, + "total_test_count": 2022, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", diff --git a/src/daemon/routing.test.ts b/src/daemon/routing.test.ts index f375923..4393d03 100644 --- a/src/daemon/routing.test.ts +++ b/src/daemon/routing.test.ts @@ -2321,6 +2321,71 @@ describe('daemon tts routing integration', () => { const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined; expect(outbound?.attachments).toBeUndefined(); }); + + it('falls back to text-only replies when tts synthesis fails', async () => { + vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('fallback response'); + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('tts down')); + + const session = { + id: 'telegram:tts-user-3', + addMessage: vi.fn(), + getHistory: vi.fn(() => []), + clear: vi.fn(), + replaceHistory: vi.fn(), + getConfig: vi.fn(() => undefined), + setConfig: vi.fn(), + deleteConfig: vi.fn(), + }; + + const router = createMessageRouter({ + sessionManager: { getSession: vi.fn(() => session) } as unknown as MessageRouterDeps['sessionManager'], + modelRouter: { + getAvailableTiers: () => ['default'], + getAllLabels: () => ({ default: 'default' }), + getLabel: (tier: string) => tier, + } as unknown as MessageRouterDeps['modelRouter'], + systemPrompt: 'test prompt', + toolRegistry: { clone() { return this; }, register: vi.fn() } as unknown as MessageRouterDeps['toolRegistry'], + toolExecutor: {} as unknown as MessageRouterDeps['toolExecutor'], + config: { + agents: { + primary_tier: 'default', + delegation: { + compaction: 'default', + memory_extraction: 'default', + classification: 'default', + tool_summarisation: 'default', + complex_reasoning: 'default', + }, + max_delegation_depth: 1, + max_iterations: 3, + }, + compaction: { enabled: false }, + models: { default: { provider: 'anthropic', model: 'claude' } }, + tts: { + enabled: true, + enabled_channels: ['telegram'], + provider: { + type: 'custom', + endpoint: 'https://example.com/v1/audio/speech', + }, + }, + } as unknown as MessageRouterDeps['config'], + }); + + const reply = vi.fn(async (_message: OutboundMessage) => {}); + await router.handler({ + id: 'tts-3', + channel: 'telegram', + senderId: 'tts-user-3', + text: 'respond with fallback', + timestamp: Date.now(), + } as MessageRouterInput, reply); + + const outbound = reply.mock.calls[0]?.[0] as OutboundMessage | undefined; + expect(outbound?.text).toBe('fallback response'); + expect(outbound?.attachments).toBeUndefined(); + }); }); describe('daemon reactions routing integration', () => {