docs: sync voice reliability updates and phase state
This commit is contained in:
@@ -572,7 +572,24 @@ Flynn can attach synthesized voice replies (OpenAI-compatible `/v1/audio/speech`
|
||||
tts:
|
||||
enabled: true
|
||||
enabled_channels: [telegram, whatsapp, discord] # Empty = all channels
|
||||
provider:
|
||||
providers:
|
||||
- name: primary
|
||||
type: custom # openai | custom
|
||||
endpoint: "https://tts-primary.example.com/v1/audio/speech"
|
||||
api_key: "${PRIMARY_TTS_API_KEY}"
|
||||
model: "gpt-4o-mini-tts"
|
||||
voice: "alloy"
|
||||
format: "mp3" # mp3 | wav | opus
|
||||
- name: backup
|
||||
type: openai
|
||||
api_key: "${OPENAI_API_KEY}"
|
||||
model: "gpt-4o-mini-tts"
|
||||
voice: "nova"
|
||||
format: "opus"
|
||||
fallback:
|
||||
max_attempts: 2
|
||||
failure_cooldown_ms: 60000
|
||||
provider: # Legacy single-provider shape (still supported)
|
||||
type: openai # openai | custom
|
||||
endpoint: "https://api.openai.com/v1/audio/speech"
|
||||
api_key: "${OPENAI_API_KEY}" # Optional Bearer token
|
||||
@@ -585,12 +602,18 @@ tts:
|
||||
|-------|----------|-------------|
|
||||
| `tts.enabled` | no | Enable voice reply synthesis (default: `false`) |
|
||||
| `tts.enabled_channels` | no | Channels allowed to receive voice replies (`[]` means all channels) |
|
||||
| `tts.providers[]` | no | Ordered provider chain for synthesis fallback |
|
||||
| `tts.providers[].name` | no | Provider label used for health tracking/debug logs |
|
||||
| `tts.provider.type` | no | `openai` or `custom` (default: `openai`) |
|
||||
| `tts.provider.endpoint` | no | OpenAI-compatible `/v1/audio/speech` endpoint (`openai` defaults to OpenAI API URL) |
|
||||
| `tts.provider.api_key` | no | Bearer token for authentication |
|
||||
| `tts.provider.model` | no | TTS model (default: `gpt-4o-mini-tts`) |
|
||||
| `tts.provider.voice` | no | Voice identifier (default: `alloy`) |
|
||||
| `tts.provider.format` | no | Output format: `mp3`, `wav`, `opus` (default: `mp3`) |
|
||||
| `tts.fallback.max_attempts` | no | Max providers attempted per reply before text fallback (default: `3`) |
|
||||
| `tts.fallback.failure_cooldown_ms` | no | Cooldown for providers after synthesis failures (default: `60000`) |
|
||||
|
||||
If all configured providers fail, Flynn deterministically returns text-only (no dropped reply) and retries unhealthy providers after their cooldown window.
|
||||
|
||||
### Capture Tools
|
||||
|
||||
|
||||
@@ -592,6 +592,7 @@ automation:
|
||||
# wake_phrase: "hey flynn"
|
||||
# timeout_ms: 120000
|
||||
# allow_manual_toggle: true
|
||||
# # While active, spoken/text "stop" or "cancel" maps to /stop run control.
|
||||
|
||||
# ── Text-to-Speech (TTS) Output ──────────────────────────────────────
|
||||
# Optional voice output for assistant replies. Uses an OpenAI-compatible
|
||||
@@ -600,6 +601,23 @@ automation:
|
||||
# tts:
|
||||
# enabled: false
|
||||
# enabled_channels: [telegram, whatsapp, discord] # Empty = all channels
|
||||
# providers:
|
||||
# - name: primary
|
||||
# type: custom # openai | custom
|
||||
# endpoint: "https://tts-primary.example.com/v1/audio/speech"
|
||||
# api_key: "${PRIMARY_TTS_API_KEY}"
|
||||
# model: "gpt-4o-mini-tts"
|
||||
# voice: "alloy"
|
||||
# format: "mp3" # mp3 | wav | opus
|
||||
# - name: backup
|
||||
# type: openai
|
||||
# api_key: "${OPENAI_API_KEY}"
|
||||
# model: "gpt-4o-mini-tts"
|
||||
# voice: "nova"
|
||||
# format: "opus"
|
||||
# fallback:
|
||||
# max_attempts: 2
|
||||
# failure_cooldown_ms: 60000
|
||||
# provider:
|
||||
# type: openai # openai | custom
|
||||
# endpoint: "https://api.openai.com/v1/audio/speech"
|
||||
@@ -607,6 +625,7 @@ automation:
|
||||
# model: "gpt-4o-mini-tts"
|
||||
# voice: "alloy"
|
||||
# format: "mp3" # mp3 | wav | opus
|
||||
# # Legacy single-provider format (`provider`) is still supported.
|
||||
|
||||
# ── Sub-Agent Configs ────────────────────────────────────────────────
|
||||
# Named agent configurations for delegation via agent.delegate tool.
|
||||
|
||||
@@ -1550,7 +1550,7 @@ Outbound attachment (image, audio, file).
|
||||
}
|
||||
```
|
||||
|
||||
Audio attachments (TTS responses) are best-effort: if synthesis fails, the gateway still returns the text reply without any audio attachment.
|
||||
Audio attachments (TTS responses) are best-effort with provider fallback: the daemon can try an ordered TTS provider chain and tracks provider health/cooldown between attempts. If synthesis still fails, the gateway returns the text reply without any audio attachment.
|
||||
|
||||
#### `context_warning`
|
||||
|
||||
@@ -1582,7 +1582,7 @@ Proactive context pressure signal emitted by `agent.send` before `done`.
|
||||
|
||||
#### `run_state`
|
||||
|
||||
Run lifecycle transition emitted during `agent.send` processing.
|
||||
Run lifecycle transition emitted during `agent.send` processing (text and voice/talk turns use the same lifecycle states and cancel semantics).
|
||||
|
||||
```json
|
||||
{
|
||||
|
||||
@@ -156,7 +156,8 @@ Gateway streaming UX signals:
|
||||
- Routing applies reaction rules with deterministic priority/cooldown (and recursion guard) before intent routing.
|
||||
- Companion nodes re-register `node.*` capabilities after reconnect; runtime clients can auto-reconnect, optionally replay cached node state (`register/status/location/push`), and surface connection events.
|
||||
- Canvas artifacts are persisted by the gateway so session UI surfaces can recover after daemon restarts.
|
||||
- TTS synthesis failures degrade to text-only replies without dropping the response.
|
||||
- TTS synthesis uses an ordered provider chain with health cooldown tracking; if all providers fail, replies degrade to text-only without dropping the response.
|
||||
- Talk mode accepts spoken/text `stop`/`cancel` while active and maps it onto the same `/stop` run-control cancellation path used for text sessions.
|
||||
|
||||
Key files:
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ If you only want the protocol surface, see `docs/api/PROTOCOL.md`.
|
||||
- Browser workflow reliability primitives (`browser.wait_for/assert/extract/checkpoint.*`) execute in the same queued session lane and apply browser-config guardrails (domain allowlist/high-risk confirmation, bounded retries, workflow step budget).
|
||||
- Companion `node.*` registration is per WebSocket connection; reconnects must re-register capabilities before invoking node RPC methods (or use runtime-client reconnect state replay to re-register/status/location/push automatically).
|
||||
- Canvas artifacts are persisted per session under the gateway data directory for UI recovery across restarts.
|
||||
- TTS output is best-effort; synthesis failures fall back to text-only responses.
|
||||
- TTS output is best-effort with ordered provider fallback + per-provider cooldown tracking; synthesis failures still fall back to text-only responses.
|
||||
- Talk mode voice sessions share the same cancel/replace semantics as text lanes (`/stop`, interrupt mode preemption), including spoken `stop`/`cancel` mapping while talk mode is active.
|
||||
|
||||
## Component Map
|
||||
|
||||
|
||||
+34
-6
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"updated_at": "2026-02-26",
|
||||
"updated_at": "2026-02-27",
|
||||
"description": "Tracks the status of all Flynn plans and implementation phases",
|
||||
"plans": {
|
||||
"phase0-ticket-0.1-audit-schema-extension": {
|
||||
@@ -6788,8 +6788,8 @@
|
||||
"personal-assistant-productization-plan-2026-02-26": {
|
||||
"status": "in_progress",
|
||||
"date": "2026-02-26",
|
||||
"updated": "2026-02-26",
|
||||
"summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week roadmap. Phase 3 browser reliability is shipped, and Phase 1 companion reliability/runtime handoff hardening is in progress (node-state reconnect replay + wrapper/CLI handoff paths + reconnect/token-refresh integration coverage).",
|
||||
"updated": "2026-02-27",
|
||||
"summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week roadmap. Phase 3 browser reliability, Phase 1 companion reconnect/handoff reliability, and Phase 2 voice daily-driver reliability (talk controls + TTS provider fallback/health + interruption-safe voice cancel semantics) are now shipped.",
|
||||
"files_modified": [
|
||||
"docs/plans/2026-02-26-personal-assistant-productization-plan.md",
|
||||
"docs/plans/state.json"
|
||||
@@ -6840,6 +6840,34 @@
|
||||
],
|
||||
"test_status": "pnpm test:run src/companion/runtimeClient.test.ts src/companion/platformClients.test.ts src/companion/platformClients.integration.test.ts src/cli/companion.test.ts + pnpm typecheck passing"
|
||||
},
|
||||
"personal-assistant-productization-phase2-voice-reliability": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-27",
|
||||
"updated": "2026-02-27",
|
||||
"summary": "Implemented Phase 2 voice daily-driver reliability: added ordered multi-provider TTS fallback with per-provider health cooldown tracking, exposed talk mode + wake/timer controls and TTS fallback policy in gateway config/UI surfaces, and mapped spoken talk-mode `stop/cancel` to `/stop` run-control semantics so voice interruption behavior matches text flows.",
|
||||
"files_modified": [
|
||||
"src/models/tts.ts",
|
||||
"src/models/tts.test.ts",
|
||||
"src/daemon/routing.ts",
|
||||
"src/daemon/routing.test.ts",
|
||||
"src/config/schema.ts",
|
||||
"src/config/schema.test.ts",
|
||||
"src/gateway/handlers/config.ts",
|
||||
"src/gateway/handlers/handlers.test.ts",
|
||||
"src/gateway/ui/pages/settings.js",
|
||||
"src/gateway/ui/pages/settings.test.ts",
|
||||
"src/gateway/ui/pages/dashboard.js",
|
||||
"src/gateway/ui/pages/dashboard.test.ts",
|
||||
"src/gateway/ui/pages/chat.test.ts",
|
||||
"config/default.yaml",
|
||||
"README.md",
|
||||
"docs/api/PROTOCOL.md",
|
||||
"docs/architecture/AGENT_DIAGRAM.md",
|
||||
"docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
|
||||
"docs/plans/state.json"
|
||||
],
|
||||
"test_status": "pnpm test:run src/models/tts.test.ts src/daemon/routing.test.ts src/gateway/handlers/handlers.test.ts src/gateway/ui/pages/settings.test.ts src/gateway/ui/pages/dashboard.test.ts src/gateway/ui/pages/chat.test.ts src/config/schema.test.ts + pnpm typecheck passing"
|
||||
},
|
||||
"subagents-support-phase1": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-26",
|
||||
@@ -6874,7 +6902,7 @@
|
||||
}
|
||||
},
|
||||
"overall_progress": {
|
||||
"total_test_count": 2553,
|
||||
"total_test_count": 2559,
|
||||
"all_tests_passing": true,
|
||||
"p0_completion": "3/3 (100%)",
|
||||
"p1_completion": "4/4 (100%)",
|
||||
@@ -6889,7 +6917,7 @@
|
||||
"tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
|
||||
"tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
|
||||
"tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
|
||||
"feature_gap_scorecard": "rebaselined 2026-02-26 and updated 2026-02-26 (phase 3 + phase 1 reliability slice) — channel breadth, setup wizard, baseline browser automation, subagent controls, browser workflow reliability primitives (wait/assert/extract/retries/checkpoints/guardrails/budgets), and companion reconnect/runtime-handoff foundations are implemented; remaining high-impact personal-assistant gaps center on shipped desktop/mobile companion apps, voice UX polish, and first-success onboarding funnel optimization.",
|
||||
"feature_gap_scorecard": "rebaselined 2026-02-26 and updated 2026-02-27 (phase 3 + phase 1 + phase 2 reliability slices) — channel breadth, setup wizard, baseline browser automation, subagent controls, browser workflow reliability primitives (wait/assert/extract/retries/checkpoints/guardrails/budgets), companion reconnect/runtime-handoff foundations, and voice reliability hardening (talk controls + TTS fallback/health + interruption-safe cancel semantics) are implemented; remaining high-impact personal-assistant gaps center on shipped desktop/mobile companion apps and first-success onboarding funnel optimization.",
|
||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done",
|
||||
"dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling",
|
||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||
@@ -6922,7 +6950,7 @@
|
||||
"deeper_surfaces_phase3_companion_canvas_voice": "completed \u2014 companion reconnect resilience (auto-reconnect with backoff, pending-wait cancellation on disconnect), canvas artifact persistence (SQLite-backed store, daemon-restart durability), voice TTS fallback coverage (text-only reply on TTS failure, no dropped responses)",
|
||||
"deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized",
|
||||
"post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)",
|
||||
"personal_assistant_productization_plan": "in_progress \u2014 8-10 week phased roadmap active; Phase 3 browser workflow reliability shipped, and Phase 1 companion runtime reliability now includes reconnect state replay plus typed handoff support with integration coverage. Remaining phases: companion app packaging/surfaces, voice reliability hardening, and onboarding 2.0 first-success funnel.",
|
||||
"personal_assistant_productization_plan": "in_progress \u2014 8-10 week phased roadmap active; Phase 3 browser workflow reliability shipped, Phase 1 companion runtime reliability includes reconnect state replay + typed handoff support, and Phase 2 voice reliability now ships talk controls + TTS provider fallback/health + interruption-safe voice cancel mapping. Remaining phases: companion app packaging/surfaces and onboarding 2.0 first-success funnel.",
|
||||
"subagents_support": "completed \u2014 subagent phases 1-3 shipped with `subagent.spawn/send/list/cancel/delete/summary`, per-child queue mode (`followup|interrupt`), budgets (`max_turns`, `max_total_tokens`, `turn_timeout_ms`), tool-profile overrides, trace-linked audit events, `/subagents` inspection commands, and focused regression tests."
|
||||
},
|
||||
"soul_md_and_cron_create": {
|
||||
|
||||
Reference in New Issue
Block a user