docs: update audio config docs and add voice message failure fix to changelog

- README.md: Update audio config format to match schema (enabled + provider.* fields instead of old transcription_endpoint fields), add whisper.cpp server Docker example - CHANGELOG.md: Add '### Fixed' section with voice message failure handling details - config/default.yaml: Update audio section with new schema format and Docker setup example
2026-02-11 19:47:52 -08:00
parent 2e235213d9
commit 28c78d469d
3 changed files with 49 additions and 16 deletions
@@ -123,9 +123,18 @@ hooks:
 # Configure a Whisper-compatible endpoint for audio transcription.
 # Models that support native audio input (Gemini, OpenAI, GitHub) will
 # receive raw audio directly; others fall back to this endpoint.
-
+#
+# For local transcription with whisper.cpp:
+#   docker run -d --name whisper-server -p 18801:8080 \
+#     ghcr.io/ggml-org/whisper.cpp:main \
+#     --model /app/models/ggml-base.en.bin \
+#     --host 0.0.0.0 --port 8080 --convert --language en \
+#     --inference-path /v1/audio/transcriptions
+#
 # audio:
-#   transcription_endpoint: "http://localhost:8080/v1/audio/transcriptions"
-#   transcription_api_key: "${WHISPER_API_KEY}"
-#   transcription_model: "whisper-1"
-#   transcription_provider: "openai"
+#   enabled: true
+#   provider:
+#     type: custom                            # openai, groq, ollama, llamacpp, custom
+#     endpoint: "http://localhost:18801/v1/audio/transcriptions"
+#     api_key: "${WHISPER_API_KEY}"            # Optional Bearer token
+#     model: "whisper-1"                       # Model name (default: whisper-1)