From cbc880c12a0f806308530eb89f40a3f95639689a Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sun, 22 Feb 2026 20:40:42 -0800 Subject: [PATCH] Fix whisper compose entrypoint so STT port 18801 is reachable --- docker-compose.yml | 3 ++- docs/plans/state.json | 14 +++++++++++++- docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 73d574d..bc4ea09 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,8 +41,9 @@ services: - "18801:8080" volumes: - whisper-models:/app/models + # Override image entrypoint so args are passed directly to whisper-server. + entrypoint: ["whisper-server"] command: - - whisper-server - --model - /app/models/ggml-base.en.bin - --host diff --git a/docs/plans/state.json b/docs/plans/state.json index ec9e9a9..4cd54f3 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -6189,6 +6189,18 @@ "docs/plans/state.json" ], "test_status": "pnpm test:run src/tools/builtin/audio-transcribe.test.ts src/models/media.test.ts" + }, + "whisper-compose-entrypoint-args-fix": { + "status": "completed", + "date": "2026-02-23", + "updated": "2026-02-23", + "summary": "Fixed docker-compose whisper-server startup to override image entrypoint and pass host/port/inference args correctly. This resolves host-side `localhost:18801` connection resets caused by whisper-server binding to loopback inside the container.", + "files_modified": [ + "docker-compose.yml", + "docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md", + "docs/plans/state.json" + ], + "test_status": "runtime validation: docker compose recreate + curl 127.0.0.1:18801 + POST /v1/audio/transcriptions returned HTTP response" } }, "overall_progress": { @@ -6210,7 +6222,7 @@ "feature_gap_scorecard": "128/128 match (100%), 0 partial (0%), 0 missing (0%)", "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", - "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, transient fetch retry/timeout hardening, and localhost->127.0.0.1 fallback for transcription endpoint connectivity", + "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, transient fetch retry/timeout hardening, localhost->127.0.0.1 fallback for transcription endpoint connectivity, and whisper docker-compose entrypoint arg fix for port 18801", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", "next_up": "Track OpenClaw evolution regularly for inspiration and feature ideas" }, diff --git a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md index e579666..bed5cd1 100644 --- a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md +++ b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md @@ -45,6 +45,9 @@ curl -sS -i -X POST http://localhost:18801/v1/audio/transcriptions \ - If this is intermittent, Flynn now retries transient failures before returning an error. - If you use `localhost` in config and this persists, set endpoint host to `127.0.0.1` to avoid local name-resolution edge cases: - `http://127.0.0.1:18801/v1/audio/transcriptions` + - If `docker logs whisper-server` shows `whisper server listening at http://127.0.0.1:8080`, host-published port forwarding may reset connections. Verify container launch args: + - `docker inspect whisper-server --format '{{.Path}} {{json .Args}}'` + - Expected: path `whisper-server` with explicit `--host 0.0.0.0` args. - `[No speech detected]` - Request succeeded and endpoint returned empty transcript text.