diff --git a/docs/plans/state.json b/docs/plans/state.json index 6b48c65..ec9e9a9 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -6174,6 +6174,21 @@ "docs/plans/state.json" ], "test_status": "pnpm test:run src/tools/builtin/audio-transcribe.test.ts src/models/media.test.ts" + }, + "audio-transcription-localhost-ipv4-fallback": { + "status": "completed", + "date": "2026-02-23", + "updated": "2026-02-23", + "summary": "Added automatic fallback from `localhost` to `127.0.0.1` for transient transcription fetch failures and clarified connectivity-oriented error text so assistants do not misdiagnose endpoint outages as missing audio payloads.", + "files_modified": [ + "src/models/media.ts", + "src/models/media.test.ts", + "src/tools/builtin/audio-transcribe.ts", + "src/tools/builtin/audio-transcribe.test.ts", + "docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/tools/builtin/audio-transcribe.test.ts src/models/media.test.ts" } }, "overall_progress": { @@ -6195,7 +6210,7 @@ "feature_gap_scorecard": "128/128 match (100%), 0 partial (0%), 0 missing (0%)", "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", - "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, and transient fetch retry/timeout hardening", + "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, transient fetch retry/timeout hardening, and localhost->127.0.0.1 fallback for transcription endpoint connectivity", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", "next_up": "Track OpenClaw evolution regularly for inspiration and feature ideas" }, diff --git a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md index 6490fbe..e579666 100644 --- a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md +++ b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md @@ -43,6 +43,8 @@ curl -sS -i -X POST http://localhost:18801/v1/audio/transcriptions \ - Flynn could not connect to the transcription endpoint for that attempt (transport/connectivity timeout/reset). - Confirm endpoint is reachable from Flynn host and check `whisper-server` logs around the same timestamp. - If this is intermittent, Flynn now retries transient failures before returning an error. + - If you use `localhost` in config and this persists, set endpoint host to `127.0.0.1` to avoid local name-resolution edge cases: + - `http://127.0.0.1:18801/v1/audio/transcriptions` - `[No speech detected]` - Request succeeded and endpoint returned empty transcript text. diff --git a/src/models/media.test.ts b/src/models/media.test.ts index f9a598d..d0b6dca 100644 --- a/src/models/media.test.ts +++ b/src/models/media.test.ts @@ -460,6 +460,28 @@ describe('transcribeAudio', () => { expect(global.fetch).toHaveBeenCalledTimes(2); }); + it('falls back from localhost to 127.0.0.1 on transient fetch failures', async () => { + vi.mocked(global.fetch) + .mockRejectedValueOnce(new TypeError('fetch failed')) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: mockTranscript }), + } as Response); + + const config: AudioTranscriptionConfig = { + endpoint: 'http://localhost:18801/v1/audio/transcriptions', + apiKey: 'test-key', + model: 'test-model', + }; + + const result = await transcribeAudio(oggAudioAttachment, config); + + expect(result).toBe(mockTranscript); + expect(global.fetch).toHaveBeenCalledTimes(2); + expect(vi.mocked(global.fetch).mock.calls[0]?.[0]).toBe('http://localhost:18801/v1/audio/transcriptions'); + expect(vi.mocked(global.fetch).mock.calls[1]?.[0]).toBe('http://127.0.0.1:18801/v1/audio/transcriptions'); + }); + // Positive: uses Whisper-1 model by default. it('uses whisper-1 model by default', async () => { const config: AudioTranscriptionConfig = { diff --git a/src/models/media.ts b/src/models/media.ts index 8d376f5..9cf856b 100644 --- a/src/models/media.ts +++ b/src/models/media.ts @@ -44,22 +44,42 @@ function isTransientNetworkError(error: unknown): boolean { || message.includes('ehostunreach'); } +function buildEndpointCandidates(endpoint: string): string[] { + try { + const parsed = new URL(endpoint); + if (parsed.hostname !== 'localhost') { + return [endpoint]; + } + const ipv4Endpoint = new URL(endpoint); + ipv4Endpoint.hostname = '127.0.0.1'; + return [endpoint, ipv4Endpoint.toString()]; + } catch { + return [endpoint]; + } +} + async function fetchTranscriptionWithRetry(endpoint: string, init: RequestInit): Promise { + const endpointCandidates = buildEndpointCandidates(endpoint); + let lastErrorMessage = 'Unknown network error'; + let lastEndpoint = endpoint; for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) { + const endpointForAttempt = endpointCandidates[(attempt - 1) % endpointCandidates.length]; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS); try { - return await fetch(endpoint, { ...init, signal: controller.signal }); + return await fetch(endpointForAttempt, { ...init, signal: controller.signal }); } catch (error) { const timedOut = error instanceof Error && error.name === 'AbortError'; const retriable = timedOut || isTransientNetworkError(error); const normalizedMessage = timedOut ? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms` : (error instanceof Error ? error.message : String(error)); + lastErrorMessage = normalizedMessage; + lastEndpoint = endpointForAttempt; const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; if (!retriable || exhausted) { throw new Error( - `Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`, + `Transcription service connectivity failure at ${lastEndpoint} after ${attempt} attempt(s): ${normalizedMessage}. This indicates endpoint/network availability, not missing audio bytes.`, ); } await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1))); @@ -68,7 +88,9 @@ async function fetchTranscriptionWithRetry(endpoint: string, init: RequestInit): } } - throw new Error(`Transcription request to ${endpoint} failed after retries`); + throw new Error( + `Transcription service connectivity failure at ${lastEndpoint} after retries: ${lastErrorMessage}. This indicates endpoint/network availability, not missing audio bytes.`, + ); } /** Check whether an attachment is a supported image type. */ diff --git a/src/tools/builtin/audio-transcribe.test.ts b/src/tools/builtin/audio-transcribe.test.ts index cdc4857..436e4cf 100644 --- a/src/tools/builtin/audio-transcribe.test.ts +++ b/src/tools/builtin/audio-transcribe.test.ts @@ -335,10 +335,30 @@ describe('createAudioTranscribeTool', () => { const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); expect(result.success).toBe(false); - expect(result.error).toMatch(/failed after 3 attempt/); + expect(result.error).toMatch(/connectivity failure/i); expect(result.error).toMatch(/audio\/transcriptions/); }); + it('falls back from localhost to 127.0.0.1 for transient fetch failures', async () => { + const localhostTool = createAudioTranscribeTool({ + endpoint: 'http://localhost:18801/v1/audio/transcriptions', + model: 'whisper-1', + }); + mockFetch + .mockRejectedValueOnce(new TypeError('fetch failed')) + .mockResolvedValueOnce({ + ok: true, + text: async () => JSON.stringify({ text: 'Local fallback transcript' }), + }); + + const result = await localhostTool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' }); + expect(result.success).toBe(true); + expect(result.output).toBe('Local fallback transcript'); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(mockFetch.mock.calls[0][0]).toBe('http://localhost:18801/v1/audio/transcriptions'); + expect(mockFetch.mock.calls[1][0]).toBe('http://127.0.0.1:18801/v1/audio/transcriptions'); + }); + it('returns clear error when transcription payload has no text field', async () => { mockFetch.mockResolvedValueOnce({ ok: true, diff --git a/src/tools/builtin/audio-transcribe.ts b/src/tools/builtin/audio-transcribe.ts index 8f81bde..1eb2b07 100644 --- a/src/tools/builtin/audio-transcribe.ts +++ b/src/tools/builtin/audio-transcribe.ts @@ -45,22 +45,42 @@ function isTransientNetworkError(error: unknown): boolean { || message.includes('ehostunreach'); } +function buildEndpointCandidates(endpoint: string): string[] { + try { + const parsed = new URL(endpoint); + if (parsed.hostname !== 'localhost') { + return [endpoint]; + } + const ipv4Endpoint = new URL(endpoint); + ipv4Endpoint.hostname = '127.0.0.1'; + return [endpoint, ipv4Endpoint.toString()]; + } catch { + return [endpoint]; + } +} + async function fetchWithRetry(endpoint: string, init: RequestInit): Promise { + const endpointCandidates = buildEndpointCandidates(endpoint); + let lastErrorMessage = 'Unknown network error'; + let lastEndpoint = endpoint; for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) { + const endpointForAttempt = endpointCandidates[(attempt - 1) % endpointCandidates.length]; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS); try { - return await fetch(endpoint, { ...init, signal: controller.signal }); + return await fetch(endpointForAttempt, { ...init, signal: controller.signal }); } catch (error) { const timedOut = error instanceof Error && error.name === 'AbortError'; const normalizedMessage = timedOut ? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms` : (error instanceof Error ? error.message : String(error)); + lastErrorMessage = normalizedMessage; + lastEndpoint = endpointForAttempt; const retriable = timedOut || isTransientNetworkError(error); const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; if (!retriable || exhausted) { throw new Error( - `Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`, + `Transcription service connectivity failure at ${lastEndpoint} after ${attempt} attempt(s): ${normalizedMessage}. This indicates endpoint/network availability, not missing audio bytes.`, ); } await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1))); @@ -69,7 +89,9 @@ async function fetchWithRetry(endpoint: string, init: RequestInit): Promise