From 487f26e36da88d25d37acfbdb9ac13eaf91736a6 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Sun, 22 Feb 2026 19:54:58 -0800
Subject: [PATCH] Harden audio transcription fetch path with retries and
 timeout

---
 docs/plans/state.json                      | 17 +++++++-
 docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md | 19 +++++++++
 src/models/media.test.ts                   | 20 +++++++++
 src/models/media.ts                        | 49 +++++++++++++++++++++-
 src/tools/builtin/audio-transcribe.test.ts | 25 ++++++++++-
 src/tools/builtin/audio-transcribe.ts      | 49 +++++++++++++++++++++-
 6 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/docs/plans/state.json b/docs/plans/state.json
index 63b4212..33f5217 100644
--- a/docs/plans/state.json
+++ b/docs/plans/state.json
@@ -6114,6 +6114,21 @@
         "README.md"
       ],
       "test_status": "docs-only update; no runtime code changes"
+    },
+    "audio-transcription-fetch-retry-hardening": {
+      "status": "completed",
+      "date": "2026-02-23",
+      "updated": "2026-02-23",
+      "summary": "Added retry+timeout hardening for transient `fetch failed` errors in both pre-transcription (`models/media`) and `audio.transcribe`, with focused regression tests and runbook updates for timestamp correlation and endpoint diagnostics.",
+      "files_modified": [
+        "src/models/media.ts",
+        "src/models/media.test.ts",
+        "src/tools/builtin/audio-transcribe.ts",
+        "src/tools/builtin/audio-transcribe.test.ts",
+        "docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md",
+        "docs/plans/state.json"
+      ],
+      "test_status": "pnpm test:run src/tools/builtin/audio-transcribe.test.ts src/models/media.test.ts"
     }
   },
   "overall_progress": {
@@ -6135,7 +6150,7 @@
     "feature_gap_scorecard": "128/128 match (100%), 0 partial (0%), 0 missing (0%)",
     "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
     "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
-    "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening and tool.args_rewritten audit metric",
+    "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, and transient fetch retry/timeout hardening",
     "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
     "next_up": "Track OpenClaw evolution regularly for inspiration and feature ideas"
   },
diff --git a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md
index 9c74725..6490fbe 100644
--- a/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md
+++ b/docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md
@@ -39,9 +39,28 @@ curl -sS -i -X POST http://localhost:18801/v1/audio/transcriptions \
 - `Transcription endpoint error: FFmpeg conversion failed.`
   - Endpoint could not decode payload as audio. Often caused by model-provided fake or mismatched `data`/`mime_type`.
 
+- `fetch failed`
+  - Flynn could not connect to the transcription endpoint for that attempt (transport/connectivity timeout/reset).
+  - Confirm endpoint is reachable from Flynn host and check `whisper-server` logs around the same timestamp.
+  - If this is intermittent, Flynn now retries transient failures before returning an error.
+
 - `[No speech detected]`
   - Request succeeded and endpoint returned empty transcript text.
 
+## Correlate Events By Timestamp
+
+To inspect one Telegram session with wall-clock timestamps:
+
+```bash
+tail -n 2000 ~/.local/share/flynn/audit.log | jq -c '
+  select(
+    .event.session_id=="telegram:8367012007" and
+    (.event_type=="user.action" or .event_type=="backend.route" or .event_type=="tool.start" or .event_type=="tool.success" or .event_type=="tool.error")
+  )
+  | . + {ts_iso: ((.timestamp/1000)|strftime("%Y-%m-%d %H:%M:%S %Z"))}
+'
+```
+
 ## Rewrite Metric
 
 Flynn emits `tool.args_rewritten` whenever it replaces model-provided `audio.transcribe` args with trusted session audio bytes.
diff --git a/src/models/media.test.ts b/src/models/media.test.ts
index 3acf419..f9a598d 100644
--- a/src/models/media.test.ts
+++ b/src/models/media.test.ts
@@ -440,6 +440,26 @@ describe('transcribeAudio', () => {
     expect(result).toBe('[Audio message transcription failed]');
   });
 
+  it('retries transient fetch failure and succeeds on a later attempt', async () => {
+    vi.mocked(global.fetch)
+      .mockRejectedValueOnce(new TypeError('fetch failed'))
+      .mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ text: mockTranscript }),
+      } as Response);
+
+    const config: AudioTranscriptionConfig = {
+      endpoint: 'https://api.example.com/v1/audio/transcriptions',
+      apiKey: 'test-key',
+      model: 'test-model',
+    };
+
+    const result = await transcribeAudio(oggAudioAttachment, config);
+
+    expect(result).toBe(mockTranscript);
+    expect(global.fetch).toHaveBeenCalledTimes(2);
+  });
+
   // Positive: uses Whisper-1 model by default.
   it('uses whisper-1 model by default', async () => {
     const config: AudioTranscriptionConfig = {
diff --git a/src/models/media.ts b/src/models/media.ts
index 96c9ec7..8d376f5 100644
--- a/src/models/media.ts
+++ b/src/models/media.ts
@@ -24,6 +24,53 @@ const SUPPORTED_AUDIO_TYPES = new Set([
   'audio/x-m4a',
 ]);
 
+const TRANSCRIPTION_FETCH_MAX_ATTEMPTS = 3;
+const TRANSCRIPTION_FETCH_TIMEOUT_MS = 45_000;
+const TRANSCRIPTION_FETCH_BASE_DELAY_MS = 250;
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function isTransientNetworkError(error: unknown): boolean {
+  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
+  return message.includes('fetch failed')
+    || message.includes('network')
+    || message.includes('timeout')
+    || message.includes('timed out')
+    || message.includes('econnrefused')
+    || message.includes('econnreset')
+    || message.includes('enotfound')
+    || message.includes('ehostunreach');
+}
+
+async function fetchTranscriptionWithRetry(endpoint: string, init: RequestInit): Promise<Response> {
+  for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS);
+    try {
+      return await fetch(endpoint, { ...init, signal: controller.signal });
+    } catch (error) {
+      const timedOut = error instanceof Error && error.name === 'AbortError';
+      const retriable = timedOut || isTransientNetworkError(error);
+      const normalizedMessage = timedOut
+        ? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms`
+        : (error instanceof Error ? error.message : String(error));
+      const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS;
+      if (!retriable || exhausted) {
+        throw new Error(
+          `Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`,
+        );
+      }
+      await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1)));
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  throw new Error(`Transcription request to ${endpoint} failed after retries`);
+}
+
 /** Check whether an attachment is a supported image type. */
 export function isSupportedImage(attachment: Attachment): boolean {
   return SUPPORTED_IMAGE_TYPES.has(attachment.mimeType);
@@ -257,7 +304,7 @@ export async function transcribeAudio(
       headers['Authorization'] = `Bearer ${config.apiKey}`;
     }
 
-    const res = await fetch(config.endpoint, { method: 'POST', body: formData, headers });
+    const res = await fetchTranscriptionWithRetry(config.endpoint, { method: 'POST', body: formData, headers });
     if (!res.ok) {
       throw new Error(`Transcription failed: ${res.status} ${res.statusText}`);
     }
diff --git a/src/tools/builtin/audio-transcribe.test.ts b/src/tools/builtin/audio-transcribe.test.ts
index 3e614ee..cdc4857 100644
--- a/src/tools/builtin/audio-transcribe.test.ts
+++ b/src/tools/builtin/audio-transcribe.test.ts
@@ -309,13 +309,36 @@ describe('createAudioTranscribeTool', () => {
     });
 
     it('handles network errors gracefully', async () => {
-      mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+      mockFetch.mockRejectedValue(new Error('ECONNREFUSED'));
 
       const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
       expect(result.success).toBe(false);
       expect(result.error).toMatch(/ECONNREFUSED/);
     });
 
+    it('retries transient fetch failures before succeeding', async () => {
+      mockFetch
+        .mockRejectedValueOnce(new TypeError('fetch failed'))
+        .mockResolvedValueOnce({
+          ok: true,
+          text: async () => JSON.stringify({ text: 'Recovered transcript' }),
+        });
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(true);
+      expect(result.output).toBe('Recovered transcript');
+      expect(mockFetch).toHaveBeenCalledTimes(2);
+    });
+
+    it('returns endpoint context when transient failures are exhausted', async () => {
+      mockFetch.mockRejectedValue(new TypeError('fetch failed'));
+
+      const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
+      expect(result.success).toBe(false);
+      expect(result.error).toMatch(/failed after 3 attempt/);
+      expect(result.error).toMatch(/audio\/transcriptions/);
+    });
+
     it('returns clear error when transcription payload has no text field', async () => {
       mockFetch.mockResolvedValueOnce({
         ok: true,
diff --git a/src/tools/builtin/audio-transcribe.ts b/src/tools/builtin/audio-transcribe.ts
index fed70a0..8f81bde 100644
--- a/src/tools/builtin/audio-transcribe.ts
+++ b/src/tools/builtin/audio-transcribe.ts
@@ -25,6 +25,53 @@ const PROVIDER_ENDPOINTS: Record<string, string> = {
   llamacpp: 'http://localhost:8080/v1/audio/transcriptions',
 };
 
+const TRANSCRIPTION_FETCH_MAX_ATTEMPTS = 3;
+const TRANSCRIPTION_FETCH_TIMEOUT_MS = 45_000;
+const TRANSCRIPTION_FETCH_BASE_DELAY_MS = 250;
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function isTransientNetworkError(error: unknown): boolean {
+  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
+  return message.includes('fetch failed')
+    || message.includes('network')
+    || message.includes('timeout')
+    || message.includes('timed out')
+    || message.includes('econnrefused')
+    || message.includes('econnreset')
+    || message.includes('enotfound')
+    || message.includes('ehostunreach');
+}
+
+async function fetchWithRetry(endpoint: string, init: RequestInit): Promise<Response> {
+  for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS);
+    try {
+      return await fetch(endpoint, { ...init, signal: controller.signal });
+    } catch (error) {
+      const timedOut = error instanceof Error && error.name === 'AbortError';
+      const normalizedMessage = timedOut
+        ? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms`
+        : (error instanceof Error ? error.message : String(error));
+      const retriable = timedOut || isTransientNetworkError(error);
+      const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS;
+      if (!retriable || exhausted) {
+        throw new Error(
+          `Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`,
+        );
+      }
+      await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1)));
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  throw new Error(`Transcription request to ${endpoint} failed after retries`);
+}
+
 function validateUrl(url: string): { valid: boolean; error?: string } {
   let parsed: URL;
   try {
@@ -387,7 +434,7 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig
           fetchOptions.headers = headers;
         }
 
-        const response = await fetch(endpoint, fetchOptions);
+        const response = await fetchWithRetry(endpoint, fetchOptions);
 
         if (!response.ok) {
           const errorText = await response.text();