Harden audio transcription fetch path with retries and timeout
This commit is contained in:
+16
-1
@@ -6114,6 +6114,21 @@
|
||||
"README.md"
|
||||
],
|
||||
"test_status": "docs-only update; no runtime code changes"
|
||||
},
|
||||
"audio-transcription-fetch-retry-hardening": {
|
||||
"status": "completed",
|
||||
"date": "2026-02-23",
|
||||
"updated": "2026-02-23",
|
||||
"summary": "Added retry+timeout hardening for transient `fetch failed` errors in both pre-transcription (`models/media`) and `audio.transcribe`, with focused regression tests and runbook updates for timestamp correlation and endpoint diagnostics.",
|
||||
"files_modified": [
|
||||
"src/models/media.ts",
|
||||
"src/models/media.test.ts",
|
||||
"src/tools/builtin/audio-transcribe.ts",
|
||||
"src/tools/builtin/audio-transcribe.test.ts",
|
||||
"docs/runbooks/VOICE_TRANSCRIPTION_DEBUG.md",
|
||||
"docs/plans/state.json"
|
||||
],
|
||||
"test_status": "pnpm test:run src/tools/builtin/audio-transcribe.test.ts src/models/media.test.ts"
|
||||
}
|
||||
},
|
||||
"overall_progress": {
|
||||
@@ -6135,7 +6150,7 @@
|
||||
"feature_gap_scorecard": "128/128 match (100%), 0 partial (0%), 0 missing (0%)",
|
||||
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done",
|
||||
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
|
||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening and tool.args_rewritten audit metric",
|
||||
"native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback, plus 2026-02-23 arg hydration hardening, tool.args_rewritten audit metric, and transient fetch retry/timeout hardening",
|
||||
"remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening",
|
||||
"next_up": "Track OpenClaw evolution regularly for inspiration and feature ideas"
|
||||
},
|
||||
|
||||
@@ -39,9 +39,28 @@ curl -sS -i -X POST http://localhost:18801/v1/audio/transcriptions \
|
||||
- `Transcription endpoint error: FFmpeg conversion failed.`
|
||||
- Endpoint could not decode payload as audio. Often caused by model-provided fake or mismatched `data`/`mime_type`.
|
||||
|
||||
- `fetch failed`
|
||||
- Flynn could not connect to the transcription endpoint for that attempt (transport/connectivity timeout/reset).
|
||||
- Confirm endpoint is reachable from Flynn host and check `whisper-server` logs around the same timestamp.
|
||||
- If this is intermittent, Flynn now retries transient failures before returning an error.
|
||||
|
||||
- `[No speech detected]`
|
||||
- Request succeeded and endpoint returned empty transcript text.
|
||||
|
||||
## Correlate Events By Timestamp
|
||||
|
||||
To inspect one Telegram session with wall-clock timestamps:
|
||||
|
||||
```bash
|
||||
tail -n 2000 ~/.local/share/flynn/audit.log | jq -c '
|
||||
select(
|
||||
.event.session_id=="telegram:8367012007" and
|
||||
(.event_type=="user.action" or .event_type=="backend.route" or .event_type=="tool.start" or .event_type=="tool.success" or .event_type=="tool.error")
|
||||
)
|
||||
| . + {ts_iso: ((.timestamp/1000)|strftime("%Y-%m-%d %H:%M:%S %Z"))}
|
||||
'
|
||||
```
|
||||
|
||||
## Rewrite Metric
|
||||
|
||||
Flynn emits `tool.args_rewritten` whenever it replaces model-provided `audio.transcribe` args with trusted session audio bytes.
|
||||
|
||||
@@ -440,6 +440,26 @@ describe('transcribeAudio', () => {
|
||||
expect(result).toBe('[Audio message transcription failed]');
|
||||
});
|
||||
|
||||
it('retries transient fetch failure and succeeds on a later attempt', async () => {
|
||||
vi.mocked(global.fetch)
|
||||
.mockRejectedValueOnce(new TypeError('fetch failed'))
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => ({ text: mockTranscript }),
|
||||
} as Response);
|
||||
|
||||
const config: AudioTranscriptionConfig = {
|
||||
endpoint: 'https://api.example.com/v1/audio/transcriptions',
|
||||
apiKey: 'test-key',
|
||||
model: 'test-model',
|
||||
};
|
||||
|
||||
const result = await transcribeAudio(oggAudioAttachment, config);
|
||||
|
||||
expect(result).toBe(mockTranscript);
|
||||
expect(global.fetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
// Positive: uses Whisper-1 model by default.
|
||||
it('uses whisper-1 model by default', async () => {
|
||||
const config: AudioTranscriptionConfig = {
|
||||
|
||||
+48
-1
@@ -24,6 +24,53 @@ const SUPPORTED_AUDIO_TYPES = new Set([
|
||||
'audio/x-m4a',
|
||||
]);
|
||||
|
||||
const TRANSCRIPTION_FETCH_MAX_ATTEMPTS = 3;
|
||||
const TRANSCRIPTION_FETCH_TIMEOUT_MS = 45_000;
|
||||
const TRANSCRIPTION_FETCH_BASE_DELAY_MS = 250;
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isTransientNetworkError(error: unknown): boolean {
|
||||
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
return message.includes('fetch failed')
|
||||
|| message.includes('network')
|
||||
|| message.includes('timeout')
|
||||
|| message.includes('timed out')
|
||||
|| message.includes('econnrefused')
|
||||
|| message.includes('econnreset')
|
||||
|| message.includes('enotfound')
|
||||
|| message.includes('ehostunreach');
|
||||
}
|
||||
|
||||
async function fetchTranscriptionWithRetry(endpoint: string, init: RequestInit): Promise<Response> {
|
||||
for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
return await fetch(endpoint, { ...init, signal: controller.signal });
|
||||
} catch (error) {
|
||||
const timedOut = error instanceof Error && error.name === 'AbortError';
|
||||
const retriable = timedOut || isTransientNetworkError(error);
|
||||
const normalizedMessage = timedOut
|
||||
? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms`
|
||||
: (error instanceof Error ? error.message : String(error));
|
||||
const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS;
|
||||
if (!retriable || exhausted) {
|
||||
throw new Error(
|
||||
`Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`,
|
||||
);
|
||||
}
|
||||
await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1)));
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Transcription request to ${endpoint} failed after retries`);
|
||||
}
|
||||
|
||||
/** Check whether an attachment is a supported image type. */
|
||||
export function isSupportedImage(attachment: Attachment): boolean {
|
||||
return SUPPORTED_IMAGE_TYPES.has(attachment.mimeType);
|
||||
@@ -257,7 +304,7 @@ export async function transcribeAudio(
|
||||
headers['Authorization'] = `Bearer ${config.apiKey}`;
|
||||
}
|
||||
|
||||
const res = await fetch(config.endpoint, { method: 'POST', body: formData, headers });
|
||||
const res = await fetchTranscriptionWithRetry(config.endpoint, { method: 'POST', body: formData, headers });
|
||||
if (!res.ok) {
|
||||
throw new Error(`Transcription failed: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
@@ -309,13 +309,36 @@ describe('createAudioTranscribeTool', () => {
|
||||
});
|
||||
|
||||
it('handles network errors gracefully', async () => {
|
||||
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
mockFetch.mockRejectedValue(new Error('ECONNREFUSED'));
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/ECONNREFUSED/);
|
||||
});
|
||||
|
||||
it('retries transient fetch failures before succeeding', async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(new TypeError('fetch failed'))
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
text: async () => JSON.stringify({ text: 'Recovered transcript' }),
|
||||
});
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('Recovered transcript');
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('returns endpoint context when transient failures are exhausted', async () => {
|
||||
mockFetch.mockRejectedValue(new TypeError('fetch failed'));
|
||||
|
||||
const result = await tool.execute({ data: 'AAAAAAA=', mime_type: 'audio/wav' });
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error).toMatch(/failed after 3 attempt/);
|
||||
expect(result.error).toMatch(/audio\/transcriptions/);
|
||||
});
|
||||
|
||||
it('returns clear error when transcription payload has no text field', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
|
||||
@@ -25,6 +25,53 @@ const PROVIDER_ENDPOINTS: Record<string, string> = {
|
||||
llamacpp: 'http://localhost:8080/v1/audio/transcriptions',
|
||||
};
|
||||
|
||||
const TRANSCRIPTION_FETCH_MAX_ATTEMPTS = 3;
|
||||
const TRANSCRIPTION_FETCH_TIMEOUT_MS = 45_000;
|
||||
const TRANSCRIPTION_FETCH_BASE_DELAY_MS = 250;
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isTransientNetworkError(error: unknown): boolean {
|
||||
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
return message.includes('fetch failed')
|
||||
|| message.includes('network')
|
||||
|| message.includes('timeout')
|
||||
|| message.includes('timed out')
|
||||
|| message.includes('econnrefused')
|
||||
|| message.includes('econnreset')
|
||||
|| message.includes('enotfound')
|
||||
|| message.includes('ehostunreach');
|
||||
}
|
||||
|
||||
async function fetchWithRetry(endpoint: string, init: RequestInit): Promise<Response> {
|
||||
for (let attempt = 1; attempt <= TRANSCRIPTION_FETCH_MAX_ATTEMPTS; attempt += 1) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), TRANSCRIPTION_FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
return await fetch(endpoint, { ...init, signal: controller.signal });
|
||||
} catch (error) {
|
||||
const timedOut = error instanceof Error && error.name === 'AbortError';
|
||||
const normalizedMessage = timedOut
|
||||
? `request timed out after ${TRANSCRIPTION_FETCH_TIMEOUT_MS}ms`
|
||||
: (error instanceof Error ? error.message : String(error));
|
||||
const retriable = timedOut || isTransientNetworkError(error);
|
||||
const exhausted = attempt >= TRANSCRIPTION_FETCH_MAX_ATTEMPTS;
|
||||
if (!retriable || exhausted) {
|
||||
throw new Error(
|
||||
`Transcription request to ${endpoint} failed after ${attempt} attempt(s): ${normalizedMessage}`,
|
||||
);
|
||||
}
|
||||
await sleep(TRANSCRIPTION_FETCH_BASE_DELAY_MS * (2 ** (attempt - 1)));
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Transcription request to ${endpoint} failed after retries`);
|
||||
}
|
||||
|
||||
function validateUrl(url: string): { valid: boolean; error?: string } {
|
||||
let parsed: URL;
|
||||
try {
|
||||
@@ -387,7 +434,7 @@ export function createAudioTranscribeTool(audioConfig?: AudioTranscriptionConfig
|
||||
fetchOptions.headers = headers;
|
||||
}
|
||||
|
||||
const response = await fetch(endpoint, fetchOptions);
|
||||
const response = await fetchWithRetry(endpoint, fetchOptions);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
|
||||
Reference in New Issue
Block a user