#!/usr/bin/env bash set -euo pipefail AUDIO_FILE="${1:-}" if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then echo "Usage: $0 /path/to/audio-file" >&2 exit 1 fi WHISPER_BASE_URL="${WHISPER_BASE_URL:-http://192.168.153.117:18801}" WHISPER_MODEL="${WHISPER_MODEL:-base}" WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-auto}" WHISPER_OUTPUT="${WHISPER_OUTPUT:-}" TMP_JSON="$(mktemp)" trap 'rm -f "$TMP_JSON"' EXIT # 1) whisper.cpp style if curl -fsS -m 120 \ -F "file=@${AUDIO_FILE}" \ -F "model=${WHISPER_MODEL}" \ -F "language=${WHISPER_LANGUAGE}" \ "${WHISPER_BASE_URL}/inference" >"$TMP_JSON" 2>/dev/null; then : # 2) OpenAI style fallback elif curl -fsS -m 120 \ -H "Accept: application/json" \ -F "file=@${AUDIO_FILE}" \ -F "model=${WHISPER_MODEL}" \ "${WHISPER_BASE_URL}/v1/audio/transcriptions" >"$TMP_JSON" 2>/dev/null; then : else echo "Transcription request failed at ${WHISPER_BASE_URL}" >&2 exit 2 fi TEXT="$(grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' "$TMP_JSON" | sed 's/.*"text"[[:space:]]*:[[:space:]]*"//; s/"$//')" if [[ -z "$TEXT" ]]; then # fallback: print raw json if text field parsing failed cat "$TMP_JSON" exit 0 fi echo "$TEXT" if [[ -n "$WHISPER_OUTPUT" ]]; then printf '%s\n' "$TEXT" > "$WHISPER_OUTPUT" fi