48 lines
1.2 KiB
Bash
Executable File
48 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
AUDIO_FILE="${1:-}"
|
|
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
|
|
echo "Usage: $0 /path/to/audio-file" >&2
|
|
exit 1
|
|
fi
|
|
|
|
WHISPER_BASE_URL="${WHISPER_BASE_URL:-http://192.168.153.117:18801}"
|
|
WHISPER_MODEL="${WHISPER_MODEL:-base}"
|
|
WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-auto}"
|
|
WHISPER_OUTPUT="${WHISPER_OUTPUT:-}"
|
|
|
|
TMP_JSON="$(mktemp)"
|
|
trap 'rm -f "$TMP_JSON"' EXIT
|
|
|
|
# 1) whisper.cpp style
|
|
if curl -fsS -m 120 \
|
|
-F "file=@${AUDIO_FILE}" \
|
|
-F "model=${WHISPER_MODEL}" \
|
|
-F "language=${WHISPER_LANGUAGE}" \
|
|
"${WHISPER_BASE_URL}/inference" >"$TMP_JSON" 2>/dev/null; then
|
|
:
|
|
# 2) OpenAI style fallback
|
|
elif curl -fsS -m 120 \
|
|
-H "Accept: application/json" \
|
|
-F "file=@${AUDIO_FILE}" \
|
|
-F "model=${WHISPER_MODEL}" \
|
|
"${WHISPER_BASE_URL}/v1/audio/transcriptions" >"$TMP_JSON" 2>/dev/null; then
|
|
:
|
|
else
|
|
echo "Transcription request failed at ${WHISPER_BASE_URL}" >&2
|
|
exit 2
|
|
fi
|
|
|
|
TEXT="$(grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' "$TMP_JSON" | sed 's/.*"text"[[:space:]]*:[[:space:]]*"//; s/"$//')"
|
|
if [[ -z "$TEXT" ]]; then
|
|
# fallback: print raw json if text field parsing failed
|
|
cat "$TMP_JSON"
|
|
exit 0
|
|
fi
|
|
|
|
echo "$TEXT"
|
|
if [[ -n "$WHISPER_OUTPUT" ]]; then
|
|
printf '%s\n' "$TEXT" > "$WHISPER_OUTPUT"
|
|
fi
|