Files
swarm-zap/skills/whisper-local-safe/scripts/transcribe-local.sh

48 lines
1.2 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
AUDIO_FILE="${1:-}"
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
echo "Usage: $0 /path/to/audio-file" >&2
exit 1
fi
WHISPER_BASE_URL="${WHISPER_BASE_URL:-http://192.168.153.117:18801}"
WHISPER_MODEL="${WHISPER_MODEL:-base}"
WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-auto}"
WHISPER_OUTPUT="${WHISPER_OUTPUT:-}"
TMP_JSON="$(mktemp)"
trap 'rm -f "$TMP_JSON"' EXIT
# 1) whisper.cpp style
if curl -fsS -m 120 \
-F "file=@${AUDIO_FILE}" \
-F "model=${WHISPER_MODEL}" \
-F "language=${WHISPER_LANGUAGE}" \
"${WHISPER_BASE_URL}/inference" >"$TMP_JSON" 2>/dev/null; then
:
# 2) OpenAI style fallback
elif curl -fsS -m 120 \
-H "Accept: application/json" \
-F "file=@${AUDIO_FILE}" \
-F "model=${WHISPER_MODEL}" \
"${WHISPER_BASE_URL}/v1/audio/transcriptions" >"$TMP_JSON" 2>/dev/null; then
:
else
echo "Transcription request failed at ${WHISPER_BASE_URL}" >&2
exit 2
fi
TEXT="$(grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' "$TMP_JSON" | sed 's/.*"text"[[:space:]]*:[[:space:]]*"//; s/"$//')"
if [[ -z "$TEXT" ]]; then
# fallback: print raw json if text field parsing failed
cat "$TMP_JSON"
exit 0
fi
echo "$TEXT"
if [[ -n "$WHISPER_OUTPUT" ]]; then
printf '%s\n' "$TEXT" > "$WHISPER_OUTPUT"
fi