#!/usr/bin/env bash # Transcribe audio files using Whisper # Usage: transcribe [options] # transcribe recording.mp3 # transcribe -m medium meeting.wav set -e MODEL="${WHISPER_MODEL:-base}" LANGUAGE="" OUTPUT_FORMAT="txt" AUDIO_FILE="" show_help() { echo "Usage: transcribe [options] " echo "" echo "Options:" echo " -m, --model MODEL Whisper model (tiny, base, small, medium, large)" echo " Default: base (fast), use medium/large for accuracy" echo " -l, --language LANG Force language (e.g., en, es, fr)" echo " -f, --format FORMAT Output format (txt, json, srt, vtt)" echo " -h, --help Show this help" echo "" echo "Examples:" echo " transcribe meeting.mp3 # Quick transcription" echo " transcribe -m medium interview.wav # Better accuracy" echo " transcribe -l en -f srt podcast.mp3 # English subtitles" echo "" echo "Models (speed vs accuracy):" echo " tiny - Fastest, lowest accuracy (~1GB VRAM)" echo " base - Fast, good accuracy (~1GB VRAM) [default]" echo " small - Balanced (~2GB VRAM)" echo " medium - Better accuracy (~5GB VRAM)" echo " large - Best accuracy (~10GB VRAM)" echo "" echo "Environment:" echo " WHISPER_MODEL Default model (default: base)" } while [[ $# -gt 0 ]]; do case $1 in -m|--model) MODEL="$2" shift 2 ;; -l|--language) LANGUAGE="$2" shift 2 ;; -f|--format) OUTPUT_FORMAT="$2" shift 2 ;; -h|--help) show_help exit 0 ;; -*) echo "Unknown option: $1" >&2 exit 1 ;; *) AUDIO_FILE="$1" shift ;; esac done if [[ -z "$AUDIO_FILE" ]]; then echo "Error: No audio file provided" >&2 show_help exit 1 fi if [[ ! -f "$AUDIO_FILE" ]]; then echo "Error: File not found: $AUDIO_FILE" >&2 exit 1 fi # Build whisper command CMD="whisper \"$AUDIO_FILE\" --model $MODEL --output_format $OUTPUT_FORMAT" if [[ -n "$LANGUAGE" ]]; then CMD="$CMD --language $LANGUAGE" fi # Run transcription echo "Transcribing: $AUDIO_FILE (model: $MODEL)" >&2 eval $CMD # Show output location BASE=$(basename "$AUDIO_FILE" | sed 's/\.[^.]*$//') echo "" >&2 echo "Output: ${BASE}.${OUTPUT_FORMAT}" >&2