- llm: Local LLM wrapper for llama-swap - homelab-status: Quick K8s/cluster health check - calc: Python/JS REPL for quick calculations - transcribe: Whisper audio transcription wrapper Added to fish PATH.
95 lines
2.3 KiB
Bash
Executable File
95 lines
2.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Transcribe audio files using Whisper
|
|
# Usage: transcribe <audio_file> [options]
|
|
# transcribe recording.mp3
|
|
# transcribe -m medium meeting.wav
|
|
|
|
set -e
|
|
|
|
MODEL="${WHISPER_MODEL:-base}"
|
|
LANGUAGE=""
|
|
OUTPUT_FORMAT="txt"
|
|
AUDIO_FILE=""
|
|
|
|
show_help() {
|
|
echo "Usage: transcribe [options] <audio_file>"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " -m, --model MODEL Whisper model (tiny, base, small, medium, large)"
|
|
echo " Default: base (fast), use medium/large for accuracy"
|
|
echo " -l, --language LANG Force language (e.g., en, es, fr)"
|
|
echo " -f, --format FORMAT Output format (txt, json, srt, vtt)"
|
|
echo " -h, --help Show this help"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " transcribe meeting.mp3 # Quick transcription"
|
|
echo " transcribe -m medium interview.wav # Better accuracy"
|
|
echo " transcribe -l en -f srt podcast.mp3 # English subtitles"
|
|
echo ""
|
|
echo "Models (speed vs accuracy):"
|
|
echo " tiny - Fastest, lowest accuracy (~1GB VRAM)"
|
|
echo " base - Fast, good accuracy (~1GB VRAM) [default]"
|
|
echo " small - Balanced (~2GB VRAM)"
|
|
echo " medium - Better accuracy (~5GB VRAM)"
|
|
echo " large - Best accuracy (~10GB VRAM)"
|
|
echo ""
|
|
echo "Environment:"
|
|
echo " WHISPER_MODEL Default model (default: base)"
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-m|--model)
|
|
MODEL="$2"
|
|
shift 2
|
|
;;
|
|
-l|--language)
|
|
LANGUAGE="$2"
|
|
shift 2
|
|
;;
|
|
-f|--format)
|
|
OUTPUT_FORMAT="$2"
|
|
shift 2
|
|
;;
|
|
-h|--help)
|
|
show_help
|
|
exit 0
|
|
;;
|
|
-*)
|
|
echo "Unknown option: $1" >&2
|
|
exit 1
|
|
;;
|
|
*)
|
|
AUDIO_FILE="$1"
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "$AUDIO_FILE" ]]; then
|
|
echo "Error: No audio file provided" >&2
|
|
show_help
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -f "$AUDIO_FILE" ]]; then
|
|
echo "Error: File not found: $AUDIO_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Build whisper command
|
|
CMD="whisper \"$AUDIO_FILE\" --model $MODEL --output_format $OUTPUT_FORMAT"
|
|
|
|
if [[ -n "$LANGUAGE" ]]; then
|
|
CMD="$CMD --language $LANGUAGE"
|
|
fi
|
|
|
|
# Run transcription
|
|
echo "Transcribing: $AUDIO_FILE (model: $MODEL)" >&2
|
|
eval $CMD
|
|
|
|
# Show output location
|
|
BASE=$(basename "$AUDIO_FILE" | sed 's/\.[^.]*$//')
|
|
echo "" >&2
|
|
echo "Output: ${BASE}.${OUTPUT_FORMAT}" >&2
|