Files
clawdbot/scripts/transcribe
William Valentin 2c3d6afcdd Add workspace utility scripts
- llm: Local LLM wrapper for llama-swap
- homelab-status: Quick K8s/cluster health check
- calc: Python/JS REPL for quick calculations
- transcribe: Whisper audio transcription wrapper

Added to fish PATH.
2026-01-26 22:53:52 -08:00

95 lines
2.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Transcribe audio files using Whisper
# Usage: transcribe <audio_file> [options]
# transcribe recording.mp3
# transcribe -m medium meeting.wav
set -e
MODEL="${WHISPER_MODEL:-base}"
LANGUAGE=""
OUTPUT_FORMAT="txt"
AUDIO_FILE=""
show_help() {
echo "Usage: transcribe [options] <audio_file>"
echo ""
echo "Options:"
echo " -m, --model MODEL Whisper model (tiny, base, small, medium, large)"
echo " Default: base (fast), use medium/large for accuracy"
echo " -l, --language LANG Force language (e.g., en, es, fr)"
echo " -f, --format FORMAT Output format (txt, json, srt, vtt)"
echo " -h, --help Show this help"
echo ""
echo "Examples:"
echo " transcribe meeting.mp3 # Quick transcription"
echo " transcribe -m medium interview.wav # Better accuracy"
echo " transcribe -l en -f srt podcast.mp3 # English subtitles"
echo ""
echo "Models (speed vs accuracy):"
echo " tiny - Fastest, lowest accuracy (~1GB VRAM)"
echo " base - Fast, good accuracy (~1GB VRAM) [default]"
echo " small - Balanced (~2GB VRAM)"
echo " medium - Better accuracy (~5GB VRAM)"
echo " large - Best accuracy (~10GB VRAM)"
echo ""
echo "Environment:"
echo " WHISPER_MODEL Default model (default: base)"
}
while [[ $# -gt 0 ]]; do
case $1 in
-m|--model)
MODEL="$2"
shift 2
;;
-l|--language)
LANGUAGE="$2"
shift 2
;;
-f|--format)
OUTPUT_FORMAT="$2"
shift 2
;;
-h|--help)
show_help
exit 0
;;
-*)
echo "Unknown option: $1" >&2
exit 1
;;
*)
AUDIO_FILE="$1"
shift
;;
esac
done
if [[ -z "$AUDIO_FILE" ]]; then
echo "Error: No audio file provided" >&2
show_help
exit 1
fi
if [[ ! -f "$AUDIO_FILE" ]]; then
echo "Error: File not found: $AUDIO_FILE" >&2
exit 1
fi
# Build whisper command
CMD="whisper \"$AUDIO_FILE\" --model $MODEL --output_format $OUTPUT_FORMAT"
if [[ -n "$LANGUAGE" ]]; then
CMD="$CMD --language $LANGUAGE"
fi
# Run transcription
echo "Transcribing: $AUDIO_FILE (model: $MODEL)" >&2
eval $CMD
# Show output location
BASE=$(basename "$AUDIO_FILE" | sed 's/\.[^.]*$//')
echo "" >&2
echo "Output: ${BASE}.${OUTPUT_FORMAT}" >&2