- llm: Local LLM wrapper for llama-swap - homelab-status: Quick K8s/cluster health check - calc: Python/JS REPL for quick calculations - transcribe: Whisper audio transcription wrapper Added to fish PATH.
100 lines
2.4 KiB
Bash
Executable File
100 lines
2.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Local LLM wrapper for llama-swap
|
|
# Usage: llm [model] "prompt"
|
|
# llm -m model "prompt"
|
|
# echo "prompt" | llm [model]
|
|
|
|
set -e
|
|
|
|
ENDPOINT="${LLAMA_SWAP_URL:-http://127.0.0.1:8080}"
|
|
DEFAULT_MODEL="${LLAMA_SWAP_MODEL:-gemma}"
|
|
MAX_TOKENS="${LLAMA_SWAP_MAX_TOKENS:-2048}"
|
|
|
|
# Parse args
|
|
MODEL="$DEFAULT_MODEL"
|
|
PROMPT=""
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-m|--model)
|
|
MODEL="$2"
|
|
shift 2
|
|
;;
|
|
-t|--tokens)
|
|
MAX_TOKENS="$2"
|
|
shift 2
|
|
;;
|
|
-h|--help)
|
|
echo "Usage: llm [-m model] [-t max_tokens] \"prompt\""
|
|
echo ""
|
|
echo "Models: gemma, qwen3, coder, glm, reasoning, gpt-oss"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " llm \"What is 2+2?\""
|
|
echo " llm -m coder \"Write a Python hello world\""
|
|
echo " echo \"Explain this\" | llm qwen3"
|
|
echo ""
|
|
echo "Environment:"
|
|
echo " LLAMA_SWAP_URL Endpoint (default: http://127.0.0.1:8080)"
|
|
echo " LLAMA_SWAP_MODEL Default model (default: gemma)"
|
|
echo " LLAMA_SWAP_MAX_TOKENS Max tokens (default: 2048)"
|
|
exit 0
|
|
;;
|
|
-*)
|
|
echo "Unknown option: $1" >&2
|
|
exit 1
|
|
;;
|
|
*)
|
|
if [[ -z "$PROMPT" ]]; then
|
|
# Check if it's a model name
|
|
if [[ "$1" =~ ^(gemma|qwen3|coder|glm|reasoning|gpt-oss)$ ]]; then
|
|
MODEL="$1"
|
|
else
|
|
PROMPT="$1"
|
|
fi
|
|
else
|
|
PROMPT="$PROMPT $1"
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Read from stdin if no prompt
|
|
if [[ -z "$PROMPT" ]]; then
|
|
if [[ ! -t 0 ]]; then
|
|
PROMPT=$(cat)
|
|
else
|
|
echo "Error: No prompt provided" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Check if llama-swap is running
|
|
if ! curl -sf "$ENDPOINT/health" >/dev/null 2>&1; then
|
|
echo "Error: llama-swap not running at $ENDPOINT" >&2
|
|
echo "Start with: systemctl --user start llama-swap" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Build JSON payload
|
|
JSON=$(jq -n \
|
|
--arg model "$MODEL" \
|
|
--arg prompt "$PROMPT" \
|
|
--argjson max_tokens "$MAX_TOKENS" \
|
|
'{model: $model, messages: [{role: "user", content: $prompt}], max_tokens: $max_tokens}')
|
|
|
|
# Make request and extract response
|
|
RESPONSE=$(curl -s "$ENDPOINT/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$JSON")
|
|
|
|
# Check for error
|
|
if echo "$RESPONSE" | jq -e '.error' >/dev/null 2>&1; then
|
|
echo "Error: $(echo "$RESPONSE" | jq -r '.error.message')" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Extract content
|
|
echo "$RESPONSE" | jq -r '.choices[0].message.content'
|