#!/usr/bin/env bash # Local LLM wrapper for llama-swap # Usage: llm [model] "prompt" # llm -m model "prompt" # echo "prompt" | llm [model] set -e ENDPOINT="${LLAMA_SWAP_URL:-http://127.0.0.1:8080}" DEFAULT_MODEL="${LLAMA_SWAP_MODEL:-gemma}" MAX_TOKENS="${LLAMA_SWAP_MAX_TOKENS:-2048}" # Parse args MODEL="$DEFAULT_MODEL" PROMPT="" while [[ $# -gt 0 ]]; do case $1 in -m|--model) MODEL="$2" shift 2 ;; -t|--tokens) MAX_TOKENS="$2" shift 2 ;; -h|--help) echo "Usage: llm [-m model] [-t max_tokens] \"prompt\"" echo "" echo "Models: gemma, qwen3, coder, glm, reasoning, gpt-oss" echo "" echo "Examples:" echo " llm \"What is 2+2?\"" echo " llm -m coder \"Write a Python hello world\"" echo " echo \"Explain this\" | llm qwen3" echo "" echo "Environment:" echo " LLAMA_SWAP_URL Endpoint (default: http://127.0.0.1:8080)" echo " LLAMA_SWAP_MODEL Default model (default: gemma)" echo " LLAMA_SWAP_MAX_TOKENS Max tokens (default: 2048)" exit 0 ;; -*) echo "Unknown option: $1" >&2 exit 1 ;; *) if [[ -z "$PROMPT" ]]; then # Check if it's a model name if [[ "$1" =~ ^(gemma|qwen3|coder|glm|reasoning|gpt-oss)$ ]]; then MODEL="$1" else PROMPT="$1" fi else PROMPT="$PROMPT $1" fi shift ;; esac done # Read from stdin if no prompt if [[ -z "$PROMPT" ]]; then if [[ ! -t 0 ]]; then PROMPT=$(cat) else echo "Error: No prompt provided" >&2 exit 1 fi fi # Check if llama-swap is running if ! curl -sf "$ENDPOINT/health" >/dev/null 2>&1; then echo "Error: llama-swap not running at $ENDPOINT" >&2 echo "Start with: systemctl --user start llama-swap" >&2 exit 1 fi # Build JSON payload JSON=$(jq -n \ --arg model "$MODEL" \ --arg prompt "$PROMPT" \ --argjson max_tokens "$MAX_TOKENS" \ '{model: $model, messages: [{role: "user", content: $prompt}], max_tokens: $max_tokens}') # Make request and extract response RESPONSE=$(curl -s "$ENDPOINT/v1/chat/completions" \ -H "Content-Type: application/json" \ -d "$JSON") # Check for error if echo "$RESPONSE" | jq -e '.error' >/dev/null 2>&1; then echo "Error: $(echo "$RESPONSE" | jq -r '.error.message')" >&2 exit 1 fi # Extract content echo "$RESPONSE" | jq -r '.choices[0].message.content'