#!/usr/bin/env bash
# Local LLM wrapper for llama-swap
# Usage: llm [model] "prompt"
#        llm -m model "prompt"
#        echo "prompt" | llm [model]

set -e

ENDPOINT="${LLAMA_SWAP_URL:-http://127.0.0.1:8080}"
DEFAULT_MODEL="${LLAMA_SWAP_MODEL:-gemma}"
MAX_TOKENS="${LLAMA_SWAP_MAX_TOKENS:-2048}"

# Parse args
MODEL="$DEFAULT_MODEL"
PROMPT=""

while [[ $# -gt 0 ]]; do
  case $1 in
    -m|--model)
      MODEL="$2"
      shift 2
      ;;
    -t|--tokens)
      MAX_TOKENS="$2"
      shift 2
      ;;
    -h|--help)
      echo "Usage: llm [-m model] [-t max_tokens] \"prompt\""
      echo ""
      echo "Models: gemma, qwen3, coder, glm, reasoning, gpt-oss"
      echo ""
      echo "Examples:"
      echo "  llm \"What is 2+2?\""
      echo "  llm -m coder \"Write a Python hello world\""
      echo "  echo \"Explain this\" | llm qwen3"
      echo ""
      echo "Environment:"
      echo "  LLAMA_SWAP_URL      Endpoint (default: http://127.0.0.1:8080)"
      echo "  LLAMA_SWAP_MODEL    Default model (default: gemma)"
      echo "  LLAMA_SWAP_MAX_TOKENS  Max tokens (default: 2048)"
      exit 0
      ;;
    -*)
      echo "Unknown option: $1" >&2
      exit 1
      ;;
    *)
      if [[ -z "$PROMPT" ]]; then
        # Check if it's a model name
        if [[ "$1" =~ ^(gemma|qwen3|coder|glm|reasoning|gpt-oss)$ ]]; then
          MODEL="$1"
        else
          PROMPT="$1"
        fi
      else
        PROMPT="$PROMPT $1"
      fi
      shift
      ;;
  esac
done

# Read from stdin if no prompt
if [[ -z "$PROMPT" ]]; then
  if [[ ! -t 0 ]]; then
    PROMPT=$(cat)
  else
    echo "Error: No prompt provided" >&2
    exit 1
  fi
fi

# Check if llama-swap is running
if ! curl -sf "$ENDPOINT/health" >/dev/null 2>&1; then
  echo "Error: llama-swap not running at $ENDPOINT" >&2
  echo "Start with: systemctl --user start llama-swap" >&2
  exit 1
fi

# Build JSON payload
JSON=$(jq -n \
  --arg model "$MODEL" \
  --arg prompt "$PROMPT" \
  --argjson max_tokens "$MAX_TOKENS" \
  '{model: $model, messages: [{role: "user", content: $prompt}], max_tokens: $max_tokens}')

# Make request and extract response
RESPONSE=$(curl -s "$ENDPOINT/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -d "$JSON")

# Check for error
if echo "$RESPONSE" | jq -e '.error' >/dev/null 2>&1; then
  echo "Error: $(echo "$RESPONSE" | jq -r '.error.message')" >&2
  exit 1
fi

# Extract content
echo "$RESPONSE" | jq -r '.choices[0].message.content'