Files
swarm-master/litellm-dedup.sh
William Valentin c94bbe5de8 Add LiteLLM maintenance scripts and systemd health-check timer
litellm-dedup.sh: removes duplicate model DB entries (idempotent, supports
--dry-run). Root cause of duplicates was litellm-init running multiple times
before the DB was populated, causing all entries to be inserted concurrently.

litellm-health-check.sh: runs every 6 hours via systemd user timer; checks
liveness (auto-restarts container if unresponsive) and duplicate entries
(auto-dedups when DEDUP=1). Logs to litellm-maintenance.log.

Systemd units: litellm-health-check.{service,timer} installed under
~/.config/systemd/user/.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 13:33:16 -07:00

74 lines
2.0 KiB
Bash
Executable File

#!/usr/bin/env bash
# Removes duplicate model entries from the LiteLLM DB.
# Keeps the first registered entry per model name; deletes the rest.
# Safe to run at any time — idempotent, no-op when no duplicates exist.
# Usage: ./litellm-dedup.sh [--dry-run]
set -euo pipefail
LITELLM_URL="${LITELLM_URL:-http://localhost:18804}"
LITELLM_MASTER_KEY="${LITELLM_MASTER_KEY:-$(grep LITELLM_MASTER_KEY /home/will/lab/swarm/.env | cut -d= -f2)}"
DRY_RUN=0
[ "${1:-}" = "--dry-run" ] && DRY_RUN=1
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
# Fetch all DB model entries
RESPONSE=$(curl -sf -H "Authorization: Bearer $LITELLM_MASTER_KEY" \
"$LITELLM_URL/v2/model/info") || { log "ERROR: failed to reach LiteLLM at $LITELLM_URL"; exit 1; }
# Find duplicate IDs (keep first occurrence of each name, collect the rest)
DUPES=$(echo "$RESPONSE" | python3 -c "
import sys, json
data = json.loads(sys.stdin.read())
seen = {}
dupes = []
for m in data.get('data', []):
info = m.get('model_info', {})
if not info.get('db_model'):
continue
name = m.get('model_name', '')
db_id = info.get('id', '')
if name not in seen:
seen[name] = db_id
else:
dupes.append(db_id)
for d in dupes:
print(d)
")
TOTAL=$(echo "$DUPES" | grep -c . 2>/dev/null || true)
if [ "$TOTAL" -eq 0 ]; then
log "No duplicates found."
exit 0
fi
log "Found $TOTAL duplicate entries."
if [ "$DRY_RUN" -eq 1 ]; then
log "Dry run — would delete:"
echo "$DUPES"
exit 0
fi
ok=0; fail=0
while IFS= read -r id; do
[ -z "$id" ] && continue
result=$(curl -sf -X POST \
-H "Authorization: Bearer $LITELLM_MASTER_KEY" \
-H "Content-Type: application/json" \
-d "{\"id\": \"$id\"}" \
"$LITELLM_URL/model/delete" 2>/dev/null || echo "error")
if echo "$result" | grep -q "deleted successfully"; then
ok=$((ok + 1))
else
log "WARN: failed to delete $id: $result"
fail=$((fail + 1))
fi
done <<EOF
$DUPES
EOF
log "Deleted: $ok Failed: $fail"