Files
clawdbot/scripts/find-duplicates.sh
2026-01-27 02:53:42 -08:00

35 lines
1.0 KiB
Bash
Executable File

#!/usr/bin/env bash
# Find duplicate files in a directory using checksums
DIR="${1:-$HOME}"
MIN_SIZE="${2:-1M}" # Only files larger than 1MB
echo "Scanning $DIR for duplicate files (min size: $MIN_SIZE)..."
echo "Skipping: .cache, node_modules, .git, .local, .config, tmp, tmp.*"
# Find files, compute md5sum, group by hash, show duplicates
find "$DIR" \
-type f \
-size "+$MIN_SIZE" \
\( \
-not -path "*/.cache/*" \
-not -path "*/node_modules/*" \
-not -path "*/.git/*" \
-not -path "*/.local/*" \
-not -path "*/.config/*" \
-not -path "*/tmp/*" \
-not -path "*/tmp.*/*" \
-not -path "*/.npm/*" \
-not -path "*/.cargo/*" \
-not -path "*/.ollama/*" \
-not -path "*/.config/llama-swap/*" \
-not -path "*/.cache/*" \
-not -path "*/.local/share/Steam/*" \
-not -path "*/.local/share/containers/*" \
-not -path "*/.local/lib/docker/*" \
\) \
-exec md5sum {} + 2>/dev/null | \
sort | \
uniq -D -w 32 | \
cut -c 35-