#!/usr/bin/env bash # Find duplicate files in a directory using checksums DIR="${1:-$HOME}" MIN_SIZE="${2:-1M}" # Only files larger than 1MB echo "Scanning $DIR for duplicate files (min size: $MIN_SIZE)..." echo "Skipping: .cache, node_modules, .git, .local, .config, tmp, tmp.*" # Find files, compute md5sum, group by hash, show duplicates find "$DIR" \ -type f \ -size "+$MIN_SIZE" \ \( \ -not -path "*/.cache/*" \ -not -path "*/node_modules/*" \ -not -path "*/.git/*" \ -not -path "*/.local/*" \ -not -path "*/.config/*" \ -not -path "*/tmp/*" \ -not -path "*/tmp.*/*" \ -not -path "*/.npm/*" \ -not -path "*/.cargo/*" \ -not -path "*/.ollama/*" \ -not -path "*/.config/llama-swap/*" \ -not -path "*/.cache/*" \ -not -path "*/.local/share/Steam/*" \ -not -path "*/.local/share/containers/*" \ -not -path "*/.local/lib/docker/*" \ \) \ -exec md5sum {} + 2>/dev/null | \ sort | \ uniq -D -w 32 | \ cut -c 35-