diff --git a/GUARDRAILS.md b/GUARDRAILS.md new file mode 100644 index 0000000..4d83856 --- /dev/null +++ b/GUARDRAILS.md @@ -0,0 +1,30 @@ +# GUARDRAILS.md + +These are enforcement-style rules for Flynn (Clawdbot assistant). If there’s any ambiguity, ask William. + +## Hard blocks (never do) +- `kubectl delete namespace ` +- `rm -rf /` (or anything equivalent that targets `/`) +- `rm -rf ~` + +If the user asks for these, refuse and propose a safer alternative (e.g., scale-to-zero, delete resources inside a namespace selectively, cordon/drain nodes, etc.). + +## Confirm-required actions +### External communications +Before sending anything externally, always: +1) present a draft +2) ask “Send it?” / “Approve?” +3) only send after explicit approval + +Applies to: +- `message` tool sends (any channel) +- email sends (Gmail via `gog`, IMAP/SMTP via `himalaya`) + +### Potentially destructive shell/K8s actions +- `kubectl delete ...` (anything other than namespaces) requires confirmation +- `rm` outside the workspace requires confirmation +- system service state changes (`systemctl stop/disable/mask`) require confirmation + +## Preferred safer patterns +- Prefer `trash` over `rm` when feasible +- For K8s “cleanup”, prefer labeling + ArgoCD sync/prune or deleting specific workloads, not entire namespaces diff --git a/dupes-report.txt b/dupes-report.txt new file mode 100644 index 0000000..d3f9f1b --- /dev/null +++ b/dupes-report.txt @@ -0,0 +1,115 @@ +🔍 Scanning /home/will for duplicates (>1MB)... + Skipping: .cache, .cargo, .config/llama-swap, .git, .local, .local/lib/docker, .local/share/Steam, .local/share/containers, .npm, .ollama, .thumbnails, node_modules, tmp, tmp.* + +📊 Results: + Files scanned: 3422 + Skipped: 302 + Duplicate files: 628 + Wasted space: 6.10 GB + +📁 ARCHIVE (10 groups): + 2 copies, 1.2 MB each + → /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.0/flate/testdata/fuzz/FuzzEncoding.zip + /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.1/flate/testdata/fuzz/FuzzEncoding.zip + 2 copies, 3.0 MB each + → /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.0/s2/testdata/fuzz/block-corpus-enc.zip + /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.1/s2/testdata/fuzz/block-corpus-enc.zip + 2 copies, 8.0 MB each + → /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.0/s2/testdata/fuzz/block-corpus-raw.zip + /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.1/s2/testdata/fuzz/block-corpus-raw.zip + 2 copies, 1.4 MB each + → /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.0/zstd/testdata/comp-crashers.zip + /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.1/zstd/testdata/comp-crashers.zip + 2 copies, 6.6 MB each + → /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.0/zstd/testdata/decoder.zip + /home/will/go/pkg/mod/github.com/klauspost/compress@v1.18.1/zstd/testdata/decoder.zip + ... and 5 more groups + +📁 IMAGE (46 groups): + 2 copies, 3.9 MB each + → /home/will/Downloads/20230514_095432.jpg + /home/will/Pictures/20230514_095432.jpg + 2 copies, 4.0 MB each + → /home/will/Downloads/20230514_111241.jpg + /home/will/Pictures/20230514_111241.jpg + 2 copies, 2.7 MB each + → /home/will/Downloads/20230514_122541.jpg + /home/will/Pictures/20230514_122541.jpg + 2 copies, 4.1 MB each + → /home/will/Downloads/20230526_084947.jpg + /home/will/Pictures/20230526_084947.jpg + 2 copies, 4.3 MB each + → /home/will/Downloads/20230603_073343.jpg + /home/will/Pictures/20230603_073343.jpg + ... and 41 more groups + +📁 OTHER (177 groups): + 2 copies, 72.2 MB each + → /home/will/.config/Code/User/workspaceStorage/420469884bb0bfd214adb3561809d46b/state.vscdb + /home/will/.config/Code/User/workspaceStorage/420469884bb0bfd214adb3561809d46b/state.vscdb.backup + 3 copies, 4.0 MB each + → /home/will/.config/google-chrome/BrowserMetrics-spare.pma + /home/will/.config/chromium/BrowserMetrics-spare.pma + /home/will/.zoom/data/cefcache/140.0.7339.185/BrowserMetrics-spare.pma + 2 copies, 1.8 MB each + → /home/will/.config/google-chrome/Safe Browsing/ChromeExtMalware.store.32_13413622236785599 + /home/will/.config/chromium/Safe Browsing/ChromeExtMalware.store.32_13413622513482255 + 2 copies, 1.1 MB each + → /home/will/.config/google-chrome/OpenCookieDatabase/2024.10.17.0/open_cookie_database.json + /home/will/.config/chromium/OpenCookieDatabase/2024.10.17.0/open_cookie_database.json + 2 copies, 4.4 MB each + → /home/will/.config/Newelle/pip/30fcd23745efe32ce681__mypyc.cpython-313-x86_64-linux-gnu.so + /home/will/Code/active/python/unitforge/.venv/lib/python3.13/site-packages/30fcd23745efe32ce681__mypyc.cpython-313-x86_64-linux-gnu.so + ... and 172 more groups + +📁 PDF (3 groups): + 3 copies, 2.6 MB each + → /home/will/Downloads/William_Valentin-Other_part-filling.pdf + /home/will/Downloads/Case/William_Valentin-Other_part-filling.pdf + /home/will/Documents/verbatim-key/print/Office Depot Scan 10-11-2023_12-40-29-272.pdf + 2 copies, 5.9 MB each + → /home/will/Downloads/Case/Petitioner/22-2-15707-1 SEA_TMORPRT.pdf + /home/will/Documents/Legal/22-2-15707-1 SEA_TMORPRT.pdf + 3 copies, 1.2 MB each + → /home/will/Documents/Arret_travail-William_Valentin.pdf + /home/will/.lmstudio/user-files/1768691090132 - 830.pdf + /home/will/.lmstudio/user-files/1768691109158 - 724.pdf + +📁 TEXT (18 groups): + 14 copies, 2.1 MB each + → /home/will/.config/Newelle/get-pip.py + /home/will/.config/Newelle/get-pip.py.1 + /home/will/.config/Newelle/get-pip.py.2 + /home/will/.config/Newelle/get-pip.py.3 + /home/will/.config/Newelle/get-pip.py.4 + /home/will/.config/Newelle/get-pip.py.5 + /home/will/.config/Newelle/get-pip.py.6 + /home/will/.config/Newelle/get-pip.py.7 + /home/will/.config/Newelle/get-pip.py.8 + /home/will/.config/Newelle/get-pip.py.9 + /home/will/.config/Newelle/get-pip.py.10 + /home/will/.config/Newelle/get-pip.py.11 + /home/will/.config/Newelle/get-pip.py.12 + /home/will/.antigravity/extensions/ms-python.python-2026.0.0-universal/python_files/get-pip.py + 2 copies, 2.2 MB each + → /home/will/.vscode/extensions/github.copilot-chat-0.36.1/ThirdPartyNotices.txt + /home/will/.vscode/extensions/github.copilot-chat-0.36.2/ThirdPartyNotices.txt + 2 copies, 10.4 MB each + → /home/will/.vscode/extensions/github.copilot-chat-0.36.1/dist/cli.js + /home/will/.vscode/extensions/github.copilot-chat-0.36.2/dist/cli.js + 5 copies, 4.1 MB each + → /home/will/Code/archive/testing-projects/copilot-test/.next/static/chunks/amp.js + /home/will/Code/archive/testing-projects/nextjs-blog/.next/static/chunks/amp.js + /home/will/Code/archive/testing-projects/test-projects/email-nextjs-app/.next/static/chunks/amp.js + /home/will/Code/archive/testing-projects/test-projects/next13-pokemons/.next/static/chunks/amp.js + /home/will/Code/archive/testing-projects/test-projects/pokemon-nextjs13-app/.next/static/chunks/amp.js + 2 copies, 3.6 MB each + → /home/will/Code/archive/testing-projects/copilot-test/.next/static/chunks/fallback/amp.js + /home/will/Code/archive/testing-projects/test-projects/pokemon-nextjs13-app/.next/static/chunks/fallback/amp.js + ... and 13 more groups + +📁 VIDEO (1 groups): + 2 copies, 6.4 MB each + → /home/will/repo/SwarmUI/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/comfyui_frontend_package/static/assets/video-BvOHf4P9.mp4 + /home/will/repo/SwarmUI/SwarmUI/dlbackend/ComfyUI/web_custom_versions/Comfy-Org_ComfyUI_frontend/1.35.9/assets/video-BvOHf4P9.mp4 + diff --git a/find_duplicates.py b/find_duplicates.py new file mode 100755 index 0000000..93a0609 --- /dev/null +++ b/find_duplicates.py @@ -0,0 +1,52 @@ +import os +import hashlib +from collections import defaultdict + +EXCLUDED_DIRS = {".cache", "node_modules", ".git", ".local", "tmp", ".npm", ".cargo", ".ollama", ".config/llama-swap"} +MIN_FILE_SIZE = 1 * 1024 * 1024 # 1MB + +def calculate_checksum(filepath): + sha256_hash = hashlib.sha256() + try: + with open(filepath, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + except (OSError, IOError): + return None + +def find_duplicates(start_path): + checksums = defaultdict(list) + processed_files = 0 + for root, dirs, files in os.walk(start_path): + dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS] + for filename in files: + filepath = os.path.join(root, filename) + try: + if os.path.getsize(filepath) > MIN_FILE_SIZE: + checksum = calculate_checksum(filepath) + if checksum: + checksums[checksum].append(filepath) + processed_files += 1 + if processed_files % 100 == 0: + print(f"Processed {processed_files} files...") + except (OSError, IOError): + continue + return {k: v for k, v in checksums.items() if len(v) > 1} + +if __name__ == "__main__": + home_dir = os.path.expanduser("~") + print("Scanning for duplicate files. This may take some time...") + duplicates = find_duplicates(home_dir) + + grouped_by_extension = defaultdict(list) + for files in duplicates.values(): + for file in files: + ext = os.path.splitext(file)[-1].lower() + grouped_by_extension[ext].append(file) + + print("\nDuplicate Files Found:") + for ext, files in grouped_by_extension.items(): + print(f"\nFile Type: {ext if ext else 'No Extension'}") + for file in files: + print(f" {file}") \ No newline at end of file diff --git a/scripts/dupes.py b/scripts/dupes.py new file mode 100755 index 0000000..884e4a4 --- /dev/null +++ b/scripts/dupes.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +"""Find duplicate files by checksum.""" + +import hashlib +import os +from pathlib import Path +from collections import defaultdict +import mimetypes + +# Configuration +MIN_SIZE_MB = 1 +MIN_SIZE_BYTES = MIN_SIZE_MB * 1024 * 1024 + +# Directories to skip +SKIP_DIRS = { + '.cache', 'node_modules', '.git', '.local', 'tmp', 'tmp.*', + '.npm', '.cargo', '.ollama', '.config/llama-swap', + '.local/share/Steam', '.local/share/containers', + '.local/lib/docker', '.thumbnails' +} + +def should_skip(path): + """Check if path should be skipped.""" + parts = path.parts + for skip in SKIP_DIRS: + if '*' in skip: + pattern = skip.replace('*', '') + if any(pattern in p for p in parts): + return True + elif skip in parts: + return True + return False + +def sha256_file(filepath, block_size=65536): + """Calculate SHA256 checksum of a file.""" + hasher = hashlib.sha256() + with open(filepath, 'rb') as f: + for block in iter(lambda: f.read(block_size), b''): + hasher.update(block) + return hasher.hexdigest() + +def get_file_type(filepath): + """Guess file type from extension.""" + mime, _ = mimetypes.guess_type(filepath) + if mime: + if mime.startswith('image/'): + return 'image' + elif mime.startswith('video/'): + return 'video' + elif mime.startswith('audio/'): + return 'audio' + elif 'pdf' in mime: + return 'pdf' + elif 'zip' in mime or 'tar' in mime or 'compressed' in mime: + return 'archive' + elif mime.startswith('text/'): + return 'text' + return 'other' + +def main(): + home = Path.home() + duplicates = defaultdict(list) + total_files = 0 + skipped = 0 + + print(f"🔍 Scanning {home} for duplicates (>{MIN_SIZE_MB}MB)...") + print(f" Skipping: {', '.join(sorted(SKIP_DIRS))}\n") + + for root, dirs, files in os.walk(home): + # Modify dirs in-place to skip + dirs[:] = [d for d in dirs if not any(s in d for s in SKIP_DIRS)] + + for filename in files: + filepath = Path(root) / filename + + if should_skip(filepath): + skipped += 1 + continue + + try: + if filepath.stat().st_size >= MIN_SIZE_BYTES: + checksum = sha256_file(filepath) + duplicates[checksum].append(filepath) + total_files += 1 + except (OSError, PermissionError) as e: + skipped += 1 + continue + + # Find actual duplicates + dupes_by_type = defaultdict(list) + total_dupes = 0 + total_wasted = 0 + + for checksum, files in duplicates.items(): + if len(files) > 1: + file_size = files[0].stat().st_size + file_type = get_file_type(files[0]) + dupes_by_type[file_type].append({ + 'checksum': checksum, + 'files': files, + 'size': file_size + }) + total_dupes += len(files) + total_wasted += file_size * (len(files) - 1) + + # Report + if not dupes_by_type: + print("✅ No duplicates found!") + return + + print(f"📊 Results:") + print(f" Files scanned: {total_files}") + print(f" Skipped: {skipped}") + print(f" Duplicate files: {total_dupes}") + print(f" Wasted space: {total_wasted / (1024**3):.2f} GB\n") + + for ftype, groups in sorted(dupes_by_type.items()): + print(f"📁 {ftype.upper()} ({len(groups)} groups):") + for group in groups[:5]: # Limit to 5 per type + print(f" {len(group['files'])} copies, {group['size'] / (1024**2):.1f} MB each") + print(f" → {group['files'][0]}") + for f in group['files'][1:]: + print(f" {f}") + if len(groups) > 5: + print(f" ... and {len(groups) - 5} more groups") + print() + +if __name__ == '__main__': + main() diff --git a/scripts/find-duplicates.sh b/scripts/find-duplicates.sh new file mode 100755 index 0000000..2dac93a --- /dev/null +++ b/scripts/find-duplicates.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Find duplicate files in a directory using checksums + +DIR="${1:-$HOME}" +MIN_SIZE="${2:-1M}" # Only files larger than 1MB + +echo "Scanning $DIR for duplicate files (min size: $MIN_SIZE)..." +echo "Skipping: .cache, node_modules, .git, .local, .config, tmp, tmp.*" + +# Find files, compute md5sum, group by hash, show duplicates +find "$DIR" \ + -type f \ + -size "+$MIN_SIZE" \ + \( \ + -not -path "*/.cache/*" \ + -not -path "*/node_modules/*" \ + -not -path "*/.git/*" \ + -not -path "*/.local/*" \ + -not -path "*/.config/*" \ + -not -path "*/tmp/*" \ + -not -path "*/tmp.*/*" \ + -not -path "*/.npm/*" \ + -not -path "*/.cargo/*" \ + -not -path "*/.ollama/*" \ + -not -path "*/.config/llama-swap/*" \ + -not -path "*/.cache/*" \ + -not -path "*/.local/share/Steam/*" \ + -not -path "*/.local/share/containers/*" \ + -not -path "*/.local/lib/docker/*" \ + \) \ + -exec md5sum {} + 2>/dev/null | \ + sort | \ + uniq -D -w 32 | \ + cut -c 35-