#!/usr/bin/env python3 """ RAG Search - Test Suite Tests all components of the RAG search skill. """ import json import subprocess import sys from pathlib import Path # Constants SKILL_DIR = Path(__file__).parent.parent SCRIPTS_DIR = SKILL_DIR / "scripts" VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python" DATA_DIR = Path.home() / ".claude" / "data" / "rag-search" def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]: """Run a script and return (returncode, stdout, stderr).""" cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)] if args: cmd.extend(args) result = subprocess.run(cmd, capture_output=True, text=True) return result.returncode, result.stdout, result.stderr def test_chromadb_embeddings(): """Test 1: ChromaDB + embeddings working.""" print("Test 1: ChromaDB + embeddings...") # Add venv to path and test imports venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages" sys.path.insert(0, str(venv_path)) try: import chromadb from sentence_transformers import SentenceTransformer # Test ChromaDB client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma")) assert client is not None, "Failed to create ChromaDB client" # Test embedding model model = SentenceTransformer("all-MiniLM-L6-v2") embedding = model.encode("test query") assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}" print(" PASS: ChromaDB and embeddings working") return True except Exception as e: print(f" FAIL: {e}") return False def test_personal_index(): """Test 2: Personal index populated from ~/.claude/state.""" print("Test 2: Personal index populated...") # Check if collection exists and has data venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages" if str(venv_path) not in sys.path: sys.path.insert(0, str(venv_path)) try: import chromadb client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma")) collection = client.get_collection("personal") count = collection.count() assert count > 0, f"Personal collection is empty (count={count})" print(f" PASS: Personal index has {count} chunks") return True except Exception as e: print(f" FAIL: {e}") return False def test_docs_index(): """Test 3: At least one external doc source indexed.""" print("Test 3: External docs indexed...") # Check if collection exists and has data venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages" if str(venv_path) not in sys.path: sys.path.insert(0, str(venv_path)) try: import chromadb client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma")) collection = client.get_collection("docs") count = collection.count() assert count > 0, f"Docs collection is empty (count={count})" # Also verify sources.json has at least one source sources_file = SKILL_DIR / "references" / "sources.json" with open(sources_file) as f: sources = json.load(f) assert len(sources.get("sources", [])) > 0, "No sources configured" print(f" PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)") return True except Exception as e: print(f" FAIL: {e}") return False def test_search_returns_results(): """Test 4: search.py returns relevant results.""" print("Test 4: Search returns relevant results...") # Test personal search returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"]) if returncode != 0: print(f" FAIL: Personal search failed: {stderr}") return False try: result = json.loads(stdout) personal_results = result.get("results", []) if not personal_results: print(" WARN: No personal results found (may be expected if state is minimal)") except json.JSONDecodeError: print(f" FAIL: Invalid JSON output: {stdout}") return False # Test docs search returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"]) if returncode != 0: print(f" FAIL: Docs search failed: {stderr}") return False try: result = json.loads(stdout) docs_results = result.get("results", []) if not docs_results: print(" FAIL: No docs results found for 'kubernetes'") return False except json.JSONDecodeError: print(f" FAIL: Invalid JSON output: {stdout}") return False # Test combined search returncode, stdout, stderr = run_script("search.py", ["configuration"]) if returncode != 0: print(f" FAIL: Combined search failed: {stderr}") return False try: result = json.loads(stdout) assert "query" in result, "Missing 'query' in output" assert "results" in result, "Missing 'results' in output" assert "searched_collections" in result, "Missing 'searched_collections'" assert len(result["searched_collections"]) == 2, "Should search both collections" except json.JSONDecodeError: print(f" FAIL: Invalid JSON output: {stdout}") return False print(f" PASS: Search returns properly formatted results") return True def test_skill_structure(): """Test 5: All required files exist.""" print("Test 5: Skill structure complete...") required_files = [ SKILL_DIR / "SKILL.md", SCRIPTS_DIR / "search.py", SCRIPTS_DIR / "index_personal.py", SCRIPTS_DIR / "index_docs.py", SCRIPTS_DIR / "add_doc_source.py", SKILL_DIR / "references" / "sources.json", ] missing = [] for f in required_files: if not f.exists(): missing.append(str(f.relative_to(SKILL_DIR))) if missing: print(f" FAIL: Missing files: {', '.join(missing)}") return False print(" PASS: All required files exist") return True def main(): print("=" * 60) print("RAG Search Test Suite") print("=" * 60) print() tests = [ test_chromadb_embeddings, test_personal_index, test_docs_index, test_search_returns_results, test_skill_structure, ] results = [] for test in tests: results.append(test()) print() print("=" * 60) print("Summary") print("=" * 60) passed = sum(results) total = len(results) print(f"Passed: {passed}/{total}") if passed == total: print("\nAll tests passed!") return 0 else: print(f"\n{total - passed} test(s) failed") return 1 if __name__ == "__main__": sys.exit(main())