Implement rag-search skill for semantic search

Add new skill for semantic search across personal state files and external documentation using ChromaDB and sentence-transformers. Components: - search.py: Main search interface (--index, --top-k flags) - index_personal.py: Index ~/.claude/state files - index_docs.py: Index external docs (git repos) - add_doc_source.py: Manage doc sources - test_rag.py: Test suite (5/5 passing) Features: - Two indexes: personal (116 chunks) and docs (k0s: 846 chunks) - all-MiniLM-L6-v2 embeddings (384 dimensions) - ChromaDB persistent storage - JSON output with ranked results and metadata Documentation: - Added to component-registry.json with triggers - Added /rag command alias - Updated skills/README.md - Resolved fc-013 (vector database for agent memory) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 23:41:38 -08:00
parent c21b152de8
commit 7ca8caeecb
11 changed files with 1781 additions and 155 deletions
--- a/skills/rag-search/scripts/test_rag.py
+++ b/skills/rag-search/scripts/test_rag.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""
+RAG Search - Test Suite
+
+Tests all components of the RAG search skill.
+"""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+# Constants
+SKILL_DIR = Path(__file__).parent.parent
+SCRIPTS_DIR = SKILL_DIR / "scripts"
+VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python"
+DATA_DIR = Path.home() / ".claude" / "data" / "rag-search"
+
+
+def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]:
+    """Run a script and return (returncode, stdout, stderr)."""
+    cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)]
+    if args:
+        cmd.extend(args)
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return result.returncode, result.stdout, result.stderr
+
+
+def test_chromadb_embeddings():
+    """Test 1: ChromaDB + embeddings working."""
+    print("Test 1: ChromaDB + embeddings...")
+
+    # Add venv to path and test imports
+    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
+    sys.path.insert(0, str(venv_path))
+
+    try:
+        import chromadb
+        from sentence_transformers import SentenceTransformer
+
+        # Test ChromaDB
+        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
+        assert client is not None, "Failed to create ChromaDB client"
+
+        # Test embedding model
+        model = SentenceTransformer("all-MiniLM-L6-v2")
+        embedding = model.encode("test query")
+        assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}"
+
+        print("  PASS: ChromaDB and embeddings working")
+        return True
+    except Exception as e:
+        print(f"  FAIL: {e}")
+        return False
+
+
+def test_personal_index():
+    """Test 2: Personal index populated from ~/.claude/state."""
+    print("Test 2: Personal index populated...")
+
+    # Check if collection exists and has data
+    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
+    if str(venv_path) not in sys.path:
+        sys.path.insert(0, str(venv_path))
+
+    try:
+        import chromadb
+
+        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
+        collection = client.get_collection("personal")
+        count = collection.count()
+
+        assert count > 0, f"Personal collection is empty (count={count})"
+        print(f"  PASS: Personal index has {count} chunks")
+        return True
+    except Exception as e:
+        print(f"  FAIL: {e}")
+        return False
+
+
+def test_docs_index():
+    """Test 3: At least one external doc source indexed."""
+    print("Test 3: External docs indexed...")
+
+    # Check if collection exists and has data
+    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
+    if str(venv_path) not in sys.path:
+        sys.path.insert(0, str(venv_path))
+
+    try:
+        import chromadb
+
+        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
+        collection = client.get_collection("docs")
+        count = collection.count()
+
+        assert count > 0, f"Docs collection is empty (count={count})"
+
+        # Also verify sources.json has at least one source
+        sources_file = SKILL_DIR / "references" / "sources.json"
+        with open(sources_file) as f:
+            sources = json.load(f)
+        assert len(sources.get("sources", [])) > 0, "No sources configured"
+
+        print(f"  PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)")
+        return True
+    except Exception as e:
+        print(f"  FAIL: {e}")
+        return False
+
+
+def test_search_returns_results():
+    """Test 4: search.py returns relevant results."""
+    print("Test 4: Search returns relevant results...")
+
+    # Test personal search
+    returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"])
+    if returncode != 0:
+        print(f"  FAIL: Personal search failed: {stderr}")
+        return False
+
+    try:
+        result = json.loads(stdout)
+        personal_results = result.get("results", [])
+        if not personal_results:
+            print("  WARN: No personal results found (may be expected if state is minimal)")
+    except json.JSONDecodeError:
+        print(f"  FAIL: Invalid JSON output: {stdout}")
+        return False
+
+    # Test docs search
+    returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"])
+    if returncode != 0:
+        print(f"  FAIL: Docs search failed: {stderr}")
+        return False
+
+    try:
+        result = json.loads(stdout)
+        docs_results = result.get("results", [])
+        if not docs_results:
+            print("  FAIL: No docs results found for 'kubernetes'")
+            return False
+    except json.JSONDecodeError:
+        print(f"  FAIL: Invalid JSON output: {stdout}")
+        return False
+
+    # Test combined search
+    returncode, stdout, stderr = run_script("search.py", ["configuration"])
+    if returncode != 0:
+        print(f"  FAIL: Combined search failed: {stderr}")
+        return False
+
+    try:
+        result = json.loads(stdout)
+        assert "query" in result, "Missing 'query' in output"
+        assert "results" in result, "Missing 'results' in output"
+        assert "searched_collections" in result, "Missing 'searched_collections'"
+        assert len(result["searched_collections"]) == 2, "Should search both collections"
+    except json.JSONDecodeError:
+        print(f"  FAIL: Invalid JSON output: {stdout}")
+        return False
+
+    print(f"  PASS: Search returns properly formatted results")
+    return True
+
+
+def test_skill_structure():
+    """Test 5: All required files exist."""
+    print("Test 5: Skill structure complete...")
+
+    required_files = [
+        SKILL_DIR / "SKILL.md",
+        SCRIPTS_DIR / "search.py",
+        SCRIPTS_DIR / "index_personal.py",
+        SCRIPTS_DIR / "index_docs.py",
+        SCRIPTS_DIR / "add_doc_source.py",
+        SKILL_DIR / "references" / "sources.json",
+    ]
+
+    missing = []
+    for f in required_files:
+        if not f.exists():
+            missing.append(str(f.relative_to(SKILL_DIR)))
+
+    if missing:
+        print(f"  FAIL: Missing files: {', '.join(missing)}")
+        return False
+
+    print("  PASS: All required files exist")
+    return True
+
+
+def main():
+    print("=" * 60)
+    print("RAG Search Test Suite")
+    print("=" * 60)
+    print()
+
+    tests = [
+        test_chromadb_embeddings,
+        test_personal_index,
+        test_docs_index,
+        test_search_returns_results,
+        test_skill_structure,
+    ]
+
+    results = []
+    for test in tests:
+        results.append(test())
+        print()
+
+    print("=" * 60)
+    print("Summary")
+    print("=" * 60)
+
+    passed = sum(results)
+    total = len(results)
+    print(f"Passed: {passed}/{total}")
+
+    if passed == total:
+        print("\nAll tests passed!")
+        return 0
+    else:
+        print(f"\n{total - passed} test(s) failed")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())