Add new skill for semantic search across personal state files and external documentation using ChromaDB and sentence-transformers. Components: - search.py: Main search interface (--index, --top-k flags) - index_personal.py: Index ~/.claude/state files - index_docs.py: Index external docs (git repos) - add_doc_source.py: Manage doc sources - test_rag.py: Test suite (5/5 passing) Features: - Two indexes: personal (116 chunks) and docs (k0s: 846 chunks) - all-MiniLM-L6-v2 embeddings (384 dimensions) - ChromaDB persistent storage - JSON output with ranked results and metadata Documentation: - Added to component-registry.json with triggers - Added /rag command alias - Updated skills/README.md - Resolved fc-013 (vector database for agent memory) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
231 lines
6.8 KiB
Python
Executable File
231 lines
6.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
RAG Search - Test Suite
|
|
|
|
Tests all components of the RAG search skill.
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Constants
|
|
SKILL_DIR = Path(__file__).parent.parent
|
|
SCRIPTS_DIR = SKILL_DIR / "scripts"
|
|
VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python"
|
|
DATA_DIR = Path.home() / ".claude" / "data" / "rag-search"
|
|
|
|
|
|
def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]:
|
|
"""Run a script and return (returncode, stdout, stderr)."""
|
|
cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)]
|
|
if args:
|
|
cmd.extend(args)
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
return result.returncode, result.stdout, result.stderr
|
|
|
|
|
|
def test_chromadb_embeddings():
|
|
"""Test 1: ChromaDB + embeddings working."""
|
|
print("Test 1: ChromaDB + embeddings...")
|
|
|
|
# Add venv to path and test imports
|
|
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
|
sys.path.insert(0, str(venv_path))
|
|
|
|
try:
|
|
import chromadb
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
# Test ChromaDB
|
|
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
|
assert client is not None, "Failed to create ChromaDB client"
|
|
|
|
# Test embedding model
|
|
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
embedding = model.encode("test query")
|
|
assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}"
|
|
|
|
print(" PASS: ChromaDB and embeddings working")
|
|
return True
|
|
except Exception as e:
|
|
print(f" FAIL: {e}")
|
|
return False
|
|
|
|
|
|
def test_personal_index():
|
|
"""Test 2: Personal index populated from ~/.claude/state."""
|
|
print("Test 2: Personal index populated...")
|
|
|
|
# Check if collection exists and has data
|
|
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
|
if str(venv_path) not in sys.path:
|
|
sys.path.insert(0, str(venv_path))
|
|
|
|
try:
|
|
import chromadb
|
|
|
|
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
|
collection = client.get_collection("personal")
|
|
count = collection.count()
|
|
|
|
assert count > 0, f"Personal collection is empty (count={count})"
|
|
print(f" PASS: Personal index has {count} chunks")
|
|
return True
|
|
except Exception as e:
|
|
print(f" FAIL: {e}")
|
|
return False
|
|
|
|
|
|
def test_docs_index():
|
|
"""Test 3: At least one external doc source indexed."""
|
|
print("Test 3: External docs indexed...")
|
|
|
|
# Check if collection exists and has data
|
|
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
|
if str(venv_path) not in sys.path:
|
|
sys.path.insert(0, str(venv_path))
|
|
|
|
try:
|
|
import chromadb
|
|
|
|
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
|
collection = client.get_collection("docs")
|
|
count = collection.count()
|
|
|
|
assert count > 0, f"Docs collection is empty (count={count})"
|
|
|
|
# Also verify sources.json has at least one source
|
|
sources_file = SKILL_DIR / "references" / "sources.json"
|
|
with open(sources_file) as f:
|
|
sources = json.load(f)
|
|
assert len(sources.get("sources", [])) > 0, "No sources configured"
|
|
|
|
print(f" PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)")
|
|
return True
|
|
except Exception as e:
|
|
print(f" FAIL: {e}")
|
|
return False
|
|
|
|
|
|
def test_search_returns_results():
|
|
"""Test 4: search.py returns relevant results."""
|
|
print("Test 4: Search returns relevant results...")
|
|
|
|
# Test personal search
|
|
returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"])
|
|
if returncode != 0:
|
|
print(f" FAIL: Personal search failed: {stderr}")
|
|
return False
|
|
|
|
try:
|
|
result = json.loads(stdout)
|
|
personal_results = result.get("results", [])
|
|
if not personal_results:
|
|
print(" WARN: No personal results found (may be expected if state is minimal)")
|
|
except json.JSONDecodeError:
|
|
print(f" FAIL: Invalid JSON output: {stdout}")
|
|
return False
|
|
|
|
# Test docs search
|
|
returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"])
|
|
if returncode != 0:
|
|
print(f" FAIL: Docs search failed: {stderr}")
|
|
return False
|
|
|
|
try:
|
|
result = json.loads(stdout)
|
|
docs_results = result.get("results", [])
|
|
if not docs_results:
|
|
print(" FAIL: No docs results found for 'kubernetes'")
|
|
return False
|
|
except json.JSONDecodeError:
|
|
print(f" FAIL: Invalid JSON output: {stdout}")
|
|
return False
|
|
|
|
# Test combined search
|
|
returncode, stdout, stderr = run_script("search.py", ["configuration"])
|
|
if returncode != 0:
|
|
print(f" FAIL: Combined search failed: {stderr}")
|
|
return False
|
|
|
|
try:
|
|
result = json.loads(stdout)
|
|
assert "query" in result, "Missing 'query' in output"
|
|
assert "results" in result, "Missing 'results' in output"
|
|
assert "searched_collections" in result, "Missing 'searched_collections'"
|
|
assert len(result["searched_collections"]) == 2, "Should search both collections"
|
|
except json.JSONDecodeError:
|
|
print(f" FAIL: Invalid JSON output: {stdout}")
|
|
return False
|
|
|
|
print(f" PASS: Search returns properly formatted results")
|
|
return True
|
|
|
|
|
|
def test_skill_structure():
|
|
"""Test 5: All required files exist."""
|
|
print("Test 5: Skill structure complete...")
|
|
|
|
required_files = [
|
|
SKILL_DIR / "SKILL.md",
|
|
SCRIPTS_DIR / "search.py",
|
|
SCRIPTS_DIR / "index_personal.py",
|
|
SCRIPTS_DIR / "index_docs.py",
|
|
SCRIPTS_DIR / "add_doc_source.py",
|
|
SKILL_DIR / "references" / "sources.json",
|
|
]
|
|
|
|
missing = []
|
|
for f in required_files:
|
|
if not f.exists():
|
|
missing.append(str(f.relative_to(SKILL_DIR)))
|
|
|
|
if missing:
|
|
print(f" FAIL: Missing files: {', '.join(missing)}")
|
|
return False
|
|
|
|
print(" PASS: All required files exist")
|
|
return True
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("RAG Search Test Suite")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
tests = [
|
|
test_chromadb_embeddings,
|
|
test_personal_index,
|
|
test_docs_index,
|
|
test_search_returns_results,
|
|
test_skill_structure,
|
|
]
|
|
|
|
results = []
|
|
for test in tests:
|
|
results.append(test())
|
|
print()
|
|
|
|
print("=" * 60)
|
|
print("Summary")
|
|
print("=" * 60)
|
|
|
|
passed = sum(results)
|
|
total = len(results)
|
|
print(f"Passed: {passed}/{total}")
|
|
|
|
if passed == total:
|
|
print("\nAll tests passed!")
|
|
return 0
|
|
else:
|
|
print(f"\n{total - passed} test(s) failed")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|