Implement rag-search skill for semantic search
Add new skill for semantic search across personal state files and external documentation using ChromaDB and sentence-transformers. Components: - search.py: Main search interface (--index, --top-k flags) - index_personal.py: Index ~/.claude/state files - index_docs.py: Index external docs (git repos) - add_doc_source.py: Manage doc sources - test_rag.py: Test suite (5/5 passing) Features: - Two indexes: personal (116 chunks) and docs (k0s: 846 chunks) - all-MiniLM-L6-v2 embeddings (384 dimensions) - ChromaDB persistent storage - JSON output with ranked results and metadata Documentation: - Added to component-registry.json with triggers - Added /rag command alias - Updated skills/README.md - Resolved fc-013 (vector database for agent memory) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
230
skills/rag-search/scripts/test_rag.py
Executable file
230
skills/rag-search/scripts/test_rag.py
Executable file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
RAG Search - Test Suite
|
||||
|
||||
Tests all components of the RAG search skill.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Constants
|
||||
SKILL_DIR = Path(__file__).parent.parent
|
||||
SCRIPTS_DIR = SKILL_DIR / "scripts"
|
||||
VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python"
|
||||
DATA_DIR = Path.home() / ".claude" / "data" / "rag-search"
|
||||
|
||||
|
||||
def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]:
|
||||
"""Run a script and return (returncode, stdout, stderr)."""
|
||||
cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)]
|
||||
if args:
|
||||
cmd.extend(args)
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
return result.returncode, result.stdout, result.stderr
|
||||
|
||||
|
||||
def test_chromadb_embeddings():
|
||||
"""Test 1: ChromaDB + embeddings working."""
|
||||
print("Test 1: ChromaDB + embeddings...")
|
||||
|
||||
# Add venv to path and test imports
|
||||
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
||||
sys.path.insert(0, str(venv_path))
|
||||
|
||||
try:
|
||||
import chromadb
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# Test ChromaDB
|
||||
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
||||
assert client is not None, "Failed to create ChromaDB client"
|
||||
|
||||
# Test embedding model
|
||||
model = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
embedding = model.encode("test query")
|
||||
assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}"
|
||||
|
||||
print(" PASS: ChromaDB and embeddings working")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" FAIL: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_personal_index():
|
||||
"""Test 2: Personal index populated from ~/.claude/state."""
|
||||
print("Test 2: Personal index populated...")
|
||||
|
||||
# Check if collection exists and has data
|
||||
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
||||
if str(venv_path) not in sys.path:
|
||||
sys.path.insert(0, str(venv_path))
|
||||
|
||||
try:
|
||||
import chromadb
|
||||
|
||||
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
||||
collection = client.get_collection("personal")
|
||||
count = collection.count()
|
||||
|
||||
assert count > 0, f"Personal collection is empty (count={count})"
|
||||
print(f" PASS: Personal index has {count} chunks")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" FAIL: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_docs_index():
|
||||
"""Test 3: At least one external doc source indexed."""
|
||||
print("Test 3: External docs indexed...")
|
||||
|
||||
# Check if collection exists and has data
|
||||
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
|
||||
if str(venv_path) not in sys.path:
|
||||
sys.path.insert(0, str(venv_path))
|
||||
|
||||
try:
|
||||
import chromadb
|
||||
|
||||
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
|
||||
collection = client.get_collection("docs")
|
||||
count = collection.count()
|
||||
|
||||
assert count > 0, f"Docs collection is empty (count={count})"
|
||||
|
||||
# Also verify sources.json has at least one source
|
||||
sources_file = SKILL_DIR / "references" / "sources.json"
|
||||
with open(sources_file) as f:
|
||||
sources = json.load(f)
|
||||
assert len(sources.get("sources", [])) > 0, "No sources configured"
|
||||
|
||||
print(f" PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" FAIL: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_search_returns_results():
|
||||
"""Test 4: search.py returns relevant results."""
|
||||
print("Test 4: Search returns relevant results...")
|
||||
|
||||
# Test personal search
|
||||
returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"])
|
||||
if returncode != 0:
|
||||
print(f" FAIL: Personal search failed: {stderr}")
|
||||
return False
|
||||
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
personal_results = result.get("results", [])
|
||||
if not personal_results:
|
||||
print(" WARN: No personal results found (may be expected if state is minimal)")
|
||||
except json.JSONDecodeError:
|
||||
print(f" FAIL: Invalid JSON output: {stdout}")
|
||||
return False
|
||||
|
||||
# Test docs search
|
||||
returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"])
|
||||
if returncode != 0:
|
||||
print(f" FAIL: Docs search failed: {stderr}")
|
||||
return False
|
||||
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
docs_results = result.get("results", [])
|
||||
if not docs_results:
|
||||
print(" FAIL: No docs results found for 'kubernetes'")
|
||||
return False
|
||||
except json.JSONDecodeError:
|
||||
print(f" FAIL: Invalid JSON output: {stdout}")
|
||||
return False
|
||||
|
||||
# Test combined search
|
||||
returncode, stdout, stderr = run_script("search.py", ["configuration"])
|
||||
if returncode != 0:
|
||||
print(f" FAIL: Combined search failed: {stderr}")
|
||||
return False
|
||||
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
assert "query" in result, "Missing 'query' in output"
|
||||
assert "results" in result, "Missing 'results' in output"
|
||||
assert "searched_collections" in result, "Missing 'searched_collections'"
|
||||
assert len(result["searched_collections"]) == 2, "Should search both collections"
|
||||
except json.JSONDecodeError:
|
||||
print(f" FAIL: Invalid JSON output: {stdout}")
|
||||
return False
|
||||
|
||||
print(f" PASS: Search returns properly formatted results")
|
||||
return True
|
||||
|
||||
|
||||
def test_skill_structure():
|
||||
"""Test 5: All required files exist."""
|
||||
print("Test 5: Skill structure complete...")
|
||||
|
||||
required_files = [
|
||||
SKILL_DIR / "SKILL.md",
|
||||
SCRIPTS_DIR / "search.py",
|
||||
SCRIPTS_DIR / "index_personal.py",
|
||||
SCRIPTS_DIR / "index_docs.py",
|
||||
SCRIPTS_DIR / "add_doc_source.py",
|
||||
SKILL_DIR / "references" / "sources.json",
|
||||
]
|
||||
|
||||
missing = []
|
||||
for f in required_files:
|
||||
if not f.exists():
|
||||
missing.append(str(f.relative_to(SKILL_DIR)))
|
||||
|
||||
if missing:
|
||||
print(f" FAIL: Missing files: {', '.join(missing)}")
|
||||
return False
|
||||
|
||||
print(" PASS: All required files exist")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("RAG Search Test Suite")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
tests = [
|
||||
test_chromadb_embeddings,
|
||||
test_personal_index,
|
||||
test_docs_index,
|
||||
test_search_returns_results,
|
||||
test_skill_structure,
|
||||
]
|
||||
|
||||
results = []
|
||||
for test in tests:
|
||||
results.append(test())
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print("Summary")
|
||||
print("=" * 60)
|
||||
|
||||
passed = sum(results)
|
||||
total = len(results)
|
||||
print(f"Passed: {passed}/{total}")
|
||||
|
||||
if passed == total:
|
||||
print("\nAll tests passed!")
|
||||
return 0
|
||||
else:
|
||||
print(f"\n{total - passed} test(s) failed")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user