Files
claude-code/skills/rag-search/scripts/test_rag.py
OpenCode Test 7ca8caeecb Implement rag-search skill for semantic search
Add new skill for semantic search across personal state files and
external documentation using ChromaDB and sentence-transformers.

Components:
- search.py: Main search interface (--index, --top-k flags)
- index_personal.py: Index ~/.claude/state files
- index_docs.py: Index external docs (git repos)
- add_doc_source.py: Manage doc sources
- test_rag.py: Test suite (5/5 passing)

Features:
- Two indexes: personal (116 chunks) and docs (k0s: 846 chunks)
- all-MiniLM-L6-v2 embeddings (384 dimensions)
- ChromaDB persistent storage
- JSON output with ranked results and metadata

Documentation:
- Added to component-registry.json with triggers
- Added /rag command alias
- Updated skills/README.md
- Resolved fc-013 (vector database for agent memory)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 23:41:38 -08:00

231 lines
6.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
RAG Search - Test Suite
Tests all components of the RAG search skill.
"""
import json
import subprocess
import sys
from pathlib import Path
# Constants
SKILL_DIR = Path(__file__).parent.parent
SCRIPTS_DIR = SKILL_DIR / "scripts"
VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python"
DATA_DIR = Path.home() / ".claude" / "data" / "rag-search"
def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]:
"""Run a script and return (returncode, stdout, stderr)."""
cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)]
if args:
cmd.extend(args)
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode, result.stdout, result.stderr
def test_chromadb_embeddings():
"""Test 1: ChromaDB + embeddings working."""
print("Test 1: ChromaDB + embeddings...")
# Add venv to path and test imports
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
sys.path.insert(0, str(venv_path))
try:
import chromadb
from sentence_transformers import SentenceTransformer
# Test ChromaDB
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
assert client is not None, "Failed to create ChromaDB client"
# Test embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")
embedding = model.encode("test query")
assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}"
print(" PASS: ChromaDB and embeddings working")
return True
except Exception as e:
print(f" FAIL: {e}")
return False
def test_personal_index():
"""Test 2: Personal index populated from ~/.claude/state."""
print("Test 2: Personal index populated...")
# Check if collection exists and has data
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
if str(venv_path) not in sys.path:
sys.path.insert(0, str(venv_path))
try:
import chromadb
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
collection = client.get_collection("personal")
count = collection.count()
assert count > 0, f"Personal collection is empty (count={count})"
print(f" PASS: Personal index has {count} chunks")
return True
except Exception as e:
print(f" FAIL: {e}")
return False
def test_docs_index():
"""Test 3: At least one external doc source indexed."""
print("Test 3: External docs indexed...")
# Check if collection exists and has data
venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
if str(venv_path) not in sys.path:
sys.path.insert(0, str(venv_path))
try:
import chromadb
client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
collection = client.get_collection("docs")
count = collection.count()
assert count > 0, f"Docs collection is empty (count={count})"
# Also verify sources.json has at least one source
sources_file = SKILL_DIR / "references" / "sources.json"
with open(sources_file) as f:
sources = json.load(f)
assert len(sources.get("sources", [])) > 0, "No sources configured"
print(f" PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)")
return True
except Exception as e:
print(f" FAIL: {e}")
return False
def test_search_returns_results():
"""Test 4: search.py returns relevant results."""
print("Test 4: Search returns relevant results...")
# Test personal search
returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"])
if returncode != 0:
print(f" FAIL: Personal search failed: {stderr}")
return False
try:
result = json.loads(stdout)
personal_results = result.get("results", [])
if not personal_results:
print(" WARN: No personal results found (may be expected if state is minimal)")
except json.JSONDecodeError:
print(f" FAIL: Invalid JSON output: {stdout}")
return False
# Test docs search
returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"])
if returncode != 0:
print(f" FAIL: Docs search failed: {stderr}")
return False
try:
result = json.loads(stdout)
docs_results = result.get("results", [])
if not docs_results:
print(" FAIL: No docs results found for 'kubernetes'")
return False
except json.JSONDecodeError:
print(f" FAIL: Invalid JSON output: {stdout}")
return False
# Test combined search
returncode, stdout, stderr = run_script("search.py", ["configuration"])
if returncode != 0:
print(f" FAIL: Combined search failed: {stderr}")
return False
try:
result = json.loads(stdout)
assert "query" in result, "Missing 'query' in output"
assert "results" in result, "Missing 'results' in output"
assert "searched_collections" in result, "Missing 'searched_collections'"
assert len(result["searched_collections"]) == 2, "Should search both collections"
except json.JSONDecodeError:
print(f" FAIL: Invalid JSON output: {stdout}")
return False
print(f" PASS: Search returns properly formatted results")
return True
def test_skill_structure():
"""Test 5: All required files exist."""
print("Test 5: Skill structure complete...")
required_files = [
SKILL_DIR / "SKILL.md",
SCRIPTS_DIR / "search.py",
SCRIPTS_DIR / "index_personal.py",
SCRIPTS_DIR / "index_docs.py",
SCRIPTS_DIR / "add_doc_source.py",
SKILL_DIR / "references" / "sources.json",
]
missing = []
for f in required_files:
if not f.exists():
missing.append(str(f.relative_to(SKILL_DIR)))
if missing:
print(f" FAIL: Missing files: {', '.join(missing)}")
return False
print(" PASS: All required files exist")
return True
def main():
print("=" * 60)
print("RAG Search Test Suite")
print("=" * 60)
print()
tests = [
test_chromadb_embeddings,
test_personal_index,
test_docs_index,
test_search_returns_results,
test_skill_structure,
]
results = []
for test in tests:
results.append(test())
print()
print("=" * 60)
print("Summary")
print("=" * 60)
passed = sum(results)
total = len(results)
print(f"Passed: {passed}/{total}")
if passed == total:
print("\nAll tests passed!")
return 0
else:
print(f"\n{total - passed} test(s) failed")
return 1
if __name__ == "__main__":
sys.exit(main())