claude-code/skills/rag-search/scripts/test_rag.py

#!/usr/bin/env python3
"""
RAG Search - Test Suite

Tests all components of the RAG search skill.
"""

import json
import subprocess
import sys
from pathlib import Path

# Constants
SKILL_DIR = Path(__file__).parent.parent
SCRIPTS_DIR = SKILL_DIR / "scripts"
VENV_PYTHON = SKILL_DIR / "venv" / "bin" / "python"
DATA_DIR = Path.home() / ".claude" / "data" / "rag-search"


def run_script(script_name: str, args: list[str] = None) -> tuple[int, str, str]:
    """Run a script and return (returncode, stdout, stderr)."""
    cmd = [str(VENV_PYTHON), str(SCRIPTS_DIR / script_name)]
    if args:
        cmd.extend(args)

    result = subprocess.run(cmd, capture_output=True, text=True)
    return result.returncode, result.stdout, result.stderr


def test_chromadb_embeddings():
    """Test 1: ChromaDB + embeddings working."""
    print("Test 1: ChromaDB + embeddings...")

    # Add venv to path and test imports
    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
    sys.path.insert(0, str(venv_path))

    try:
        import chromadb
        from sentence_transformers import SentenceTransformer

        # Test ChromaDB
        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
        assert client is not None, "Failed to create ChromaDB client"

        # Test embedding model
        model = SentenceTransformer("all-MiniLM-L6-v2")
        embedding = model.encode("test query")
        assert len(embedding) == 384, f"Expected 384 dimensions, got {len(embedding)}"

        print("  PASS: ChromaDB and embeddings working")
        return True
    except Exception as e:
        print(f"  FAIL: {e}")
        return False


def test_personal_index():
    """Test 2: Personal index populated from ~/.claude/state."""
    print("Test 2: Personal index populated...")

    # Check if collection exists and has data
    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
    if str(venv_path) not in sys.path:
        sys.path.insert(0, str(venv_path))

    try:
        import chromadb

        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
        collection = client.get_collection("personal")
        count = collection.count()

        assert count > 0, f"Personal collection is empty (count={count})"
        print(f"  PASS: Personal index has {count} chunks")
        return True
    except Exception as e:
        print(f"  FAIL: {e}")
        return False


def test_docs_index():
    """Test 3: At least one external doc source indexed."""
    print("Test 3: External docs indexed...")

    # Check if collection exists and has data
    venv_path = SKILL_DIR / "venv" / "lib" / "python3.13" / "site-packages"
    if str(venv_path) not in sys.path:
        sys.path.insert(0, str(venv_path))

    try:
        import chromadb

        client = chromadb.PersistentClient(path=str(DATA_DIR / "chroma"))
        collection = client.get_collection("docs")
        count = collection.count()

        assert count > 0, f"Docs collection is empty (count={count})"

        # Also verify sources.json has at least one source
        sources_file = SKILL_DIR / "references" / "sources.json"
        with open(sources_file) as f:
            sources = json.load(f)
        assert len(sources.get("sources", [])) > 0, "No sources configured"

        print(f"  PASS: Docs index has {count} chunks from {len(sources['sources'])} source(s)")
        return True
    except Exception as e:
        print(f"  FAIL: {e}")
        return False


def test_search_returns_results():
    """Test 4: search.py returns relevant results."""
    print("Test 4: Search returns relevant results...")

    # Test personal search
    returncode, stdout, stderr = run_script("search.py", ["--index", "personal", "decisions"])
    if returncode != 0:
        print(f"  FAIL: Personal search failed: {stderr}")
        return False

    try:
        result = json.loads(stdout)
        personal_results = result.get("results", [])
        if not personal_results:
            print("  WARN: No personal results found (may be expected if state is minimal)")
    except json.JSONDecodeError:
        print(f"  FAIL: Invalid JSON output: {stdout}")
        return False

    # Test docs search
    returncode, stdout, stderr = run_script("search.py", ["--index", "docs", "kubernetes"])
    if returncode != 0:
        print(f"  FAIL: Docs search failed: {stderr}")
        return False

    try:
        result = json.loads(stdout)
        docs_results = result.get("results", [])
        if not docs_results:
            print("  FAIL: No docs results found for 'kubernetes'")
            return False
    except json.JSONDecodeError:
        print(f"  FAIL: Invalid JSON output: {stdout}")
        return False

    # Test combined search
    returncode, stdout, stderr = run_script("search.py", ["configuration"])
    if returncode != 0:
        print(f"  FAIL: Combined search failed: {stderr}")
        return False

    try:
        result = json.loads(stdout)
        assert "query" in result, "Missing 'query' in output"
        assert "results" in result, "Missing 'results' in output"
        assert "searched_collections" in result, "Missing 'searched_collections'"
        assert len(result["searched_collections"]) == 2, "Should search both collections"
    except json.JSONDecodeError:
        print(f"  FAIL: Invalid JSON output: {stdout}")
        return False

    print(f"  PASS: Search returns properly formatted results")
    return True


def test_skill_structure():
    """Test 5: All required files exist."""
    print("Test 5: Skill structure complete...")

    required_files = [
        SKILL_DIR / "SKILL.md",
        SCRIPTS_DIR / "search.py",
        SCRIPTS_DIR / "index_personal.py",
        SCRIPTS_DIR / "index_docs.py",
        SCRIPTS_DIR / "add_doc_source.py",
        SKILL_DIR / "references" / "sources.json",
    ]

    missing = []
    for f in required_files:
        if not f.exists():
            missing.append(str(f.relative_to(SKILL_DIR)))

    if missing:
        print(f"  FAIL: Missing files: {', '.join(missing)}")
        return False

    print("  PASS: All required files exist")
    return True


def main():
    print("=" * 60)
    print("RAG Search Test Suite")
    print("=" * 60)
    print()

    tests = [
        test_chromadb_embeddings,
        test_personal_index,
        test_docs_index,
        test_search_returns_results,
        test_skill_structure,
    ]

    results = []
    for test in tests:
        results.append(test())
        print()

    print("=" * 60)
    print("Summary")
    print("=" * 60)

    passed = sum(results)
    total = len(results)
    print(f"Passed: {passed}/{total}")

    if passed == total:
        print("\nAll tests passed!")
        return 0
    else:
        print(f"\n{total - passed} test(s) failed")
        return 1


if __name__ == "__main__":
    sys.exit(main())