Implement rag-search skill for semantic search
Add new skill for semantic search across personal state files and external documentation using ChromaDB and sentence-transformers. Components: - search.py: Main search interface (--index, --top-k flags) - index_personal.py: Index ~/.claude/state files - index_docs.py: Index external docs (git repos) - add_doc_source.py: Manage doc sources - test_rag.py: Test suite (5/5 passing) Features: - Two indexes: personal (116 chunks) and docs (k0s: 846 chunks) - all-MiniLM-L6-v2 embeddings (384 dimensions) - ChromaDB persistent storage - JSON output with ranked results and metadata Documentation: - Added to component-registry.json with triggers - Added /rag command alias - Updated skills/README.md - Resolved fc-013 (vector database for agent memory) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
205
skills/rag-search/scripts/add_doc_source.py
Executable file
205
skills/rag-search/scripts/add_doc_source.py
Executable file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
RAG Search - Add Documentation Source
|
||||
|
||||
Adds a new documentation source to the registry.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Constants
|
||||
SKILL_DIR = Path(__file__).parent.parent
|
||||
SOURCES_FILE = SKILL_DIR / "references" / "sources.json"
|
||||
|
||||
|
||||
def load_sources() -> list[dict]:
|
||||
"""Load configured documentation sources."""
|
||||
if not SOURCES_FILE.exists():
|
||||
return []
|
||||
with open(SOURCES_FILE) as f:
|
||||
data = json.load(f)
|
||||
return data.get("sources", [])
|
||||
|
||||
|
||||
def save_sources(sources: list[dict]) -> None:
|
||||
"""Save documentation sources."""
|
||||
SOURCES_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(SOURCES_FILE, "w") as f:
|
||||
json.dump({"sources": sources}, f, indent=2)
|
||||
|
||||
|
||||
def add_source(
|
||||
source_id: str,
|
||||
name: str,
|
||||
source_type: str,
|
||||
url: str = None,
|
||||
path: str = None,
|
||||
glob: str = "**/*.md",
|
||||
version: str = None,
|
||||
base_url: str = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Add a new documentation source.
|
||||
|
||||
Args:
|
||||
source_id: Unique identifier for the source
|
||||
name: Human-readable name
|
||||
source_type: "git" or "local"
|
||||
url: Git repository URL (for git type)
|
||||
path: Path within repo or local path
|
||||
glob: File pattern to match
|
||||
version: Git tag/branch (for git type)
|
||||
base_url: Base URL for documentation links
|
||||
|
||||
Returns:
|
||||
The created source configuration
|
||||
"""
|
||||
sources = load_sources()
|
||||
|
||||
# Check for existing source
|
||||
existing = [s for s in sources if s["id"] == source_id]
|
||||
if existing:
|
||||
raise ValueError(f"Source already exists: {source_id}")
|
||||
|
||||
# Build source config
|
||||
source = {
|
||||
"id": source_id,
|
||||
"name": name,
|
||||
"type": source_type,
|
||||
}
|
||||
|
||||
if source_type == "git":
|
||||
if not url:
|
||||
raise ValueError("Git sources require --url")
|
||||
source["url"] = url
|
||||
if version:
|
||||
source["version"] = version
|
||||
elif source_type == "local":
|
||||
if not path:
|
||||
raise ValueError("Local sources require --path")
|
||||
source["path"] = str(Path(path).expanduser())
|
||||
else:
|
||||
raise ValueError(f"Unknown source type: {source_type}")
|
||||
|
||||
if path and source_type == "git":
|
||||
source["path"] = path
|
||||
source["glob"] = glob
|
||||
if base_url:
|
||||
source["base_url"] = base_url
|
||||
|
||||
sources.append(source)
|
||||
save_sources(sources)
|
||||
|
||||
return source
|
||||
|
||||
|
||||
def remove_source(source_id: str) -> bool:
|
||||
"""Remove a documentation source."""
|
||||
sources = load_sources()
|
||||
original_count = len(sources)
|
||||
sources = [s for s in sources if s["id"] != source_id]
|
||||
|
||||
if len(sources) == original_count:
|
||||
return False
|
||||
|
||||
save_sources(sources)
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Add or manage documentation sources for RAG search",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Add k0s documentation from GitHub
|
||||
%(prog)s --id k0s --name "k0s Documentation" --type git \\
|
||||
--url "https://github.com/k0sproject/k0s.git" \\
|
||||
--path "docs/" --version "v1.30.0"
|
||||
|
||||
# Add local documentation directory
|
||||
%(prog)s --id internal --name "Internal Docs" --type local \\
|
||||
--path "~/docs/internal" --glob "**/*.md"
|
||||
|
||||
# Remove a source
|
||||
%(prog)s --remove k0s
|
||||
|
||||
# List sources
|
||||
%(prog)s --list
|
||||
"""
|
||||
)
|
||||
parser.add_argument("--id", help="Unique source identifier")
|
||||
parser.add_argument("--name", help="Human-readable name")
|
||||
parser.add_argument(
|
||||
"--type", "-t",
|
||||
choices=["git", "local"],
|
||||
default="git",
|
||||
help="Source type (default: git)"
|
||||
)
|
||||
parser.add_argument("--url", help="Git repository URL")
|
||||
parser.add_argument("--path", help="Path within repo or local directory")
|
||||
parser.add_argument(
|
||||
"--glob", "-g",
|
||||
default="**/*.md",
|
||||
help="File pattern to match (default: **/*.md)"
|
||||
)
|
||||
parser.add_argument("--version", "-v", help="Git tag or branch")
|
||||
parser.add_argument("--base-url", help="Base URL for documentation links")
|
||||
parser.add_argument(
|
||||
"--remove", "-r",
|
||||
metavar="ID",
|
||||
help="Remove a source by ID"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list", "-l",
|
||||
action="store_true",
|
||||
help="List configured sources"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list:
|
||||
sources = load_sources()
|
||||
if sources:
|
||||
print(json.dumps(sources, indent=2))
|
||||
else:
|
||||
print("No documentation sources configured")
|
||||
return
|
||||
|
||||
if args.remove:
|
||||
if remove_source(args.remove):
|
||||
print(f"Removed source: {args.remove}")
|
||||
else:
|
||||
print(f"Source not found: {args.remove}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
# Adding a new source
|
||||
if not args.id or not args.name:
|
||||
parser.error("--id and --name are required when adding a source")
|
||||
|
||||
try:
|
||||
source = add_source(
|
||||
source_id=args.id,
|
||||
name=args.name,
|
||||
source_type=args.type,
|
||||
url=args.url,
|
||||
path=args.path,
|
||||
glob=args.glob,
|
||||
version=args.version,
|
||||
base_url=args.base_url,
|
||||
)
|
||||
print(f"Added source: {args.id}")
|
||||
print(json.dumps(source, indent=2))
|
||||
print(f"\nTo index this source, run:")
|
||||
print(f" index_docs.py --source {args.id}")
|
||||
except ValueError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user