diff --git a/scripts/obsidian-reindex-server.py b/scripts/obsidian-reindex-server.py index 5810968..57e8e5d 100644 --- a/scripts/obsidian-reindex-server.py +++ b/scripts/obsidian-reindex-server.py @@ -11,6 +11,7 @@ Endpoints: POST /reindex?full=true -> trigger full semantic Chroma rebuild GET /reindex/status -> check last index state GET /semantic-health -> verify state plus semantic search smoke check + POST /semantic-search -> query the Obsidian Chroma semantic index GET /healthz -> returns ok """ @@ -90,6 +91,47 @@ def get_status() -> dict: return {"error": str(e)} +def run_semantic_search(query: str, top_k: int = 5) -> dict: + """Query the local Obsidian Chroma index via the rag-search script.""" + query = (query or "").strip() + if not query: + return {"ok": False, "error": "query is required", "results": []} + top_k = max(1, min(int(top_k or 5), 20)) + result = subprocess.run( + [ + VENV_PYTHON if Path(VENV_PYTHON).exists() else sys.executable, + SEARCH_SCRIPT, + "--index", + "obsidian", + "--top-k", + str(top_k), + "--raw", + query, + ], + capture_output=True, + text=True, + timeout=90, + ) + if result.returncode != 0: + return { + "ok": False, + "query": query, + "top_k": top_k, + "error": result.stderr.strip()[-2000:] or result.stdout.strip()[-2000:], + "results": [], + } + payload = json.loads(result.stdout) + results = payload.get("results") or [] + return { + "ok": True, + "query": query, + "index": payload.get("index", "obsidian"), + "top_k": top_k, + "result_count": len(results), + "results": results, + } + + def semantic_health() -> dict: """Return state plus a tiny semantic-search smoke check.""" status = get_status() @@ -109,29 +151,11 @@ def semantic_health() -> dict: }, } try: - result = subprocess.run( - [ - VENV_PYTHON if Path(VENV_PYTHON).exists() else sys.executable, - SEARCH_SCRIPT, - "--index", - "obsidian", - "--top-k", - "1", - "--raw", - "Obsidian reindex", - ], - capture_output=True, - text=True, - timeout=90, - ) - if result.returncode == 0: - payload = json.loads(result.stdout) - health["search_ok"] = bool(payload.get("results")) - health["result_count"] = len(payload.get("results", [])) - else: - health["status"] = "degraded" - health["search_ok"] = False - health["search_error"] = result.stderr.strip()[-1000:] or result.stdout.strip()[-1000:] + payload = run_semantic_search("Obsidian reindex", top_k=1) + health["search_ok"] = bool(payload.get("results")) + health["result_count"] = len(payload.get("results", [])) + if not payload.get("ok"): + health["search_error"] = payload.get("error") except Exception as e: health["status"] = "degraded" health["search_ok"] = False @@ -163,6 +187,19 @@ class ReindexHandler(http.server.BaseHTTPRequestHandler): result = run_reindex(full=full) status = 200 if "error" not in result else 500 self._json_response(result, status=status) + elif path == "/semantic-search": + try: + length = int(self.headers.get("Content-Length") or 0) + body = self.rfile.read(length).decode("utf-8") if length else "{}" + payload = json.loads(body or "{}") + query = payload.get("query") or payload.get("q") or "" + top_k = payload.get("top_k") or payload.get("topK") or 5 + result = run_semantic_search(str(query), int(top_k)) + self._json_response(result, status=200 if result.get("ok") else 400) + except json.JSONDecodeError: + self._json_response({"ok": False, "error": "invalid json", "results": []}, status=400) + except Exception as exc: + self._json_response({"ok": False, "error": str(exc), "results": []}, status=500) else: self._json_response({"error": "not found"}, status=404)