118 lines
3.8 KiB
Python
118 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
"""RAG/embedding health HTTP wrapper for n8n.
|
|
|
|
Listens on 0.0.0.0:18814 so the n8n container can call it via
|
|
http://172.19.0.1:18814.
|
|
|
|
Endpoints:
|
|
GET /healthz -> service liveness
|
|
POST /check -> run ~/.hermes/scripts/rag_embedding_health.py and return JSON
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import http.server
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
|
|
PORT = int(os.environ.get("PORT", "18814"))
|
|
CHECK_SCRIPT = Path(os.environ.get("RAG_HEALTH_SCRIPT", "/home/will/.hermes/scripts/rag_embedding_health.py"))
|
|
TIMEOUT = int(os.environ.get("RAG_HEALTH_TIMEOUT", "180"))
|
|
|
|
|
|
class Handler(http.server.BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path.rstrip("/") == "/healthz":
|
|
self._json({"status": "ok", "service": "rag-embedding-health"})
|
|
else:
|
|
self._json({"error": "not found"}, status=404)
|
|
|
|
def do_POST(self):
|
|
if self.path.rstrip("/") != "/check":
|
|
self._json({"error": "not found"}, status=404)
|
|
return
|
|
|
|
started = time.time()
|
|
if not CHECK_SCRIPT.exists():
|
|
self._json(
|
|
{
|
|
"ok": False,
|
|
"status": "failed",
|
|
"exitCode": 127,
|
|
"output": f"RAG health script missing: {CHECK_SCRIPT}",
|
|
"durationMs": 0,
|
|
},
|
|
status=200,
|
|
)
|
|
return
|
|
|
|
env = os.environ.copy()
|
|
env.setdefault("HERMES_HOME", "/home/will/.hermes")
|
|
env.setdefault("OLLAMA_BASE_URL", "http://127.0.0.1:18817")
|
|
env.setdefault("RAG_EMBED_MODEL", "bge-base-en-v1.5-int8-ov")
|
|
env.setdefault("N8N_URL", "http://127.0.0.1:18808")
|
|
env.setdefault("OBSIDIAN_REINDEX_URL", "http://127.0.0.1:18810")
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
[str(CHECK_SCRIPT)],
|
|
text=True,
|
|
capture_output=True,
|
|
timeout=TIMEOUT,
|
|
check=False,
|
|
env=env,
|
|
)
|
|
output = (proc.stdout or proc.stderr or "").strip()
|
|
self._json(
|
|
{
|
|
"ok": proc.returncode == 0,
|
|
"status": "ok" if proc.returncode == 0 else "failed",
|
|
"exitCode": proc.returncode,
|
|
"output": output[:4000],
|
|
"durationMs": int((time.time() - started) * 1000),
|
|
},
|
|
status=200,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
self._json(
|
|
{
|
|
"ok": False,
|
|
"status": "timeout",
|
|
"exitCode": 124,
|
|
"output": f"RAG/embedding health check timed out after {TIMEOUT}s",
|
|
"durationMs": int((time.time() - started) * 1000),
|
|
},
|
|
status=200,
|
|
)
|
|
except Exception as exc:
|
|
self._json(
|
|
{
|
|
"ok": False,
|
|
"status": "error",
|
|
"exitCode": 1,
|
|
"output": str(exc)[:4000],
|
|
"durationMs": int((time.time() - started) * 1000),
|
|
},
|
|
status=200,
|
|
)
|
|
|
|
def _json(self, data, status=200):
|
|
body = json.dumps(data, indent=2).encode("utf-8")
|
|
self.send_response(status)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.send_header("Content-Length", str(len(body)))
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def log_message(self, format, *args):
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
server = http.server.HTTPServer(("0.0.0.0", PORT), Handler)
|
|
print(f"rag-embedding-health listening on 0.0.0.0:{PORT}", flush=True)
|
|
server.serve_forever()
|