feat(rag): add OpenVINO NPU embedding services
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
"""RAG/embedding health HTTP wrapper for n8n.
|
||||
|
||||
Listens on 0.0.0.0:18814 so the n8n container can call it via
|
||||
http://172.19.0.1:18814.
|
||||
|
||||
Endpoints:
|
||||
GET /healthz -> service liveness
|
||||
POST /check -> run ~/.hermes/scripts/rag_embedding_health.py and return JSON
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
PORT = int(os.environ.get("PORT", "18814"))
|
||||
CHECK_SCRIPT = Path(os.environ.get("RAG_HEALTH_SCRIPT", "/home/will/.hermes/scripts/rag_embedding_health.py"))
|
||||
TIMEOUT = int(os.environ.get("RAG_HEALTH_TIMEOUT", "180"))
|
||||
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path.rstrip("/") == "/healthz":
|
||||
self._json({"status": "ok", "service": "rag-embedding-health"})
|
||||
else:
|
||||
self._json({"error": "not found"}, status=404)
|
||||
|
||||
def do_POST(self):
|
||||
if self.path.rstrip("/") != "/check":
|
||||
self._json({"error": "not found"}, status=404)
|
||||
return
|
||||
|
||||
started = time.time()
|
||||
if not CHECK_SCRIPT.exists():
|
||||
self._json(
|
||||
{
|
||||
"ok": False,
|
||||
"status": "failed",
|
||||
"exitCode": 127,
|
||||
"output": f"RAG health script missing: {CHECK_SCRIPT}",
|
||||
"durationMs": 0,
|
||||
},
|
||||
status=200,
|
||||
)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
env.setdefault("HERMES_HOME", "/home/will/.hermes")
|
||||
env.setdefault("OLLAMA_BASE_URL", "http://127.0.0.1:18817")
|
||||
env.setdefault("RAG_EMBED_MODEL", "bge-base-en-v1.5-int8-ov")
|
||||
env.setdefault("N8N_URL", "http://127.0.0.1:18808")
|
||||
env.setdefault("OBSIDIAN_REINDEX_URL", "http://127.0.0.1:18810")
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[str(CHECK_SCRIPT)],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
timeout=TIMEOUT,
|
||||
check=False,
|
||||
env=env,
|
||||
)
|
||||
output = (proc.stdout or proc.stderr or "").strip()
|
||||
self._json(
|
||||
{
|
||||
"ok": proc.returncode == 0,
|
||||
"status": "ok" if proc.returncode == 0 else "failed",
|
||||
"exitCode": proc.returncode,
|
||||
"output": output[:4000],
|
||||
"durationMs": int((time.time() - started) * 1000),
|
||||
},
|
||||
status=200,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self._json(
|
||||
{
|
||||
"ok": False,
|
||||
"status": "timeout",
|
||||
"exitCode": 124,
|
||||
"output": f"RAG/embedding health check timed out after {TIMEOUT}s",
|
||||
"durationMs": int((time.time() - started) * 1000),
|
||||
},
|
||||
status=200,
|
||||
)
|
||||
except Exception as exc:
|
||||
self._json(
|
||||
{
|
||||
"ok": False,
|
||||
"status": "error",
|
||||
"exitCode": 1,
|
||||
"output": str(exc)[:4000],
|
||||
"durationMs": int((time.time() - started) * 1000),
|
||||
},
|
||||
status=200,
|
||||
)
|
||||
|
||||
def _json(self, data, status=200):
|
||||
body = json.dumps(data, indent=2).encode("utf-8")
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def log_message(self, format, *args):
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
server = http.server.HTTPServer(("0.0.0.0", PORT), Handler)
|
||||
print(f"rag-embedding-health listening on 0.0.0.0:{PORT}", flush=True)
|
||||
server.serve_forever()
|
||||
Reference in New Issue
Block a user