[verified] refresh OpenVINO NPU reranker prototype

This commit is contained in:
William Valentin
2026-06-04 12:16:15 -07:00
parent 4dc77bb0c7
commit 418be69f96
4 changed files with 93 additions and 3 deletions
+13 -2
View File
@@ -13,8 +13,9 @@ This service is intentionally not wired into live RAG by default.
## Files
- `SPEC.md` — endpoint/CLI contract, model/runtime recommendation, smoke/NPU proof plan, RAG integration plan, docs implications, and no-go criteria.
- `server.py` — stdlib HTTP OpenVINO Runtime service.
- `server.py` — stdlib HTTP OpenVINO Runtime service with fail-fast localhost listener conflict checks and request validation.
- `smoke.py` — non-private API/ranking/NPU busy-time smoke test.
- `tests/test_server_validation.py` — stdlib unit checks for request validation and listener conflict detection.
- `openvino-reranker.service` — optional user-systemd unit.
## One-time setup
@@ -62,7 +63,7 @@ OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco
python /home/will/lab/swarm/openvino-reranker-npu/server.py
```
Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase.
Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. It also checks whether the requested listener can bind before compiling the OpenVINO model, so obvious port conflicts fail fast; the real server bind still happens immediately after model load.
## API
@@ -110,6 +111,16 @@ Expected:
- The top result matches the non-private fixture expectation.
- Response and sysfs `npu_busy_delta_us` are positive.
## Validation checks
```bash
source /home/will/.venvs/openvino-reranker/bin/activate
PYTHONPATH=/home/will/lab/swarm/openvino-reranker-npu \
python -m unittest discover -s /home/will/lab/swarm/openvino-reranker-npu/tests
```
These checks do not compile the OpenVINO model; they cover request validation and fail-fast listener conflict detection.
## Optional systemd user service
Install the unit only after the foreground command and smoke test pass:
+1 -1
View File
@@ -105,7 +105,7 @@ Error response shape:
Status behavior:
- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text.
- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text, or non-positive/non-integer `top_k`/`top_n`.
- 413: request body above `OPENVINO_RERANKER_MAX_BODY_BYTES`.
- 503: model not ready.
- 500: unexpected inference/runtime failure.
+24
View File
@@ -16,6 +16,7 @@ import argparse
import json
import math
import os
import socket
import sys
import threading
import time
@@ -251,6 +252,27 @@ def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]:
return docs
def parse_top_k(value: Any, document_count: int) -> int:
"""Validate top_k/top_n before inference so schema errors return HTTP 400."""
if value is None:
return document_count
if isinstance(value, bool) or not isinstance(value, int):
raise ValueError("top_k/top_n must be a positive integer")
if value < 1:
raise ValueError("top_k/top_n must be a positive integer")
return min(value, document_count)
def assert_port_available(host: str, port: int) -> None:
"""Fail fast on listener conflicts before compiling the OpenVINO model."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
sock.bind((host, port))
except OSError as exc:
raise RuntimeError(f"cannot bind {host}:{port}; listener conflict or invalid bind: {exc}") from exc
class Handler(BaseHTTPRequestHandler):
server_version = "OpenVINOReranker/0.1"
@@ -293,6 +315,7 @@ class Handler(BaseHTTPRequestHandler):
raise ValueError("query is required")
top_k = payload.get("top_k", payload.get("top_n"))
documents = normalize_documents(payload.get("documents"), self.max_documents)
top_k = parse_top_k(top_k, len(documents))
return_documents = bool(payload.get("return_documents", True))
response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents)
self.write_json(response)
@@ -342,6 +365,7 @@ def main() -> int:
parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"})
args = parser.parse_args()
assert_port_available(args.host, args.port)
service = RerankerService(
Path(args.model_dir).expanduser(),
args.model,
@@ -0,0 +1,55 @@
#!/usr/bin/env python3
"""Unit checks for reranker request validation helpers.
These tests intentionally avoid loading an OpenVINO model; they only cover the
stdlib validation helpers used before inference.
"""
from __future__ import annotations
import socket
import unittest
from server import assert_port_available, normalize_documents, parse_top_k
class ValidationTests(unittest.TestCase):
def test_normalize_accepts_strings_and_objects(self) -> None:
docs = normalize_documents(
[
"plain text document",
{"id": "obj", "text": "object document", "metadata": {"source": "synthetic"}},
],
max_documents=2,
)
self.assertEqual(docs[0], {"text": "plain text document"})
self.assertEqual(docs[1]["id"], "obj")
self.assertEqual(docs[1]["metadata"], {"source": "synthetic"})
def test_normalize_rejects_empty_or_too_many_documents(self) -> None:
with self.assertRaisesRegex(ValueError, "non-empty"):
normalize_documents([], max_documents=2)
with self.assertRaisesRegex(ValueError, "max_documents"):
normalize_documents(["a", "b", "c"], max_documents=2)
with self.assertRaisesRegex(ValueError, "non-empty string"):
normalize_documents([{"id": "empty", "text": ""}], max_documents=2)
def test_parse_top_k_defaults_clamps_and_rejects_invalid_values(self) -> None:
self.assertEqual(parse_top_k(None, document_count=3), 3)
self.assertEqual(parse_top_k(2, document_count=3), 2)
self.assertEqual(parse_top_k(99, document_count=3), 3)
for value in (0, -1, True, False, 1.5, "2", "nope"):
with self.subTest(value=value):
with self.assertRaisesRegex(ValueError, "positive integer"):
parse_top_k(value, document_count=3)
def test_assert_port_available_detects_listener_conflict(self) -> None:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as listener:
listener.bind(("127.0.0.1", 0))
listener.listen(1)
port = listener.getsockname()[1]
with self.assertRaisesRegex(RuntimeError, "cannot bind"):
assert_port_available("127.0.0.1", port)
if __name__ == "__main__":
unittest.main()