diff --git a/openvino-reranker-npu/README.md b/openvino-reranker-npu/README.md index d6d6186..b0b7b70 100644 --- a/openvino-reranker-npu/README.md +++ b/openvino-reranker-npu/README.md @@ -13,8 +13,9 @@ This service is intentionally not wired into live RAG by default. ## Files - `SPEC.md` — endpoint/CLI contract, model/runtime recommendation, smoke/NPU proof plan, RAG integration plan, docs implications, and no-go criteria. -- `server.py` — stdlib HTTP OpenVINO Runtime service. +- `server.py` — stdlib HTTP OpenVINO Runtime service with fail-fast localhost listener conflict checks and request validation. - `smoke.py` — non-private API/ranking/NPU busy-time smoke test. +- `tests/test_server_validation.py` — stdlib unit checks for request validation and listener conflict detection. - `openvino-reranker.service` — optional user-systemd unit. ## One-time setup @@ -62,7 +63,7 @@ OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco python /home/will/lab/swarm/openvino-reranker-npu/server.py ``` -Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. +Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. It also checks whether the requested listener can bind before compiling the OpenVINO model, so obvious port conflicts fail fast; the real server bind still happens immediately after model load. ## API @@ -110,6 +111,16 @@ Expected: - The top result matches the non-private fixture expectation. - Response and sysfs `npu_busy_delta_us` are positive. +## Validation checks + +```bash +source /home/will/.venvs/openvino-reranker/bin/activate +PYTHONPATH=/home/will/lab/swarm/openvino-reranker-npu \ + python -m unittest discover -s /home/will/lab/swarm/openvino-reranker-npu/tests +``` + +These checks do not compile the OpenVINO model; they cover request validation and fail-fast listener conflict detection. + ## Optional systemd user service Install the unit only after the foreground command and smoke test pass: diff --git a/openvino-reranker-npu/SPEC.md b/openvino-reranker-npu/SPEC.md index de40a03..cd32c13 100644 --- a/openvino-reranker-npu/SPEC.md +++ b/openvino-reranker-npu/SPEC.md @@ -105,7 +105,7 @@ Error response shape: Status behavior: -- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text. +- 400: invalid JSON schema, empty query, missing/empty documents, invalid document text, or non-positive/non-integer `top_k`/`top_n`. - 413: request body above `OPENVINO_RERANKER_MAX_BODY_BYTES`. - 503: model not ready. - 500: unexpected inference/runtime failure. diff --git a/openvino-reranker-npu/server.py b/openvino-reranker-npu/server.py index 74554eb..26cc1bd 100755 --- a/openvino-reranker-npu/server.py +++ b/openvino-reranker-npu/server.py @@ -16,6 +16,7 @@ import argparse import json import math import os +import socket import sys import threading import time @@ -251,6 +252,27 @@ def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]: return docs +def parse_top_k(value: Any, document_count: int) -> int: + """Validate top_k/top_n before inference so schema errors return HTTP 400.""" + if value is None: + return document_count + if isinstance(value, bool) or not isinstance(value, int): + raise ValueError("top_k/top_n must be a positive integer") + if value < 1: + raise ValueError("top_k/top_n must be a positive integer") + return min(value, document_count) + + +def assert_port_available(host: str, port: int) -> None: + """Fail fast on listener conflicts before compiling the OpenVINO model.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + sock.bind((host, port)) + except OSError as exc: + raise RuntimeError(f"cannot bind {host}:{port}; listener conflict or invalid bind: {exc}") from exc + + class Handler(BaseHTTPRequestHandler): server_version = "OpenVINOReranker/0.1" @@ -293,6 +315,7 @@ class Handler(BaseHTTPRequestHandler): raise ValueError("query is required") top_k = payload.get("top_k", payload.get("top_n")) documents = normalize_documents(payload.get("documents"), self.max_documents) + top_k = parse_top_k(top_k, len(documents)) return_documents = bool(payload.get("return_documents", True)) response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents) self.write_json(response) @@ -342,6 +365,7 @@ def main() -> int: parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"}) args = parser.parse_args() + assert_port_available(args.host, args.port) service = RerankerService( Path(args.model_dir).expanduser(), args.model, diff --git a/openvino-reranker-npu/tests/test_server_validation.py b/openvino-reranker-npu/tests/test_server_validation.py new file mode 100644 index 0000000..0852b55 --- /dev/null +++ b/openvino-reranker-npu/tests/test_server_validation.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""Unit checks for reranker request validation helpers. + +These tests intentionally avoid loading an OpenVINO model; they only cover the +stdlib validation helpers used before inference. +""" +from __future__ import annotations + +import socket +import unittest + +from server import assert_port_available, normalize_documents, parse_top_k + + +class ValidationTests(unittest.TestCase): + def test_normalize_accepts_strings_and_objects(self) -> None: + docs = normalize_documents( + [ + "plain text document", + {"id": "obj", "text": "object document", "metadata": {"source": "synthetic"}}, + ], + max_documents=2, + ) + self.assertEqual(docs[0], {"text": "plain text document"}) + self.assertEqual(docs[1]["id"], "obj") + self.assertEqual(docs[1]["metadata"], {"source": "synthetic"}) + + def test_normalize_rejects_empty_or_too_many_documents(self) -> None: + with self.assertRaisesRegex(ValueError, "non-empty"): + normalize_documents([], max_documents=2) + with self.assertRaisesRegex(ValueError, "max_documents"): + normalize_documents(["a", "b", "c"], max_documents=2) + with self.assertRaisesRegex(ValueError, "non-empty string"): + normalize_documents([{"id": "empty", "text": ""}], max_documents=2) + + def test_parse_top_k_defaults_clamps_and_rejects_invalid_values(self) -> None: + self.assertEqual(parse_top_k(None, document_count=3), 3) + self.assertEqual(parse_top_k(2, document_count=3), 2) + self.assertEqual(parse_top_k(99, document_count=3), 3) + for value in (0, -1, True, False, 1.5, "2", "nope"): + with self.subTest(value=value): + with self.assertRaisesRegex(ValueError, "positive integer"): + parse_top_k(value, document_count=3) + + def test_assert_port_available_detects_listener_conflict(self) -> None: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as listener: + listener.bind(("127.0.0.1", 0)) + listener.listen(1) + port = listener.getsockname()[1] + with self.assertRaisesRegex(RuntimeError, "cannot bind"): + assert_port_available("127.0.0.1", port) + + +if __name__ == "__main__": + unittest.main()