fix: harden OpenVINO doc triage prototype

This commit is contained in:
William Valentin
2026-06-04 12:10:18 -07:00
parent 6b1cae016c
commit 582e0ee553
4 changed files with 58 additions and 13 deletions
+2 -2
View File
@@ -2,7 +2,7 @@
Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images. Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images.
It returns structured JSON metadata and explicitly reports CPU vs NPU stages. It returns structured JSON metadata and explicitly reports CPU vs NPU stages.
Optional HTTP is a localhost-only prototype on `127.0.0.1:18829` when explicitly started; it is not a live Atlas/Hermes/RAG integration. Optional HTTP is a localhost/loopback-only prototype on `127.0.0.1:18829` when explicitly started; non-loopback binds are rejected and it is not a live Atlas/Hermes/RAG integration.
Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/` Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/`
@@ -121,7 +121,7 @@ cd /home/will/lab/swarm/openvino-doc-image-triage-npu
/home/will/.venvs/npu/bin/python tests/smoke_test.py /home/will/.venvs/npu/bin/python tests/smoke_test.py
``` ```
Expected: JSON ending with `"ok": true`. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`. Expected: JSON ending with `"ok": true`. The smoke test generates only synthetic fixtures, verifies non-loopback HTTP binds are rejected, starts its temporary server on a preflighted free localhost port, and terminates it before exit. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`.
## Example output shape ## Example output shape
+3 -3
View File
@@ -5,7 +5,7 @@ Status: CLI-first prototype specification; not a live Atlas/Hermes integration.
## Safety stance ## Safety stance
- Default workflow is local CLI execution against explicitly named files. - Default workflow is local CLI execution against explicitly named files.
- Optional HTTP is disabled unless a human starts it, binds to localhost, and is intended for `127.0.0.1:18829` only. - Optional HTTP is disabled unless a human starts it, is constrained to loopback (`127.0.0.1`, `::1`, or `localhost`), and is intended for `127.0.0.1:18829` only.
- No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec. - No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec.
- Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval. - Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval.
- NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof. - NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof.
@@ -107,10 +107,10 @@ Expected smoke coverage:
- Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root. - Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root.
- Asserts privacy flags (`external_uploads: false`, no full path by default). - Asserts privacy flags (`external_uploads: false`, no full path by default).
- Asserts invoice category/needs-attention behavior on synthetic text. - Asserts invoice category/needs-attention behavior on synthetic text.
- Starts a temporary localhost HTTP server on an ephemeral smoke port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, and rejects external embedding URLs. - Starts a temporary localhost HTTP server on a preflighted free ephemeral port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, rejects external embedding URLs, and verifies non-loopback binds are rejected.
- Terminates the temporary server. - Terminates the temporary server.
The smoke port in tests should stay ephemeral/non-live (currently `18828`) to avoid claiming `18829` as a persistent service. The smoke port in tests should stay OS-assigned ephemeral/non-live to avoid claiming `18829` as a persistent service.
## NPU busy-time verification plan ## NPU busy-time verification plan
+20 -2
View File
@@ -13,6 +13,7 @@ configured allowed roots. It never uploads document/image contents externally.
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import ipaddress
import json import json
import os import os
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
@@ -23,6 +24,19 @@ from urllib.parse import urlparse
from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file from triage import DEFAULT_EMBED_URL, TriageOptions, read_npu_busy, triage_batch, triage_file
def _validate_loopback_host(host: str) -> str:
"""Reject non-loopback binds; this prototype is never a LAN service."""
normalized = host.strip()
if normalized == "localhost":
return normalized
try:
if ipaddress.ip_address(normalized).is_loopback:
return normalized
except ValueError:
pass
raise ValueError("host must be localhost/loopback for this prototype")
def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]: def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]:
"""Return request roots only when they narrow the startup allowlist.""" """Return request roots only when they narrow the startup allowlist."""
narrowed: list[Path] = [] narrowed: list[Path] = []
@@ -166,10 +180,14 @@ def main() -> int:
parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829"))) parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829")))
parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat") parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat")
args = parser.parse_args() args = parser.parse_args()
try:
host = _validate_loopback_host(args.host)
except ValueError as exc:
parser.error(str(exc))
roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()] roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()]
httpd = ThreadingHTTPServer((args.host, args.port), Handler) httpd = ThreadingHTTPServer((host, args.port), Handler)
httpd.allowed_roots = roots # type: ignore[attr-defined] httpd.allowed_roots = roots # type: ignore[attr-defined]
print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": args.host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True) print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True)
httpd.serve_forever() httpd.serve_forever()
return 0 return 0
@@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import json import json
import socket
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
@@ -42,6 +43,29 @@ def busy() -> int | None:
return None return None
def choose_free_loopback_port() -> int:
"""Ask the OS for a free localhost port and verify it is not listening yet."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind(("127.0.0.1", 0))
port = int(sock.getsockname()[1])
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
probe.settimeout(0.25)
assert probe.connect_ex(("127.0.0.1", port)) != 0, f"selected port already has a listener: {port}"
return port
def assert_loopback_bind_policy() -> None:
blocked = subprocess.run(
[sys.executable, "server.py", "--host", "0.0.0.0", "--port", "0", "--allowed-root", str(ROOT)],
cwd=ROOT,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
assert blocked.returncode != 0, blocked.stdout + blocked.stderr
assert "loopback" in blocked.stderr.lower(), blocked.stderr
def main() -> int: def main() -> int:
run([sys.executable, "make_samples.py"]) run([sys.executable, "make_samples.py"])
invoice = SAMPLES / "synthetic_invoice.png" invoice = SAMPLES / "synthetic_invoice.png"
@@ -69,20 +93,23 @@ def main() -> int:
assert (emb.get("npu_busy_delta_us") or 0) > 0, emb assert (emb.get("npu_busy_delta_us") or 0) > 0, emb
assert after > before, {"before": before, "after": after, "embedding": emb} assert after > before, {"before": before, "after": after, "embedding": emb}
# HTTP smoke on an ephemeral localhost port so we do not collide with 18820 during tests. # HTTP smoke on a preflighted free localhost port so we do not collide with live/prototype ports.
proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", "18828", "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) assert_loopback_bind_policy()
smoke_port = choose_free_loopback_port()
base_url = f"http://127.0.0.1:{smoke_port}"
proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", str(smoke_port), "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
try: try:
deadline = time.time() + 5 deadline = time.time() + 5
while time.time() < deadline: while time.time() < deadline:
try: try:
health = urllib.request.urlopen("http://127.0.0.1:18828/healthz", timeout=1).read() health = urllib.request.urlopen(f"{base_url}/healthz", timeout=1).read()
assert b"openvino-doc-image-triage-npu" in health assert b"openvino-doc-image-triage-npu" in health
break break
except Exception: except Exception:
time.sleep(0.1) time.sleep(0.1)
else: else:
raise AssertionError("server did not become ready") raise AssertionError("server did not become ready")
resp = post_json("http://127.0.0.1:18828/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}}) resp = post_json(f"{base_url}/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}})
assert resp["ok"] is True, resp assert resp["ok"] is True, resp
assert resp["result"]["source_path_basename"] == "synthetic_invoice.png" assert resp["result"]["source_path_basename"] == "synthetic_invoice.png"
assert "source_path" not in resp["result"] assert "source_path" not in resp["result"]
@@ -92,7 +119,7 @@ def main() -> int:
outside.write(b"sensitive text outside configured artifact root") outside.write(b"sensitive text outside configured artifact root")
outside.flush() outside.flush()
status, blocked = post_json_status( status, blocked = post_json_status(
"http://127.0.0.1:18828/triage", f"{base_url}/triage",
{"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}}, {"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}},
) )
assert status == 400, blocked assert status == 400, blocked
@@ -101,7 +128,7 @@ def main() -> int:
# Request bodies must not redirect extracted text to caller-supplied endpoints. # Request bodies must not redirect extracted text to caller-supplied endpoints.
status, blocked = post_json_status( status, blocked = post_json_status(
"http://127.0.0.1:18828/triage", f"{base_url}/triage",
{"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}}, {"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}},
) )
assert status == 400, blocked assert status == 400, blocked