From 5b01b1bd113792aacca885b11299e40a2973a621 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 4 Jun 2026 11:41:55 -0700 Subject: [PATCH] feat: add OpenVINO NPU prototype services --- openvino-doc-image-triage-npu/README.md | 159 ++++++ openvino-doc-image-triage-npu/make_samples.py | 69 +++ .../samples/synthetic_blurry.png | Bin 0 -> 4620 bytes .../samples/synthetic_blurry.png.txt | 2 + .../samples/synthetic_conversation.png | Bin 0 -> 9325 bytes .../samples/synthetic_conversation.png.txt | 3 + .../samples/synthetic_invoice.pdf | Bin 0 -> 28071 bytes .../samples/synthetic_invoice.pdf.txt | 5 + .../samples/synthetic_invoice.png | Bin 0 -> 13347 bytes .../samples/synthetic_invoice.png.txt | 5 + .../samples/synthetic_receipt.png | Bin 0 -> 12268 bytes .../samples/synthetic_receipt.png.txt | 5 + .../samples/synthetic_sensitive_form.png | Bin 0 -> 12769 bytes .../samples/synthetic_sensitive_form.png.txt | 5 + openvino-doc-image-triage-npu/server.py | 178 +++++++ .../tests/smoke_test.py | 127 +++++ openvino-doc-image-triage-npu/triage.py | 459 ++++++++++++++++++ openvino-genai-npu-worker/README.md | 111 +++++ openvino-genai-npu-worker/smoke_llm_npu.py | 73 +++ .../systemd/openvino-genai-npu-worker.service | 16 + openvino-genai-npu-worker/worker.py | 251 ++++++++++ openvino-reranker-npu/README.md | 138 ++++++ .../openvino-reranker.service | 19 + openvino-reranker-npu/server.py | 369 ++++++++++++++ openvino-reranker-npu/smoke.py | 167 +++++++ scripts/npu-service-health.sh | 8 +- 26 files changed, 2165 insertions(+), 4 deletions(-) create mode 100644 openvino-doc-image-triage-npu/README.md create mode 100644 openvino-doc-image-triage-npu/make_samples.py create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_blurry.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_conversation.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_conversation.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_receipt.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png.txt create mode 100644 openvino-doc-image-triage-npu/server.py create mode 100644 openvino-doc-image-triage-npu/tests/smoke_test.py create mode 100644 openvino-doc-image-triage-npu/triage.py create mode 100644 openvino-genai-npu-worker/README.md create mode 100644 openvino-genai-npu-worker/smoke_llm_npu.py create mode 100644 openvino-genai-npu-worker/systemd/openvino-genai-npu-worker.service create mode 100644 openvino-genai-npu-worker/worker.py create mode 100644 openvino-reranker-npu/README.md create mode 100644 openvino-reranker-npu/openvino-reranker.service create mode 100755 openvino-reranker-npu/server.py create mode 100755 openvino-reranker-npu/smoke.py diff --git a/openvino-doc-image-triage-npu/README.md b/openvino-doc-image-triage-npu/README.md new file mode 100644 index 0000000..56890db --- /dev/null +++ b/openvino-doc-image-triage-npu/README.md @@ -0,0 +1,159 @@ +# OpenVINO NPU document/image triage prototype + +Local-only prototype for triaging screenshots, photos/scans, and PDF page images. +It returns structured JSON metadata and explicitly reports CPU vs NPU stages. + +Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/` + +## Privacy and safety + +- No external uploads. +- The only network call is optional localhost-only embeddings at `127.0.0.1:18817`. +- Raw OCR/sidecar text is redacted by default and is not logged. +- Full source paths are omitted by default; responses include basename and SHA-256. +- Allowed roots are enforced for CLI/server requests. +- This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services. + +## CPU vs NPU stages + +CPU: +- file intake, allowed-root checks, size checks, hashing +- image/PDF decoding/rendering and normalization +- optional local text extraction from sidecars or PDF text libraries +- regex metadata extraction and rule-based category fallback +- final needs-attention rules + +NPU: +- needs-attention semantic embedding, via existing local OpenVINO embeddings service on `:18817` +- verified with `/sys/class/accel/accel0/device/npu_busy_time_us` before/after each embedding call + +Not configured in v1: +- image category classifier on NPU. The JSON reports this as `CPU rule fallback (NPU model not configured in prototype v1)`. A future task can add a static-shape MobileNet/EfficientNet/ResNet OpenVINO IR model. +- OCR on NPU. OCR remains CPU/local plumbing in v1. + +## Files + +- `triage.py` — core library and CLI. +- `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`. +- `make_samples.py` — creates synthetic non-private image/PDF samples. +- `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable. +- `samples/` — generated synthetic fixtures. + +## Requirements + +Use the existing NPU venv when available: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python -m pip install pillow +``` + +`pillow` is already present in the discovered `/home/will/.venvs/npu`. Optional local PDF text/rendering improves PDF support: + +```bash +/home/will/.venvs/npu/bin/python -m pip install pypdf pypdfium2 +``` + +The smoke tests do not require external services except the existing localhost `:18817` embeddings service for positive NPU verification. + +## CLI usage + +Generate synthetic samples: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python make_samples.py +``` + +Triage local files: + +```bash +/home/will/.venvs/npu/bin/python triage.py \ + --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \ + --pretty \ + samples/synthetic_invoice.png samples/synthetic_invoice.pdf +``` + +Disable the local NPU embeddings call if needed: + +```bash +/home/will/.venvs/npu/bin/python triage.py --no-embeddings --allowed-root "$PWD" samples/synthetic_receipt.png +``` + +Include OCR/sidecar text in a single response only when explicitly requested: + +```bash +/home/will/.venvs/npu/bin/python triage.py --include-ocr-text --allowed-root "$PWD" samples/synthetic_invoice.png +``` + +## HTTP usage + +Check that port 18820 is free first: + +```bash +ss -ltnp | grep ':18820\b' || true +``` + +Start local-only server: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18820 --allowed-root "$PWD" +``` + +Call it: + +```bash +curl -sS http://127.0.0.1:18820/healthz | jq +curl -sS http://127.0.0.1:18820/models | jq +curl -sS -X POST http://127.0.0.1:18820/triage \ + -H 'Content-Type: application/json' \ + -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq +``` + +## Smoke test + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python tests/smoke_test.py +``` + +Expected: JSON ending with `"ok": true`. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`. + +## Example output shape + +```json +{ + "file_id": "sha256:...", + "source_path_basename": "synthetic_invoice.png", + "media_type": "image", + "page_count": 1, + "pages": [ + { + "page_index": 0, + "classification": { + "label": "bill_or_invoice", + "confidence": 0.71, + "device": "CPU", + "method": "rule_based_fallback" + }, + "needs_attention": { + "value": true, + "device": "NPU+CPU", + "reasons": ["amount_due", "due_date_present"], + "embedding": {"verified_npu": true, "npu_busy_delta_us": 12345} + }, + "metadata": {"dates_count": 1, "amounts_count": 1, "raw_values_redacted": true}, + "ocr": {"available": true, "device": "CPU"} + } + ], + "processing_device_summary": { + "file_intake": "CPU", + "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)", + "needs_attention_embedding": "NPU via local :18817", + "metadata_extraction": "CPU", + "npu_verified": true + }, + "privacy": {"external_uploads": false, "raw_text_logged": false} +} +``` diff --git a/openvino-doc-image-triage-npu/make_samples.py b/openvino-doc-image-triage-npu/make_samples.py new file mode 100644 index 0000000..41179bd --- /dev/null +++ b/openvino-doc-image-triage-npu/make_samples.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +from pathlib import Path + +from PIL import Image, ImageDraw, ImageFilter + +ROOT = Path(__file__).resolve().parent +SAMPLES = ROOT / "samples" + + +def make_doc(path: Path, lines: list[str], size=(900, 1200), rotate: int = 0, blur: bool = False) -> None: + img = Image.new("RGB", size, "white") + draw = ImageDraw.Draw(img) + y = 70 + for line in lines: + draw.text((70, y), line, fill="black") + y += 55 + draw.rectangle((55, 50, size[0] - 55, min(size[1] - 50, y + 30)), outline="gray", width=3) + if blur: + img = img.filter(ImageFilter.GaussianBlur(2.5)) + if rotate: + img = img.rotate(rotate, expand=True, fillcolor="white") + img.save(path) + path.with_suffix(path.suffix + ".txt").write_text("\n".join(lines) + "\n") + + +def main() -> int: + SAMPLES.mkdir(exist_ok=True) + make_doc(SAMPLES / "synthetic_invoice.png", [ + "ACME Utilities Invoice", + "Invoice No: INV-2026-0604", + "Amount Due: $123.45", + "Payment due 2026-06-30", + "Please submit payment by the due date.", + ]) + make_doc(SAMPLES / "synthetic_receipt.png", [ + "Neighborhood Store Receipt", + "Subtotal $14.20", + "Tax $1.42", + "Total $15.62", + "Thank you for shopping", + ], size=(720, 1100), rotate=3) + make_doc(SAMPLES / "synthetic_conversation.png", [ + "Messages with Alex", + "Can you please respond by tomorrow?", + "Need signature on the form before Friday.", + ], size=(1200, 750)) + make_doc(SAMPLES / "synthetic_sensitive_form.png", [ + "Sample Government Form - Fake Data", + "Applicant: Test Person", + "SSN: 123-45-6789", + "Signature required", + "Submit by Jan 15, 2027", + ], blur=False) + make_doc(SAMPLES / "synthetic_blurry.png", [ + "Low resolution blurred sample", + "No action required", + ], size=(360, 250), blur=True) + # PIL can save a simple local PDF from a synthetic page. This is non-private. + pdf_img = Image.open(SAMPLES / "synthetic_invoice.png").convert("RGB") + pdf_img.save(SAMPLES / "synthetic_invoice.pdf", "PDF") + (SAMPLES / "synthetic_invoice.pdf.txt").write_text((SAMPLES / "synthetic_invoice.png.txt").read_text()) + print(f"wrote samples under {SAMPLES}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/samples/synthetic_blurry.png b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png new file mode 100644 index 0000000000000000000000000000000000000000..cfa0c47a61894b19377af2c3088f6cb560837e4f GIT binary patch literal 4620 zcmdT|c`zI7w+@PaU1Hy3X|+`CMa5np8cS_ewJ(jT{Ud4@s`Y7zh@~xBTkS1vNbMmg zi6~#GDoPNNS}F)lEe++DxpU{v+`0eUzwZ3*KkvNr%)IBkbIy66^FAlT!47ivl;kM@ z0C3jI()=a>z!AvavrcldS1xbsWB@>5+{)baR$TE$$wp-5pddG~7IbjLSi=Hfe$jm^ zlUFToe?*1#J~si$NSJ1p=dG>PIDcfqIRS$XwXHmWYaqauK@JRXO_IO?2;%1jRGESR zIXwT)z>D2x-sSkCO$7j4+SxhmFv`UPZPoJ8WbTVAXVa~O@{(Gw;PI}mDC*RnjlFMEDplD28uAO zE0VmBxhai*XBraf0$MA*F|K{4pKK6`IKnNd02i!gcUr+Xj6vDx(3=SlSed_5vlOG& z64cPp(3AWd=cy(LF|4J`xu*0)yr!4gj{i(j4{0B*INKWYnjufy-=R;@>GY+@4(|qx z% z=q*q^vwIDS4V4!77{AHhpa zXGV~v)*9K7PXXKMhOGA8cJUXxcr2YRk25ndI@cCP-YA2nIDZiud$t~unCf&jMg=Ie z@>!Gj%&uJvSvfxQz74>aCI}le5akcJQR>g+!<152i6g<%Dtzhs_K>o!$JsjU|1#J@jgscf>`t$ObbK zBMk;;HF%4t@xaIKB~?Ie#U3=bBP+&>ClZgYWJHjWTB0s`ZXb=QM+E_+!p4yXzXN$T zzp8J%f3xWS6pqzT$h3*yxhmcU@5;08!jj}!nub>AFn-JXQOyl%iV|injGfU-K*oEY z-f}!KpERI;|HOkEEvhe8pGV0f`(-bb8f{EMP}vH>JFTh@!cT&HK*YVqL1}lE&UH(y zs7F`OfsDAz15dB^u$?kZn#13L(eK)ao_L7(8MitZ{FHgq1q1DLbR|QD5$Nh@tJx!e zUed?RF`1_FuNuVrxgEm4bN%zEKg|=FdS5)6J(SGq+rkbL%@#=hAQnqRHN7J9rY@XR zS~&>&-1-)(vq?(&JMQIvChb#^(o739omlQ%+GS=jd=oTg;H`O$%HOX+7&xfeV0%i| zQK*s8^w@p~I`(!mK=%Kc@!_d`nioxXz(nLnRwgk)!n+5ir0~@k!Vif;aNcN(J^oN$ z|25O){vlPRmUYBxkJCj@lki22E>iBu{zPShz}~`7l8t7_S&z3&Ov5ubq@VcXLYS-l zSi)FgaDXA^f|SS74Mof>qxn3t{~Z(P6@;9-UXtI<@bUJpu-@F6@}+^yLD)%tX4?L5 z6&9MiK#Dr?hU`mjZO!fmk7zEKsWkn3#Ye%CU_bF+W{{5G(mbe;2wb_UNY=YTg1~_R zlGz%DKlBv%jBQxdo#8aDC|T0L)U74p*CLF)M$DlQg?k_vO6eZR5o8_q)Sh_X4u>@W zUPM#LuAs|ke8sTCRJ2NEu*uB)F04Tjcul4L0`}GSg{Xq}yw2Z*eNNFDVf)135LAD= zN;R;2g9JlSBjQ4AbmWq^vpX;UW#$<6yyin4(nsLJosR5fk@|{SsH|c#KmGNpRx;h0 ztF}xH?N7SX(zH7waI z>0tM7T1~~4g4u#5?*BAWOYzSM-pV+|f{LLk5Nz48^{ZJIqu7aUvpoMY`7$UIP!*q^4RyYyffg_d-?udQ!| zatf2$zceINoi|laxH~0e=Eu|R`De$VybAE3qFqXi*bzl+S2J-)IfX;?-(LreyeY z^DbU7${4L8$UsarPYB5eL{npKfbxauq|s}z z-rHAm1E|VEC$Uac`u%xU6vx7e0J&oAa28 z{g+hIX+KKsSF=08Yn6%L*oO9+FXi%*P4;U$#6q)+a)aWgQzr5Q9sHtC#_vSbb9Br` zMk9zm1$DR;ojoBuEO$pZu~)KlnSD}>8a`_7S;Iw%(^8RXK2|m`Oaj6#&%BFZ60;sQ zEP|=~nPN-V)#zE;G_%bQ3F;dN%0db?rGXc6s2!&v@Lw(FfApOHYoPg8TWCpqX{JkE z9jm4l0D7_;*ZoPIrR4t)Gc?vvJ-0gLUE;nOlE=_^GdM(30WQcZfbfUmy;ovEdd_mz z-}><)oGhwCR{&;0mEpz{+ZzzU4=c8-r5lWETgfKi>*{bA|Fg&M&eZz%K|{+{U;1-V z?-4PjK|K`jF6HE$M253L!&JR)eK>sodWx(nFzutZEe#Cp*O>cMCj4i#m#%H4&)uB; z`Tfjfce8nw8AtBHZsyR=Ie<>5b&SeF%9d8aJ z@8R8dZZOU)kTUw;YoTdmGMP@VpLI;HOoD7Ws7wPj!7K5%N3PgD;W9?{3)F9jYz-UP zc+X_)IsNo6u8Lb3PRL5eW*_bbN~}Ows3ZzRh~v-;EeC{*XxsK{e3hnwS-@Fs+o|X< z@>;D~l$(&iX9rY#?PZJBiiPIF=I0hQC9WL9Y8lGv(Ozv~ba1v(c9F{k$foG8Gq;>4 zqDvyr>L}y~0ggl={0Mn#y+?DJXHuU4DTs^cnxMM6n|Fh?!oCX*7>;T#GoqbAzih_? zS40t~)#^IhWf##I>-@uNqKGKD?GighMPoZ<&yhz|_Mz-adEVeN&%+`=Eyp~mPDVI82$`RKrEtpvu>fW{g2mk)&P7t{hSsW^gmErb!s zZZ;1&1oC|*F{|C!$wWf~`=336hW4osIEwp&)LvREV=W!mr?G2wf2&j$#@a*hB6|tc zLX5eukX2Yh=Y7$z^}PXa`}I0uF8zH@K z1|#3U00&eR>fO}7M%_||nJd`EsaDCo7ICuqGULx(u*L)DVb>~GI~~XT73u~q`lLF? zr3}<^0nJu!^I)fF>8huSlQV-sn4LTd+1AjfxhPqE(=D4NC0l zn%cj-(~mr}wE{8)buSGdUtXYo@Ew6PyS;(F0tdsuNn2r*i;vHrZhYHphNw?emLxd3 z5O=1)C$FIW4o})t67!JOiZImGV9Oq5vVPg9j3Lxq)Hqc>y`MwDZM7^fJ$whI_BfA| z?`C_X#1a%1*aVFULIsC~&k>B%6jKRf&%qmgG=&%kVT9EBS?EK*7OR`i59IAGbL@9E z^D0MxIV*Kk(5^`l&(7HaGSmKG9D6?%6>5X($D3^FbX^0lahl>L?W z*zXxfF08jUlKoQho>^OhhSPWQ(Hn_;O{&AMhz#r%Ng8}7{s}m>eh>`ZApkGOhCDB$OW}tLN)zlJmfqtLMg*SbB^p6sv*cet@af3kv@q3$5fq8xCYl(i8KQ%`?^;))r9cS-~MBRV`)(72=HIAVUo#s< z%jhA=JBGQQo&() zF|^55+$P-EW`Ip!CnuFY+_P8i)%u4CB00RM`F?P;w6p{_j`eEuj}3JM^6X|8Tf^f^+5+ue;RDaK<9f#eA z;DsFOtMGO(ZYjum;$Eu8$YQ}2Ppg^GB90^YkB2`O?S`(-VZC4k8Q(W1@<6c6M5i=T z0>qZDY-t)ZW|`7MyqAR4f&qGKe9y`LcNxi^alno5{u>WDB8GmLRCk|BcDcAe%AWE7 NtSs!zF@O3#`WIm(a6$k8 literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt new file mode 100644 index 0000000..e640029 --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt @@ -0,0 +1,2 @@ +Low resolution blurred sample +No action required diff --git a/openvino-doc-image-triage-npu/samples/synthetic_conversation.png b/openvino-doc-image-triage-npu/samples/synthetic_conversation.png new file mode 100644 index 0000000000000000000000000000000000000000..cf28316add97006732a5447633448251cb61b614 GIT binary patch literal 9325 zcmeHN_g9l=`%jCCRROJ5R0Q-GK}AJD*^t+&rBQ~;k{J--`9P8*7fA)a~7s@>vpU| zAP{os?@wJoAXYs`AikVi`z5?`R55ff0)ae+K6S!6G<9|`!ncbfT3)c!%sRB~>1NG| zJL{hahjVTukGS-1tlanbyL%VZ?p%xc;k#6|_3N1LP9Hfq5wK4FyzErf9qXMZ1ALxa zWqnYsScR4OLTQU)^vl)RcZ?}QZj;3$T=Uz319)Qg4`Lcdf5r z84Fv=cx&LNm+Fm;2*j>^C)Oem8xwY|LLk#~_p}L|0?gQ|;W`ngm`EPJrDR*| zB2vc6L04qEt#KtG@dBzdRD#5Tk4uI{*96o7ID-yJzoP<+O0- zh9U;a{Lmz)fV$PXj8Hcca=QCj^)$B z+@9&FDK)c)9W+{rYyB-GdTMIwyXYN>L+;nFb67-);MlH|l$7f0)7%fgXx8!=5%sqf zD5IpaCH37!&W3@bjSO-GcBtw)vlgFbaQzH*oaI_~W3{4lP-l~Rh7$&Zy7u8WZp8uu z@p@BM{hHxBoA%6AvugWFJt^~V)?4LSm|GS(R|nH`%_kS;hN>yT#g@)2Q^^NK%0q+e zQ?#73>gJCUjQxFl@}EAnhuXQhxvjXf>SR^TNTYAoYaTpsz+lvRk{`FnwCLiC$8cF@ zm|$!99#&RX&dx)cM1G5oZfcXenq_(_vS~6egVE8^v65k1wa9DwO5iSfS1KHGRs+yKQNFhod9lv_! z>{&+#2bb{49Qfy^Dq-10MRVy9b78`T21ivQyVEquIk=BKNvC6pedSn;!O62{&#J}i zk6J6BgaIVv$=mXxHY{POS)+Kvj$=;>3w@!MZEbDpsR7@nD9ycpYQ-(Myl&mPl5uHi zp!}Nn*A>CD{Wb-*SE7y(70{%a{#OMR6`?&v&ddclAk3IhWiECtT{cMDvwf#YWGJ`w z^^Y=i6gA>wvggOvWO_%(IeYs)SiH39X0gN;m0}n2O0+Ix8;T%=!-Uh0M{YlI>2-6W zMm+NAvuEZ}^Si9BP*!pZ3uk`YZ0Ypk@>{s0tlW-cM6s~BxjEImOxPeHs;?U$Xu1&i zV|VY~Wdm;N`cUUunI}m|z#0QbH7@t*%J-({<_M$=&bxU8O*-oKt~FJEDMKRkfGda;T^2efe!Sfy0;*;W$bPASu*edeh;Q-ZKpEKE_AQ-%|3wSQRvxZ&TpW`mi%* z0DQ!r?`1cT9X#`F&P>*#GtUICK6DU zn3(v@H{ZN^^{Ow4H%a$|qeYXB>FA^f_j{}2*_{)eS)e^y;p_=z!{AY)!!1NoaLb|c z4bX*+_jfy%`8K6XW=g5~`T4gLR88C)@AqJc#%NNiXfT3B#zU_Rl(V3vQcx8fVZ6Gf z%kxVo#1Ff*M_KM5Qhd8O9AA|vTu@1J*th#;`jYdz$!}5UlW0)&j({Szjlh4_gElv2NsjpxegYVy4!{y*m0jEmbAOLg z07bqGy&G$?GLeFOgH%57W zPgY(AVda*b5^cmx4F`t=evLntq>k1CBplNrAt$A(D3h^z^=d#HKcOMe9C(NcL@U%R~0c4d)9qs@r0ICt)s zj;iAulnjCy)rHN5LeMBY3%}+&En3_M(apceiVMWLqXf4a%TuNc00uj7iW$2Rp1E40Jp) zZ01FSLy~*HH=x;qH2Sl8`ZgWki(H{p=qdF734MHN#;seoK%Zgbnm{p}sV;uh(%8MG z>Abkt0Hq4m+!aPy7mIu&2A55j^16 z0NfTfp2mAEqwtd88WzR}I$#hbqQ%9mzJ;x}0yCbMb4pVLLv{?v5iw{PG6 zqNq6sn%qvaCH5Ng21~(TKzz_S zeq0&XdKC7pbj<7AO1?A)U`Hfx;8{S}_)YcLfN1LedDLcHY#l^BLR=7Gw zu=NYXe{S31M_0K*H79i0Y`SGE=meuCY?d6y(kurnU~uaLm+_j{-YZ!SFBYX6sR)Zg z7R~)P0S}3Wzxvc-?>`cLwwyHfIEM^%fwgzt3SIell`FLFbJzM3)PtlXVl-Nwoew8; z*r{S(Y36V;P^E4dzsGS}W9TLjQg&fsVMWDq%0axI1|j|@AT=X4Ec5O|Ku&}hfY9z++mb~&KaRsn(an(dmgpHs*^8Tw@+T(_u zxK=QXT)#siBv0cnI85L z%ygRA0+PYi-|lSErxunpfQI~^ao}Ds44&C$9L~PK`{Z}ADrC2Y7#&6PS1uZ8Qc$bC zXIf&6eur?q;$dGOpB+GKAKOxv6wmM;s9Vh<9FE@F}4Lx;gTymgxR zJAk!J)jMkFniAF0+zbbn(d4d!WejGS#)548!;N6cq#0q7sO7wnv`cR{w) zPHvSZObEXpANyIP#DwvNLg+_jAE5;K!k|M}aZ z#)U(olX59v3|4cmqm&O`L3=qLY}AFvFx!z!g=8uq-6k zp(W(x)4j+}l-&>lNC5PzLv&``rb?+4&I zbZDs5(-Jhb-BSyI^iLatTCP)AOfnq(R<_T5ygiL-$f{v4|0t_60Uly{R(Zm>Im{4f z7YcsP&MpM(-%$AUGtlgII|*kM_-@XgEarsc$=Y!Mm4mf48v=edYK}=EwmmwYlam7~ ztVw2HfMtu5p;9?f5WF4o7Wc#&U4YvGs~Q1#q_H}@q^%By5zU0<(#*_VXc7#Pge5J9 zQ%vyT;-1u(FJETM!rEKgZJnG_)y&0Q+82De42oB+P7xUMAfw+@fFP3u^W4@N( zsdq#)PzjLqfHdis{`XkjW_#x3$&&zHiCC0$$gwP(Gc_~nC*N+FC7mn@mlwcR`}zA@ zP+tm7SF-&_-bJHHM%-RE{_Z3S`8}{3n1Xa<)8beQ9oyzz+7Q^?tcuIzE0lEDl@KLj zcChinZ=36H$r&~V%GyudPcf-WD?Eot@umA!N zO>aE^T1G!(PaD(4VD`2uK@Y?l+D01Yf4!|T1%nCk<37hYXANR2w57aW>re9R)44!v zP?!jbh}Y86k_6d2gvAh!$yavl)(y+#8IKz`*Z#`KHb9dH($MMvM8~}J@F^HV(O7a{ zB7Rt_x_$QHiDk-BZ(253u2ej?Z@Q+&^SB6F#McXtDx*$plk Ofj(_?1_7f=N0 zMT#PXCPhT07e$b4@ZP(>pLh4V|NrmVy?gIIndD5K_nb2`?|bGmlXK=I&zIj=OGivf z97fF#aYV!IF>oh{0~TV8@bEzSLkMLMCp5wjj;4k|Bq1mVH)UmN2~(^W93r7mvzAq?Ry8IAL5N3X+o45_)iiiz{Kb&;vsU;emmp zAre}erdn`EloO$<1_I+_3`c9CkX|THIKct=15Z3q=yP87j)YjW;C={4_*q>IYKhZu zPZz>&p>k3(vIMVuFoZ@yQsdv?r-4JKbo6xq5C{N3gcpFv0h$0MIXMM686^b;1r-$~ zH7yezEe#DV>)|5|Ovl-{IFGY&a6tIP1tC15yc`@ta>AmLQZN{dOHe^Y9;z%Z4TB!+ z1VlweMN31=LPy5}<>ugq{(mp{X8(hq+{(?502)a<;ug{76F6WrOw z72)QC@%8f$zy?NMz7lmcIwm$H^?KTk^qaRb?%vDGzyF}1@L|Q{%Bt#`+PeDZEv+xw z+B-VC28V`Uy&icpI`(#Ec5eRN!u!P!YwH`ETic(%?0h|l3j~0F35&4)C9vPc#Yl*Y zgp?FaN^uYuh{T_ez>K71+*0I+)lDeuy^ruf!zr0Gl5@+Osd%N&t};8I2dIzo!QS$( z9fbBhvj4ZhBL1bw{yngN#5D%cfa=-jq%g>|FS@h^Hr;04YASOrH^{uJH!Me*=*_xW`q&l^BnO?t17-T5n_$a* zDuSuezCRZFzAc9q4}4nTzymLx+xNe4?Rj4N!Fob11irT4x7PoS+#_@)91pa;|H1nB z=x&%iaSv_8cKC%3VmlDq;h*moL>~UH^N?HKw8oqrFN!HKH;%<*$R@~3|it%PPW-jE&JdX$eG66l*`V$80a!4AOxEA>rX?1#;C>LSQx=bkL1S@j;3LO#SoMV7%?@=$0PU57+;GhQiw38Mucjwy z1A(^8@2@}7jqE!HwpaeyQagU-CxNkkUD=Bp%l;Nk@9ETZyM!svkdq^=ER%%(2npj^cmOTF4~D zjpa*@cqqimm--CwFBAgq#=0i6>BK9h8`B6WhXp!aA`d&R%}jwqUK;nfG{k@-z2IH5 zL2+w-fxUVQmnA&}1F0}`9BJ`I9e>eKC<@I9m`jKhDP$#OY$o+DSVonIehzu~fl{`0 zBU@IjHbJU!XPA^OE$s8K@K?hE(ZzC7RIq#3RGR>$W3GN8y=rBHc+_ zYr~q@Ny{o4z1cd5+6pvWSVcDFlE)k-hvvE&eApxF+oz-J6J?D4RkdlSW1I4Q?w}Pu z=WekJt<09EtWTi$Y&$&%3zDxYU+L4E-Zup~$KJ26&RD_}7FbDJ`JhR=PODtP1F905 zBZK)hjg?O-kNJ%l_j;)FxX#gDrhh6qTb@JpzF<0wy(pkmY+gSn4cyM>!D_450ZF9f zZPfp3U*7+G!T!%*zH;$)86TV3zBM)&i4HE-P;qz{I{QgI0zjR7+ri7sDHxBE`DkF7 zm(kYNd+*KVZ@HxDCov))aIAV*~oC+oWi0U~)emPNO!h9st=CYQ;Aj@UyhWy@bEYRRT`Kf-Ee z*5RHlc%B-!UV4eF>`su~p}#v4$JYrLz5lw;a&)hOC$qH6L`@zSpIr~yv=RbG4te;> zD{CYtE2kvrZ2L=(?!RN1>XFi`2$QR3gNZb(>OI3{SjUN}xuyH1;yPuP`idN}%;T~> zkD?t9?PSYZ2!5^885}+7!rJSPF{VfxLnuB(oc9~Fp0rg-%Tq;ec${`qM~bHah*vqv|#{_CcRO*kedYK9+(a0`!%0 zmefmI&hl8oy$b?&@+Or}Cnc$kwTF#=by2V$3r-i#~Zu?%NTt4%|?f;Y(?CX*P%z=G?R(Bql+_=JWK`pc-gmCal|l0 zL%-Xa4-dGAF(J7(f~Q1W%}7(<1vk)F_1b>9fRM4ZHZ@?98+1P}jWeqAkHc8QuNU@r zNv4v>i;_#Cg$r{gMgz{g>l@L-DY`%3)TD|Det+d_59^s}>3Kl1l`Ea|I_TBn*ZlkU zD{fnIR2o+3&Bffs$al~tf@vctJLb%~W;`n=tY%wf_j%b&?{(H)4!PPJ*XKIOeqACH zC+AtW_+U#b+!GpZ4fbfGU)mQ=r~=oMuyB!jNdANdFA8iK4j0&FYBP ztZBUIx6Rbb;sJk8lPrxZ6Me4Ws`?SwM==c%_L#1IVKpEjwh(P5C&_N^O-k456i++k zQMDvz(%^1wIhs%1C#Utk;>4r#PR@WlY1pY0a@~UB2`hHo6yxZ#vC}L0t!w@Kt%mgc zY1Tq4$LPu_8J_Xoam*#BQyeipDPVj4qrr`1N3-GC-?(j$_rye9KBjbt%-McDb$zwf zn7r|GbY+R`+Nb5x=QjC8wv`6#N|cdgVpk}>+yoS}o=v~%R(ex5y^*W`$|v3k{Cw(j zcUUNgIn~1eT`Q#b3T^qt7SUs3Qdds)ONog+;jMChDxIUcCa{wh_Z`IG4m`iNMe9Fo6ts{N+Be<~`K z@F*-{FTeEp4`vSi^Phx9`^|q}d&y|Ok)JT#L+A2Ea9^8-YZrkB?y*(MOzj-2QvJG_ zjtA6avZu3mm_$RD9_+`v0q=iy7XA)pKS1^8AXfPBgXAKc%JKuR(wC#SPH&zzkV8R! zG%LgfJZwhUU}9Hu80V%iSjxNJpB5r|%0|DI`)p|!+N4c7`}&$1yQ!JJudN~_?Z(Bi zc1jmb-kw}dh{G)3mT|n^QsqK!K}ppF&o(Av#H>8ag|?f+oU*6+3`-b;yh?}=SR*zk z<6Sa;UOM8TMOEb)-V}JPMs#Z!Q?(ZrA8k-<_@3e2dx&OV5leI9TG`kAOb@%($((s1 zc;NB1LU1GCZer=r$vq^t6=W!~WYQVpBXwDwBlZH62MhgpC%lS5{)29*+sJlic$7lZ z1ajU0RGYB(s-;!x7O4M(oJ8-?JI+dwTkKumpdu@ESBs$v@Ws0`+{Nx<#P@xRxC4h8Dn#5 z+QV+jc^fzn%TCf|c@A)V{50EbgEg|IBJcSYq%Q}GHDeNc2%Cb~H~m1R4IbR>?Oclt*kP1d3RRABxA*kLD~DOn7B{vpa#*4!^er(oZ< zOGq>!vsgAop?Ow;WsVvcyj!;zZ0~{+O&%Y;sPM24cpZM#&(d z6UojE6&aJVNlCex)De25vx0+l*-T$}X7;;WF^-3VZtE{Cq^vKW;}*~1-k|h9@l^I4 z5kE|mxs0&cW}6VZS1ZE)>I^saG8YV&!V$Cjp6@awcbSg` zm!*3iasdKClaH4$bZ)Jdlg-CthTj@Qzk^OFiSGN~=oR*6QL52;Dn5>!W^7Yp69T73 zstqrR!<6_93=+6b)kMJ-V~+=hq@M1GQQhPHmvxQn+ONKijPdDN zROm(z${4qjT(THW4eyjG`sXkQF%I|Qvu=LS6*2M8wzIGVg5)lc)T4^6PU>*pVO~}nsdl4(a z`{@(qvuAcbW7RUfa=R`qUvH$p+fhYU-I(gZJ#pR?x#Q%*+6NU8u~>D^ELdE%7<)mU z?UAa_SBN4hNK!tm))H^GbUg_>M^6V>x4R5JlXkfZfquFQL-XM|C>m;jc8wn$g915T$6|*iIv`{_!WG=7GV7Vda%^sQb~m zX^b|=Waj4JTQul5U1cmCFx=b1AQ2I{SB{c6brj=^o8&&!JUQ1 zmt!21Hc7|lW*(V$l6mrALCaechO1?;)*w>%!pnWULj!glxb+>&IeUPwdKt~)9goWf zU+m7mFsfa)B=;5js4m6Y`TD0R5%HeRG~jf1l&Ig81zs5FF+*&zwN2AtDY#jtYRu~1 zCqI3f7aVanXsW2hO@$Sd0zSqj_P29#K6Ur4X-sY!n8(6PoYl0hGNzmgW4;F5Awg5{ zGIL}dt4%j=%-mwyC7akgw&EeS@3oqzu9oU=__ByA9hX>jVWgSMxuU9O|K-G-)uJD+ zA>MPBsm(0HpQZhW_ z0e%vZZ|ZUVmW7nb#_ zx?>)tC3v7xc4xajziEY6=@L#dT{S54W}9kU*>NGTw@mLzLp=-89vAU}LMeG2U($-A zS%e*^D>6Cqz59uaoUYk#`~&mzQpelL;-+|L5XQZ1iKmX5)H)ULxp?Y$>)TJi7gbjC z<#UHVbhhVN+oH2Zg>$B+~X3D zwZ!#Ler$)Zv0aNr!g_U$YHDM>>E`oBM0lEc6?dG)Y3&W@`S}*u{WeD8{Ob491I2o) z#=6A$0~}5i`$Hpoap6}41tXIq(9-8j27E;tb&ZCRzItNIOGxMHq^jIxzKzpbkJy`S zSZY~Suzd>m-mcJwI=QDBaG-S^k?zb@54*Yy9%<>OMg)j3L0l+K?D=$%XwKrO#Fu7* ztZQd)P6@0$?&_}}GmQ@mr;+6Db_$uu&oMndhTvC^%NE^f&&iAxqsI;161$#Zld(~1`W&FS&d%BS>2>K_1pyakmiG~m z;S_d*xR2I5Z{0&a4QdQZ@#n!mxgrT)i z&vtTu-ucDmek5{pW|B1|#a`KnN(++IDo=qJo02=jkXu|EjJ$6UilxV0 ziJD}`GWS+CIVA{RC$p&blxOtppnznbLSBo^i8OfSQOedMo{!o_Txxcz&JlQRGkw?~ z_!Ih!q8+`D)ok=~i=x4S8^F9ipdN7msDG*JE$lo!+gVW;(9R1PTd;=ga18 z=RR=Sh=93w+6j9M0E+B&ky*sgoYU zk{)uurpmz7a=&e)dMJ*a-2iTITS8WWlPpcMVuh8=C3XIjUsAxr{Q+3@z$e}cONgt9 zJ`Hp2WeR6!2a-EvJ5l6NNwhT#l>f6X!v97Yv0L&lP2c?K-4f!o$shk#jyOU0pY`O4 z6LkL_Q^B?U^j}5{2Vo-a?9t+57B!wychlx$W<^!p4<2i%?jJpQ4G)|gnfm-Zbj>vM zdHVn!SQy*g|6;U%;`{#$@W7R8go)|?s?Xn6v-ixnR?9a-ceb1GKojBDp0TCRV}V&b z@YeA?9@tn4!2`j9cz{`VPc(#UKcwvG%4Iw-Q}oN<5x!V_Ro&aY7&=Axqr(dmJV3*x ziwCmZS9ag3etGywi0BKkU5LC8uOs69g?Qin?fb|q)4c@eKE(*@E;3DHgamY42hc|~ z-(*{DvK{<(m6(WaKxBZ(0FePA14IUh3=kP0GC*X2$N-T6A_GJQhzt-JATmH?fXD!m z0U`rL28aw086Yx1WPr#3kpUtDL1H~+3G|KTD97E8%k~ommlK8G* zrAd&zlK55K>U%B3fvy$gKmhANNbBG&VF-6Z*lVBy2wO@LUIei#amnv8SA-A3)|x0! z3_;Gy2O|A_qhFP|4y3jYWVz4>N>{(r&Z%Mm0GmhT5VP}CCt zZvJ9BA|VpLxT%GLm{9-6CXoHd{!I}W4>&|<9}oBdSilp&0ImR>utormz(vB^6Yv92 z-z~T>A(R90F$Bs}iy%P;5zdsOnOYJe zNBxV_-vg1Ak&%{#I75ERBS>&Tz86D0f6ar+!DQwCL!PWO;aq&*!*BHn28_@Uzt5AG z`&}MXP7eA{c^I@k!h@ipc7VcJ6bb`55K}u#P&7jX!U?WQ=zBV&AO|h?{k;2N?9rI- RaX{tdWn`%N`L&I7{ta+#A$tG- literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt b/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt new file mode 100644 index 0000000..5b7f6be --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt @@ -0,0 +1,5 @@ +ACME Utilities Invoice +Invoice No: INV-2026-0604 +Amount Due: $123.45 +Payment due 2026-06-30 +Please submit payment by the due date. diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.png b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png new file mode 100644 index 0000000000000000000000000000000000000000..ad5aa76de7bfd423394771237fb297096e3af983 GIT binary patch literal 13347 zcmeHOcT|(^ZY?JRcg8?|aMh{Mw5*PgEAjX92Tho(UdTMVZ z{&MQ-)2jzaIfzcfD~bPZTz>Lt1_xFlZutUg^c8aAM2 zjT_x{E1=kM4RU$?$L~ww+xB(H-|nkFLy)VR|6g2q_Z8OV=jS)l5yWpSBO~Lu8Y$)W z8sxiUvdhSqONo;2LR&T>-(@d-5#D(Azqc^FyhEW4Cd)zm&c&?4-s%X)yo-$)=_y8+ zX8Po{iuxAYi49id>E7_kjyFQI(%utez2#JL_cOEDac(KUpli;hx~4`x*0+4IEv~8Z zjxy@N*)L0Wq=qdqhTBPnRATS+F$DQuw$&@2GPAPk6g)jW6#}YXoO%|6lL*+tMD}A{ zkEU3fc(6Ed6uZ>n(o;$y=6kj-v!$!*sbPmL%3WejmT;0moVmGqKykO>XwrNQQ4%0p z;&Yfek>t*l1~rrd_K}XD2m zOAM!g)OGb+rFzVC*~^zN-BZJlFoLR7C`kd;==7A(@dQ1u4HUGtcB%<=sqbB+#{T`| z6B9P(tDSojgMzbmh7?D9FztF4=6!ibr!=mQyL2ljCZWcH%fXe7TfM^7alJdlD6| zNw6)dfW4$Hj_1Qtz}F5Px94$M#Y6Ya)WUTcc|JWQ{qLiQwAIDIs#srd?_IbEad5j6 zJ=Gv^r1p?YrLL~-Z8aPVR?3fvT>ilc@1FO|$6F}pOXr1i!-@L7&g_U)R=8+MKLD1I z(i3cFX;}wLtrtArA{LfYmu>Cr+^Ygd>rF(Hxwvi($MVp{G|R-CK^QN;tI+qujm>mo zgs>&UW|}RPRLo{&Wj)wyX^vimA7*5r8M0=9`+55D#>RC-eA2yo}q0pL|q>C5# zIhGX_8ORsK$H!|qVFf*29v&eBH`PmmC)zqYOU}Q)4$l%zl``MFnU%u{-W^pm37Rfn zE>I>qI3!3n{&4ElDRUm|a0h%KMg@~XlL%!6K}%ijDfA^SWx}U(a&judaWfVOA{Y#= zDCE)R9;BW=efmPx!-o&O+Fm_23LdxA@5ys-NQuKcIXRK<>W2wtIJxerl->}zT)__a zq}f3=sX+qk#dSy*ax*seZN+z(aQWn z{r%-=8K<5d>j|C6TKm_H`}XhOdHBMox#1cc^BTH#1FqQ;y=nWwb02SQ<>0japvVil zmd`mKM&71csfQgdfl7FCG|H{YBy_=G?$d5-*ok+Ma;fA_57>DuyfR%fkzFvTUDS64 z#wr~^u`g0{_+1OoS!7n%RMB8eW_HPb(IKh`W6^tWx0xF;zIqp>+aKzse6~_~-o5kA zV;oHqhql?K<=DZnpqoZ-g9UNA)PK!AIoQ_AYobn{8+Ml&RQu?VtfgUn@#ga*b&19y zb3GJpxS&7EeY)M<2O#Y819dLcwX3V^GM6T;PsVgji6W*nkhf9HW=XRZf~XPlqpUEp zwf_b&IDO+3f>ivOFG2O&HMO;+=alrJpU%CzvaH9TE>9i3zA=PI7_AC2ptQZRk49Pyr2#ADGZ>5~ zPx{w-xA%8Lm-@B7Jj3x144n63mfM~>b#GPB#g&kuLnqpq%*@QhLoUb6dGQ3FjF&Gv zU!Onf>N?zA9EjPK>(^gl9X6nZ6LA_?@OWncB+~N!Bxi#nzY}hw^~r!~GDO9`m6t;V zXFpsF2@y_ma^)0N9ic~w#8mt^mn~KHExU*B1c05bQe%hAj{u6FK6B>I_Cvqa#A^zx z9!!_b*MCG29i5y^ygObOawDi+1tj211NZ@eehN&VT-pL_(bAGLu*bxeS%9lF5qNAfl3%pzal}XMJgYKMwFIi}^P@_)1 zQK&K&^hN#-j81;<*2!wWt9(N`NW`39Ke9ru_WI%!XyoEHmb z_4M@Gff4dNn%lCSN`b-9HfecXu47-GaITQj3!TUS{lCv<&z?OnmbPYVTrxg%@=evT zJ3AjrTSu|W#lRfTFeA z2Z^Hn$3~fZyuZE)sKuvnC-PPlkJ%z#omLqV07Opy25Sb3T@w>)FmS_BYdMp+Ixe1X zXc!K#Nw->Erb(AwVhl&1_3MTMrdgt?Lsh#Tq>6afw9tkWlVZng2E!F31#QuerYBg& zLX{Nc<>hsG^^^nytO(WQ>HL{Fqol-|VaXy?A4_%xThV{8SYs#EE6jeplc$c3tVDgc*8aH(STyODykUi(rboM2Y#PkZRs?N zI`Y_^1pr54?m!HJy3i-pX@?zTTv2fHpK!&mYvo1)iWSt|gbz*94`^uo2m9>Yxf3)= z#>Z$BJuWU!nFzFi$IrFcSi9n;d^rmFiHZ1~*f7BZKxJ5>RM0R^B|)&yR!5J%r?VCq zlQz!>EAJ4>F8PCmvN1PP2|m5Vy8?JR|KeFpbs8#Whe>FVxyQm-BgiOiyt#0l{O=Vv z|7myZWOwm=PK{P(cD9d?4@*2(!yQdshxAFCKfC0;x7Ll>sW;w6@Krf@4!E!J?)OJ? zmcP)oiOx<=$!a?1-d^6SVf#F83e*_1nt$KhUpZ!0c3BRsOBTi=(Sm!aYhq%eVpuXC zYv|zY+zTQU`2K>^BUr<|R!K5lqSIikgV6zvRZ>zS%>`Kt zECmX>=MR>W=@&m{TocSu)SN0=fXbwGX=3~bK1>qrjouCqkGkP{$z3F9H9~>6p?K!~ zyb?%;UXXIh5i2}2BgKT8?O2?Xk>N4$;U)l!71OyK#D{~et*yZb2*?!UkbuCzK)RNb zj2f9lw)o1mW_dYO3~$DEEfVE}SvQ_^Xah3iZ%ZA^vr(rdsv?(T55p@zZ2z-fRyGGu zWECDGt^s-Cn0}{`I#rwwcp)<_saYuM!q_Q5>&1(GqS-?BD!++g(at0i?dNa$*PkOI_Il#yR;(;6KiR+UL1F;>ZYml&g zM(($#zzno!I}t^l?Y7yDkCVa9u;;2ZM2i!JK%Ur@0xMLH<+aMF1krKl6U)ZP=1 z{CY!1K@7;`Kbisdh0R}ZPYH2@qBb=()p4se=)bCz^FR?No&mapZS{P&8&IF^KQmN` z_|WAZigd|WN(P(*>64k{}vSH-H)dV<^ezVBcYmd0DXR##w# zi<}3aIy*aI(r&)@P)6W1^hup9{mg%2yEKWcGXCZ7k6w2S8Gb11W$<$Fu!LlBOFFid zT>Bricp*@>vvE!8IN@ig24=)!=CfmW9E$wTcfC^vtyLB>-|@yJR}FBuu1b~a{pGhG zcbmn8@fdW4l{DBy0c>LS**Cv{+mmrG%mS)HN@M002^N~IrM2!@8wZEb6<)ebIDq1^ zLhI@3UU+zfJlO$dmrqE~&)1_J^k{k>hX+@bn2;dpTL&b#1)EZ9O8|E!E4A_Q@fvM* z7}N=Hn&^W%wvpTssfaymZ(j_4Tglq^jZ=ETJRo3QqCS(KzD^YkMU-mjMG$C09dNC>mTZr$3Xe8kcJ<5oMT;8wLTO$4DCB_BMt%GoHO}n92|^Doq6w}rU1pE_(voK7kzwGLFf*Aj0R1}&im9cvMP;5fP`Ld18eiZKDhCbx0>7ht-z-ly&D4FiTgi7C$^lOTY$ zIDL9SCW3rFKx>PI^XVrZ=Kx@6Ih7o6$lJ#Vf^?%$a|F~stzVwl0WD;Ta>@f=p-{*I zA!ZBg1=R=ArxZXIf|?{9_xcYvm8QW0LK940%Of0_F6FLHJwN_nq#>1?MJSwv0Q2dV ztt{)+&8G>3L=`X?&ts$J=H`G|&4}v|zn@>41a)R8u`IcU`%WQ!gFpNuSNlC&`}1AW z0xK`Zs+zpa%#7N&>^p>U z9TGBlq?0nA_Qjm;rMO!LLLK(i%Bt1LHgb| z8M^3BKYw5qDjcRm#~Q#i0dM?)kv#+PI!2@dB#7&KhEXC-cYZhPxoNhc8>n0QL#Vg4l1Cm z(WIV|#ZE7#rk2)Jb9w_Hl8w16IQoY*%T0fH%|$hA=H2lJdl_*Mqh86aH_GH&?n{ce z{7wdT3%q6w^%4Z*8M^qTUO z@k}6PP6CJ&HYB#-TP(E8Thgsl$rx}CK+kQWgyZZ-7vXBIB_gH8YpE^-#Es9)*dXyqRa>lZZ~Md+P*GK#`Cf`;803p#C;BA4 zWe2~fw6LVasD=faYgAi0r#&$~4)UTHP$jMj_?12Pkm(3!T*LO&BR#J+sfn7F7It-^ z*#hET3kw`3R#T3wBfk(!(Rv8A1KmQ1Nkb zTNEymNIl#E;UVqvg(iSD*d)qjI)_{fGDlX#@O~M%&kcQ+g;TN!4YbOnrMN;F$YbBS+)r?rvkqV8F*MPxW9JK`p1uNSvJEM=miT^L6^MM&mD5@BG7x*%8Bo z%{_n_IG+-IS;ajIYd5AGA;|!_G>8`fi{cF8L(~PGbhPm^PA5SqY@lI*`eC0bz!aRv z(Bt6~1nvrCahLjTLL55}l*@GmmClZiM|e+f?^iEg zT%?k&H7g!0%E;hF;l$_7dFi-S-nZ&WU;2ZP%Ze7KSn=3uPcN_BNYIgyJBZHCu+2;2 zSkU7X)sUfkz_GGeFTdS+qBDO&mYLQbsuIg=KHv$bV6aQ2OtLZop*IL;ip&aWIrbw3j?3Johbpv^8O(_d=&~IXy~4q!uErGQ>D|r+yOL$I$cib z%=3V53N_F8gQUDYJrfN42jIEQnRZM-yZDC>x5k8s_=W1$Dc$e|FI|&IElspT$;+p! zotuHU>Iir|*nD6$3m70OSSEt>xd<<80UNje&*|VE)$e!)f#Qqo zY*h%MAy))?D~`iMMwdFjMj@gU^uN~G*MGyX7TX9mFUTmryUf4iCL()=aC6qVY+k>< z5+V>(Ui9$r08#-k-FD#YK{$q3*t`TyAW87b7T#EBmMgtHQ-i&lz2W!#ulKlm^1@t= z_9(3zIOD$_{ZJ1Y{%7)$?PeS>&|-@X#Gf_>2F>IXrRssCSXq`-naq6ZS5;xpNl;G?!WWE4ruqrK<$6#g@K89nc>2DSuA(MhAnU8s&WH%# zAc7a4{t3zG)7)erh+7TPqEmrI;P48tzSWM{)6r1`(o9x(^qR$~9%@yLiVX7DE3M3J z3iWv^&17al6*wo*s1S0=&=LqBu-FmYK`KNQfJpf-UW`2k=?Mo|IVxTCo~5Vz|IXK1 zq;f^Q0LgGD2no$A`&@WAB!9j5N zhag;f^|KEob8N{3TT5FTD1@2{puIRjzY_xI7vQ#t{ND0X&@7-NWam|&_P5$OHnz0r zLak_r^~+z21XjYEld3K>%oKq0*n}Clu^YDOp3_^%2EaBfPjUjm{QGcn7-q4m7|PTE zG@93z)dhYJBHVc;dBr*AQ~uw<1xuTk@Uo*dX4tUCojZ5HUvOX@RzRJxHI2f)hehqm zakW@Na$dc91rVv|{1zCf-KWa?Y=mupf~YhKJP0Sx!v>Zb5@XnJ5DcyT$;JX6Jk^^T zIG4%ym92*-*B$bn`$p65$K)Fd^YV;mKPbV?y?`4b9Y22j^P@v9tUqW>Q?#uStX6wR zhagf--1j6(b^op;%_ed!7}l=N&V*!GF%ghw?jSI-SOBZ9 z%;`(&iV>luiGh9X3P@N`&|Jgq0v*4;x0AWIGi|dHFccqi-Y8Bg1oO*<0m2b0*q%+Q z#sZM45q%Jq(8RM9@6ZggOODxP&{QYTCXk5>UqTxCIYKn^UQuz!;i122GZ!gyxB|j+ zKErJ-98@o7`G!wK6U76f@K*baEUtV9H%4fG&5K+VY|aCovw!o8sd)yAg;jfYjWXAl zj_OUCe7V8yQ%d3*L~V2O{O5~L24gTjaC|mD+K_tKwHjn5Fm2AoMzHX3YS{oM)xc69 zz9B9Aujf+6;2X)2x#v7Fdl^B%8L=vuC5TRUK`6TPoi6o=5dKjM{<*9y42-K>h)jNh z(|0TmaE*!%8_`MqOdN;YA;+BoF-g&IJVBw%yFXKM4t3Y$@`3I*J+C4;D+OIXiNH{@ zEU4aabja7?_u?Q)V>or6vpqqv@NJ6(1JSv@Dc>M!>yqbno+TeGwf5T#v!pnNorcbDbFEqkgTs;g;X?i^p6skrqHD-wQ1!mmj96$!s0;a4R5iiH2aknrbI61B7cN=L6R UdWeJn{DT}fwfZUh-_Bk6ca!siCIA2c literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt new file mode 100644 index 0000000..5b7f6be --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt @@ -0,0 +1,5 @@ +ACME Utilities Invoice +Invoice No: INV-2026-0604 +Amount Due: $123.45 +Payment due 2026-06-30 +Please submit payment by the due date. diff --git a/openvino-doc-image-triage-npu/samples/synthetic_receipt.png b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png new file mode 100644 index 0000000000000000000000000000000000000000..e0bf86f50db43c73c03ed155148269d1586e8313 GIT binary patch literal 12268 zcmeHNc|6qn-v4Pi+T`|h$}Qn^8x)}vl4Xoew=@w+age3cVkeB<7@bb5nA1p;brhjw zi4Z~?8M5!oP}Z?C!!TxhKEG4vzFs}gxzD}NJ~?fbcL+{92~ z&DJ#tf=J+x{PhHaEQjA$c&}Op|8o6S^hpF!%ftWmz{!C4Nd}2vIZs@ijXQE_)zOJR zeO;rMH#xWNYa3GA?R)$0@7ucKjuWzy5X3)_w8k3ik^1!elRCCCQQYP0clr^o-cP_i zy7i}vF7p9F1y3Js%-^edvjy3JKdIkmK2=5=-t^;2{5rfWxqON^BH)HhjZ`-4@yZt1 z+ZChe7zE*yiaJ#gWN-swU}Ql?kY#}@kUv%JK*9`U#gG-ORme9#8zRUhy=BNM^gZAF zkG%y}dzod0{nyq-;3{)m|GU?lWZ1J3;cP#3>Qo7?C^t73OMWO#r=Qy^LtKY6L{FOa zPsm?sQFtON_EO(co3J}@g~38ok&>g96rS;p%NLNDjrMg41ViT&-%NalEEi0CZ zULK+!xPoAzB<`fhTE#q)u+u9<`5SaCr{zlG=vp(uO5z5x2ijA-5p=B%f8Phyv!=ew zY;WlO=$Gb==uqR06`C90AqkhO#dIn}hplQE!4_nu-^uj|%~ZD0>d`i&>xSNg%kaw4 zWL+<~?~O3`4sN(qErtZPtn_{Gc6RkzgmZx9)c4RENjg}SjINsGbK9`QXb3UTYMf-H zZ#)&5ja?bGzR`+(WC%efQ=*2<#4T>;Z$p?0I##li*y!a5p;qsNHe#Slch6S*3Sq{3 z>yAD!Q!Lh$MwqAJJz~f+tU%Mbe-c{qf)rP0wJifu9xQf zO?CychbPCsmY8T`@SarLiqGHG4 zTi=(53hc5P<|aDxQd2LqK0Xz`S>uALtr)(_J#6dU9%pgr~mTB zYIY%`52*_Hy=Nu5s$n4#Vgv6Xa z&Fg2e+2iBmQJj;Xz&I-kuc9ykH`S>~udS_};*7Os zFih4V&vpdPk=n9sZES3UxkJ=K`_+t#ZzNpXo^U5UNx{v>v?8{sr)FiT+NiJHy!#0C zN^Xg^wl?*&Hyu7N7rOXS-sr9dDN#7mv^C7B%Bt>1~Kn@mT@m++m$ZOj~9jtmJUi{pK*VB{rdIN($d`5 z=>APHc=xZ#DygWHa);v-O%qSs+mk+A5^rm79~riOyduemxkP%(VzF+9hl@TO%G!SL z>cr!STodek3v69bXYTpV+pbwfsYyvmIXN>CTD|ER8OpnM?b^M2_pV*eUhc}JyV6J^ zVTMJfbunRdZnU{M++t{GC_g_RexlV>#;dUxdRN}sB3PPtb#vQ>4I&&oc#x(~%vI9~ zWDWZF5f>D3TzZ^mnYLR;j+}7jnsb8Q;)l!aw*Izf&kn4VIz6*Ao-OCsaR!k*wJ zsd?OwqQW>fZ+=`WWN`)><$^ynx+jU(4bPHaVb4Ew;J_<(J+6;$=EwV6k`SU%jJ@{WU)c~#aXC!l0sCGNXSz( zzW1|906h9;dTG%75_Ed=#V%KWb?;p4YmD6mZ>&__ljWHh1Z@1H(Z!urHcCC?-5sp9erm#0Cqv9tE znnW0pZ;B~t;LIQsoBNT z&jB>HWmsm<8I}ewcx=#$F;-Ah%YORwTZ!09%f$Vj;7Uzam*ud-#gw7|@3cw%N>Y{&b@?pFJJ*K;(2d3AmxSy?m?_0Zvhw&7r%w3}L}*=LGn-9uC@ADD zNNI@{PnnyW7e6KPIDMHplMPVZ2{bnP2L9|jcvx}(4E_E6ruy8JLsKDT+CIZ6n&{@P zT)R2N)w+`bh%2De_;{VX3d02>KPP=w$5~JwB`)6}4qCp+dP+)4U7qCv;k>;_lW}ZX zO;t^e)PF;9xGurw`|s~11Xk}#(_>ZbIDh_p&0Uq6VY3Xm7e_r0CaOD{0Ne4oY#etm zYM9vkgE#wpG$FVl>41vM;U2``S`A&uo&M|N$B&yL0O%PpG{N_w?(UNkT0Jvp>A=O0 zw`Z5nH5nCpFuIZr$-#ix*LOr#)rG(oC>)EugQ5GEu@kuL5z@>nar?T$ZDKE}d_~Gs zSr9ydGiT1!Ct`8J#Q|GJc+^+Nf$MsTe8}3v_oVCsw)0x+yH+8^H#}||*4A98xVedR zG4#-(ik3&_Y!)jvHg>9bJj+jC2XW|YQeion%J}zqb()5XA&g_~qBb`q4EvXVUT8L4 zVWq=eCQFJTOc$N=vZpL8{N}bJh^hWp$)_V|vq!24cZ7WeFV{_SKMATSP3fuEwM z?_|5f--sjD?Z?;v1$ZbWS9X|p)$mq;P>P|V$e?;|6 z3O-67+6BPF&Z}b6s6*`j+DxmwVUS+Cl$Ble16s@s5jAH2GkdGx)sKllS7V|;4 zs&$UTtLkWFwUUY4N9Jh--|uuOBi3rRe+_>&Z!VjylpSrwzsv_#i*yXwyUVBmcg3P2Upk2Slik%#^OOd#;7|A1$`o71`a#@ zf<0VML)%UfzxU#Ym&L`!7HE$(`6nn{*0)qiN8Bxx)q?JMsJRC1?4j+s2dHm>+^UE+X5NN}z72{_>iQqbC6C5vyLoqCICgbSm*6;ri z2Hm2Mr<4(SC`HBXbC1<RnZ&uSsXMOeW`{8}u<<1A}EDl*?l;zEX^ zEoHr{w$IPccPa&qXBGy5t^invGU7~j`!}Z^L-j+gXQ2H5x3{ecS|#%62+E(btgL7y(4EkuSl@@=FOXT)$GfB$J?5knoyfFEJg;k%w-L& zl~Q$ca&pq|&9*PUCKvi#1>M?Sk6Jp$RPUFNA1jKF0G z$5_i6TTQ1!`vh}39mOSO^#Ce>`Y?F!kp%y{(a{or-tz$%ccL}JQYQ~2wGmY{;cwiP zw}nuTT>!b2&h@!a9>`3M^Kj7@zFw=Q(I-=KkXL>PNs=HW>y7o5i?9nl>fmtzuqH}d z&-xuH^aM9zo5@^Ub{Jvm>&#ZT3mzExpgZ20$(N`1pYG1CiA|Cn1KoKj|K7 z1cYi*8Utgdlluk2AEBI^=y)oL<6op;Py-H1{%7I`l&GAWQotpTHmBL6nEwHIrD-e! z*zGgy8~KxU#?nDM%H#Js*2T}Uwjnw7V~vgC4YQ!Zbb~n);M3LtjXciCDrVQ|!qrFu zIsyHn#&*ggp) zr?8kY)>!9JGFd>i59PMmcrsoD`cN?TysmcD=O2CqUCqlXnR3yed?hXikIBi!WvdQ> zd25Th{>W)%s$3*AR#1lx+oFEq>rI;yeA}#o7nGMH zT~xiJb|JGBkeUJRJ(O!F&9_?8LYGDj<%oo_mPg<(f>Qy{fYK-!ZGdhlp!TuOZIF^= z3;>#iKt?bHIvoBVKJknZ8iyE2Gus01&}cLa9Uyo{i^SG~{DPxB60Z)72dJxOt7Dz- z(i&}P7YM%fVN`SO7EmH#V#`fXD5^IH2Jx8SN0Ud%sm3u)4GqW5&DrNm)sYY3HO^%Q zO@Tl`52d4{qbCK{pm;iEmxU2KsoaI9%A;)=hW5AiEep#!_G=G6nb6FiIJM93+V2fI zlaz0M_ zaa;$0qCw&F!b#yiB6=d_%jCA}y-HzEu5L3tCgE7NP^qK@9F22xDse=&4Wh>T$aaac_wPQwgDUjfO*y0w=eZFkYSXf=~R2ReKDFk zu)sR(R8j&Sj?r|r;5-%0NQy3kv()2F^sKDj+LibDeEx9BXZ>hrXXoKD1qL~8{oT8FWx*>z3^eKix(WWx z#@xKQ|B-CS%qyTIAf4eD$HWj89ZzxVdF}<3SyWU6Q2@Zn+3r$H#G#PTr)OQ@W>tK* zxzMvOR;CVUMa{2tf;&Q^X*i$w&r-FjE^vY2FNvLN_$4;W2W>d9n2}w!6mHLdke<$b z!LCzQRyLvxrYA>k)$%w^!L+uvLN4Jx=O-4%XeF|pKr^Ydd4n`Pdi3Z&mZ^`x?5Y(r zAAp@aH{O;BeZ`& zi=JRcp_4E2Ucf$62OFm$r%a(1eioiG<&y>7cA?xMh!K3*!?l@-Z@&N|^$YKveCxpf zsDi?tJKED|D3`GBI0ciI;Aidb?oPqLRf6s@qF|!v#-Q6Ey;Q1Otfkii$F6Sq~mONKJ)`SPxhY-&CM80+u6quF(+8(P(Pi6gUhFzA{oKZmJg| zK>%@_aFz@~)f5K5S@rZ{i@7NS1oxpsha3}pE#mmG@HE@cN)G$^!du}HZ&WabQdLz_ zI$D0miN~2#D_iWZCkfdS()-Tgo&l&qkem&f7a_RonKR_e9mT$`&d#GR0@>C?mYjfT zXr5|ZG!UtqtEKm_&shUv7c^&e*7vDiKgm5T7FHjXmmwcjhdSGe^3p=x&uK0u5LA^a zd>2Hm=^)vG$M?@Q9E42nJ#ZYFlYzdHfoBFm5h(mjkk1JhU$0hsG4nbrQ5-3*q*Zy; zj5SV|m4Azsy8M+y5F`z;v;=UVyBW`6TVl!6J%>jZ3<^d(*C30O=L@Tlr14=hzgK%o zep-bzWU^Nw4%44yh(Y^^FzUbK)PMV6iP;o*(#k5v<0OPHb59W@;e=%k=$uj%H__uL z#*aKxKicmG=8Taqfmz>n4}1aV>ZgTEo_#uM&_00Jl6j{3G`+6&Uu{zAu{aLX3^*iM z)C5YP2;5ViX}}u^t@xm^M^KYCfG3|UYr0wTnl)=s4i^e|(}EhYnTL$Tnvp0&UNMC9 zgF{2}%*L~7w6v_Otdx|LoSYo^2}Pum_(kg5(RyTQ25J}9$OZ<4SQz3YQ_8vF_c8GB zdqIrX14}~F6$E4>Ii^1wx<5H~*F6@55-4_SNPC+Y!(Qs=tM5Q2r!FHZDd&}V631=& zgq%V!qlu9z;aHTmi#yamnZvHGF0hDTKNavz|H(qy$k4mMFx}Z@4#eJneMi&7CqUvp z^;j)DC9h7$wOqh)?e>>IWRA!ROd8i?gXWHCfrjjU;ceX!xLuK#JlqZ4-kp2i2#!mN zeaD-wX^`VUm64z=M&F~20F8~~b=Gf&+)}7vJ7HB!a4znE#Le8o;w}bnWz`9`I|@Sw zC{n**Ou>NLL7fI%*BCfP&P&KEn&wjo{?LOl#;LZZQgHk+XqM~NnHwvEIzNw;@V}HK zS4{4=0wW{*_T9ETc+!Lgo4%zsh&}FN=pa?fxwW2XN~p5g+i!?fqh?ATtCaRMmZL<( z2{_%J;DS11;Cum+t&foXqwBzdON?=G2Rn_lls5W9ecj5axVSjm`B88=89Qnf3j-Fx zQbB4*#^{TdzDg+@GZ(4#x`d`H61G+JN3U;IhHU7#URA&FNA zWrjBd&_)d?2A>$vf8*)dcXjdFc@^K8_<;89<2vNJ;;dk|of3D0zIrkCI0Np3YSCC1 zj{O8sc*;pjOR(?qc-CkdO&@|fJVg(n0{%BMG9(g`a^h4^(UmJ#)Jo@0aJu9Wf>?0y z8&AvV(*5&e8QF;-?w~wkbp59Q$*!&2vKKBF1h7(V(&oZ}lq|^DA(4-VOtUau-!25Y_AOz}aYl%Eah{4sIC1dwR(P5KJ@9o*% ztpnMWO^M&+&)W~8Vq(HvlbR&}|9)n+zrwjk17uD)l>0awgY-f>*o)@v3zs6llGr+j zrpbRk{ruv`5wb@1gUw)NQcV(KA(LxqA;7uHq{~)>pug5p*`nceJg)34;`9!8 zn-S5P%CZL-5R9BrVLo; zm$?JRH{@LZ0mtcjaRki53S{OB2VA@X(%ki*4f1dK;(s`>z`PI$tkc?f4M7MBw);>` zt-JImB+TSd%`nU-Nc|ceGmJh($G@uhzZ>lSm!lHcepQ$=09)cax1F#|8$8qKSE3Ck z(YX34g3cTW^$V)U;DG$kuG%~Wl=;%VcS}v8cPKI(`WfUrK~w1(%8MO_kGAQ+#Uv5o zRR#tXs7JT?%jLo8x`EaMZwLRX9bt8bLk1=A5t+k7FU#;Sm5^LL8y%LTJGkMc$V}Vd zsH~W6M70=)qPL7=1|tVMVf4TrUI~L6a%@hnZy;9fVA`Er{}LWdk~y64Pb#AGB8-9T z>z+IZIdm%FfCV{8T0T-lL(oO}a?#6S(Z<$vm^Yy4{U8TZ1+eIKPU{V&ceOkuWpG2u zp&-xll=a2HEAN}YmakxTfgsOfF2HL;Da%Rji5v|h9!5N_Rh`U9 zhUo+pL}5xH3}pV^m|Gjgke{P@yknoHSJwX5(;(;+OUZ@#f8JqzG4cYo>i>FJ<=6Qe z^mQV(^|ynce^}uE!$Uj&XVXD{e9`~r?A0H;`^WD7fegRxM*rBCzwHY;25OrZ^e+yC z!qJFBI}B6)OH-s_;L1BRGrkNk|LN&ebV}8)7L?S#rqh1UY0=*{?+=9eqm%w9hX1YU i@&6wMgQ*qZX1|V5STjI+3sd|E{*cLEv%Yiq@!tU^#BpZ; literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt new file mode 100644 index 0000000..f85b281 --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt @@ -0,0 +1,5 @@ +Neighborhood Store Receipt +Subtotal $14.20 +Tax $1.42 +Total $15.62 +Thank you for shopping diff --git a/openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png b/openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png new file mode 100644 index 0000000000000000000000000000000000000000..1a5afc144d90361293581e39a8980b2c425506ed GIT binary patch literal 12769 zcmeHNd010tx<9S8Tw9gt6-7nhcI*@cHC5CI0WuwmOhu%KNR>^cipmn%2_Z>aTd5#q zsv@$4v`|1mK-K^u35tla1OZtjgs_F31VTax+3tHf_jc~wJM-s#W~Pt-I=9eX?FxNx#~>~A>DOnv7M=9k9&f(aF0c8)_;)!utlyvd zQ_zd_5468+>bX|*-FAyJ+V7e^SgN5%y3xKmvin9|_*$LKay{p`Qo_{ENu zbc%jm&wAuHSaJvPNhkgP>DJj+M@*^e#qW1I9l~O1TlbxxAE+XPSJ&0m(e-O)3X869 zG`0!qf9RHTM5CbUqRUw6*5MZ?Q%Mqbbhj?ejd-gEcvUGnm)0NGS0PWSUg zqKCGT7n=;3L9b32I66DGKDn{Uv&7pvZe^iyBQA)SX)SsqGBR@0hsO@D3}2gU&%j2_ z^*_3%7fqQp60RI^aB%SV9}DlW>)SWkis9PAn;m3vcv4al&3^S#vzDrzq-vL$Ty~!? zX|rP}&w5fe+b5Ct`T21vn)_}!+l`EzlD1b-G^;z451$v*MLN1wGMQ9aj%jdU;HDJk3PsE#2R$5Xlqa%noegK2Kmo4j}DpRS``@Nc-_?@SbF9n{LT|b)F&5f=w z)Z=C<30L>cx=kiA!(W{!h=5(fcvU|rE)E`U_)7FfPfw5KsRy4|Ty$}CY$~P5i-a0+ zv@xOQ<;&gPH4jf(2(C76Fk{4p6`U+AF5Z)Jw79q!K5w3KG_dzUL20Sva9=!LJ+jBE zI^uMLCr6sH_T4Raj^@H^oV>`7J$}oXq8>9%J(^1r*9LU`^r_u(4|n&9i0LkA6ztT+ zjw~OGlvAa?c9&lKKrgLreifVfqxxf;2;!>??LK!aZ(H`}?7bbW+w`)xx0=bsVzEN1 zPJ^qh_sJWmi=3sX=KIEi?6TG(Lm7GO=TYwz!n&G52&=MbQBr|>zIgO~3|B3kX9~5A zrrL!&?#F9~zkYYiUhkSKJ%zR5lOaR(wd$>-mZkMw`L2xg^mM3Zad~-pNl8fK^$jBC zp`iZHJUn`ahA1yS)+_eoa@4&)e0*)A{mQU(e73ikv^t%yl1;lCG-+XrFSe)8%*?#1 z2oMq{>9c*MRCpJ5Mjx-5tAI_OpP#>`r%xIFN{@$=)aN>uZQP(f=aH2czv6*TI)78&X@gGB@O?Bo#(@ypF_GX($MG#rLRaBjV zieK%d*(=9y3j+8L&o!c3Z~>3hV_m3A^LM)5Wo#o)=cZCreBnl;LqY15MYGJSkN@<( z19Zw_E{1#=D(UGR>PS51;o(smdF9HLma?87Vdr*UD=aF)Efe_F!P8y2a5H=+!H*M1 zY`*8P(Rv)OTf)<9X6`R_;Hi(lS+_=S<8C2AY&-d5yvYC*_;PoiZiS|+QA=A%nS}%) z$JnxP>l=hp^ByiW+pu&MKegV9ifBs6Ug1S^Dwdve7rwH4S| zPd5Y6Rzas|ezeoc)Oi0F&tL~wZKtYpG)O~8k)gfS88^3m7u)VhX;b`xfsM8c4b3K` zptgJaysCpP69~GBcv%0um?hbpFnZT3#l# z^YPk6nxk7A9mn)`Yj1C74#ft#8KAH3?(S{|X7q@zwlpiWaBgm8IR!QmrEL0CXthZ| zzxKLudpEgV@`ITAH8c)7)6ra&pvTjXeY=(a3Q$MgPM|D3eit)m@1@LP7vK*d4{=l9 zMjz2zzx8U5hi8(BT^+y;c~zE%r)s$xK6iuBp$8ler=+BW&1NrYcw#8bmOW<4Wic+A7JJiahR_y#nfMz|@xQFHm#xC;Sxz(Tz8#e6Ov&Z@F?&(E=IFO=859e76 zbZ@1kq+kUR-B1D_K*igxWs$SJd3kvW2?^+y3DpzWjP&%nj#Ju8=m7WA39FGvt)3~S|q0LSH<+dh?QfdNu7ZuwRv@QtKS;>_RAfh z8xpcVuMHDz_Vix7+kt~}?Imr&_zudhbI7W@*buh77kltIkfPw_%a?IwaY=8PQ!Vw~qsFYhi3~uYQ zhGKVDJWaq!R~z^WWaH}#Yges$aouPksS&u~=Y*?T{!zH+#}o+z%JtTXlX zZTm0&E-3+`QURmi8i3Wy{xV--XM;D`&N*RD7u&`8~#J-lf|8 zGl@E#GFTJJRkjmgkm0K<&OZA3y`4wq*oq-Q<@<$&a69;^AFjd5gk;xdCfvZ9jfaO{ z1u%*W3u86%J}hC#`}FDBt0Rv+icYoDanVZ??T&8HGcx*D3-(-j>-O#2>vjwQVQMuh!a!v~d3hMD z(%7B6(r8Knbx~AQ#OL$%^iAqR1a&5T>k--hZbfG4v}y~RH(Scpwpn$+@N;)1j6w(d zlz;L3yZ3flSX!b?YVw(nk0xKwpl*KJ-nI56`X?_h)wh_q5ofLOeY2wwDtXl3@i%BY2TJbE17*@G`7!ju5B_`^mw8%GcB zYu*vwxB+`7^<>r;hWZlqsnmk>jEq+9AXnb&1$Y=$c@g>Yj&DqWEEy+0tX&yvMR{KV z{jYlI?uv@CGnJR16F|CxRtZ)*FYhG*vHl5`5I5^3v@peymgj)!LEOOW-p?PNEoCon zKNR#inN0Ta@gb2&G^~58lHb);4y6uzOK}O5Z~#5B7v^zwd7dQi@$BpE&AR${os%h9 zwmi?*Et=}e{BI@u`OT{rc!bEb8CXYfa4;KGhyof-xira5N_+=dFzm!Jw#}EQaxx{) zsN-)T4~^0}leShf&R*cohAj6DXg<8p&qk0?NeXzlv)B)Qb^a|j;HEuPY^PIlP)LZ$ zzVlB8ssc%a-GCYs6BEfOs@HW?oNK%ex~`|UH);-8o8sWm_@2{_P-wNyj*9@d^r(T$ zE-o$%tt?+sH(O$27_JaRD?m+}ConW~0iYm5VjG(b{RW9`P#oy3LH}gyyt1-ID8-(H z(N;fg7;zl*+}rE7erC|5Q3&y1)8O*guU~gEA^v2Z0QyC|#L&gLyLYcM%Ul3;QJ<6g z8)R7Uf;pA+;jw!U0p|fj&UR)y-ZpdPC?_&ll7f5<{Yvg4jztaQQ@YYjt(?jZJ2!PMz8a zFb`m8o={4j4}`r7aWk;=EZJ=sVCbHAOu5>}^#vT6RjZde^miP7*t}L4GnO>gOv5CM zqWb~bM^MH+X#vQ~tRL2o8(t&2U0qd$aw-rUih-#SP>419whPr5Bcy><9Q|}k!BMcG zTrj?()>Q0)1G1WY^Mo-Po@1VX%C4~nF9N%2$rc<3bu!gd)!%%CX5wQKHG0Eri{qkJ z!Q}$b-L&Ezp8n}JD)26BYfZ)I!2W!MD&E#f231?a5^uMUivd$S(^G__#Z9CIh`~63 zIO{%+EB23bGr()6T~VP^pUw6 z(EG_oSTS(v4*R&3KCV^{VqRX8-<**nftqUP#)~{lvknIKf*8gv1M6-;wIS-44Vut) zhQ5kf99>>setz35wW#btc6K(0Ls3dafSam;K29%jp|{ve-)8CuF#eb3`pcjw#_{T5 zJ$wx~x)wg#tBX&K7%`(aj6!Ngs`(+7)N5RzOXz*LDsTpSljlZgbB}>d$5eRM{uQ`t z+{#Eiu3Et-aK)sfxhDYCh^|l$=pW%?Ig3X?F%F$Xl-eoQp`%Yw;hKBmGMF2x+>_d) zyl+IPUmP>lt%MS$y7N1>1!OJ`hIa(>tBY-88oqpku4fwd?0tdoOxbxzAe@`;DWHXc z0Aq=5diD$8#bp8sg$Yvqq=ns5n-!Rlxf1Uk$T0itK*r_#2fzZA+1clTJw%K)W>u9j z_;-OK!bGT?n1aei?+-6HX#@Xl)la8#)gqMYhzqD8PqFm0mt6<0 z#sC|%+}Q;}9jMonyV?VO&PE@FtkgYTrwp1v>def}w&KuO?j5kbDZ@jRmp-99T)cv;|$?>GE-$P{{)nQG!?nc~jV0Dewt< z{yUf`hE>3E7ngQ$M*?LrfvVy|0|+63Q2}X*QarafmVu>5?Ka3jV}IBlPi@&ru#E@% zyxmKB0CLGXl@Kp|wi8o3ev26jgumYcYF@_AuqVe7b6ROaa2h%M0z(S)b3@i2e#H^v z-wO8t)apkQEQfgCK*+YcYv^(Q{h}gO;vPL@*!b@<1Ag&_Ydw7(q;l35TJHRIW)tCA z$s5Qpy$2_`2_7I3?DVdL?A+W7qfc%aEB?(dX5?Fl=>XdRo%~@wM2|qmK={=e+HC9z z9uW{gYWHlUG2S$){~VZ}T?yxBUKV6v2`1KIU!;u z*Fw4y80eha1QlQ~x_Qqw7oRJBh5kyo-=>wfl0i_I+dba#F#Ht#sSE-}6y zvJR$@qSB|XG`b(RzA#XE8SW9p{ni7QUKDy1L7Ku0=AQx)JX2H(u@GcukSR)Cn!%JL z8DY^Z2%L;})s>#X+Hh3ub2in2#NZhLzi@R5-66=83>^PEU^;CLG21nL`x>NKe3JAUFX+R)(^7tJhwVvG~c1tATj78zD}d>1N-Q+;EnnOx00sEVgFQ|jY&X6v`@ z{{j{&VpfL;&J6W)$Rww4Jv}@k<1{{QZf*um5Vvq;xfqx90|hGm>!8bhhx#9$b7&4C ztCou&sDbYQLA~lCKjkAYJxt1AtL83UhkPRdZU_nuwF21>>E16HNGXekQrAX_%G z(s;ClH<9{-S=$6Iu(Ow8rT}F5P)8b7Jthl7kfGVwkv)^rNCUauKst(1 zzp)8HNbme>pNssGmi*-es2k@mTWmHp1#k!tUjh@^TjImYIR`ai9Ug%IqO@i-LAnfr zLpbmOAsAE*I#$>aZ{T^Q2=(C*_hZ>3qlp-?Jw>ZlLJdKRKLO3-Y0v~^aD{jjz=#ni zeGbGr7utsMs`ML3hK)Wt+mPY&KGNXT{r6O}aN-+Aj02H0(Ut~Eao3O|DpbJ-hHy2@ z(pLFwABu*M-AB=``NdA>qQ2OWyV7LUVH@7EE8kZB(jCp=jf{*s@H(|A2pyP&dvM62 z*oP(;rj+)BeQFSUR73m@WFf2fZy>IZ*ePmA`xMLReDUH%fBy<7lN4e4N9P_t(lfOs z4AldAZZxrt0q~~N(tkN^c%_V>n<>FD*8-nrn*)ACJbOPL5`7cTnlMqze}OZ7k~@{1 z2o|+wb#=8V(JTrWGdHMgUXiyF+)T1buCw-y?J!ALEPZS)8 z77t1{uR&rAWxTDLAqA%o);lmz2VNei!gxaAV zQPlcDK7+FQXY~y@7z&3BlBxU7pxLJL8Cq2SgYZuFNHRtJ0(zch`>Hm_su#|dAclvS z#t+nV?2ZbsPLSPC`xx z0fCL)6!iYjpkMOf!5nNekp%!}o&ZWsngz!vEiFEEhkZWWcHq)y7cPj<0~!TOXdL?F zJ>y0=5W|^zaQ0cl(Lr(r={aJ8<`Cx;q`g~Qe zFo>Bo%X|Vroyez*E5MZ z>t;|@RfW2>gwdx)gt#EigUvVBmBaTxAB&3I3

yjw8}zC?7sRXWSwlY4JYko6-tN=oKV{Ou6M>QjxjG%gJZV7(o-p*j z`Y6DDDNULMN24iJMkOR~@RR&-UR*_Qup2V$^LKS}Vs_16Q%BsU$~aXx`0$F0?H~hK z%L9S1W*7<3P+=RF4?hQ*?SP{s32ZW*RtF_R0Sn@F;4H(Rk0jUs&*K39Pg$e4ocC_S zQ`DK(;gBbma8_pkl(N+^fl2`?<{^!P!$}rM3Y9{_g&ZJpZ`~Axb@lVre8>~7LY}BN ze6gp{1GI{1-5_Tzvf^CS-`T;AwuDx|5Pm+bo`=n=R)5pL3a^7`;VRH193Dzp;Wj|( zaAtiuA6(6WSvUgAw28c;jGAMObva|meY~~E0`K2*BoFUm>$@x7L}bU=3wp;8WNz25 z(}dyZ6`kbl2;ezOLzC4D4%&ehtyTuBTUD list[Path]: + """Return request roots only when they narrow the startup allowlist.""" + narrowed: list[Path] = [] + configured = [root.expanduser().resolve() for root in configured_roots] + for raw in requested_roots: + candidate = Path(str(raw)).expanduser().resolve() + if any(candidate == root or candidate.is_relative_to(root) for root in configured): + narrowed.append(candidate) + else: + raise ValueError("requested allowed_roots must be within configured allowed roots") + return narrowed + + +def _validated_embedding_url(raw_url: Any) -> str: + """Allow only the configured local loopback embeddings service.""" + url = str(raw_url) + parsed = urlparse(url) + host = parsed.hostname or "" + if ( + parsed.scheme == "http" + and host in {"127.0.0.1", "localhost", "::1"} + and (parsed.port or 80) == 18817 + and parsed.path == "/v1/embeddings" + and not parsed.username + and not parsed.password + ): + return url + raise ValueError("embedding_url override must target the configured local loopback embeddings service") + + +def make_options(payload: dict[str, Any], default_roots: list[Path]) -> TriageOptions: + opts = payload.get("options") or {} + requested_roots = opts.get("allowed_roots", []) + if requested_roots: + if not isinstance(requested_roots, list): + raise ValueError("allowed_roots must be a list") + roots = _roots_within_configured(requested_roots, default_roots) + else: + roots = default_roots + embedding_url = DEFAULT_EMBED_URL + if "embedding_url" in opts: + embedding_url = _validated_embedding_url(opts["embedding_url"]) + return TriageOptions( + max_pages=int(opts.get("max_pages", 3)), + include_ocr_text=bool(opts.get("include_ocr_text", False)), + dry_run=bool(opts.get("dry_run", False)), + use_embeddings=bool(opts.get("use_embeddings", True)), + embedding_url=embedding_url, + allowed_roots=roots, + include_full_path=bool(opts.get("include_full_path", False)), + ) + + +class Handler(BaseHTTPRequestHandler): + server_version = "openvino-doc-image-triage-npu/0.1" + + def _json(self, status: int, body: dict[str, Any]) -> None: + data = json.dumps(body, sort_keys=True).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + def log_message(self, format: str, *args: Any) -> None: + # Do not log request bodies, OCR text, or file paths. + return + + @property + def allowed_roots(self) -> list[Path]: + return self.server.allowed_roots # type: ignore[attr-defined] + + def do_GET(self) -> None: # noqa: N802 + if self.path in ("/", "/healthz", "/health"): + self._json(200, { + "ok": True, + "service": "openvino-doc-image-triage-npu", + "bind_policy": "localhost-default", + "npu_busy_time_us": read_npu_busy(), + "npu_busy_check_enabled": True, + "allowed_roots": [str(p) for p in self.allowed_roots], + "privacy": {"external_uploads": False, "raw_text_logged": False}, + }) + return + if self.path == "/models": + self._json(200, { + "models": [ + { + "stage": "needs_attention_embedding", + "model": "bge-base-en-v1.5-int8-ov via local :18817", + "target_device": "NPU", + "verification": "sysfs npu_busy_time_us before/after embedding call", + }, + { + "stage": "image_category_classification", + "model": "rule-based fallback in prototype v1", + "target_device": "CPU", + "npu_status": "not configured; future static-shape MobileNet/EfficientNet/ResNet OV IR", + }, + {"stage": "ocr_text_extraction", "model": "optional local sidecar/PDF text", "target_device": "CPU"}, + ] + }) + return + self._json(404, {"ok": False, "error": "not_found"}) + + def _read_payload(self) -> dict[str, Any]: + length = int(self.headers.get("Content-Length", "0")) + if length > 512 * 1024: + raise ValueError("request JSON too large") + raw = self.rfile.read(length) + if not raw: + return {} + return json.loads(raw.decode()) + + def do_POST(self) -> None: # noqa: N802 + try: + payload = self._read_payload() + options = make_options(payload, self.allowed_roots) + if self.path == "/triage": + path = payload.get("path") + if not path: + self._json(400, {"ok": False, "error": "missing_path"}) + return + self._json(200, {"ok": True, "result": triage_file(path, options)}) + return + if self.path == "/triage/batch": + paths = payload.get("paths") or [] + if not isinstance(paths, list) or not paths: + self._json(400, {"ok": False, "error": "missing_paths"}) + return + self._json(200, triage_batch([str(p) for p in paths], options)) + return + self._json(404, {"ok": False, "error": "not_found"}) + except Exception as exc: + self._json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc)}) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server") + parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1")) + parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18820"))) + parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat") + args = parser.parse_args() + roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()] + httpd = ThreadingHTTPServer((args.host, args.port), Handler) + httpd.allowed_roots = roots # type: ignore[attr-defined] + print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": args.host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True) + httpd.serve_forever() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/tests/smoke_test.py b/openvino-doc-image-triage-npu/tests/smoke_test.py new file mode 100644 index 0000000..d4fc4af --- /dev/null +++ b/openvino-doc-image-triage-npu/tests/smoke_test.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import json +import subprocess +import sys +import tempfile +import time +import urllib.error +import urllib.request +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SAMPLES = ROOT / "samples" +BUSY = Path("/sys/class/accel/accel0/device/npu_busy_time_us") + + +def run(cmd: list[str]) -> None: + print("+", " ".join(cmd)) + subprocess.run(cmd, cwd=ROOT, check=True) + + +def post_json(url: str, payload: dict) -> dict: + req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}) + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read().decode()) + + +def post_json_status(url: str, payload: dict) -> tuple[int, dict]: + req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + return resp.status, json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + return exc.code, json.loads(exc.read().decode()) + + +def busy() -> int | None: + try: + return int(BUSY.read_text().strip()) + except Exception: + return None + + +def main() -> int: + run([sys.executable, "make_samples.py"]) + invoice = SAMPLES / "synthetic_invoice.png" + pdf = SAMPLES / "synthetic_invoice.pdf" + + before = busy() + raw = subprocess.check_output([ + sys.executable, "triage.py", "--allowed-root", str(ROOT), "--pretty", str(invoice), str(pdf) + ], cwd=ROOT, text=True) + data = json.loads(raw) + assert data["ok"], data + first = data["files"][0]["result"] + assert first["privacy"]["external_uploads"] is False + assert first["pages"][0]["classification"]["label"] == "bill_or_invoice" + assert first["pages"][0]["needs_attention"]["value"] is True + assert "amount_due" in first["pages"][0]["needs_attention"]["reasons"] + assert first["processing_device_summary"]["file_intake"] == "CPU" + assert "NPU" in first["processing_device_summary"]["needs_attention_embedding"] or first["pages"][0]["needs_attention"]["device"] == "CPU" + after = busy() + if before is not None and after is not None: + # If :18817 is reachable and text was embedded, NPU delta must be positive. + emb = first["pages"][0]["needs_attention"]["embedding"] + if emb.get("used"): + assert emb.get("verified_npu") is True, emb + assert (emb.get("npu_busy_delta_us") or 0) > 0, emb + assert after > before, {"before": before, "after": after, "embedding": emb} + + # HTTP smoke on an ephemeral localhost port so we do not collide with 18820 during tests. + proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", "18828", "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + try: + deadline = time.time() + 5 + while time.time() < deadline: + try: + health = urllib.request.urlopen("http://127.0.0.1:18828/healthz", timeout=1).read() + assert b"openvino-doc-image-triage-npu" in health + break + except Exception: + time.sleep(0.1) + else: + raise AssertionError("server did not become ready") + resp = post_json("http://127.0.0.1:18828/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}}) + assert resp["ok"] is True, resp + assert resp["result"]["source_path_basename"] == "synthetic_invoice.png" + assert "source_path" not in resp["result"] + + # Request bodies may narrow but must not widen the startup --allowed-root policy. + with tempfile.NamedTemporaryFile(suffix=".txt") as outside: + outside.write(b"sensitive text outside configured artifact root") + outside.flush() + status, blocked = post_json_status( + "http://127.0.0.1:18828/triage", + {"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}}, + ) + assert status == 400, blocked + assert blocked["ok"] is False, blocked + assert "allowed_roots" in blocked.get("message", ""), blocked + + # Request bodies must not redirect extracted text to caller-supplied endpoints. + status, blocked = post_json_status( + "http://127.0.0.1:18828/triage", + {"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}}, + ) + assert status == 400, blocked + assert blocked["ok"] is False, blocked + assert "embedding_url" in blocked.get("message", ""), blocked + finally: + proc.terminate() + proc.wait(timeout=5) + + print(json.dumps({ + "ok": True, + "samples": len(list(SAMPLES.glob("synthetic_*"))), + "npu_busy_before": before, + "npu_busy_after": after, + "npu_delta_observed": None if before is None or after is None else after - before, + "triage_label": first["pages"][0]["classification"]["label"], + "needs_attention": first["pages"][0]["needs_attention"]["value"], + }, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/triage.py b/openvino-doc-image-triage-npu/triage.py new file mode 100644 index 0000000..e2764aa --- /dev/null +++ b/openvino-doc-image-triage-npu/triage.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +"""Local-only document/image triage prototype. + +CPU stages: +- local file intake, hashing, MIME/extension checks +- image/PDF-page decoding and normalization +- optional sidecar/native-text extraction +- regex metadata extraction and rule-based category fallback + +NPU stages: +- needs-attention semantic embedding via the existing local OpenVINO NPU + embeddings service on 127.0.0.1:18817, verified by sysfs busy-time delta. + +No external uploads are performed. The only network call is localhost to the +embedding service when enabled. +""" +from __future__ import annotations + +import argparse +import base64 +import dataclasses +import datetime as dt +import hashlib +import io +import json +import mimetypes +import os +import re +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +try: + from PIL import Image, ImageOps +except Exception as exc: # pragma: no cover - caught in CLI smoke + raise SystemExit("Pillow is required: install pillow in the active Python env") from exc + +NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us") +DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings" +DEFAULT_ALLOWED_ROOTS = [Path.cwd()] +MAX_FILE_BYTES = 25 * 1024 * 1024 +CATEGORY_LABELS = [ + "receipt", + "bill_or_invoice", + "tax_or_financial", + "medical_or_insurance", + "legal_or_government", + "form_or_application", + "travel_or_ticket", + "screenshot_conversation", + "screenshot_web_or_app", + "identity_or_sensitive", + "photo_misc", + "unknown_or_low_confidence", +] + +DATE_PATTERNS = [ + re.compile(r"\b(20\d{2}[-/](?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01]))\b"), + re.compile(r"\b((?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01])[-/](?:20)?\d{2})\b"), + re.compile(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+20\d{2})\b", re.I), +] +AMOUNT_RE = re.compile(r"(? int | None: + try: + return int(NPU_BUSY_PATH.read_text().strip()) + except Exception: + return None + + +def sha256_file(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def under_allowed_root(path: Path, roots: list[Path]) -> bool: + resolved = path.resolve() + for root in roots: + try: + resolved.relative_to(root.resolve()) + return True + except ValueError: + continue + return False + + +def sidecar_text(path: Path) -> tuple[str, str | None]: + for suffix in (path.suffix + ".txt", ".txt"): + candidate = path.with_suffix(suffix) if suffix.startswith(path.suffix) else path.with_suffix(suffix) + if candidate.exists() and candidate.is_file(): + try: + return candidate.read_text(errors="replace")[:12000], f"sidecar:{candidate.name}" + except Exception: + return "", "sidecar_unreadable" + return "", None + + +def extract_pdf_text(path: Path, max_pages: int) -> tuple[str, str | None]: + # Optional dependency; tests do not require it. Keeps PDF support local-only when installed. + try: + import pypdf # type: ignore + except Exception: + return "", "pypdf_not_installed" + try: + reader = pypdf.PdfReader(str(path)) + if getattr(reader, "is_encrypted", False): + return "", "pdf_encrypted" + chunks = [] + for page in reader.pages[:max_pages]: + chunks.append(page.extract_text() or "") + return "\n".join(chunks)[:12000], "pypdf_cpu" + except Exception as exc: + return "", f"pdf_text_error:{type(exc).__name__}" + + +def load_image_pages(path: Path, max_pages: int) -> tuple[list[Image.Image], str | None]: + ext = path.suffix.lower() + if ext == ".pdf": + try: + import pypdfium2 as pdfium # type: ignore + except Exception: + return [], "pypdfium2_not_installed" + try: + pdf = pdfium.PdfDocument(str(path)) + pages = [] + for i in range(min(len(pdf), max_pages)): + bitmap = pdf[i].render(scale=1.5) + pages.append(bitmap.to_pil().convert("RGB")) + return pages, None + except Exception as exc: + return [], f"pdf_render_error:{type(exc).__name__}" + try: + img = Image.open(path) + img = ImageOps.exif_transpose(img).convert("RGB") + return [img], None + except Exception as exc: + return [], f"image_decode_error:{type(exc).__name__}" + + +def normalize_for_hash_features(img: Image.Image) -> dict[str, Any]: + small = ImageOps.contain(img.copy(), (224, 224)) + gray = small.convert("L") + hist = gray.histogram() + pixels = max(1, gray.width * gray.height) + mean = sum(i * c for i, c in enumerate(hist)) / pixels + variance = sum(((i - mean) ** 2) * c for i, c in enumerate(hist)) / pixels + return { + "mean_luma": round(mean, 2), + "contrast": round(variance ** 0.5, 2), + "aspect_ratio": round(img.width / max(1, img.height), 3), + } + + +def classify_rule(text: str, image_features: dict[str, Any]) -> dict[str, Any]: + t = text.lower() + best_label = "unknown_or_low_confidence" + best_score = 0 + for label, words in CATEGORY_KEYWORDS.items(): + score = sum(1 for word in words if word in t) + if score > best_score: + best_label, best_score = label, score + if best_score == 0: + ar = image_features.get("aspect_ratio", 1.0) + if ar > 1.3: + best_label, best_score = "screenshot_web_or_app", 1 + else: + best_label, best_score = "unknown_or_low_confidence", 0 + confidence = min(0.35 + 0.18 * best_score, 0.92) if best_score else 0.2 + if confidence < 0.45: + best_label = "unknown_or_low_confidence" + return { + "label": best_label, + "confidence": round(confidence, 3), + "device": "CPU", + "stage": "category_classification", + "method": "rule_based_fallback", + "npu_status": "not_configured_for_prototype_v1", + "candidate_labels": CATEGORY_LABELS, + } + + +def extract_metadata(text: str) -> dict[str, Any]: + dates = [] + for pat in DATE_PATTERNS: + dates.extend(m.group(1) for m in pat.finditer(text)) + amounts = AMOUNT_RE.findall(text) + flags = { + "org_present": bool(re.search(r"\b(?:inc|llc|clinic|department|bank|insurance|store)\b", text, re.I)), + "address_present": bool(re.search(r"\b\d{2,5}\s+[A-Za-z0-9 .]+\s+(?:st|street|ave|avenue|rd|road|blvd|drive|dr)\b", text, re.I)), + "phone_present": bool(PHONE_RE.search(text)), + "email_present": bool(EMAIL_RE.search(text)), + "policy_or_account_id_present": bool(ACCOUNT_RE.search(text)), + "identity_number_like_present": bool(SSN_LIKE_RE.search(text)), + } + return { + "dates_count": len(set(dates)), + "amounts_count": len(set(amounts)), + "detected_entities": flags, + "raw_values_redacted": True, + } + + +def call_embeddings(text: str, url: str, timeout: float) -> dict[str, Any]: + if not text.strip(): + return {"used": False, "device": "NPU", "status": "skipped_no_text", "npu_busy_delta_us": 0} + before = read_npu_busy() + payload = json.dumps({"input": text[:2048], "purpose": "document"}).encode() + req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}) + t0 = time.perf_counter() + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = resp.read(1024 * 1024) + status = resp.status + parsed = json.loads(body.decode()) + dim = None + if isinstance(parsed, dict) and parsed.get("data"): + emb = parsed["data"][0].get("embedding", []) + dim = len(emb) if isinstance(emb, list) else None + after = read_npu_busy() + delta = (after - before) if before is not None and after is not None else None + return { + "used": True, + "device": "NPU", + "status": "ok" if status == 200 else f"http_{status}", + "embedding_dim": dim, + "wall_ms": round((time.perf_counter() - t0) * 1000, 2), + "npu_busy_delta_us": delta, + "verified_npu": bool(delta and delta > 0), + "endpoint": "127.0.0.1:18817", + } + except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc: + after = read_npu_busy() + delta = (after - before) if before is not None and after is not None else None + return { + "used": False, + "device": "NPU", + "status": f"embedding_service_error:{type(exc).__name__}", + "npu_busy_delta_us": delta, + "verified_npu": False, + "endpoint": "127.0.0.1:18817", + } + + +def needs_attention(text: str, embedding_result: dict[str, Any]) -> dict[str, Any]: + t = text.lower() + reasons = [] + for reason, words in ATTENTION_KEYWORDS.items(): + if any(word in t for word in words): + reasons.append(reason) + meta = extract_metadata(text) + if meta["amounts_count"]: + reasons.append("amount_due") + if meta["dates_count"]: + reasons.append("due_date_present") + reasons = sorted(set(reasons)) + value = bool(reasons) + confidence = min(0.45 + 0.1 * len(reasons), 0.9) if value else 0.35 + if embedding_result.get("verified_npu"): + confidence = min(confidence + 0.05, 0.95) + return { + "value": value, + "confidence": round(confidence, 3), + "reasons": reasons or (["low_confidence"] if not text.strip() else []), + "device": "NPU+CPU" if embedding_result.get("used") else "CPU", + "stage": "needs_attention", + "method": "NPU embedding verification + CPU rules" if embedding_result.get("used") else "CPU rules fallback", + "embedding": embedding_result, + } + + +def infer_media_type(path: Path, is_pdf_page: bool = False) -> str: + if is_pdf_page: + return "pdf_page" + mt, _ = mimetypes.guess_type(path.name) + if path.suffix.lower() == ".pdf": + return "pdf" + if mt and mt.startswith("image/"): + return "image" + return "unknown" + + +def triage_file(path_like: str | Path, options: TriageOptions | None = None) -> dict[str, Any]: + options = options or TriageOptions() + path = Path(path_like).expanduser() + resolved = path.resolve() + if not under_allowed_root(resolved, options.allowed_roots): + raise ValueError(f"path is outside allowed roots: {path}") + if not resolved.exists() or not resolved.is_file(): + raise FileNotFoundError(str(path)) + size = resolved.stat().st_size + if size > MAX_FILE_BYTES: + raise ValueError(f"file too large for prototype limit: {size} bytes") + + file_hash = sha256_file(resolved) + text, text_source = sidecar_text(resolved) + pdf_text_status = None + if resolved.suffix.lower() == ".pdf" and not text: + text, pdf_text_status = extract_pdf_text(resolved, options.max_pages) + text_source = pdf_text_status + + pages: list[dict[str, Any]] = [] + render_error = None + if not options.dry_run: + images, render_error = load_image_pages(resolved, options.max_pages) + else: + images = [] + + if not images and options.dry_run: + images = [] + elif not images: + # Return a file-level record even if PDF rendering is unavailable. + images = [] + + embedding_result = call_embeddings(text, options.embedding_url, options.timeout_seconds) if options.use_embeddings else {"used": False, "device": "NPU", "status": "disabled", "npu_busy_delta_us": 0, "verified_npu": False} + attn = needs_attention(text, embedding_result) + meta = extract_metadata(text) + + if images: + for idx, img in enumerate(images): + features = normalize_for_hash_features(img) + classification = classify_rule(text, features) + pages.append({ + "page_index": idx, + "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"), + "image": {"width": img.width, "height": img.height, "orientation": "portrait" if img.height >= img.width else "landscape", **features}, + "classification": classification, + "needs_attention": attn, + "metadata": meta, + "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source}, + }) + else: + classification = classify_rule(text, {"aspect_ratio": 1.0}) + pages.append({ + "page_index": 0, + "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"), + "image": {"width": None, "height": None, "orientation": None, "render_error": render_error}, + "classification": classification, + "needs_attention": attn, + "metadata": meta, + "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source}, + }) + + result: dict[str, Any] = { + "file_id": f"sha256:{file_hash}", + "source_path_basename": resolved.name, + "media_type": infer_media_type(resolved), + "file_size_bytes": size, + "page_count": len(pages), + "pages": pages, + "processing_device_summary": { + "file_intake": "CPU", + "pdf_rendering": "CPU" if resolved.suffix.lower() == ".pdf" else "not_applicable", + "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)", + "ocr_text_extraction": "CPU/local sidecar or optional local PDF text extractor", + "needs_attention_embedding": "NPU via local :18817" if embedding_result.get("used") else "CPU fallback/no text", + "metadata_extraction": "CPU", + "npu_verified": bool(embedding_result.get("verified_npu")), + "npu_busy_delta_us": embedding_result.get("npu_busy_delta_us"), + }, + "privacy": { + "external_uploads": False, + "localhost_only_embedding_call": bool(options.use_embeddings), + "raw_text_logged": False, + "raw_values_redacted": True, + "full_path_included": options.include_full_path, + }, + "errors": [e for e in [render_error, pdf_text_status if pdf_text_status and not text else None] if e], + } + if options.include_full_path: + result["source_path"] = str(resolved) + if options.include_ocr_text: + result["ocr_text"] = text + return result + + +def triage_batch(paths: list[str], options: TriageOptions | None = None) -> dict[str, Any]: + items = [] + for p in paths: + try: + items.append({"ok": True, "result": triage_file(p, options)}) + except Exception as exc: + items.append({"ok": False, "source_path_basename": Path(p).name, "error": type(exc).__name__, "message": str(exc)}) + return {"ok": all(item["ok"] for item in items), "files": items, "generated_at": dt.datetime.now(dt.UTC).isoformat()} + + +def cli() -> int: + parser = argparse.ArgumentParser(description="Local document/image triage prototype") + parser.add_argument("paths", nargs="+", help="local image/PDF paths") + parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; defaults to cwd") + parser.add_argument("--max-pages", type=int, default=3) + parser.add_argument("--include-ocr-text", action="store_true") + parser.add_argument("--include-full-path", action="store_true") + parser.add_argument("--no-embeddings", action="store_true", help="disable local NPU embedding call") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--pretty", action="store_true") + args = parser.parse_args() + roots = [Path(p) for p in args.allowed_root] if args.allowed_root else [Path.cwd()] + options = TriageOptions( + max_pages=args.max_pages, + include_ocr_text=args.include_ocr_text, + dry_run=args.dry_run, + use_embeddings=not args.no_embeddings, + allowed_roots=roots, + include_full_path=args.include_full_path, + ) + out = triage_batch(args.paths, options) + print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True)) + return 0 if out["ok"] else 2 + + +if __name__ == "__main__": + raise SystemExit(cli()) diff --git a/openvino-genai-npu-worker/README.md b/openvino-genai-npu-worker/README.md new file mode 100644 index 0000000..c7b241b --- /dev/null +++ b/openvino-genai-npu-worker/README.md @@ -0,0 +1,111 @@ +# OpenVINO GenAI NPU worker prototype + +Local-only prototype for cheap bounded background generation on Will's Intel NPU. It is intentionally isolated from primary Atlas/Hermes routing. + +## What it does + +- Model: `OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov`. +- Runtime: `/home/will/.venvs/npu` with `openvino-genai==2026.2.0.0`. +- Device: OpenVINO GenAI `NPU`. +- Default bind: `127.0.0.1:18820`. +- Jobs: `title`, `summary`, `notification`, `memory_candidate`. +- Prompt/input limits: 6000 chars, `MAX_PROMPT_LEN=1024`, max 256 generated tokens. + +The worker does not write memory, does not restart Atlas/Hermes, does not change primary routing, and does not log raw prompt bodies by default. + +## Files + +- `worker.py` — stdlib HTTP API plus CLI wrapper. +- `smoke_llm_npu.py` — direct GenAI smoke test with NPU busy-time verification. +- `systemd/openvino-genai-npu-worker.service` — optional user-service template; not installed by this prototype. + +## Model/cache + +Downloaded model path: + +```text +/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov +``` + +OpenVINO compile cache path: + +```text +/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4 +``` + +NPU pipeline config used by the prototype: + +```python +CACHE_DIR=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4 +MAX_PROMPT_LEN=1024 +MIN_RESPONSE_LEN=64 +PREFILL_HINT=DYNAMIC +GENERATE_HINT=FAST_COMPILE +``` + +AOT/blob note: first milestone uses `CACHE_DIR` only. Do not switch to manual `EXPORT_BLOB`/`BLOB_PATH` until compile latency is proven to be the bottleneck. If explicit blobs are used later, record OpenVINO version, NPU compiler version, driver version, model id, quantization flags, and source weights path; invalidate blobs after OpenVINO/NPU driver upgrades. + +## Direct smoke test + +```bash +cd /home/will/lab/swarm/openvino-genai-npu-worker +/home/will/.venvs/npu/bin/python smoke_llm_npu.py +``` + +Acceptance requires `npu_busy_delta_us > 0`. + +Observed cold-ish smoke after download/cache setup: + +```json +{ + "text": "\"Atlas Summarizes NPU Worker Options Requested by User\"", + "timing_ms": {"load": 10989.08, "generate": 3157.94, "total": 14147.02}, + "npu_busy_delta_us": 2650724 +} +``` + +## CLI usage + +```bash +/home/will/.venvs/npu/bin/python worker.py \ + --job title \ + --input 'Kanban task asks for a small OpenVINO GenAI NPU worker prototype.' +``` + +## HTTP usage + +Start locally only: + +```bash +cd /home/will/lab/swarm/openvino-genai-npu-worker +/home/will/.venvs/npu/bin/python worker.py --host 127.0.0.1 --port 18820 +``` + +Endpoints: + +```text +GET /healthz +GET /models +POST /v1/worker/generate +POST /v1/worker/extract-memory-candidates +POST /v1/worker/condense-notification +``` + +Example: + +```bash +curl -s http://127.0.0.1:18820/v1/worker/generate \ + -H 'Content-Type: application/json' \ + -d '{"job":"summary","input":"Build a bounded local NPU worker for small generation tasks, no primary routing changes.","max_new_tokens":80}' \ + | python -m json.tool +``` + +Response includes `npu_busy_delta_us`; treat zero as failure even if HTTP status is 200. + +## Safety boundaries + +- Binds only to `127.0.0.1` by default; non-local bind is refused in code. +- No raw request-body logging. +- No private external uploads. +- No Atlas/Hermes gateway restarts or primary model routing changes. +- NPU access is serialized with a process lock because the NPU is a shared resource with existing services. diff --git a/openvino-genai-npu-worker/smoke_llm_npu.py b/openvino-genai-npu-worker/smoke_llm_npu.py new file mode 100644 index 0000000..aba039f --- /dev/null +++ b/openvino-genai-npu-worker/smoke_llm_npu.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +"""Smoke-test OpenVINO GenAI LLMPipeline on Intel NPU. + +This verifies NPU execution by reading /sys/class/accel/accel0/device/npu_busy_time_us +before and after generation. HTTP 200/service success is not considered proof. +""" +from __future__ import annotations + +import argparse +import json +import time +from pathlib import Path + +import openvino_genai as ov_genai + +DEFAULT_MODEL = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov" +DEFAULT_CACHE = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4" +BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us") + + +def read_busy() -> int: + return int(BUSY_PATH.read_text().strip()) + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--model", default=DEFAULT_MODEL) + parser.add_argument("--cache-dir", default=DEFAULT_CACHE) + parser.add_argument("--prompt", default="Write a concise title for: User asked Atlas to summarize NPU worker options.") + parser.add_argument("--max-new-tokens", type=int, default=24) + args = parser.parse_args() + + model_path = Path(args.model) + cache_dir = Path(args.cache_dir) + cache_dir.mkdir(parents=True, exist_ok=True) + if not model_path.exists(): + raise SystemExit(f"model path does not exist: {model_path}") + + config = { + "CACHE_DIR": str(cache_dir), + "MAX_PROMPT_LEN": 1024, + "MIN_RESPONSE_LEN": 64, + "PREFILL_HINT": "DYNAMIC", + "GENERATE_HINT": "FAST_COMPILE", + } + + before = read_busy() + load_start = time.monotonic() + pipe = ov_genai.LLMPipeline(str(model_path), "NPU", config) + load_ms = round((time.monotonic() - load_start) * 1000, 2) + + gen_start = time.monotonic() + output = pipe.generate(args.prompt, max_new_tokens=args.max_new_tokens) + gen_ms = round((time.monotonic() - gen_start) * 1000, 2) + after = read_busy() + result = { + "model": str(model_path), + "device": "NPU", + "cache_dir": str(cache_dir), + "prompt_chars": len(args.prompt), + "max_new_tokens": args.max_new_tokens, + "text": str(output).strip(), + "timing_ms": {"load": load_ms, "generate": gen_ms, "total": round(load_ms + gen_ms, 2)}, + "npu_busy_before_us": before, + "npu_busy_after_us": after, + "npu_busy_delta_us": after - before, + } + print(json.dumps(result, indent=2)) + return 0 if after > before else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-genai-npu-worker/systemd/openvino-genai-npu-worker.service b/openvino-genai-npu-worker/systemd/openvino-genai-npu-worker.service new file mode 100644 index 0000000..910c940 --- /dev/null +++ b/openvino-genai-npu-worker/systemd/openvino-genai-npu-worker.service @@ -0,0 +1,16 @@ +[Unit] +Description=OpenVINO GenAI NPU worker prototype +After=network-online.target + +[Service] +Type=simple +WorkingDirectory=/home/will/lab/swarm/openvino-genai-npu-worker +Environment=OV_GENAI_NPU_MODEL=/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov +Environment=OV_GENAI_NPU_CACHE=/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4 +Environment=OV_GENAI_NPU_PORT=18820 +ExecStart=/home/will/.venvs/npu/bin/python /home/will/lab/swarm/openvino-genai-npu-worker/worker.py --host 127.0.0.1 --port 18820 +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=default.target diff --git a/openvino-genai-npu-worker/worker.py b/openvino-genai-npu-worker/worker.py new file mode 100644 index 0000000..9ec7ed8 --- /dev/null +++ b/openvino-genai-npu-worker/worker.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +"""Local-only OpenVINO GenAI NPU worker. + +Small bounded LLM worker for cheap background tasks. It intentionally does not +wire into Atlas/Hermes routing and does not log raw prompts by default. +""" +from __future__ import annotations + +import argparse +import json +import os +import re +import threading +import time +from dataclasses import dataclass +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any, cast +from urllib.parse import urlparse + +import openvino_genai as ov_genai # type: ignore[import-not-found] + +MODEL_ID = "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov" +DEFAULT_MODEL_PATH = "/home/will/models/openvino-genai/Qwen2.5-1.5B-Instruct-int4-ov" +DEFAULT_CACHE_DIR = "/home/will/.cache/openvino/genai-npu/qwen2.5-1.5b-int4" +BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us") +HOST = "127.0.0.1" +PORT = 18820 +MAX_INPUT_CHARS = 6000 +DEFAULTS = { + "title": 32, + "summary": 160, + "memory_candidate": 192, + "notification": 96, +} +PROMPTS = { + "title": "Write one concise title, 8 words or fewer. Return only the title.\n\nInput:\n{input}", + "summary": "Summarize the input in one short paragraph or up to 4 bullets. Be factual and concise.\n\nInput:\n{input}", + "memory_candidate": ( + "Extract durable memory candidates from the conversation excerpt. " + "Return strict JSON with keys: candidates (array of objects with fact, confidence, reason), notes. " + "Do not write memory; only propose candidates.\n\nInput:\n{input}" + ), + "notification": ( + "Condense this notification or log excerpt for a human. " + "Return JSON with keys: severity (info|warning|error), category, summary, action_needed.\n\nInput:\n{input}" + ), +} + + +def read_busy() -> int: + return int(BUSY_PATH.read_text().strip()) + + +def coerce_json(text: str) -> Any | None: + text = text.strip() + if not text: + return None + try: + return json.loads(text) + except json.JSONDecodeError: + match = re.search(r"(\{.*\}|\[.*\])", text, re.S) + if match: + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + return None + return None + + +@dataclass +class GenerationResult: + text: str + parsed_json: Any | None + timing_ms: dict[str, float] + npu_busy_delta_us: int + npu_busy_before_us: int + npu_busy_after_us: int + + +class NpuWorker: + def __init__(self, model_path: str, cache_dir: str): + self.model_path = Path(model_path) + self.cache_dir = Path(cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + self._pipe = None + self._load_ms: float | None = None + self._lock = threading.Lock() + self._loaded_at: float | None = None + if not self.model_path.exists(): + raise FileNotFoundError(f"model path does not exist: {self.model_path}") + + def load(self) -> None: + if self._pipe is not None: + return + start = time.monotonic() + # NPU GenAI requires bounded prompt/response shapes; CACHE_DIR enables compiled blob caching. + self._pipe = ov_genai.LLMPipeline( + str(self.model_path), + "NPU", + CACHE_DIR=str(self.cache_dir), + MAX_PROMPT_LEN=1024, + MIN_RESPONSE_LEN=64, + PREFILL_HINT="DYNAMIC", + GENERATE_HINT="FAST_COMPILE", + ) + self._load_ms = round((time.monotonic() - start) * 1000, 2) + self._loaded_at = time.time() + + def generate(self, job: str, user_input: str, max_new_tokens: int | None = None) -> GenerationResult: + if job not in PROMPTS: + raise ValueError(f"unsupported job: {job}") + if not isinstance(user_input, str) or not user_input.strip(): + raise ValueError("input must be a non-empty string") + if len(user_input) > MAX_INPUT_CHARS: + raise ValueError(f"input too long: {len(user_input)} chars > {MAX_INPUT_CHARS}") + max_new_tokens = int(max_new_tokens or DEFAULTS[job]) + if max_new_tokens < 1 or max_new_tokens > 256: + raise ValueError("max_new_tokens must be between 1 and 256") + prompt = PROMPTS[job].format(input=user_input.strip()) + with self._lock: + load_start = time.monotonic() + self.load() + load_ms = round((time.monotonic() - load_start) * 1000, 2) + before = read_busy() + gen_start = time.monotonic() + pipe = cast(Any, self._pipe) + text = str(pipe.generate(prompt, max_new_tokens=max_new_tokens)).strip() + generate_ms = round((time.monotonic() - gen_start) * 1000, 2) + after = read_busy() + parsed = coerce_json(text) if job in {"memory_candidate", "notification"} else None + if job == "memory_candidate" and isinstance(parsed, list): + parsed = {"candidates": parsed, "notes": "model returned a top-level array; worker wrapped it to preserve the API contract"} + return GenerationResult( + text=text, + parsed_json=parsed, + timing_ms={"load": load_ms, "initial_load": self._load_ms or 0.0, "generate": generate_ms, "total": round(load_ms + generate_ms, 2)}, + npu_busy_delta_us=after - before, + npu_busy_before_us=before, + npu_busy_after_us=after, + ) + + def health(self) -> dict[str, Any]: + return { + "ok": True, + "model": MODEL_ID, + "model_path": str(self.model_path), + "device": "NPU", + "cache_dir": str(self.cache_dir), + "cache_exists": self.cache_dir.exists(), + "loaded": self._pipe is not None, + "initial_load_ms": self._load_ms, + "loaded_at": self._loaded_at, + "busy_time_us": read_busy(), + "max_input_chars": MAX_INPUT_CHARS, + "jobs": sorted(PROMPTS), + "bind": f"{HOST}:{PORT}", + } + + +def response_payload(worker: NpuWorker, job: str, result: GenerationResult) -> dict[str, Any]: + return { + "model": MODEL_ID, + "device": "NPU", + "job": job, + "text": result.text, + "json": result.parsed_json, + "timing_ms": result.timing_ms, + "npu_busy_delta_us": result.npu_busy_delta_us, + "npu_busy_before_us": result.npu_busy_before_us, + "npu_busy_after_us": result.npu_busy_after_us, + "cache_dir": str(worker.cache_dir), + } + + +def make_handler(worker: NpuWorker): + class Handler(BaseHTTPRequestHandler): + server_version = "openvino-genai-npu-worker/0.1" + + def log_message(self, format: str, *args: Any) -> None: + # Log only method/path/status metadata, not raw request bodies. + print(f"{self.client_address[0]} {format % args}") + + def send_json(self, status: int, payload: Any) -> None: + body = json.dumps(payload, indent=2).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self) -> None: # noqa: N802 + path = urlparse(self.path).path + if path == "/healthz": + self.send_json(200, worker.health()) + elif path == "/models": + self.send_json(200, {"models": [{"id": MODEL_ID, "path": str(worker.model_path), "device": "NPU"}]}) + else: + self.send_json(404, {"error": "not found"}) + + def do_POST(self) -> None: # noqa: N802 + path = urlparse(self.path).path + route_job = { + "/v1/worker/generate": None, + "/v1/worker/extract-memory-candidates": "memory_candidate", + "/v1/worker/condense-notification": "notification", + }.get(path, "__missing__") + if route_job == "__missing__": + self.send_json(404, {"error": "not found"}) + return + try: + length = int(self.headers.get("Content-Length", "0")) + payload = json.loads(self.rfile.read(length) or b"{}") + job = route_job or str(payload.get("job", "summary")) + if job == "memory": + job = "memory_candidate" + result = worker.generate(job, str(payload.get("input", "")), payload.get("max_new_tokens")) + self.send_json(200, response_payload(worker, job, result)) + except Exception as exc: + self.send_json(400, {"error": str(exc)}) + + return Handler + + +def cli(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="OpenVINO GenAI NPU worker") + parser.add_argument("--model-path", default=os.environ.get("OV_GENAI_NPU_MODEL", DEFAULT_MODEL_PATH)) + parser.add_argument("--cache-dir", default=os.environ.get("OV_GENAI_NPU_CACHE", DEFAULT_CACHE_DIR)) + parser.add_argument("--host", default=HOST) + parser.add_argument("--port", type=int, default=int(os.environ.get("OV_GENAI_NPU_PORT", PORT))) + parser.add_argument("--job", choices=sorted(PROMPTS), help="Run one CLI job instead of serving HTTP") + parser.add_argument("--input", help="Input text for --job") + parser.add_argument("--max-new-tokens", type=int) + args = parser.parse_args(argv) + + worker = NpuWorker(args.model_path, args.cache_dir) + if args.job: + result = worker.generate(args.job, args.input or "", args.max_new_tokens) + print(json.dumps(response_payload(worker, args.job, result), indent=2)) + return 0 if result.npu_busy_delta_us > 0 else 2 + + if args.host != "127.0.0.1": + raise SystemExit("Refusing non-local bind without code change/explicit approval") + server = ThreadingHTTPServer((args.host, args.port), make_handler(worker)) + print(f"serving {MODEL_ID} on http://{args.host}:{args.port}; raw prompts are not logged") + server.serve_forever() + return 0 + + +if __name__ == "__main__": + raise SystemExit(cli()) diff --git a/openvino-reranker-npu/README.md b/openvino-reranker-npu/README.md new file mode 100644 index 0000000..30194a4 --- /dev/null +++ b/openvino-reranker-npu/README.md @@ -0,0 +1,138 @@ +# OpenVINO NPU reranker service + +Local-first cross-encoder reranker prototype for second-stage RAG ranking. + +- Default bind: `127.0.0.1:18818` +- Default model: `cross-encoder/ms-marco-MiniLM-L6-v2` +- Default device: `NPU` +- Model cache: `/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov/` +- NPU proof: `/sys/class/accel/accel0/device/npu_busy_time_us` delta before/after inference + +This service is intentionally not wired into live RAG by default. + +## Files + +- `server.py` — stdlib HTTP OpenVINO Runtime service. +- `smoke.py` — non-private API/ranking/NPU busy-time smoke test. +- `openvino-reranker.service` — optional user-systemd unit. + +## One-time setup + +Use a separate venv so the existing Whisper/embeddings NPU venv is not perturbed: + +```bash +python -m venv /home/will/.venvs/openvino-reranker +source /home/will/.venvs/openvino-reranker/bin/activate +python -m pip install -U pip +python -m pip install "openvino>=2026.2" "optimum-intel[openvino]" transformers tokenizers nncf numpy +``` + +Export the model: + +```bash +source /home/will/.venvs/openvino-reranker/bin/activate +optimum-cli export openvino \ + --model cross-encoder/ms-marco-MiniLM-L6-v2 \ + --task text-classification \ + --weight-format int8 \ + --trust-remote-code false \ + /home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov +``` + +If INT8 export or NPU compile fails, export an FP16/FP32 IR to a separate directory and point `OPENVINO_RERANKER_MODEL_DIR` at it while debugging. Do not overwrite existing vector/RAG/Chroma collections. + +## Run in foreground + +Check the port and NPU counter first: + +```bash +ss -ltnp | grep ':18818 ' || true +cat /sys/class/accel/accel0/device/npu_busy_time_us +``` + +Start locally: + +```bash +source /home/will/.venvs/openvino-reranker/bin/activate +OPENVINO_RERANKER_HOST=127.0.0.1 \ +OPENVINO_RERANKER_PORT=18818 \ +OPENVINO_RERANKER_DEVICE=NPU \ +OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov \ +python /home/will/lab/swarm/openvino-reranker-npu/server.py +``` + +Startup performs a non-private smoke inference and fails closed when `OPENVINO_RERANKER_DEVICE=NPU` but `npu_busy_time_us` does not increase. + +## API + +Health: + +```bash +curl -sS http://127.0.0.1:18818/healthz | jq +curl -sS http://127.0.0.1:18818/readyz | jq +``` + +Rerank: + +```bash +curl -sS http://127.0.0.1:18818/rerank \ + -H 'Content-Type: application/json' \ + -d '{ + "query":"how do I verify OpenVINO NPU usage?", + "documents":[ + {"id":"good","text":"Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."}, + {"id":"bad","text":"This note is about making sourdough starter."} + ], + "top_k":2 + }' | jq +``` + +Compatibility alias: + +```bash +curl -sS http://127.0.0.1:18818/v1/rerank \ + -H 'Content-Type: application/json' \ + -d '{"model":"local-reranker","query":"npu busy time","documents":["OpenVINO NPU busy time proves accelerator use."],"top_n":1}' | jq +``` + +## Smoke test + +```bash +source /home/will/.venvs/openvino-reranker/bin/activate +python /home/will/lab/swarm/openvino-reranker-npu/smoke.py --url http://127.0.0.1:18818 +``` + +Expected: + +- `/readyz` is HTTP 200 and reports `device=NPU`. +- Each fixture returns `ok=true` and a sorted `results` list. +- The top result matches the non-private fixture expectation. +- Response and sysfs `npu_busy_delta_us` are positive. + +## Optional systemd user service + +Install the unit only after the foreground command and smoke test pass: + +```bash +cp /home/will/lab/swarm/openvino-reranker-npu/openvino-reranker.service /home/will/.config/systemd/user/openvino-reranker.service +systemctl --user daemon-reload +systemctl --user start openvino-reranker.service +systemctl --user status openvino-reranker.service --no-pager +journalctl --user -u openvino-reranker.service -n 100 --no-pager +``` + +Do not enable or integrate it into live RAG without explicit approval. + +## Optional RAG integration plan (disabled by default) + +RAG should keep vector search against `obsidian_bge_npu` unchanged, retrieve a larger candidate set, and call this service as a read-only request-time second stage. Suggested disabled-by-default knobs: + +```text +RAG_RERANK_ENABLED=false +RAG_RERANK_URL=http://127.0.0.1:18818/rerank +RAG_RERANK_INITIAL_K=20 +RAG_RERANK_TOP_K=5 +RAG_RERANK_TIMEOUT_MS=3000 +``` + +On reranker timeout/error, fall back to vector order and include metadata such as `rerank_error`; do not mutate or reindex Chroma collections. diff --git a/openvino-reranker-npu/openvino-reranker.service b/openvino-reranker-npu/openvino-reranker.service new file mode 100644 index 0000000..f979b9a --- /dev/null +++ b/openvino-reranker-npu/openvino-reranker.service @@ -0,0 +1,19 @@ +[Unit] +Description=OpenVINO NPU Reranker HTTP Service (port 18818) +After=network-online.target + +[Service] +Type=simple +WorkingDirectory=/home/will/lab/swarm/openvino-reranker-npu +Environment=OPENVINO_RERANKER_HOST=127.0.0.1 +Environment=OPENVINO_RERANKER_PORT=18818 +Environment=OPENVINO_RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L6-v2 +Environment=OPENVINO_RERANKER_MODEL_DIR=/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov +Environment=OPENVINO_RERANKER_DEVICE=NPU +Environment=OPENVINO_RERANKER_MAX_LENGTH=512 +ExecStart=/home/will/.venvs/openvino-reranker/bin/python /home/will/lab/swarm/openvino-reranker-npu/server.py +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=default.target diff --git a/openvino-reranker-npu/server.py b/openvino-reranker-npu/server.py new file mode 100755 index 0000000..74554eb --- /dev/null +++ b/openvino-reranker-npu/server.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +"""OpenVINO NPU cross-encoder reranker HTTP service. + +Default port: 18818 +Default model: cross-encoder/ms-marco-MiniLM-L6-v2 exported as OpenVINO IR +Default device: NPU + +Endpoints: + GET /, /healthz, /readyz + POST /rerank + POST /v1/rerank +""" +from __future__ import annotations + +import argparse +import json +import math +import os +import sys +import threading +import time +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any + +import numpy as np +import openvino as ov +from transformers import AutoTokenizer + +DEFAULT_MODEL_ID = "cross-encoder/ms-marco-MiniLM-L6-v2" +DEFAULT_MODEL_DIR = Path("/home/will/.cache/openvino-models/rerankers/ms-marco-MiniLM-L6-v2-int8-ov") +DEFAULT_PORT = 18818 +DEFAULT_MAX_LENGTH = 512 +DEFAULT_MAX_DOCUMENTS = 100 +DEFAULT_MAX_BODY_BYTES = 5 * 1024 * 1024 +NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us") + + +def npu_busy_time_us() -> int | None: + try: + return int(NPU_BUSY_FILE.read_text().strip()) + except Exception: + return None + + +def sigmoid(x: float) -> float: + if x >= 0: + z = math.exp(-x) + return 1.0 / (1.0 + z) + z = math.exp(x) + return z / (1.0 + z) + + +def softmax_prob(logits: np.ndarray, index: int = 1) -> float: + row = np.asarray(logits, dtype=np.float64).reshape(-1) + shifted = row - np.max(row) + probs = np.exp(shifted) / np.sum(np.exp(shifted)) + return float(probs[index]) + + +class RerankerService: + def __init__( + self, + model_dir: Path, + model_id: str, + device: str, + max_length: int, + startup_smoke: bool = True, + ) -> None: + self.model_dir = model_dir + self.model_id = model_id + self.device = device + self.max_length = int(max_length) + self.loaded_at = time.time() + self.lock = threading.Lock() + self.last_inference: dict[str, Any] | None = None + self.startup_smoke: dict[str, Any] | None = None + self.ready = False + self.ready_error: str | None = None + + if not self.model_dir.exists(): + raise FileNotFoundError(f"model directory not found: {self.model_dir}") + + self.core = ov.Core() + self.available_devices = list(self.core.available_devices) + if self.device not in self.available_devices: + raise RuntimeError(f"OpenVINO device {self.device!r} unavailable; available={self.available_devices}") + + xml_path = self.model_dir / "openvino_model.xml" + if not xml_path.exists(): + raise FileNotFoundError(f"OpenVINO IR not found: {xml_path}") + + self.tokenizer = AutoTokenizer.from_pretrained(str(self.model_dir), local_files_only=True) + model = self.core.read_model(str(xml_path)) + self._reshape_static(model) + self.compiled = self.core.compile_model(model, self.device) + self.input_names = {inp.get_any_name() for inp in self.compiled.inputs} + self.output = self.compiled.output(0) + + if startup_smoke: + try: + smoke = self.rerank( + "npu busy time", + [{"id": "smoke", "text": "OpenVINO NPU usage is verified by npu_busy_time_us."}], + top_k=1, + return_documents=False, + ) + self.startup_smoke = { + "ok": bool(smoke.get("ok")), + "duration_ms": smoke.get("duration_ms"), + "npu_busy_delta_us": smoke.get("npu_busy_delta_us"), + } + if self.device == "NPU" and int(smoke.get("npu_busy_delta_us") or 0) <= 0: + raise RuntimeError("startup smoke did not increase npu_busy_time_us") + except Exception as exc: + self.ready_error = f"startup smoke failed: {type(exc).__name__}: {exc}" + raise + + self.ready = True + + def _reshape_static(self, model: ov.Model) -> None: + shape_by_name: dict[str, list[int]] = {} + for inp in model.inputs: + name = inp.get_any_name() + if name in {"input_ids", "attention_mask", "token_type_ids"}: + shape_by_name[name] = [1, self.max_length] + if shape_by_name: + model.reshape(shape_by_name) + + def _tokenize(self, query: str, document: str) -> dict[str, np.ndarray]: + tokens = self.tokenizer( + query, + document, + max_length=self.max_length, + padding="max_length", + truncation=True, + return_tensors="np", + ) + return {name: np.asarray(value) for name, value in tokens.items() if name in self.input_names} + + def _score_pair(self, query: str, document: str) -> dict[str, float | None]: + inputs = self._tokenize(query, document) + missing = self.input_names - set(inputs) + # Some exported BERT models do not use token_type_ids. input_ids and attention_mask are required. + required_missing = missing & {"input_ids", "attention_mask"} + if required_missing: + raise RuntimeError(f"tokenizer did not produce required inputs: {sorted(required_missing)}") + outputs = self.compiled(inputs) + logits = np.asarray(outputs[self.output]) + flat = logits.reshape(-1) + if flat.size == 1: + raw = float(flat[0]) + return {"score": raw, "raw_score": raw, "probability": sigmoid(raw)} + if flat.size >= 2: + raw = float(flat[1]) + return {"score": raw, "raw_score": raw, "probability": softmax_prob(flat, 1)} + raise RuntimeError(f"unexpected empty logits shape: {list(logits.shape)}") + + def rerank( + self, + query: str, + documents: list[dict[str, Any]], + *, + top_k: int | None, + return_documents: bool = True, + ) -> dict[str, Any]: + before = npu_busy_time_us() + started = time.perf_counter() + results: list[dict[str, Any]] = [] + with self.lock: + for idx, doc in enumerate(documents): + scored = self._score_pair(query, str(doc["text"])) + item: dict[str, Any] = { + "index": idx, + "score": scored["score"], + "raw_score": scored["raw_score"], + "probability": scored["probability"], + } + if doc.get("id") is not None: + item["id"] = doc.get("id") + if return_documents: + item["text"] = doc["text"] + item["metadata"] = doc.get("metadata") if isinstance(doc.get("metadata"), dict) else {} + results.append(item) + after = npu_busy_time_us() + results.sort(key=lambda item: (-float(item["score"]), int(item["index"]))) + clamped_top_k = len(results) if top_k is None else max(1, min(int(top_k), len(results))) + duration_ms = round((time.perf_counter() - started) * 1000, 3) + npu_delta = None if before is None or after is None else after - before + payload = { + "ok": True, + "model": self.model_id, + "model_dir": str(self.model_dir), + "device": self.device, + "query": query, + "input_count": len(documents), + "top_k": clamped_top_k, + "duration_ms": duration_ms, + "npu_busy_delta_us": npu_delta, + "results": results[:clamped_top_k], + } + self.last_inference = { + "duration_ms": duration_ms, + "docs": len(documents), + "npu_busy_delta_us": npu_delta, + } + return payload + + def health(self) -> dict[str, Any]: + status = "ok" if self.ready else "degraded" + return { + "status": status, + "ok": self.ready, + "service": "openvino-reranker", + "model": self.model_id, + "model_dir": str(self.model_dir), + "device": self.device, + "available_devices": self.available_devices, + "max_length": self.max_length, + "input_names": sorted(self.input_names), + "uptime_s": round(time.time() - self.loaded_at, 3), + "npu_busy_time_us": npu_busy_time_us(), + "startup_smoke": self.startup_smoke, + "last_inference": self.last_inference, + "ready_error": self.ready_error, + } + + +def normalize_documents(value: Any, max_documents: int) -> list[dict[str, Any]]: + if not isinstance(value, list) or not value: + raise ValueError("documents must be a non-empty list") + if len(value) > max_documents: + raise ValueError(f"documents exceeds max_documents={max_documents}") + docs: list[dict[str, Any]] = [] + for idx, item in enumerate(value): + if isinstance(item, str): + text = item + doc: dict[str, Any] = {"text": text} + elif isinstance(item, dict): + text = item.get("text") + doc = { + "id": item.get("id"), + "text": text, + "metadata": item.get("metadata") if isinstance(item.get("metadata"), dict) else {}, + } + else: + raise ValueError(f"documents[{idx}] must be a string or object") + if not isinstance(text, str) or not text.strip(): + raise ValueError(f"documents[{idx}].text must be a non-empty string") + docs.append(doc) + return docs + + +class Handler(BaseHTTPRequestHandler): + server_version = "OpenVINOReranker/0.1" + + @property + def svc(self) -> RerankerService: + return self.server.reranker_service # type: ignore[attr-defined] + + @property + def max_body_bytes(self) -> int: + return self.server.max_body_bytes # type: ignore[attr-defined] + + @property + def max_documents(self) -> int: + return self.server.max_documents # type: ignore[attr-defined] + + def do_GET(self) -> None: + path = self.path.split("?", 1)[0].rstrip("/") or "/" + if path == "/": + self.write_json({"ok": True, "service": "openvino-reranker", "endpoints": ["/healthz", "/readyz", "/rerank", "/v1/rerank"]}) + elif path in {"/healthz", "/health"}: + self.write_json(self.svc.health(), status=200) + elif path == "/readyz": + health = self.svc.health() + self.write_json(health, status=200 if health.get("ok") else 503) + else: + self.write_json({"ok": False, "error": "not found", "results": []}, status=404) + + def do_POST(self) -> None: + path = self.path.split("?", 1)[0].rstrip("/") or "/" + try: + if path not in {"/rerank", "/v1/rerank"}: + self.write_json({"ok": False, "error": "not found", "results": []}, status=404) + return + if not self.svc.ready: + self.write_json({"ok": False, "error": self.svc.ready_error or "model not ready", "results": []}, status=503) + return + payload = self.read_json() + query = payload.get("query") + if not isinstance(query, str) or not query.strip(): + raise ValueError("query is required") + top_k = payload.get("top_k", payload.get("top_n")) + documents = normalize_documents(payload.get("documents"), self.max_documents) + return_documents = bool(payload.get("return_documents", True)) + response = self.svc.rerank(query.strip(), documents, top_k=top_k, return_documents=return_documents) + self.write_json(response) + except RequestTooLarge as exc: + self.write_json({"ok": False, "error": str(exc), "results": []}, status=413) + except ValueError as exc: + self.write_json({"ok": False, "error": str(exc), "results": []}, status=400) + except Exception as exc: + self.write_json({"ok": False, "error": f"{type(exc).__name__}: {exc}", "results": []}, status=500) + + def read_json(self) -> dict[str, Any]: + length = int(self.headers.get("Content-Length") or 0) + if length > self.max_body_bytes: + raise RequestTooLarge(f"request body exceeds {self.max_body_bytes} bytes") + body = self.rfile.read(length).decode("utf-8", "replace") if length else "{}" + payload = json.loads(body or "{}") + if not isinstance(payload, dict): + raise ValueError("JSON body must be an object") + return payload + + def write_json(self, payload: dict[str, Any], status: int = 200) -> None: + body = json.dumps(payload, ensure_ascii=False).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - stdlib override name + print(f"{self.address_string()} - {format % args}", file=sys.stderr, flush=True) + + +class RequestTooLarge(ValueError): + pass + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--host", default=os.environ.get("OPENVINO_RERANKER_HOST", "127.0.0.1")) + parser.add_argument("--port", type=int, default=int(os.environ.get("OPENVINO_RERANKER_PORT", DEFAULT_PORT))) + parser.add_argument("--model-dir", default=os.environ.get("OPENVINO_RERANKER_MODEL_DIR", str(DEFAULT_MODEL_DIR))) + parser.add_argument("--model", default=os.environ.get("OPENVINO_RERANKER_MODEL", DEFAULT_MODEL_ID)) + parser.add_argument("--device", default=os.environ.get("OPENVINO_RERANKER_DEVICE", "NPU")) + parser.add_argument("--max-length", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_LENGTH", str(DEFAULT_MAX_LENGTH)))) + parser.add_argument("--max-documents", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_DOCUMENTS", str(DEFAULT_MAX_DOCUMENTS)))) + parser.add_argument("--max-body-bytes", type=int, default=int(os.environ.get("OPENVINO_RERANKER_MAX_BODY_BYTES", str(DEFAULT_MAX_BODY_BYTES)))) + parser.add_argument("--skip-startup-smoke", action="store_true", default=os.environ.get("OPENVINO_RERANKER_SKIP_STARTUP_SMOKE", "").lower() in {"1", "true", "yes"}) + args = parser.parse_args() + + service = RerankerService( + Path(args.model_dir).expanduser(), + args.model, + args.device, + args.max_length, + startup_smoke=not args.skip_startup_smoke, + ) + httpd = ThreadingHTTPServer((args.host, args.port), Handler) + httpd.reranker_service = service # type: ignore[attr-defined] + httpd.max_body_bytes = args.max_body_bytes # type: ignore[attr-defined] + httpd.max_documents = args.max_documents # type: ignore[attr-defined] + print( + f"openvino-reranker listening on {args.host}:{args.port} model={args.model} " + f"model_dir={args.model_dir} device={args.device} max_length={args.max_length}", + flush=True, + ) + try: + httpd.serve_forever() + except KeyboardInterrupt: + pass + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-reranker-npu/smoke.py b/openvino-reranker-npu/smoke.py new file mode 100755 index 0000000..3710160 --- /dev/null +++ b/openvino-reranker-npu/smoke.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +"""Smoke/benchmark checks for the OpenVINO reranker service. + +Prints a JSON summary and exits non-zero on schema/ranking/NPU verification failure. +Uses only non-private fixture text. +""" +from __future__ import annotations + +import argparse +import json +import statistics +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +NPU_BUSY_FILE = Path("/sys/class/accel/accel0/device/npu_busy_time_us") + +FIXTURES = [ + { + "query": "how do I verify OpenVINO NPU usage?", + "documents": [ + {"id": "good", "text": "Check /sys/class/accel/accel0/device/npu_busy_time_us before and after inference."}, + {"id": "bad", "text": "This note is about making sourdough starter."}, + ], + "expected_top_id": "good", + }, + { + "query": "what port does the reranker service use?", + "documents": [ + {"id": "unrelated", "text": "Whisper transcription accepts audio uploads."}, + {"id": "port", "text": "The OpenVINO reranker prototype listens locally on port 18818."}, + ], + "expected_top_id": "port", + }, + { + "query": "why should reranking not mutate vector collections?", + "documents": [ + {"id": "mutation", "text": "Reranking is a read-only second-stage transformation after vector search."}, + {"id": "cooking", "text": "Boil pasta in salted water until al dente."}, + ], + "expected_top_id": "mutation", + }, +] + + +def npu_busy_time_us() -> int | None: + try: + return int(NPU_BUSY_FILE.read_text().strip()) + except Exception: + return None + + +def post_json(url: str, payload: dict[str, Any], timeout: float) -> tuple[int, dict[str, Any]]: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST") + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = resp.read().decode("utf-8", "replace") + return resp.status, json.loads(body) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", "replace") + try: + parsed = json.loads(body) + except Exception: + parsed = {"error": body} + return exc.code, parsed + + +def get_json(url: str, timeout: float) -> tuple[int, dict[str, Any]]: + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: + body = resp.read().decode("utf-8", "replace") + return resp.status, json.loads(body) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", "replace") + try: + parsed = json.loads(body) + except Exception: + parsed = {"error": body} + return exc.code, parsed + + +def percentile(values: list[float], pct: float) -> float | None: + if not values: + return None + ordered = sorted(values) + idx = min(len(ordered) - 1, max(0, round((pct / 100.0) * (len(ordered) - 1)))) + return round(ordered[idx], 3) + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--url", default="http://127.0.0.1:18818") + parser.add_argument("--timeout", type=float, default=20.0) + parser.add_argument("--allow-cpu", action="store_true", help="do not fail when health reports a non-NPU device") + args = parser.parse_args() + + base = args.url.rstrip("/") + failures: list[str] = [] + health_status, health = get_json(f"{base}/readyz", args.timeout) + if health_status != 200 or not health.get("ok"): + failures.append(f"readyz failed status={health_status} error={health.get('ready_error') or health.get('error')}") + device = health.get("device") + if device != "NPU" and not args.allow_cpu: + failures.append(f"device is {device!r}, expected 'NPU'") + + latencies: list[float] = [] + response_npu_total = 0 + sysfs_npu_total = 0 + top1_passed = 0 + + for case in FIXTURES: + before = npu_busy_time_us() + started = time.perf_counter() + status, payload = post_json( + f"{base}/rerank", + {"query": case["query"], "documents": case["documents"], "top_k": len(case["documents"]), "return_documents": False}, + args.timeout, + ) + wall_ms = (time.perf_counter() - started) * 1000 + after = npu_busy_time_us() + latencies.append(float(payload.get("duration_ms") or wall_ms)) + response_delta = payload.get("npu_busy_delta_us") + sysfs_delta = None if before is None or after is None else after - before + if isinstance(response_delta, int): + response_npu_total += response_delta + if isinstance(sysfs_delta, int): + sysfs_npu_total += sysfs_delta + results = payload.get("results") if isinstance(payload, dict) else None + top_id = results[0].get("id") if isinstance(results, list) and results else None + if status != 200 or not payload.get("ok"): + failures.append(f"case {case['expected_top_id']} HTTP/status failed: status={status} error={payload.get('error')}") + if not isinstance(results, list) or len(results) != len(case["documents"]): + failures.append(f"case {case['expected_top_id']} returned invalid results") + if top_id == case["expected_top_id"]: + top1_passed += 1 + else: + failures.append(f"case {case['expected_top_id']} top_id={top_id!r}") + if device == "NPU": + if not isinstance(response_delta, int) or response_delta <= 0: + failures.append(f"case {case['expected_top_id']} response npu delta not positive: {response_delta}") + if not isinstance(sysfs_delta, int) or sysfs_delta <= 0: + failures.append(f"case {case['expected_top_id']} sysfs npu delta not positive: {sysfs_delta}") + + summary = { + "ok": not failures, + "url": base, + "model": health.get("model"), + "device": device, + "cases": len(FIXTURES), + "top1_passed": top1_passed, + "p50_ms": percentile(latencies, 50), + "p95_ms": percentile(latencies, 95), + "mean_ms": round(statistics.mean(latencies), 3) if latencies else None, + "npu_busy_delta_us_total": sysfs_npu_total, + "response_npu_busy_delta_us_total": response_npu_total, + "failures": failures, + } + print(json.dumps(summary, indent=2, sort_keys=True)) + return 0 if not failures else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/npu-service-health.sh b/scripts/npu-service-health.sh index d574ecb..460e0b2 100755 --- a/scripts/npu-service-health.sh +++ b/scripts/npu-service-health.sh @@ -68,7 +68,7 @@ section "HTTP health" http_json "RAG endpoint" "http://127.0.0.1:18810/healthz" || true http_json "RAG/embedding health wrapper" "http://127.0.0.1:18814/healthz" || true http_json "Whisper NPU" "http://127.0.0.1:18816/health" || true -http_json "OpenVINO embeddings" "http://127.0.0.1:18817/health" || true +http_json "OpenVINO embeddings" "http://127.0.0.1:18817/healthz" || true # Prototypes are expected to be unavailable until explicitly started/approved. http_json "NPU reranker prototype" "http://127.0.0.1:18818/readyz" || true http_json "NPU router classifier prototype" "http://127.0.0.1:18819/healthz" || true @@ -91,10 +91,10 @@ if [[ -z "$response" ]]; then fi delta=$((after - before)) printf 'sysfs_before_us=%s\nsysfs_after_us=%s\nsysfs_delta_us=%s\n' "$before" "$after" "$delta" -printf '%s' "$response" | python - <<'PY' || true -import json, sys +RESPONSE_JSON="$response" python - <<'PY' || true +import json, os try: - data = json.load(sys.stdin) + data = json.loads(os.environ.get('RESPONSE_JSON', '')) except Exception as exc: print(f'response_parse_error={type(exc).__name__}: {exc}') raise SystemExit(0)