From d3373e7234a55cb9a7e049786c7f70e81583642f Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 4 Jun 2026 13:07:51 -0700 Subject: [PATCH] feat(npu): add document image triage prototype --- openvino-doc-image-triage-npu/README.md | 164 +++++++ openvino-doc-image-triage-npu/SPEC.md | 146 ++++++ openvino-doc-image-triage-npu/make_samples.py | 69 +++ .../samples/synthetic_blurry.png | Bin 0 -> 4620 bytes .../samples/synthetic_blurry.png.txt | 2 + .../samples/synthetic_conversation.png | Bin 0 -> 9325 bytes .../samples/synthetic_conversation.png.txt | 3 + .../samples/synthetic_invoice.pdf | Bin 0 -> 28071 bytes .../samples/synthetic_invoice.pdf.txt | 5 + .../samples/synthetic_invoice.png | Bin 0 -> 13347 bytes .../samples/synthetic_invoice.png.txt | 5 + .../samples/synthetic_receipt.png | Bin 0 -> 12268 bytes .../samples/synthetic_receipt.png.txt | 5 + .../samples/synthetic_sensitive_form.png | Bin 0 -> 12769 bytes .../samples/synthetic_sensitive_form.png.txt | 5 + openvino-doc-image-triage-npu/server.py | 196 ++++++++ .../tests/smoke_test.py | 154 ++++++ openvino-doc-image-triage-npu/triage.py | 459 ++++++++++++++++++ 18 files changed, 1213 insertions(+) create mode 100644 openvino-doc-image-triage-npu/README.md create mode 100644 openvino-doc-image-triage-npu/SPEC.md create mode 100644 openvino-doc-image-triage-npu/make_samples.py create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_blurry.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_conversation.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_conversation.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_receipt.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png create mode 100644 openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png.txt create mode 100644 openvino-doc-image-triage-npu/server.py create mode 100644 openvino-doc-image-triage-npu/tests/smoke_test.py create mode 100644 openvino-doc-image-triage-npu/triage.py diff --git a/openvino-doc-image-triage-npu/README.md b/openvino-doc-image-triage-npu/README.md new file mode 100644 index 0000000..d7e8af4 --- /dev/null +++ b/openvino-doc-image-triage-npu/README.md @@ -0,0 +1,164 @@ +# OpenVINO NPU document/image triage prototype + +Local-only, CLI-first prototype for triaging screenshots, photos/scans, and PDF page images. +It returns structured JSON metadata and explicitly reports CPU vs NPU stages. +Optional HTTP is a localhost/loopback-only prototype on `127.0.0.1:18829` when explicitly started; non-loopback binds are rejected and it is not a live Atlas/Hermes/RAG integration. + +Location: `/home/will/lab/swarm/openvino-doc-image-triage-npu/` + +## Privacy and safety + +- No external uploads. +- The only network call is optional localhost-only embeddings at `127.0.0.1:18817`. +- Raw OCR/sidecar text is redacted by default and is not logged. +- Full source paths are omitted by default; responses include basename and SHA-256. +- Allowed roots are enforced for CLI/server requests. +- This prototype does not mutate Obsidian, RAG, Chroma, vector collections, routing, or gateway services. +- Do not process broad private document/image directories; use generated synthetic fixtures unless Will explicitly approves a narrow source root. +- See `SPEC.md` for the full CLI contract, smoke-test plan, NPU verification plan, docs implications, and no-go/defer criteria. + +## CPU vs NPU stages + +CPU: +- file intake, allowed-root checks, size checks, hashing +- image/PDF decoding/rendering and normalization +- optional local text extraction from sidecars or PDF text libraries +- regex metadata extraction and rule-based category fallback +- final needs-attention rules + +NPU: +- needs-attention semantic embedding, via existing local OpenVINO embeddings service on `:18817` +- verified with `/sys/class/accel/accel0/device/npu_busy_time_us` before/after each embedding call + +Not configured in v1: +- image category classifier on NPU. The JSON reports this as `CPU rule fallback (NPU model not configured in prototype v1)`. A future task can add a static-shape MobileNet/EfficientNet/ResNet OpenVINO IR model. +- OCR on NPU. OCR remains CPU/local plumbing in v1. + +## Files + +- `triage.py` — core library and CLI. +- `server.py` — stdlib HTTP server with `/healthz`, `/models`, `/triage`, `/triage/batch`. +- `make_samples.py` — creates synthetic non-private image/PDF samples. +- `tests/smoke_test.py` — end-to-end smoke test, including NPU busy-time verification when `:18817` is reachable. +- `samples/` — generated synthetic fixtures. + +## Requirements + +Use the existing NPU venv when available: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python -m pip install pillow +``` + +`pillow` is already present in the discovered `/home/will/.venvs/npu`. Optional local PDF text/rendering improves PDF support: + +```bash +/home/will/.venvs/npu/bin/python -m pip install pypdf pypdfium2 +``` + +The smoke tests do not require external services except the existing localhost `:18817` embeddings service for positive NPU verification. + +## CLI usage + +Generate synthetic samples: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python make_samples.py +``` + +Triage local files: + +```bash +/home/will/.venvs/npu/bin/python triage.py \ + --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \ + --pretty \ + samples/synthetic_invoice.png samples/synthetic_invoice.pdf +``` + +Disable the local NPU embeddings call if needed: + +```bash +/home/will/.venvs/npu/bin/python triage.py --no-embeddings --allowed-root "$PWD" samples/synthetic_receipt.png +``` + +Include OCR/sidecar text in a single response only when explicitly requested: + +```bash +/home/will/.venvs/npu/bin/python triage.py --include-ocr-text --allowed-root "$PWD" samples/synthetic_invoice.png +``` + +## HTTP usage + +The prototype is CLI-first. HTTP is optional and not enabled by default. If a foreground HTTP server is needed for review, prefer optional port `18829` so it does not collide with the GenAI worker prototype on `18820`. Check the port first: + +```bash +ss -ltnp | grep ':18829\b' || true +``` + +Start a local-only server and stop it after the smoke: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD" +``` + +Call it with synthetic/non-private fixtures only: + +```bash +curl -sS http://127.0.0.1:18829/healthz | jq +curl -sS http://127.0.0.1:18829/models | jq +curl -sS -X POST http://127.0.0.1:18829/triage \ + -H 'Content-Type: application/json' \ + -d '{"path":"/home/will/lab/swarm/openvino-doc-image-triage-npu/samples/synthetic_invoice.png","options":{"allowed_roots":["/home/will/lab/swarm/openvino-doc-image-triage-npu"]}}' | jq +``` + +Do not install or enable a persistent service for this prototype without explicit approval, and do not point it at private document/image directories during smoke tests. + +## Smoke test + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python tests/smoke_test.py +``` + +Expected: JSON ending with `"ok": true`. The smoke test generates only synthetic fixtures, verifies non-loopback HTTP binds are rejected, starts its temporary server on a preflighted free localhost port, and terminates it before exit. If the embeddings service is up, the result should show positive NPU busy-time delta and each embedded page should report `verified_npu: true`. + +## Example output shape + +```json +{ + "file_id": "sha256:...", + "source_path_basename": "synthetic_invoice.png", + "media_type": "image", + "page_count": 1, + "pages": [ + { + "page_index": 0, + "classification": { + "label": "bill_or_invoice", + "confidence": 0.71, + "device": "CPU", + "method": "rule_based_fallback" + }, + "needs_attention": { + "value": true, + "device": "NPU+CPU", + "reasons": ["amount_due", "due_date_present"], + "embedding": {"verified_npu": true, "npu_busy_delta_us": 12345} + }, + "metadata": {"dates_count": 1, "amounts_count": 1, "raw_values_redacted": true}, + "ocr": {"available": true, "device": "CPU"} + } + ], + "processing_device_summary": { + "file_intake": "CPU", + "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)", + "needs_attention_embedding": "NPU via local :18817", + "metadata_extraction": "CPU", + "npu_verified": true + }, + "privacy": {"external_uploads": false, "raw_text_logged": false} +} +``` diff --git a/openvino-doc-image-triage-npu/SPEC.md b/openvino-doc-image-triage-npu/SPEC.md new file mode 100644 index 0000000..d0f7cf4 --- /dev/null +++ b/openvino-doc-image-triage-npu/SPEC.md @@ -0,0 +1,146 @@ +# OpenVINO NPU document/image triage spec + +Status: CLI-first prototype specification; not a live Atlas/Hermes integration. + +## Safety stance + +- Default workflow is local CLI execution against explicitly named files. +- Optional HTTP is disabled unless a human starts it, is constrained to loopback (`127.0.0.1`, `::1`, or `localhost`), and is intended for `127.0.0.1:18829` only. +- No persistent systemd unit, Docker service, gateway hook, Atlas/Hermes route, RAG route, Chroma/vector collection mutation, or in-place reindexing is part of this spec. +- Smoke data must be synthetic/non-private only. Do not point this tool at Will's private document, image, screenshot, Downloads, Desktop, Obsidian, or photo-library directories without explicit approval. +- NPU claims require `/sys/class/accel/accel0/device/npu_busy_time_us` before/after deltas. HTTP 200, JSON output, or model-load success alone is not NPU proof. + +## Recommended model/runtime + +Recommended v1 runtime: + +- File intake, hashing, MIME/extension checks, image/PDF rendering, sidecar/native PDF text extraction, metadata extraction, and category fallback: local Python CPU path using Pillow plus optional `pypdf`/`pypdfium2`. +- Needs-attention semantic check: reuse the live localhost OpenVINO embeddings service on `127.0.0.1:18817`, currently `bge-base-en-v1.5-int8-ov`, and verify each embedding call with `npu_busy_time_us` deltas. +- Category classification in v1: CPU rule fallback, explicitly reported as not an NPU image model. + +Why this is the recommended v1: + +- It avoids private-data exposure: no external upload path and no broader local file scanning. +- It avoids collection/routing risk by using the existing embeddings API as a stateless feature extractor only; it does not write to RAG or Chroma. +- It gives a real NPU verification hook for the semantic stage without overclaiming that OCR/image classification are NPU-backed. +- It keeps the prototype useful even when optional PDF dependencies or the embeddings service are unavailable: it can fall back to CPU-only metadata/rule output and mark NPU verification false. + +Deferred model work: + +- NPU image category classifier: defer until a static-shape OpenVINO IR image model such as MobileNet/EfficientNet/ResNet is selected, calibrated for the label set, and smoke-tested with busy-time deltas. +- NPU OCR/VLM: defer; OCR remains local CPU text plumbing in v1. + +## CLI contract + +Command: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python triage.py \ + --allowed-root /home/will/lab/swarm/openvino-doc-image-triage-npu \ + --max-pages 3 \ + --pretty \ + samples/synthetic_invoice.png samples/synthetic_invoice.pdf +``` + +Inputs: + +- Positional `paths`: one or more local image/PDF paths. +- `--allowed-root ROOT`: may repeat; every requested path must resolve under one of these roots. Default is current directory. +- `--max-pages N`: maximum rendered/extracted PDF pages; default 3. +- `--no-embeddings`: disables the localhost `:18817` embedding/NPU check and reports CPU fallback/no text. +- `--dry-run`: skip image/PDF rendering while still checking intake/hash/text/metadata where available. +- `--include-ocr-text`: include raw extracted/sidecar text in this single response only; off by default. +- `--include-full-path`: include resolved full paths; off by default. +- `--pretty`: pretty-print JSON. + +Output: + +- Batch JSON: `{ "ok": bool, "files": [...], "generated_at": "..." }`. +- Per file result includes `file_id` as `sha256:`, `source_path_basename`, media type, file size, pages, classification, needs-attention result, metadata counts/flags, privacy flags, and processing-device summary. +- Raw OCR/text and full paths are omitted unless explicitly requested. +- NPU evidence is per embedding call: `used`, `verified_npu`, `npu_busy_delta_us`, endpoint, and wall time. + +Exit behavior: + +- Exit 0 when all files triage successfully. +- Exit 2 when one or more files fail policy/intake/processing checks. + +## Optional localhost HTTP contract + +HTTP is optional and not enabled by this spec. If explicitly started for a smoke or local demo, use localhost and port 18829: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +ss -ltnp | grep ':18829\b' || true +/home/will/.venvs/npu/bin/python server.py --host 127.0.0.1 --port 18829 --allowed-root "$PWD" +``` + +Endpoints: + +- `GET /healthz` or `/health`: service name, bind policy, configured allowed roots, privacy flags, and current `npu_busy_time_us`. +- `GET /models`: reports v1 stages and whether each is CPU or NPU-backed. +- `POST /triage`: `{ "path": "/local/file", "options": {...} }` -> `{ "ok": true, "result": ... }`. +- `POST /triage/batch`: `{ "paths": ["/local/file"], "options": {...} }` -> batch JSON. + +HTTP privacy/policy rules: + +- Server startup `--allowed-root` is the outer allowlist. +- Request `options.allowed_roots` may narrow that allowlist but must not widen it. +- Request `options.embedding_url` may only target the configured local loopback embeddings route `http://127.0.0.1:18817/v1/embeddings` (or localhost equivalent); external or alternate endpoints are rejected. +- Request bodies and raw text are not logged by the stdlib handler. +- Stop the temporary server after the smoke/demo. + +## Synthetic smoke-test plan + +Use only generated fixtures under the prototype directory: + +```bash +cd /home/will/lab/swarm/openvino-doc-image-triage-npu +/home/will/.venvs/npu/bin/python make_samples.py +/home/will/.venvs/npu/bin/python tests/smoke_test.py +``` + +Expected smoke coverage: + +- Creates synthetic invoice/receipt/form-like image/PDF fixtures. +- Runs CLI triage against the synthetic invoice image/PDF under an explicit allowed root. +- Asserts privacy flags (`external_uploads: false`, no full path by default). +- Asserts invoice category/needs-attention behavior on synthetic text. +- Starts a temporary localhost HTTP server on a preflighted free ephemeral port, calls `/healthz` and `/triage`, verifies no full path leakage, rejects attempts to widen allowed roots, rejects external embedding URLs, and verifies non-loopback binds are rejected. +- Terminates the temporary server. + +The smoke port in tests should stay OS-assigned ephemeral/non-live to avoid claiming `18829` as a persistent service. + +## NPU busy-time verification plan + +For every test that claims NPU use: + +1. Read `/sys/class/accel/accel0/device/npu_busy_time_us` before the operation. +2. Perform an operation that should call the live embeddings service on `127.0.0.1:18817` with non-empty synthetic text. +3. Read `npu_busy_time_us` after the operation. +4. Require both: + - the per-result embedding object reports `used: true`, `verified_npu: true`, and `npu_busy_delta_us > 0`; and + - the outer before/after sysfs value increased. +5. If sysfs is missing or `:18817` is unavailable, do not claim NPU success; report CPU fallback / embedding unavailable and keep the smoke result honest. + +## Docs and diagram implications + +- Service maps should list document/image triage as CLI-first and optional prototype `127.0.0.1:18829`, not live unless explicitly started. +- Diagrams must not draw live Atlas/Hermes/gateway/RAG routing to this triage lane. +- If shown with other candidate sidecars, label it separately from live services: live baseline remains RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; prototype sidecars are reranker `:18818`, classifier/router `:18819`, GenAI worker `:18820`, and optional doc/image triage `:18829`. +- Runbooks should include CLI smoke, localhost listener checks, busy-time delta verification, and server shutdown instructions. +- Documentation should state CPU vs NPU stages explicitly so the prototype does not imply NPU OCR or NPU image classification. + +## No-go / defer criteria + +Do not proceed to implementation, live integration, or persistent service enablement if any of these are true: + +- Will has not explicitly approved live routing or persistent service enablement. +- The requested source path is a private document/image directory or broad home-directory scan rather than synthetic fixtures or an explicitly approved narrow root. +- The workflow would mutate Obsidian, RAG, Chroma/vector collections, or reindex in place. +- The optional server would need to bind anywhere other than localhost. +- NPU busy-time does not increase for an operation being described as NPU-backed. +- Raw OCR text or full paths would be logged, uploaded, stored durably, or returned without explicit request. +- PDF/image dependencies are missing and the task requires rendered page analysis rather than metadata/text-only fallback. +- A future image classifier/OCR/VLM model has not been selected, converted/quantized to OpenVINO, calibrated for the task, and verified on synthetic fixtures with busy-time deltas. diff --git a/openvino-doc-image-triage-npu/make_samples.py b/openvino-doc-image-triage-npu/make_samples.py new file mode 100644 index 0000000..41179bd --- /dev/null +++ b/openvino-doc-image-triage-npu/make_samples.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +from pathlib import Path + +from PIL import Image, ImageDraw, ImageFilter + +ROOT = Path(__file__).resolve().parent +SAMPLES = ROOT / "samples" + + +def make_doc(path: Path, lines: list[str], size=(900, 1200), rotate: int = 0, blur: bool = False) -> None: + img = Image.new("RGB", size, "white") + draw = ImageDraw.Draw(img) + y = 70 + for line in lines: + draw.text((70, y), line, fill="black") + y += 55 + draw.rectangle((55, 50, size[0] - 55, min(size[1] - 50, y + 30)), outline="gray", width=3) + if blur: + img = img.filter(ImageFilter.GaussianBlur(2.5)) + if rotate: + img = img.rotate(rotate, expand=True, fillcolor="white") + img.save(path) + path.with_suffix(path.suffix + ".txt").write_text("\n".join(lines) + "\n") + + +def main() -> int: + SAMPLES.mkdir(exist_ok=True) + make_doc(SAMPLES / "synthetic_invoice.png", [ + "ACME Utilities Invoice", + "Invoice No: INV-2026-0604", + "Amount Due: $123.45", + "Payment due 2026-06-30", + "Please submit payment by the due date.", + ]) + make_doc(SAMPLES / "synthetic_receipt.png", [ + "Neighborhood Store Receipt", + "Subtotal $14.20", + "Tax $1.42", + "Total $15.62", + "Thank you for shopping", + ], size=(720, 1100), rotate=3) + make_doc(SAMPLES / "synthetic_conversation.png", [ + "Messages with Alex", + "Can you please respond by tomorrow?", + "Need signature on the form before Friday.", + ], size=(1200, 750)) + make_doc(SAMPLES / "synthetic_sensitive_form.png", [ + "Sample Government Form - Fake Data", + "Applicant: Test Person", + "SSN: 123-45-6789", + "Signature required", + "Submit by Jan 15, 2027", + ], blur=False) + make_doc(SAMPLES / "synthetic_blurry.png", [ + "Low resolution blurred sample", + "No action required", + ], size=(360, 250), blur=True) + # PIL can save a simple local PDF from a synthetic page. This is non-private. + pdf_img = Image.open(SAMPLES / "synthetic_invoice.png").convert("RGB") + pdf_img.save(SAMPLES / "synthetic_invoice.pdf", "PDF") + (SAMPLES / "synthetic_invoice.pdf.txt").write_text((SAMPLES / "synthetic_invoice.png.txt").read_text()) + print(f"wrote samples under {SAMPLES}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/samples/synthetic_blurry.png b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png new file mode 100644 index 0000000000000000000000000000000000000000..cfa0c47a61894b19377af2c3088f6cb560837e4f GIT binary patch literal 4620 zcmdT|c`zI7w+@PaU1Hy3X|+`CMa5np8cS_ewJ(jT{Ud4@s`Y7zh@~xBTkS1vNbMmg zi6~#GDoPNNS}F)lEe++DxpU{v+`0eUzwZ3*KkvNr%)IBkbIy66^FAlT!47ivl;kM@ z0C3jI()=a>z!AvavrcldS1xbsWB@>5+{)baR$TE$$wp-5pddG~7IbjLSi=Hfe$jm^ zlUFToe?*1#J~si$NSJ1p=dG>PIDcfqIRS$XwXHmWYaqauK@JRXO_IO?2;%1jRGESR zIXwT)z>D2x-sSkCO$7j4+SxhmFv`UPZPoJ8WbTVAXVa~O@{(Gw;PI}mDC*RnjlFMEDplD28uAO zE0VmBxhai*XBraf0$MA*F|K{4pKK6`IKnNd02i!gcUr+Xj6vDx(3=SlSed_5vlOG& z64cPp(3AWd=cy(LF|4J`xu*0)yr!4gj{i(j4{0B*INKWYnjufy-=R;@>GY+@4(|qx z% z=q*q^vwIDS4V4!77{AHhpa zXGV~v)*9K7PXXKMhOGA8cJUXxcr2YRk25ndI@cCP-YA2nIDZiud$t~unCf&jMg=Ie z@>!Gj%&uJvSvfxQz74>aCI}le5akcJQR>g+!<152i6g<%Dtzhs_K>o!$JsjU|1#J@jgscf>`t$ObbK zBMk;;HF%4t@xaIKB~?Ie#U3=bBP+&>ClZgYWJHjWTB0s`ZXb=QM+E_+!p4yXzXN$T zzp8J%f3xWS6pqzT$h3*yxhmcU@5;08!jj}!nub>AFn-JXQOyl%iV|injGfU-K*oEY z-f}!KpERI;|HOkEEvhe8pGV0f`(-bb8f{EMP}vH>JFTh@!cT&HK*YVqL1}lE&UH(y zs7F`OfsDAz15dB^u$?kZn#13L(eK)ao_L7(8MitZ{FHgq1q1DLbR|QD5$Nh@tJx!e zUed?RF`1_FuNuVrxgEm4bN%zEKg|=FdS5)6J(SGq+rkbL%@#=hAQnqRHN7J9rY@XR zS~&>&-1-)(vq?(&JMQIvChb#^(o739omlQ%+GS=jd=oTg;H`O$%HOX+7&xfeV0%i| zQK*s8^w@p~I`(!mK=%Kc@!_d`nioxXz(nLnRwgk)!n+5ir0~@k!Vif;aNcN(J^oN$ z|25O){vlPRmUYBxkJCj@lki22E>iBu{zPShz}~`7l8t7_S&z3&Ov5ubq@VcXLYS-l zSi)FgaDXA^f|SS74Mof>qxn3t{~Z(P6@;9-UXtI<@bUJpu-@F6@}+^yLD)%tX4?L5 z6&9MiK#Dr?hU`mjZO!fmk7zEKsWkn3#Ye%CU_bF+W{{5G(mbe;2wb_UNY=YTg1~_R zlGz%DKlBv%jBQxdo#8aDC|T0L)U74p*CLF)M$DlQg?k_vO6eZR5o8_q)Sh_X4u>@W zUPM#LuAs|ke8sTCRJ2NEu*uB)F04Tjcul4L0`}GSg{Xq}yw2Z*eNNFDVf)135LAD= zN;R;2g9JlSBjQ4AbmWq^vpX;UW#$<6yyin4(nsLJosR5fk@|{SsH|c#KmGNpRx;h0 ztF}xH?N7SX(zH7waI z>0tM7T1~~4g4u#5?*BAWOYzSM-pV+|f{LLk5Nz48^{ZJIqu7aUvpoMY`7$UIP!*q^4RyYyffg_d-?udQ!| zatf2$zceINoi|laxH~0e=Eu|R`De$VybAE3qFqXi*bzl+S2J-)IfX;?-(LreyeY z^DbU7${4L8$UsarPYB5eL{npKfbxauq|s}z z-rHAm1E|VEC$Uac`u%xU6vx7e0J&oAa28 z{g+hIX+KKsSF=08Yn6%L*oO9+FXi%*P4;U$#6q)+a)aWgQzr5Q9sHtC#_vSbb9Br` zMk9zm1$DR;ojoBuEO$pZu~)KlnSD}>8a`_7S;Iw%(^8RXK2|m`Oaj6#&%BFZ60;sQ zEP|=~nPN-V)#zE;G_%bQ3F;dN%0db?rGXc6s2!&v@Lw(FfApOHYoPg8TWCpqX{JkE z9jm4l0D7_;*ZoPIrR4t)Gc?vvJ-0gLUE;nOlE=_^GdM(30WQcZfbfUmy;ovEdd_mz z-}><)oGhwCR{&;0mEpz{+ZzzU4=c8-r5lWETgfKi>*{bA|Fg&M&eZz%K|{+{U;1-V z?-4PjK|K`jF6HE$M253L!&JR)eK>sodWx(nFzutZEe#Cp*O>cMCj4i#m#%H4&)uB; z`Tfjfce8nw8AtBHZsyR=Ie<>5b&SeF%9d8aJ z@8R8dZZOU)kTUw;YoTdmGMP@VpLI;HOoD7Ws7wPj!7K5%N3PgD;W9?{3)F9jYz-UP zc+X_)IsNo6u8Lb3PRL5eW*_bbN~}Ows3ZzRh~v-;EeC{*XxsK{e3hnwS-@Fs+o|X< z@>;D~l$(&iX9rY#?PZJBiiPIF=I0hQC9WL9Y8lGv(Ozv~ba1v(c9F{k$foG8Gq;>4 zqDvyr>L}y~0ggl={0Mn#y+?DJXHuU4DTs^cnxMM6n|Fh?!oCX*7>;T#GoqbAzih_? zS40t~)#^IhWf##I>-@uNqKGKD?GighMPoZ<&yhz|_Mz-adEVeN&%+`=Eyp~mPDVI82$`RKrEtpvu>fW{g2mk)&P7t{hSsW^gmErb!s zZZ;1&1oC|*F{|C!$wWf~`=336hW4osIEwp&)LvREV=W!mr?G2wf2&j$#@a*hB6|tc zLX5eukX2Yh=Y7$z^}PXa`}I0uF8zH@K z1|#3U00&eR>fO}7M%_||nJd`EsaDCo7ICuqGULx(u*L)DVb>~GI~~XT73u~q`lLF? zr3}<^0nJu!^I)fF>8huSlQV-sn4LTd+1AjfxhPqE(=D4NC0l zn%cj-(~mr}wE{8)buSGdUtXYo@Ew6PyS;(F0tdsuNn2r*i;vHrZhYHphNw?emLxd3 z5O=1)C$FIW4o})t67!JOiZImGV9Oq5vVPg9j3Lxq)Hqc>y`MwDZM7^fJ$whI_BfA| z?`C_X#1a%1*aVFULIsC~&k>B%6jKRf&%qmgG=&%kVT9EBS?EK*7OR`i59IAGbL@9E z^D0MxIV*Kk(5^`l&(7HaGSmKG9D6?%6>5X($D3^FbX^0lahl>L?W z*zXxfF08jUlKoQho>^OhhSPWQ(Hn_;O{&AMhz#r%Ng8}7{s}m>eh>`ZApkGOhCDB$OW}tLN)zlJmfqtLMg*SbB^p6sv*cet@af3kv@q3$5fq8xCYl(i8KQ%`?^;))r9cS-~MBRV`)(72=HIAVUo#s< z%jhA=JBGQQo&() zF|^55+$P-EW`Ip!CnuFY+_P8i)%u4CB00RM`F?P;w6p{_j`eEuj}3JM^6X|8Tf^f^+5+ue;RDaK<9f#eA z;DsFOtMGO(ZYjum;$Eu8$YQ}2Ppg^GB90^YkB2`O?S`(-VZC4k8Q(W1@<6c6M5i=T z0>qZDY-t)ZW|`7MyqAR4f&qGKe9y`LcNxi^alno5{u>WDB8GmLRCk|BcDcAe%AWE7 NtSs!zF@O3#`WIm(a6$k8 literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt new file mode 100644 index 0000000..e640029 --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_blurry.png.txt @@ -0,0 +1,2 @@ +Low resolution blurred sample +No action required diff --git a/openvino-doc-image-triage-npu/samples/synthetic_conversation.png b/openvino-doc-image-triage-npu/samples/synthetic_conversation.png new file mode 100644 index 0000000000000000000000000000000000000000..cf28316add97006732a5447633448251cb61b614 GIT binary patch literal 9325 zcmeHN_g9l=`%jCCRROJ5R0Q-GK}AJD*^t+&rBQ~;k{J--`9P8*7fA)a~7s@>vpU| zAP{os?@wJoAXYs`AikVi`z5?`R55ff0)ae+K6S!6G<9|`!ncbfT3)c!%sRB~>1NG| zJL{hahjVTukGS-1tlanbyL%VZ?p%xc;k#6|_3N1LP9Hfq5wK4FyzErf9qXMZ1ALxa zWqnYsScR4OLTQU)^vl)RcZ?}QZj;3$T=Uz319)Qg4`Lcdf5r z84Fv=cx&LNm+Fm;2*j>^C)Oem8xwY|LLk#~_p}L|0?gQ|;W`ngm`EPJrDR*| zB2vc6L04qEt#KtG@dBzdRD#5Tk4uI{*96o7ID-yJzoP<+O0- zh9U;a{Lmz)fV$PXj8Hcca=QCj^)$B z+@9&FDK)c)9W+{rYyB-GdTMIwyXYN>L+;nFb67-);MlH|l$7f0)7%fgXx8!=5%sqf zD5IpaCH37!&W3@bjSO-GcBtw)vlgFbaQzH*oaI_~W3{4lP-l~Rh7$&Zy7u8WZp8uu z@p@BM{hHxBoA%6AvugWFJt^~V)?4LSm|GS(R|nH`%_kS;hN>yT#g@)2Q^^NK%0q+e zQ?#73>gJCUjQxFl@}EAnhuXQhxvjXf>SR^TNTYAoYaTpsz+lvRk{`FnwCLiC$8cF@ zm|$!99#&RX&dx)cM1G5oZfcXenq_(_vS~6egVE8^v65k1wa9DwO5iSfS1KHGRs+yKQNFhod9lv_! z>{&+#2bb{49Qfy^Dq-10MRVy9b78`T21ivQyVEquIk=BKNvC6pedSn;!O62{&#J}i zk6J6BgaIVv$=mXxHY{POS)+Kvj$=;>3w@!MZEbDpsR7@nD9ycpYQ-(Myl&mPl5uHi zp!}Nn*A>CD{Wb-*SE7y(70{%a{#OMR6`?&v&ddclAk3IhWiECtT{cMDvwf#YWGJ`w z^^Y=i6gA>wvggOvWO_%(IeYs)SiH39X0gN;m0}n2O0+Ix8;T%=!-Uh0M{YlI>2-6W zMm+NAvuEZ}^Si9BP*!pZ3uk`YZ0Ypk@>{s0tlW-cM6s~BxjEImOxPeHs;?U$Xu1&i zV|VY~Wdm;N`cUUunI}m|z#0QbH7@t*%J-({<_M$=&bxU8O*-oKt~FJEDMKRkfGda;T^2efe!Sfy0;*;W$bPASu*edeh;Q-ZKpEKE_AQ-%|3wSQRvxZ&TpW`mi%* z0DQ!r?`1cT9X#`F&P>*#GtUICK6DU zn3(v@H{ZN^^{Ow4H%a$|qeYXB>FA^f_j{}2*_{)eS)e^y;p_=z!{AY)!!1NoaLb|c z4bX*+_jfy%`8K6XW=g5~`T4gLR88C)@AqJc#%NNiXfT3B#zU_Rl(V3vQcx8fVZ6Gf z%kxVo#1Ff*M_KM5Qhd8O9AA|vTu@1J*th#;`jYdz$!}5UlW0)&j({Szjlh4_gElv2NsjpxegYVy4!{y*m0jEmbAOLg z07bqGy&G$?GLeFOgH%57W zPgY(AVda*b5^cmx4F`t=evLntq>k1CBplNrAt$A(D3h^z^=d#HKcOMe9C(NcL@U%R~0c4d)9qs@r0ICt)s zj;iAulnjCy)rHN5LeMBY3%}+&En3_M(apceiVMWLqXf4a%TuNc00uj7iW$2Rp1E40Jp) zZ01FSLy~*HH=x;qH2Sl8`ZgWki(H{p=qdF734MHN#;seoK%Zgbnm{p}sV;uh(%8MG z>Abkt0Hq4m+!aPy7mIu&2A55j^16 z0NfTfp2mAEqwtd88WzR}I$#hbqQ%9mzJ;x}0yCbMb4pVLLv{?v5iw{PG6 zqNq6sn%qvaCH5Ng21~(TKzz_S zeq0&XdKC7pbj<7AO1?A)U`Hfx;8{S}_)YcLfN1LedDLcHY#l^BLR=7Gw zu=NYXe{S31M_0K*H79i0Y`SGE=meuCY?d6y(kurnU~uaLm+_j{-YZ!SFBYX6sR)Zg z7R~)P0S}3Wzxvc-?>`cLwwyHfIEM^%fwgzt3SIell`FLFbJzM3)PtlXVl-Nwoew8; z*r{S(Y36V;P^E4dzsGS}W9TLjQg&fsVMWDq%0axI1|j|@AT=X4Ec5O|Ku&}hfY9z++mb~&KaRsn(an(dmgpHs*^8Tw@+T(_u zxK=QXT)#siBv0cnI85L z%ygRA0+PYi-|lSErxunpfQI~^ao}Ds44&C$9L~PK`{Z}ADrC2Y7#&6PS1uZ8Qc$bC zXIf&6eur?q;$dGOpB+GKAKOxv6wmM;s9Vh<9FE@F}4Lx;gTymgxR zJAk!J)jMkFniAF0+zbbn(d4d!WejGS#)548!;N6cq#0q7sO7wnv`cR{w) zPHvSZObEXpANyIP#DwvNLg+_jAE5;K!k|M}aZ z#)U(olX59v3|4cmqm&O`L3=qLY}AFvFx!z!g=8uq-6k zp(W(x)4j+}l-&>lNC5PzLv&``rb?+4&I zbZDs5(-Jhb-BSyI^iLatTCP)AOfnq(R<_T5ygiL-$f{v4|0t_60Uly{R(Zm>Im{4f z7YcsP&MpM(-%$AUGtlgII|*kM_-@XgEarsc$=Y!Mm4mf48v=edYK}=EwmmwYlam7~ ztVw2HfMtu5p;9?f5WF4o7Wc#&U4YvGs~Q1#q_H}@q^%By5zU0<(#*_VXc7#Pge5J9 zQ%vyT;-1u(FJETM!rEKgZJnG_)y&0Q+82De42oB+P7xUMAfw+@fFP3u^W4@N( zsdq#)PzjLqfHdis{`XkjW_#x3$&&zHiCC0$$gwP(Gc_~nC*N+FC7mn@mlwcR`}zA@ zP+tm7SF-&_-bJHHM%-RE{_Z3S`8}{3n1Xa<)8beQ9oyzz+7Q^?tcuIzE0lEDl@KLj zcChinZ=36H$r&~V%GyudPcf-WD?Eot@umA!N zO>aE^T1G!(PaD(4VD`2uK@Y?l+D01Yf4!|T1%nCk<37hYXANR2w57aW>re9R)44!v zP?!jbh}Y86k_6d2gvAh!$yavl)(y+#8IKz`*Z#`KHb9dH($MMvM8~}J@F^HV(O7a{ zB7Rt_x_$QHiDk-BZ(253u2ej?Z@Q+&^SB6F#McXtDx*$plk Ofj(_?1_7f=N0 zMT#PXCPhT07e$b4@ZP(>pLh4V|NrmVy?gIIndD5K_nb2`?|bGmlXK=I&zIj=OGivf z97fF#aYV!IF>oh{0~TV8@bEzSLkMLMCp5wjj;4k|Bq1mVH)UmN2~(^W93r7mvzAq?Ry8IAL5N3X+o45_)iiiz{Kb&;vsU;emmp zAre}erdn`EloO$<1_I+_3`c9CkX|THIKct=15Z3q=yP87j)YjW;C={4_*q>IYKhZu zPZz>&p>k3(vIMVuFoZ@yQsdv?r-4JKbo6xq5C{N3gcpFv0h$0MIXMM686^b;1r-$~ zH7yezEe#DV>)|5|Ovl-{IFGY&a6tIP1tC15yc`@ta>AmLQZN{dOHe^Y9;z%Z4TB!+ z1VlweMN31=LPy5}<>ugq{(mp{X8(hq+{(?502)a<;ug{76F6WrOw z72)QC@%8f$zy?NMz7lmcIwm$H^?KTk^qaRb?%vDGzyF}1@L|Q{%Bt#`+PeDZEv+xw z+B-VC28V`Uy&icpI`(#Ec5eRN!u!P!YwH`ETic(%?0h|l3j~0F35&4)C9vPc#Yl*Y zgp?FaN^uYuh{T_ez>K71+*0I+)lDeuy^ruf!zr0Gl5@+Osd%N&t};8I2dIzo!QS$( z9fbBhvj4ZhBL1bw{yngN#5D%cfa=-jq%g>|FS@h^Hr;04YASOrH^{uJH!Me*=*_xW`q&l^BnO?t17-T5n_$a* zDuSuezCRZFzAc9q4}4nTzymLx+xNe4?Rj4N!Fob11irT4x7PoS+#_@)91pa;|H1nB z=x&%iaSv_8cKC%3VmlDq;h*moL>~UH^N?HKw8oqrFN!HKH;%<*$R@~3|it%PPW-jE&JdX$eG66l*`V$80a!4AOxEA>rX?1#;C>LSQx=bkL1S@j;3LO#SoMV7%?@=$0PU57+;GhQiw38Mucjwy z1A(^8@2@}7jqE!HwpaeyQagU-CxNkkUD=Bp%l;Nk@9ETZyM!svkdq^=ER%%(2npj^cmOTF4~D zjpa*@cqqimm--CwFBAgq#=0i6>BK9h8`B6WhXp!aA`d&R%}jwqUK;nfG{k@-z2IH5 zL2+w-fxUVQmnA&}1F0}`9BJ`I9e>eKC<@I9m`jKhDP$#OY$o+DSVonIehzu~fl{`0 zBU@IjHbJU!XPA^OE$s8K@K?hE(ZzC7RIq#3RGR>$W3GN8y=rBHc+_ zYr~q@Ny{o4z1cd5+6pvWSVcDFlE)k-hvvE&eApxF+oz-J6J?D4RkdlSW1I4Q?w}Pu z=WekJt<09EtWTi$Y&$&%3zDxYU+L4E-Zup~$KJ26&RD_}7FbDJ`JhR=PODtP1F905 zBZK)hjg?O-kNJ%l_j;)FxX#gDrhh6qTb@JpzF<0wy(pkmY+gSn4cyM>!D_450ZF9f zZPfp3U*7+G!T!%*zH;$)86TV3zBM)&i4HE-P;qz{I{QgI0zjR7+ri7sDHxBE`DkF7 zm(kYNd+*KVZ@HxDCov))aIAV*~oC+oWi0U~)emPNO!h9st=CYQ;Aj@UyhWy@bEYRRT`Kf-Ee z*5RHlc%B-!UV4eF>`su~p}#v4$JYrLz5lw;a&)hOC$qH6L`@zSpIr~yv=RbG4te;> zD{CYtE2kvrZ2L=(?!RN1>XFi`2$QR3gNZb(>OI3{SjUN}xuyH1;yPuP`idN}%;T~> zkD?t9?PSYZ2!5^885}+7!rJSPF{VfxLnuB(oc9~Fp0rg-%Tq;ec${`qM~bHah*vqv|#{_CcRO*kedYK9+(a0`!%0 zmefmI&hl8oy$b?&@+Or}Cnc$kwTF#=by2V$3r-i#~Zu?%NTt4%|?f;Y(?CX*P%z=G?R(Bql+_=JWK`pc-gmCal|l0 zL%-Xa4-dGAF(J7(f~Q1W%}7(<1vk)F_1b>9fRM4ZHZ@?98+1P}jWeqAkHc8QuNU@r zNv4v>i;_#Cg$r{gMgz{g>l@L-DY`%3)TD|Det+d_59^s}>3Kl1l`Ea|I_TBn*ZlkU zD{fnIR2o+3&Bffs$al~tf@vctJLb%~W;`n=tY%wf_j%b&?{(H)4!PPJ*XKIOeqACH zC+AtW_+U#b+!GpZ4fbfGU)mQ=r~=oMuyB!jNdANdFA8iK4j0&FYBP ztZBUIx6Rbb;sJk8lPrxZ6Me4Ws`?SwM==c%_L#1IVKpEjwh(P5C&_N^O-k456i++k zQMDvz(%^1wIhs%1C#Utk;>4r#PR@WlY1pY0a@~UB2`hHo6yxZ#vC}L0t!w@Kt%mgc zY1Tq4$LPu_8J_Xoam*#BQyeipDPVj4qrr`1N3-GC-?(j$_rye9KBjbt%-McDb$zwf zn7r|GbY+R`+Nb5x=QjC8wv`6#N|cdgVpk}>+yoS}o=v~%R(ex5y^*W`$|v3k{Cw(j zcUUNgIn~1eT`Q#b3T^qt7SUs3Qdds)ONog+;jMChDxIUcCa{wh_Z`IG4m`iNMe9Fo6ts{N+Be<~`K z@F*-{FTeEp4`vSi^Phx9`^|q}d&y|Ok)JT#L+A2Ea9^8-YZrkB?y*(MOzj-2QvJG_ zjtA6avZu3mm_$RD9_+`v0q=iy7XA)pKS1^8AXfPBgXAKc%JKuR(wC#SPH&zzkV8R! zG%LgfJZwhUU}9Hu80V%iSjxNJpB5r|%0|DI`)p|!+N4c7`}&$1yQ!JJudN~_?Z(Bi zc1jmb-kw}dh{G)3mT|n^QsqK!K}ppF&o(Av#H>8ag|?f+oU*6+3`-b;yh?}=SR*zk z<6Sa;UOM8TMOEb)-V}JPMs#Z!Q?(ZrA8k-<_@3e2dx&OV5leI9TG`kAOb@%($((s1 zc;NB1LU1GCZer=r$vq^t6=W!~WYQVpBXwDwBlZH62MhgpC%lS5{)29*+sJlic$7lZ z1ajU0RGYB(s-;!x7O4M(oJ8-?JI+dwTkKumpdu@ESBs$v@Ws0`+{Nx<#P@xRxC4h8Dn#5 z+QV+jc^fzn%TCf|c@A)V{50EbgEg|IBJcSYq%Q}GHDeNc2%Cb~H~m1R4IbR>?Oclt*kP1d3RRABxA*kLD~DOn7B{vpa#*4!^er(oZ< zOGq>!vsgAop?Ow;WsVvcyj!;zZ0~{+O&%Y;sPM24cpZM#&(d z6UojE6&aJVNlCex)De25vx0+l*-T$}X7;;WF^-3VZtE{Cq^vKW;}*~1-k|h9@l^I4 z5kE|mxs0&cW}6VZS1ZE)>I^saG8YV&!V$Cjp6@awcbSg` zm!*3iasdKClaH4$bZ)Jdlg-CthTj@Qzk^OFiSGN~=oR*6QL52;Dn5>!W^7Yp69T73 zstqrR!<6_93=+6b)kMJ-V~+=hq@M1GQQhPHmvxQn+ONKijPdDN zROm(z${4qjT(THW4eyjG`sXkQF%I|Qvu=LS6*2M8wzIGVg5)lc)T4^6PU>*pVO~}nsdl4(a z`{@(qvuAcbW7RUfa=R`qUvH$p+fhYU-I(gZJ#pR?x#Q%*+6NU8u~>D^ELdE%7<)mU z?UAa_SBN4hNK!tm))H^GbUg_>M^6V>x4R5JlXkfZfquFQL-XM|C>m;jc8wn$g915T$6|*iIv`{_!WG=7GV7Vda%^sQb~m zX^b|=Waj4JTQul5U1cmCFx=b1AQ2I{SB{c6brj=^o8&&!JUQ1 zmt!21Hc7|lW*(V$l6mrALCaechO1?;)*w>%!pnWULj!glxb+>&IeUPwdKt~)9goWf zU+m7mFsfa)B=;5js4m6Y`TD0R5%HeRG~jf1l&Ig81zs5FF+*&zwN2AtDY#jtYRu~1 zCqI3f7aVanXsW2hO@$Sd0zSqj_P29#K6Ur4X-sY!n8(6PoYl0hGNzmgW4;F5Awg5{ zGIL}dt4%j=%-mwyC7akgw&EeS@3oqzu9oU=__ByA9hX>jVWgSMxuU9O|K-G-)uJD+ zA>MPBsm(0HpQZhW_ z0e%vZZ|ZUVmW7nb#_ zx?>)tC3v7xc4xajziEY6=@L#dT{S54W}9kU*>NGTw@mLzLp=-89vAU}LMeG2U($-A zS%e*^D>6Cqz59uaoUYk#`~&mzQpelL;-+|L5XQZ1iKmX5)H)ULxp?Y$>)TJi7gbjC z<#UHVbhhVN+oH2Zg>$B+~X3D zwZ!#Ler$)Zv0aNr!g_U$YHDM>>E`oBM0lEc6?dG)Y3&W@`S}*u{WeD8{Ob491I2o) z#=6A$0~}5i`$Hpoap6}41tXIq(9-8j27E;tb&ZCRzItNIOGxMHq^jIxzKzpbkJy`S zSZY~Suzd>m-mcJwI=QDBaG-S^k?zb@54*Yy9%<>OMg)j3L0l+K?D=$%XwKrO#Fu7* ztZQd)P6@0$?&_}}GmQ@mr;+6Db_$uu&oMndhTvC^%NE^f&&iAxqsI;161$#Zld(~1`W&FS&d%BS>2>K_1pyakmiG~m z;S_d*xR2I5Z{0&a4QdQZ@#n!mxgrT)i z&vtTu-ucDmek5{pW|B1|#a`KnN(++IDo=qJo02=jkXu|EjJ$6UilxV0 ziJD}`GWS+CIVA{RC$p&blxOtppnznbLSBo^i8OfSQOedMo{!o_Txxcz&JlQRGkw?~ z_!Ih!q8+`D)ok=~i=x4S8^F9ipdN7msDG*JE$lo!+gVW;(9R1PTd;=ga18 z=RR=Sh=93w+6j9M0E+B&ky*sgoYU zk{)uurpmz7a=&e)dMJ*a-2iTITS8WWlPpcMVuh8=C3XIjUsAxr{Q+3@z$e}cONgt9 zJ`Hp2WeR6!2a-EvJ5l6NNwhT#l>f6X!v97Yv0L&lP2c?K-4f!o$shk#jyOU0pY`O4 z6LkL_Q^B?U^j}5{2Vo-a?9t+57B!wychlx$W<^!p4<2i%?jJpQ4G)|gnfm-Zbj>vM zdHVn!SQy*g|6;U%;`{#$@W7R8go)|?s?Xn6v-ixnR?9a-ceb1GKojBDp0TCRV}V&b z@YeA?9@tn4!2`j9cz{`VPc(#UKcwvG%4Iw-Q}oN<5x!V_Ro&aY7&=Axqr(dmJV3*x ziwCmZS9ag3etGywi0BKkU5LC8uOs69g?Qin?fb|q)4c@eKE(*@E;3DHgamY42hc|~ z-(*{DvK{<(m6(WaKxBZ(0FePA14IUh3=kP0GC*X2$N-T6A_GJQhzt-JATmH?fXD!m z0U`rL28aw086Yx1WPr#3kpUtDL1H~+3G|KTD97E8%k~ommlK8G* zrAd&zlK55K>U%B3fvy$gKmhANNbBG&VF-6Z*lVBy2wO@LUIei#amnv8SA-A3)|x0! z3_;Gy2O|A_qhFP|4y3jYWVz4>N>{(r&Z%Mm0GmhT5VP}CCt zZvJ9BA|VpLxT%GLm{9-6CXoHd{!I}W4>&|<9}oBdSilp&0ImR>utormz(vB^6Yv92 z-z~T>A(R90F$Bs}iy%P;5zdsOnOYJe zNBxV_-vg1Ak&%{#I75ERBS>&Tz86D0f6ar+!DQwCL!PWO;aq&*!*BHn28_@Uzt5AG z`&}MXP7eA{c^I@k!h@ipc7VcJ6bb`55K}u#P&7jX!U?WQ=zBV&AO|h?{k;2N?9rI- RaX{tdWn`%N`L&I7{ta+#A$tG- literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt b/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt new file mode 100644 index 0000000..5b7f6be --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_invoice.pdf.txt @@ -0,0 +1,5 @@ +ACME Utilities Invoice +Invoice No: INV-2026-0604 +Amount Due: $123.45 +Payment due 2026-06-30 +Please submit payment by the due date. diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.png b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png new file mode 100644 index 0000000000000000000000000000000000000000..ad5aa76de7bfd423394771237fb297096e3af983 GIT binary patch literal 13347 zcmeHOcT|(^ZY?JRcg8?|aMh{Mw5*PgEAjX92Tho(UdTMVZ z{&MQ-)2jzaIfzcfD~bPZTz>Lt1_xFlZutUg^c8aAM2 zjT_x{E1=kM4RU$?$L~ww+xB(H-|nkFLy)VR|6g2q_Z8OV=jS)l5yWpSBO~Lu8Y$)W z8sxiUvdhSqONo;2LR&T>-(@d-5#D(Azqc^FyhEW4Cd)zm&c&?4-s%X)yo-$)=_y8+ zX8Po{iuxAYi49id>E7_kjyFQI(%utez2#JL_cOEDac(KUpli;hx~4`x*0+4IEv~8Z zjxy@N*)L0Wq=qdqhTBPnRATS+F$DQuw$&@2GPAPk6g)jW6#}YXoO%|6lL*+tMD}A{ zkEU3fc(6Ed6uZ>n(o;$y=6kj-v!$!*sbPmL%3WejmT;0moVmGqKykO>XwrNQQ4%0p z;&Yfek>t*l1~rrd_K}XD2m zOAM!g)OGb+rFzVC*~^zN-BZJlFoLR7C`kd;==7A(@dQ1u4HUGtcB%<=sqbB+#{T`| z6B9P(tDSojgMzbmh7?D9FztF4=6!ibr!=mQyL2ljCZWcH%fXe7TfM^7alJdlD6| zNw6)dfW4$Hj_1Qtz}F5Px94$M#Y6Ya)WUTcc|JWQ{qLiQwAIDIs#srd?_IbEad5j6 zJ=Gv^r1p?YrLL~-Z8aPVR?3fvT>ilc@1FO|$6F}pOXr1i!-@L7&g_U)R=8+MKLD1I z(i3cFX;}wLtrtArA{LfYmu>Cr+^Ygd>rF(Hxwvi($MVp{G|R-CK^QN;tI+qujm>mo zgs>&UW|}RPRLo{&Wj)wyX^vimA7*5r8M0=9`+55D#>RC-eA2yo}q0pL|q>C5# zIhGX_8ORsK$H!|qVFf*29v&eBH`PmmC)zqYOU}Q)4$l%zl``MFnU%u{-W^pm37Rfn zE>I>qI3!3n{&4ElDRUm|a0h%KMg@~XlL%!6K}%ijDfA^SWx}U(a&judaWfVOA{Y#= zDCE)R9;BW=efmPx!-o&O+Fm_23LdxA@5ys-NQuKcIXRK<>W2wtIJxerl->}zT)__a zq}f3=sX+qk#dSy*ax*seZN+z(aQWn z{r%-=8K<5d>j|C6TKm_H`}XhOdHBMox#1cc^BTH#1FqQ;y=nWwb02SQ<>0japvVil zmd`mKM&71csfQgdfl7FCG|H{YBy_=G?$d5-*ok+Ma;fA_57>DuyfR%fkzFvTUDS64 z#wr~^u`g0{_+1OoS!7n%RMB8eW_HPb(IKh`W6^tWx0xF;zIqp>+aKzse6~_~-o5kA zV;oHqhql?K<=DZnpqoZ-g9UNA)PK!AIoQ_AYobn{8+Ml&RQu?VtfgUn@#ga*b&19y zb3GJpxS&7EeY)M<2O#Y819dLcwX3V^GM6T;PsVgji6W*nkhf9HW=XRZf~XPlqpUEp zwf_b&IDO+3f>ivOFG2O&HMO;+=alrJpU%CzvaH9TE>9i3zA=PI7_AC2ptQZRk49Pyr2#ADGZ>5~ zPx{w-xA%8Lm-@B7Jj3x144n63mfM~>b#GPB#g&kuLnqpq%*@QhLoUb6dGQ3FjF&Gv zU!Onf>N?zA9EjPK>(^gl9X6nZ6LA_?@OWncB+~N!Bxi#nzY}hw^~r!~GDO9`m6t;V zXFpsF2@y_ma^)0N9ic~w#8mt^mn~KHExU*B1c05bQe%hAj{u6FK6B>I_Cvqa#A^zx z9!!_b*MCG29i5y^ygObOawDi+1tj211NZ@eehN&VT-pL_(bAGLu*bxeS%9lF5qNAfl3%pzal}XMJgYKMwFIi}^P@_)1 zQK&K&^hN#-j81;<*2!wWt9(N`NW`39Ke9ru_WI%!XyoEHmb z_4M@Gff4dNn%lCSN`b-9HfecXu47-GaITQj3!TUS{lCv<&z?OnmbPYVTrxg%@=evT zJ3AjrTSu|W#lRfTFeA z2Z^Hn$3~fZyuZE)sKuvnC-PPlkJ%z#omLqV07Opy25Sb3T@w>)FmS_BYdMp+Ixe1X zXc!K#Nw->Erb(AwVhl&1_3MTMrdgt?Lsh#Tq>6afw9tkWlVZng2E!F31#QuerYBg& zLX{Nc<>hsG^^^nytO(WQ>HL{Fqol-|VaXy?A4_%xThV{8SYs#EE6jeplc$c3tVDgc*8aH(STyODykUi(rboM2Y#PkZRs?N zI`Y_^1pr54?m!HJy3i-pX@?zTTv2fHpK!&mYvo1)iWSt|gbz*94`^uo2m9>Yxf3)= z#>Z$BJuWU!nFzFi$IrFcSi9n;d^rmFiHZ1~*f7BZKxJ5>RM0R^B|)&yR!5J%r?VCq zlQz!>EAJ4>F8PCmvN1PP2|m5Vy8?JR|KeFpbs8#Whe>FVxyQm-BgiOiyt#0l{O=Vv z|7myZWOwm=PK{P(cD9d?4@*2(!yQdshxAFCKfC0;x7Ll>sW;w6@Krf@4!E!J?)OJ? zmcP)oiOx<=$!a?1-d^6SVf#F83e*_1nt$KhUpZ!0c3BRsOBTi=(Sm!aYhq%eVpuXC zYv|zY+zTQU`2K>^BUr<|R!K5lqSIikgV6zvRZ>zS%>`Kt zECmX>=MR>W=@&m{TocSu)SN0=fXbwGX=3~bK1>qrjouCqkGkP{$z3F9H9~>6p?K!~ zyb?%;UXXIh5i2}2BgKT8?O2?Xk>N4$;U)l!71OyK#D{~et*yZb2*?!UkbuCzK)RNb zj2f9lw)o1mW_dYO3~$DEEfVE}SvQ_^Xah3iZ%ZA^vr(rdsv?(T55p@zZ2z-fRyGGu zWECDGt^s-Cn0}{`I#rwwcp)<_saYuM!q_Q5>&1(GqS-?BD!++g(at0i?dNa$*PkOI_Il#yR;(;6KiR+UL1F;>ZYml&g zM(($#zzno!I}t^l?Y7yDkCVa9u;;2ZM2i!JK%Ur@0xMLH<+aMF1krKl6U)ZP=1 z{CY!1K@7;`Kbisdh0R}ZPYH2@qBb=()p4se=)bCz^FR?No&mapZS{P&8&IF^KQmN` z_|WAZigd|WN(P(*>64k{}vSH-H)dV<^ezVBcYmd0DXR##w# zi<}3aIy*aI(r&)@P)6W1^hup9{mg%2yEKWcGXCZ7k6w2S8Gb11W$<$Fu!LlBOFFid zT>Bricp*@>vvE!8IN@ig24=)!=CfmW9E$wTcfC^vtyLB>-|@yJR}FBuu1b~a{pGhG zcbmn8@fdW4l{DBy0c>LS**Cv{+mmrG%mS)HN@M002^N~IrM2!@8wZEb6<)ebIDq1^ zLhI@3UU+zfJlO$dmrqE~&)1_J^k{k>hX+@bn2;dpTL&b#1)EZ9O8|E!E4A_Q@fvM* z7}N=Hn&^W%wvpTssfaymZ(j_4Tglq^jZ=ETJRo3QqCS(KzD^YkMU-mjMG$C09dNC>mTZr$3Xe8kcJ<5oMT;8wLTO$4DCB_BMt%GoHO}n92|^Doq6w}rU1pE_(voK7kzwGLFf*Aj0R1}&im9cvMP;5fP`Ld18eiZKDhCbx0>7ht-z-ly&D4FiTgi7C$^lOTY$ zIDL9SCW3rFKx>PI^XVrZ=Kx@6Ih7o6$lJ#Vf^?%$a|F~stzVwl0WD;Ta>@f=p-{*I zA!ZBg1=R=ArxZXIf|?{9_xcYvm8QW0LK940%Of0_F6FLHJwN_nq#>1?MJSwv0Q2dV ztt{)+&8G>3L=`X?&ts$J=H`G|&4}v|zn@>41a)R8u`IcU`%WQ!gFpNuSNlC&`}1AW z0xK`Zs+zpa%#7N&>^p>U z9TGBlq?0nA_Qjm;rMO!LLLK(i%Bt1LHgb| z8M^3BKYw5qDjcRm#~Q#i0dM?)kv#+PI!2@dB#7&KhEXC-cYZhPxoNhc8>n0QL#Vg4l1Cm z(WIV|#ZE7#rk2)Jb9w_Hl8w16IQoY*%T0fH%|$hA=H2lJdl_*Mqh86aH_GH&?n{ce z{7wdT3%q6w^%4Z*8M^qTUO z@k}6PP6CJ&HYB#-TP(E8Thgsl$rx}CK+kQWgyZZ-7vXBIB_gH8YpE^-#Es9)*dXyqRa>lZZ~Md+P*GK#`Cf`;803p#C;BA4 zWe2~fw6LVasD=faYgAi0r#&$~4)UTHP$jMj_?12Pkm(3!T*LO&BR#J+sfn7F7It-^ z*#hET3kw`3R#T3wBfk(!(Rv8A1KmQ1Nkb zTNEymNIl#E;UVqvg(iSD*d)qjI)_{fGDlX#@O~M%&kcQ+g;TN!4YbOnrMN;F$YbBS+)r?rvkqV8F*MPxW9JK`p1uNSvJEM=miT^L6^MM&mD5@BG7x*%8Bo z%{_n_IG+-IS;ajIYd5AGA;|!_G>8`fi{cF8L(~PGbhPm^PA5SqY@lI*`eC0bz!aRv z(Bt6~1nvrCahLjTLL55}l*@GmmClZiM|e+f?^iEg zT%?k&H7g!0%E;hF;l$_7dFi-S-nZ&WU;2ZP%Ze7KSn=3uPcN_BNYIgyJBZHCu+2;2 zSkU7X)sUfkz_GGeFTdS+qBDO&mYLQbsuIg=KHv$bV6aQ2OtLZop*IL;ip&aWIrbw3j?3Johbpv^8O(_d=&~IXy~4q!uErGQ>D|r+yOL$I$cib z%=3V53N_F8gQUDYJrfN42jIEQnRZM-yZDC>x5k8s_=W1$Dc$e|FI|&IElspT$;+p! zotuHU>Iir|*nD6$3m70OSSEt>xd<<80UNje&*|VE)$e!)f#Qqo zY*h%MAy))?D~`iMMwdFjMj@gU^uN~G*MGyX7TX9mFUTmryUf4iCL()=aC6qVY+k>< z5+V>(Ui9$r08#-k-FD#YK{$q3*t`TyAW87b7T#EBmMgtHQ-i&lz2W!#ulKlm^1@t= z_9(3zIOD$_{ZJ1Y{%7)$?PeS>&|-@X#Gf_>2F>IXrRssCSXq`-naq6ZS5;xpNl;G?!WWE4ruqrK<$6#g@K89nc>2DSuA(MhAnU8s&WH%# zAc7a4{t3zG)7)erh+7TPqEmrI;P48tzSWM{)6r1`(o9x(^qR$~9%@yLiVX7DE3M3J z3iWv^&17al6*wo*s1S0=&=LqBu-FmYK`KNQfJpf-UW`2k=?Mo|IVxTCo~5Vz|IXK1 zq;f^Q0LgGD2no$A`&@WAB!9j5N zhag;f^|KEob8N{3TT5FTD1@2{puIRjzY_xI7vQ#t{ND0X&@7-NWam|&_P5$OHnz0r zLak_r^~+z21XjYEld3K>%oKq0*n}Clu^YDOp3_^%2EaBfPjUjm{QGcn7-q4m7|PTE zG@93z)dhYJBHVc;dBr*AQ~uw<1xuTk@Uo*dX4tUCojZ5HUvOX@RzRJxHI2f)hehqm zakW@Na$dc91rVv|{1zCf-KWa?Y=mupf~YhKJP0Sx!v>Zb5@XnJ5DcyT$;JX6Jk^^T zIG4%ym92*-*B$bn`$p65$K)Fd^YV;mKPbV?y?`4b9Y22j^P@v9tUqW>Q?#uStX6wR zhagf--1j6(b^op;%_ed!7}l=N&V*!GF%ghw?jSI-SOBZ9 z%;`(&iV>luiGh9X3P@N`&|Jgq0v*4;x0AWIGi|dHFccqi-Y8Bg1oO*<0m2b0*q%+Q z#sZM45q%Jq(8RM9@6ZggOODxP&{QYTCXk5>UqTxCIYKn^UQuz!;i122GZ!gyxB|j+ zKErJ-98@o7`G!wK6U76f@K*baEUtV9H%4fG&5K+VY|aCovw!o8sd)yAg;jfYjWXAl zj_OUCe7V8yQ%d3*L~V2O{O5~L24gTjaC|mD+K_tKwHjn5Fm2AoMzHX3YS{oM)xc69 zz9B9Aujf+6;2X)2x#v7Fdl^B%8L=vuC5TRUK`6TPoi6o=5dKjM{<*9y42-K>h)jNh z(|0TmaE*!%8_`MqOdN;YA;+BoF-g&IJVBw%yFXKM4t3Y$@`3I*J+C4;D+OIXiNH{@ zEU4aabja7?_u?Q)V>or6vpqqv@NJ6(1JSv@Dc>M!>yqbno+TeGwf5T#v!pnNorcbDbFEqkgTs;g;X?i^p6skrqHD-wQ1!mmj96$!s0;a4R5iiH2aknrbI61B7cN=L6R UdWeJn{DT}fwfZUh-_Bk6ca!siCIA2c literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt new file mode 100644 index 0000000..5b7f6be --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_invoice.png.txt @@ -0,0 +1,5 @@ +ACME Utilities Invoice +Invoice No: INV-2026-0604 +Amount Due: $123.45 +Payment due 2026-06-30 +Please submit payment by the due date. diff --git a/openvino-doc-image-triage-npu/samples/synthetic_receipt.png b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png new file mode 100644 index 0000000000000000000000000000000000000000..e0bf86f50db43c73c03ed155148269d1586e8313 GIT binary patch literal 12268 zcmeHNc|6qn-v4Pi+T`|h$}Qn^8x)}vl4Xoew=@w+age3cVkeB<7@bb5nA1p;brhjw zi4Z~?8M5!oP}Z?C!!TxhKEG4vzFs}gxzD}NJ~?fbcL+{92~ z&DJ#tf=J+x{PhHaEQjA$c&}Op|8o6S^hpF!%ftWmz{!C4Nd}2vIZs@ijXQE_)zOJR zeO;rMH#xWNYa3GA?R)$0@7ucKjuWzy5X3)_w8k3ik^1!elRCCCQQYP0clr^o-cP_i zy7i}vF7p9F1y3Js%-^edvjy3JKdIkmK2=5=-t^;2{5rfWxqON^BH)HhjZ`-4@yZt1 z+ZChe7zE*yiaJ#gWN-swU}Ql?kY#}@kUv%JK*9`U#gG-ORme9#8zRUhy=BNM^gZAF zkG%y}dzod0{nyq-;3{)m|GU?lWZ1J3;cP#3>Qo7?C^t73OMWO#r=Qy^LtKY6L{FOa zPsm?sQFtON_EO(co3J}@g~38ok&>g96rS;p%NLNDjrMg41ViT&-%NalEEi0CZ zULK+!xPoAzB<`fhTE#q)u+u9<`5SaCr{zlG=vp(uO5z5x2ijA-5p=B%f8Phyv!=ew zY;WlO=$Gb==uqR06`C90AqkhO#dIn}hplQE!4_nu-^uj|%~ZD0>d`i&>xSNg%kaw4 zWL+<~?~O3`4sN(qErtZPtn_{Gc6RkzgmZx9)c4RENjg}SjINsGbK9`QXb3UTYMf-H zZ#)&5ja?bGzR`+(WC%efQ=*2<#4T>;Z$p?0I##li*y!a5p;qsNHe#Slch6S*3Sq{3 z>yAD!Q!Lh$MwqAJJz~f+tU%Mbe-c{qf)rP0wJifu9xQf zO?CychbPCsmY8T`@SarLiqGHG4 zTi=(53hc5P<|aDxQd2LqK0Xz`S>uALtr)(_J#6dU9%pgr~mTB zYIY%`52*_Hy=Nu5s$n4#Vgv6Xa z&Fg2e+2iBmQJj;Xz&I-kuc9ykH`S>~udS_};*7Os zFih4V&vpdPk=n9sZES3UxkJ=K`_+t#ZzNpXo^U5UNx{v>v?8{sr)FiT+NiJHy!#0C zN^Xg^wl?*&Hyu7N7rOXS-sr9dDN#7mv^C7B%Bt>1~Kn@mT@m++m$ZOj~9jtmJUi{pK*VB{rdIN($d`5 z=>APHc=xZ#DygWHa);v-O%qSs+mk+A5^rm79~riOyduemxkP%(VzF+9hl@TO%G!SL z>cr!STodek3v69bXYTpV+pbwfsYyvmIXN>CTD|ER8OpnM?b^M2_pV*eUhc}JyV6J^ zVTMJfbunRdZnU{M++t{GC_g_RexlV>#;dUxdRN}sB3PPtb#vQ>4I&&oc#x(~%vI9~ zWDWZF5f>D3TzZ^mnYLR;j+}7jnsb8Q;)l!aw*Izf&kn4VIz6*Ao-OCsaR!k*wJ zsd?OwqQW>fZ+=`WWN`)><$^ynx+jU(4bPHaVb4Ew;J_<(J+6;$=EwV6k`SU%jJ@{WU)c~#aXC!l0sCGNXSz( zzW1|906h9;dTG%75_Ed=#V%KWb?;p4YmD6mZ>&__ljWHh1Z@1H(Z!urHcCC?-5sp9erm#0Cqv9tE znnW0pZ;B~t;LIQsoBNT z&jB>HWmsm<8I}ewcx=#$F;-Ah%YORwTZ!09%f$Vj;7Uzam*ud-#gw7|@3cw%N>Y{&b@?pFJJ*K;(2d3AmxSy?m?_0Zvhw&7r%w3}L}*=LGn-9uC@ADD zNNI@{PnnyW7e6KPIDMHplMPVZ2{bnP2L9|jcvx}(4E_E6ruy8JLsKDT+CIZ6n&{@P zT)R2N)w+`bh%2De_;{VX3d02>KPP=w$5~JwB`)6}4qCp+dP+)4U7qCv;k>;_lW}ZX zO;t^e)PF;9xGurw`|s~11Xk}#(_>ZbIDh_p&0Uq6VY3Xm7e_r0CaOD{0Ne4oY#etm zYM9vkgE#wpG$FVl>41vM;U2``S`A&uo&M|N$B&yL0O%PpG{N_w?(UNkT0Jvp>A=O0 zw`Z5nH5nCpFuIZr$-#ix*LOr#)rG(oC>)EugQ5GEu@kuL5z@>nar?T$ZDKE}d_~Gs zSr9ydGiT1!Ct`8J#Q|GJc+^+Nf$MsTe8}3v_oVCsw)0x+yH+8^H#}||*4A98xVedR zG4#-(ik3&_Y!)jvHg>9bJj+jC2XW|YQeion%J}zqb()5XA&g_~qBb`q4EvXVUT8L4 zVWq=eCQFJTOc$N=vZpL8{N}bJh^hWp$)_V|vq!24cZ7WeFV{_SKMATSP3fuEwM z?_|5f--sjD?Z?;v1$ZbWS9X|p)$mq;P>P|V$e?;|6 z3O-67+6BPF&Z}b6s6*`j+DxmwVUS+Cl$Ble16s@s5jAH2GkdGx)sKllS7V|;4 zs&$UTtLkWFwUUY4N9Jh--|uuOBi3rRe+_>&Z!VjylpSrwzsv_#i*yXwyUVBmcg3P2Upk2Slik%#^OOd#;7|A1$`o71`a#@ zf<0VML)%UfzxU#Ym&L`!7HE$(`6nn{*0)qiN8Bxx)q?JMsJRC1?4j+s2dHm>+^UE+X5NN}z72{_>iQqbC6C5vyLoqCICgbSm*6;ri z2Hm2Mr<4(SC`HBXbC1<RnZ&uSsXMOeW`{8}u<<1A}EDl*?l;zEX^ zEoHr{w$IPccPa&qXBGy5t^invGU7~j`!}Z^L-j+gXQ2H5x3{ecS|#%62+E(btgL7y(4EkuSl@@=FOXT)$GfB$J?5knoyfFEJg;k%w-L& zl~Q$ca&pq|&9*PUCKvi#1>M?Sk6Jp$RPUFNA1jKF0G z$5_i6TTQ1!`vh}39mOSO^#Ce>`Y?F!kp%y{(a{or-tz$%ccL}JQYQ~2wGmY{;cwiP zw}nuTT>!b2&h@!a9>`3M^Kj7@zFw=Q(I-=KkXL>PNs=HW>y7o5i?9nl>fmtzuqH}d z&-xuH^aM9zo5@^Ub{Jvm>&#ZT3mzExpgZ20$(N`1pYG1CiA|Cn1KoKj|K7 z1cYi*8Utgdlluk2AEBI^=y)oL<6op;Py-H1{%7I`l&GAWQotpTHmBL6nEwHIrD-e! z*zGgy8~KxU#?nDM%H#Js*2T}Uwjnw7V~vgC4YQ!Zbb~n);M3LtjXciCDrVQ|!qrFu zIsyHn#&*ggp) zr?8kY)>!9JGFd>i59PMmcrsoD`cN?TysmcD=O2CqUCqlXnR3yed?hXikIBi!WvdQ> zd25Th{>W)%s$3*AR#1lx+oFEq>rI;yeA}#o7nGMH zT~xiJb|JGBkeUJRJ(O!F&9_?8LYGDj<%oo_mPg<(f>Qy{fYK-!ZGdhlp!TuOZIF^= z3;>#iKt?bHIvoBVKJknZ8iyE2Gus01&}cLa9Uyo{i^SG~{DPxB60Z)72dJxOt7Dz- z(i&}P7YM%fVN`SO7EmH#V#`fXD5^IH2Jx8SN0Ud%sm3u)4GqW5&DrNm)sYY3HO^%Q zO@Tl`52d4{qbCK{pm;iEmxU2KsoaI9%A;)=hW5AiEep#!_G=G6nb6FiIJM93+V2fI zlaz0M_ zaa;$0qCw&F!b#yiB6=d_%jCA}y-HzEu5L3tCgE7NP^qK@9F22xDse=&4Wh>T$aaac_wPQwgDUjfO*y0w=eZFkYSXf=~R2ReKDFk zu)sR(R8j&Sj?r|r;5-%0NQy3kv()2F^sKDj+LibDeEx9BXZ>hrXXoKD1qL~8{oT8FWx*>z3^eKix(WWx z#@xKQ|B-CS%qyTIAf4eD$HWj89ZzxVdF}<3SyWU6Q2@Zn+3r$H#G#PTr)OQ@W>tK* zxzMvOR;CVUMa{2tf;&Q^X*i$w&r-FjE^vY2FNvLN_$4;W2W>d9n2}w!6mHLdke<$b z!LCzQRyLvxrYA>k)$%w^!L+uvLN4Jx=O-4%XeF|pKr^Ydd4n`Pdi3Z&mZ^`x?5Y(r zAAp@aH{O;BeZ`& zi=JRcp_4E2Ucf$62OFm$r%a(1eioiG<&y>7cA?xMh!K3*!?l@-Z@&N|^$YKveCxpf zsDi?tJKED|D3`GBI0ciI;Aidb?oPqLRf6s@qF|!v#-Q6Ey;Q1Otfkii$F6Sq~mONKJ)`SPxhY-&CM80+u6quF(+8(P(Pi6gUhFzA{oKZmJg| zK>%@_aFz@~)f5K5S@rZ{i@7NS1oxpsha3}pE#mmG@HE@cN)G$^!du}HZ&WabQdLz_ zI$D0miN~2#D_iWZCkfdS()-Tgo&l&qkem&f7a_RonKR_e9mT$`&d#GR0@>C?mYjfT zXr5|ZG!UtqtEKm_&shUv7c^&e*7vDiKgm5T7FHjXmmwcjhdSGe^3p=x&uK0u5LA^a zd>2Hm=^)vG$M?@Q9E42nJ#ZYFlYzdHfoBFm5h(mjkk1JhU$0hsG4nbrQ5-3*q*Zy; zj5SV|m4Azsy8M+y5F`z;v;=UVyBW`6TVl!6J%>jZ3<^d(*C30O=L@Tlr14=hzgK%o zep-bzWU^Nw4%44yh(Y^^FzUbK)PMV6iP;o*(#k5v<0OPHb59W@;e=%k=$uj%H__uL z#*aKxKicmG=8Taqfmz>n4}1aV>ZgTEo_#uM&_00Jl6j{3G`+6&Uu{zAu{aLX3^*iM z)C5YP2;5ViX}}u^t@xm^M^KYCfG3|UYr0wTnl)=s4i^e|(}EhYnTL$Tnvp0&UNMC9 zgF{2}%*L~7w6v_Otdx|LoSYo^2}Pum_(kg5(RyTQ25J}9$OZ<4SQz3YQ_8vF_c8GB zdqIrX14}~F6$E4>Ii^1wx<5H~*F6@55-4_SNPC+Y!(Qs=tM5Q2r!FHZDd&}V631=& zgq%V!qlu9z;aHTmi#yamnZvHGF0hDTKNavz|H(qy$k4mMFx}Z@4#eJneMi&7CqUvp z^;j)DC9h7$wOqh)?e>>IWRA!ROd8i?gXWHCfrjjU;ceX!xLuK#JlqZ4-kp2i2#!mN zeaD-wX^`VUm64z=M&F~20F8~~b=Gf&+)}7vJ7HB!a4znE#Le8o;w}bnWz`9`I|@Sw zC{n**Ou>NLL7fI%*BCfP&P&KEn&wjo{?LOl#;LZZQgHk+XqM~NnHwvEIzNw;@V}HK zS4{4=0wW{*_T9ETc+!Lgo4%zsh&}FN=pa?fxwW2XN~p5g+i!?fqh?ATtCaRMmZL<( z2{_%J;DS11;Cum+t&foXqwBzdON?=G2Rn_lls5W9ecj5axVSjm`B88=89Qnf3j-Fx zQbB4*#^{TdzDg+@GZ(4#x`d`H61G+JN3U;IhHU7#URA&FNA zWrjBd&_)d?2A>$vf8*)dcXjdFc@^K8_<;89<2vNJ;;dk|of3D0zIrkCI0Np3YSCC1 zj{O8sc*;pjOR(?qc-CkdO&@|fJVg(n0{%BMG9(g`a^h4^(UmJ#)Jo@0aJu9Wf>?0y z8&AvV(*5&e8QF;-?w~wkbp59Q$*!&2vKKBF1h7(V(&oZ}lq|^DA(4-VOtUau-!25Y_AOz}aYl%Eah{4sIC1dwR(P5KJ@9o*% ztpnMWO^M&+&)W~8Vq(HvlbR&}|9)n+zrwjk17uD)l>0awgY-f>*o)@v3zs6llGr+j zrpbRk{ruv`5wb@1gUw)NQcV(KA(LxqA;7uHq{~)>pug5p*`nceJg)34;`9!8 zn-S5P%CZL-5R9BrVLo; zm$?JRH{@LZ0mtcjaRki53S{OB2VA@X(%ki*4f1dK;(s`>z`PI$tkc?f4M7MBw);>` zt-JImB+TSd%`nU-Nc|ceGmJh($G@uhzZ>lSm!lHcepQ$=09)cax1F#|8$8qKSE3Ck z(YX34g3cTW^$V)U;DG$kuG%~Wl=;%VcS}v8cPKI(`WfUrK~w1(%8MO_kGAQ+#Uv5o zRR#tXs7JT?%jLo8x`EaMZwLRX9bt8bLk1=A5t+k7FU#;Sm5^LL8y%LTJGkMc$V}Vd zsH~W6M70=)qPL7=1|tVMVf4TrUI~L6a%@hnZy;9fVA`Er{}LWdk~y64Pb#AGB8-9T z>z+IZIdm%FfCV{8T0T-lL(oO}a?#6S(Z<$vm^Yy4{U8TZ1+eIKPU{V&ceOkuWpG2u zp&-xll=a2HEAN}YmakxTfgsOfF2HL;Da%Rji5v|h9!5N_Rh`U9 zhUo+pL}5xH3}pV^m|Gjgke{P@yknoHSJwX5(;(;+OUZ@#f8JqzG4cYo>i>FJ<=6Qe z^mQV(^|ynce^}uE!$Uj&XVXD{e9`~r?A0H;`^WD7fegRxM*rBCzwHY;25OrZ^e+yC z!qJFBI}B6)OH-s_;L1BRGrkNk|LN&ebV}8)7L?S#rqh1UY0=*{?+=9eqm%w9hX1YU i@&6wMgQ*qZX1|V5STjI+3sd|E{*cLEv%Yiq@!tU^#BpZ; literal 0 HcmV?d00001 diff --git a/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt new file mode 100644 index 0000000..f85b281 --- /dev/null +++ b/openvino-doc-image-triage-npu/samples/synthetic_receipt.png.txt @@ -0,0 +1,5 @@ +Neighborhood Store Receipt +Subtotal $14.20 +Tax $1.42 +Total $15.62 +Thank you for shopping diff --git a/openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png b/openvino-doc-image-triage-npu/samples/synthetic_sensitive_form.png new file mode 100644 index 0000000000000000000000000000000000000000..1a5afc144d90361293581e39a8980b2c425506ed GIT binary patch literal 12769 zcmeHNd010tx<9S8Tw9gt6-7nhcI*@cHC5CI0WuwmOhu%KNR>^cipmn%2_Z>aTd5#q zsv@$4v`|1mK-K^u35tla1OZtjgs_F31VTax+3tHf_jc~wJM-s#W~Pt-I=9eX?FxNx#~>~A>DOnv7M=9k9&f(aF0c8)_;)!utlyvd zQ_zd_5468+>bX|*-FAyJ+V7e^SgN5%y3xKmvin9|_*$LKay{p`Qo_{ENu zbc%jm&wAuHSaJvPNhkgP>DJj+M@*^e#qW1I9l~O1TlbxxAE+XPSJ&0m(e-O)3X869 zG`0!qf9RHTM5CbUqRUw6*5MZ?Q%Mqbbhj?ejd-gEcvUGnm)0NGS0PWSUg zqKCGT7n=;3L9b32I66DGKDn{Uv&7pvZe^iyBQA)SX)SsqGBR@0hsO@D3}2gU&%j2_ z^*_3%7fqQp60RI^aB%SV9}DlW>)SWkis9PAn;m3vcv4al&3^S#vzDrzq-vL$Ty~!? zX|rP}&w5fe+b5Ct`T21vn)_}!+l`EzlD1b-G^;z451$v*MLN1wGMQ9aj%jdU;HDJk3PsE#2R$5Xlqa%noegK2Kmo4j}DpRS``@Nc-_?@SbF9n{LT|b)F&5f=w z)Z=C<30L>cx=kiA!(W{!h=5(fcvU|rE)E`U_)7FfPfw5KsRy4|Ty$}CY$~P5i-a0+ zv@xOQ<;&gPH4jf(2(C76Fk{4p6`U+AF5Z)Jw79q!K5w3KG_dzUL20Sva9=!LJ+jBE zI^uMLCr6sH_T4Raj^@H^oV>`7J$}oXq8>9%J(^1r*9LU`^r_u(4|n&9i0LkA6ztT+ zjw~OGlvAa?c9&lKKrgLreifVfqxxf;2;!>??LK!aZ(H`}?7bbW+w`)xx0=bsVzEN1 zPJ^qh_sJWmi=3sX=KIEi?6TG(Lm7GO=TYwz!n&G52&=MbQBr|>zIgO~3|B3kX9~5A zrrL!&?#F9~zkYYiUhkSKJ%zR5lOaR(wd$>-mZkMw`L2xg^mM3Zad~-pNl8fK^$jBC zp`iZHJUn`ahA1yS)+_eoa@4&)e0*)A{mQU(e73ikv^t%yl1;lCG-+XrFSe)8%*?#1 z2oMq{>9c*MRCpJ5Mjx-5tAI_OpP#>`r%xIFN{@$=)aN>uZQP(f=aH2czv6*TI)78&X@gGB@O?Bo#(@ypF_GX($MG#rLRaBjV zieK%d*(=9y3j+8L&o!c3Z~>3hV_m3A^LM)5Wo#o)=cZCreBnl;LqY15MYGJSkN@<( z19Zw_E{1#=D(UGR>PS51;o(smdF9HLma?87Vdr*UD=aF)Efe_F!P8y2a5H=+!H*M1 zY`*8P(Rv)OTf)<9X6`R_;Hi(lS+_=S<8C2AY&-d5yvYC*_;PoiZiS|+QA=A%nS}%) z$JnxP>l=hp^ByiW+pu&MKegV9ifBs6Ug1S^Dwdve7rwH4S| zPd5Y6Rzas|ezeoc)Oi0F&tL~wZKtYpG)O~8k)gfS88^3m7u)VhX;b`xfsM8c4b3K` zptgJaysCpP69~GBcv%0um?hbpFnZT3#l# z^YPk6nxk7A9mn)`Yj1C74#ft#8KAH3?(S{|X7q@zwlpiWaBgm8IR!QmrEL0CXthZ| zzxKLudpEgV@`ITAH8c)7)6ra&pvTjXeY=(a3Q$MgPM|D3eit)m@1@LP7vK*d4{=l9 zMjz2zzx8U5hi8(BT^+y;c~zE%r)s$xK6iuBp$8ler=+BW&1NrYcw#8bmOW<4Wic+A7JJiahR_y#nfMz|@xQFHm#xC;Sxz(Tz8#e6Ov&Z@F?&(E=IFO=859e76 zbZ@1kq+kUR-B1D_K*igxWs$SJd3kvW2?^+y3DpzWjP&%nj#Ju8=m7WA39FGvt)3~S|q0LSH<+dh?QfdNu7ZuwRv@QtKS;>_RAfh z8xpcVuMHDz_Vix7+kt~}?Imr&_zudhbI7W@*buh77kltIkfPw_%a?IwaY=8PQ!Vw~qsFYhi3~uYQ zhGKVDJWaq!R~z^WWaH}#Yges$aouPksS&u~=Y*?T{!zH+#}o+z%JtTXlX zZTm0&E-3+`QURmi8i3Wy{xV--XM;D`&N*RD7u&`8~#J-lf|8 zGl@E#GFTJJRkjmgkm0K<&OZA3y`4wq*oq-Q<@<$&a69;^AFjd5gk;xdCfvZ9jfaO{ z1u%*W3u86%J}hC#`}FDBt0Rv+icYoDanVZ??T&8HGcx*D3-(-j>-O#2>vjwQVQMuh!a!v~d3hMD z(%7B6(r8Knbx~AQ#OL$%^iAqR1a&5T>k--hZbfG4v}y~RH(Scpwpn$+@N;)1j6w(d zlz;L3yZ3flSX!b?YVw(nk0xKwpl*KJ-nI56`X?_h)wh_q5ofLOeY2wwDtXl3@i%BY2TJbE17*@G`7!ju5B_`^mw8%GcB zYu*vwxB+`7^<>r;hWZlqsnmk>jEq+9AXnb&1$Y=$c@g>Yj&DqWEEy+0tX&yvMR{KV z{jYlI?uv@CGnJR16F|CxRtZ)*FYhG*vHl5`5I5^3v@peymgj)!LEOOW-p?PNEoCon zKNR#inN0Ta@gb2&G^~58lHb);4y6uzOK}O5Z~#5B7v^zwd7dQi@$BpE&AR${os%h9 zwmi?*Et=}e{BI@u`OT{rc!bEb8CXYfa4;KGhyof-xira5N_+=dFzm!Jw#}EQaxx{) zsN-)T4~^0}leShf&R*cohAj6DXg<8p&qk0?NeXzlv)B)Qb^a|j;HEuPY^PIlP)LZ$ zzVlB8ssc%a-GCYs6BEfOs@HW?oNK%ex~`|UH);-8o8sWm_@2{_P-wNyj*9@d^r(T$ zE-o$%tt?+sH(O$27_JaRD?m+}ConW~0iYm5VjG(b{RW9`P#oy3LH}gyyt1-ID8-(H z(N;fg7;zl*+}rE7erC|5Q3&y1)8O*guU~gEA^v2Z0QyC|#L&gLyLYcM%Ul3;QJ<6g z8)R7Uf;pA+;jw!U0p|fj&UR)y-ZpdPC?_&ll7f5<{Yvg4jztaQQ@YYjt(?jZJ2!PMz8a zFb`m8o={4j4}`r7aWk;=EZJ=sVCbHAOu5>}^#vT6RjZde^miP7*t}L4GnO>gOv5CM zqWb~bM^MH+X#vQ~tRL2o8(t&2U0qd$aw-rUih-#SP>419whPr5Bcy><9Q|}k!BMcG zTrj?()>Q0)1G1WY^Mo-Po@1VX%C4~nF9N%2$rc<3bu!gd)!%%CX5wQKHG0Eri{qkJ z!Q}$b-L&Ezp8n}JD)26BYfZ)I!2W!MD&E#f231?a5^uMUivd$S(^G__#Z9CIh`~63 zIO{%+EB23bGr()6T~VP^pUw6 z(EG_oSTS(v4*R&3KCV^{VqRX8-<**nftqUP#)~{lvknIKf*8gv1M6-;wIS-44Vut) zhQ5kf99>>setz35wW#btc6K(0Ls3dafSam;K29%jp|{ve-)8CuF#eb3`pcjw#_{T5 zJ$wx~x)wg#tBX&K7%`(aj6!Ngs`(+7)N5RzOXz*LDsTpSljlZgbB}>d$5eRM{uQ`t z+{#Eiu3Et-aK)sfxhDYCh^|l$=pW%?Ig3X?F%F$Xl-eoQp`%Yw;hKBmGMF2x+>_d) zyl+IPUmP>lt%MS$y7N1>1!OJ`hIa(>tBY-88oqpku4fwd?0tdoOxbxzAe@`;DWHXc z0Aq=5diD$8#bp8sg$Yvqq=ns5n-!Rlxf1Uk$T0itK*r_#2fzZA+1clTJw%K)W>u9j z_;-OK!bGT?n1aei?+-6HX#@Xl)la8#)gqMYhzqD8PqFm0mt6<0 z#sC|%+}Q;}9jMonyV?VO&PE@FtkgYTrwp1v>def}w&KuO?j5kbDZ@jRmp-99T)cv;|$?>GE-$P{{)nQG!?nc~jV0Dewt< z{yUf`hE>3E7ngQ$M*?LrfvVy|0|+63Q2}X*QarafmVu>5?Ka3jV}IBlPi@&ru#E@% zyxmKB0CLGXl@Kp|wi8o3ev26jgumYcYF@_AuqVe7b6ROaa2h%M0z(S)b3@i2e#H^v z-wO8t)apkQEQfgCK*+YcYv^(Q{h}gO;vPL@*!b@<1Ag&_Ydw7(q;l35TJHRIW)tCA z$s5Qpy$2_`2_7I3?DVdL?A+W7qfc%aEB?(dX5?Fl=>XdRo%~@wM2|qmK={=e+HC9z z9uW{gYWHlUG2S$){~VZ}T?yxBUKV6v2`1KIU!;u z*Fw4y80eha1QlQ~x_Qqw7oRJBh5kyo-=>wfl0i_I+dba#F#Ht#sSE-}6y zvJR$@qSB|XG`b(RzA#XE8SW9p{ni7QUKDy1L7Ku0=AQx)JX2H(u@GcukSR)Cn!%JL z8DY^Z2%L;})s>#X+Hh3ub2in2#NZhLzi@R5-66=83>^PEU^;CLG21nL`x>NKe3JAUFX+R)(^7tJhwVvG~c1tATj78zD}d>1N-Q+;EnnOx00sEVgFQ|jY&X6v`@ z{{j{&VpfL;&J6W)$Rww4Jv}@k<1{{QZf*um5Vvq;xfqx90|hGm>!8bhhx#9$b7&4C ztCou&sDbYQLA~lCKjkAYJxt1AtL83UhkPRdZU_nuwF21>>E16HNGXekQrAX_%G z(s;ClH<9{-S=$6Iu(Ow8rT}F5P)8b7Jthl7kfGVwkv)^rNCUauKst(1 zzp)8HNbme>pNssGmi*-es2k@mTWmHp1#k!tUjh@^TjImYIR`ai9Ug%IqO@i-LAnfr zLpbmOAsAE*I#$>aZ{T^Q2=(C*_hZ>3qlp-?Jw>ZlLJdKRKLO3-Y0v~^aD{jjz=#ni zeGbGr7utsMs`ML3hK)Wt+mPY&KGNXT{r6O}aN-+Aj02H0(Ut~Eao3O|DpbJ-hHy2@ z(pLFwABu*M-AB=``NdA>qQ2OWyV7LUVH@7EE8kZB(jCp=jf{*s@H(|A2pyP&dvM62 z*oP(;rj+)BeQFSUR73m@WFf2fZy>IZ*ePmA`xMLReDUH%fBy<7lN4e4N9P_t(lfOs z4AldAZZxrt0q~~N(tkN^c%_V>n<>FD*8-nrn*)ACJbOPL5`7cTnlMqze}OZ7k~@{1 z2o|+wb#=8V(JTrWGdHMgUXiyF+)T1buCw-y?J!ALEPZS)8 z77t1{uR&rAWxTDLAqA%o);lmz2VNei!gxaAV zQPlcDK7+FQXY~y@7z&3BlBxU7pxLJL8Cq2SgYZuFNHRtJ0(zch`>Hm_su#|dAclvS z#t+nV?2ZbsPLSPC`xx z0fCL)6!iYjpkMOf!5nNekp%!}o&ZWsngz!vEiFEEhkZWWcHq)y7cPj<0~!TOXdL?F zJ>y0=5W|^zaQ0cl(Lr(r={aJ8<`Cx;q`g~Qe zFo>Bo%X|Vroyez*E5MZ z>t;|@RfW2>gwdx)gt#EigUvVBmBaTxAB&3I3

yjw8}zC?7sRXWSwlY4JYko6-tN=oKV{Ou6M>QjxjG%gJZV7(o-p*j z`Y6DDDNULMN24iJMkOR~@RR&-UR*_Qup2V$^LKS}Vs_16Q%BsU$~aXx`0$F0?H~hK z%L9S1W*7<3P+=RF4?hQ*?SP{s32ZW*RtF_R0Sn@F;4H(Rk0jUs&*K39Pg$e4ocC_S zQ`DK(;gBbma8_pkl(N+^fl2`?<{^!P!$}rM3Y9{_g&ZJpZ`~Axb@lVre8>~7LY}BN ze6gp{1GI{1-5_Tzvf^CS-`T;AwuDx|5Pm+bo`=n=R)5pL3a^7`;VRH193Dzp;Wj|( zaAtiuA6(6WSvUgAw28c;jGAMObva|meY~~E0`K2*BoFUm>$@x7L}bU=3wp;8WNz25 z(}dyZ6`kbl2;ezOLzC4D4%&ehtyTuBTUD str: + """Reject non-loopback binds; this prototype is never a LAN service.""" + normalized = host.strip() + if normalized == "localhost": + return normalized + try: + if ipaddress.ip_address(normalized).is_loopback: + return normalized + except ValueError: + pass + raise ValueError("host must be localhost/loopback for this prototype") + + +def _roots_within_configured(requested_roots: list[Any], configured_roots: list[Path]) -> list[Path]: + """Return request roots only when they narrow the startup allowlist.""" + narrowed: list[Path] = [] + configured = [root.expanduser().resolve() for root in configured_roots] + for raw in requested_roots: + candidate = Path(str(raw)).expanduser().resolve() + if any(candidate == root or candidate.is_relative_to(root) for root in configured): + narrowed.append(candidate) + else: + raise ValueError("requested allowed_roots must be within configured allowed roots") + return narrowed + + +def _validated_embedding_url(raw_url: Any) -> str: + """Allow only the configured local loopback embeddings service.""" + url = str(raw_url) + parsed = urlparse(url) + host = parsed.hostname or "" + if ( + parsed.scheme == "http" + and host in {"127.0.0.1", "localhost", "::1"} + and (parsed.port or 80) == 18817 + and parsed.path == "/v1/embeddings" + and not parsed.username + and not parsed.password + ): + return url + raise ValueError("embedding_url override must target the configured local loopback embeddings service") + + +def make_options(payload: dict[str, Any], default_roots: list[Path]) -> TriageOptions: + opts = payload.get("options") or {} + requested_roots = opts.get("allowed_roots", []) + if requested_roots: + if not isinstance(requested_roots, list): + raise ValueError("allowed_roots must be a list") + roots = _roots_within_configured(requested_roots, default_roots) + else: + roots = default_roots + embedding_url = DEFAULT_EMBED_URL + if "embedding_url" in opts: + embedding_url = _validated_embedding_url(opts["embedding_url"]) + return TriageOptions( + max_pages=int(opts.get("max_pages", 3)), + include_ocr_text=bool(opts.get("include_ocr_text", False)), + dry_run=bool(opts.get("dry_run", False)), + use_embeddings=bool(opts.get("use_embeddings", True)), + embedding_url=embedding_url, + allowed_roots=roots, + include_full_path=bool(opts.get("include_full_path", False)), + ) + + +class Handler(BaseHTTPRequestHandler): + server_version = "openvino-doc-image-triage-npu/0.1" + + def _json(self, status: int, body: dict[str, Any]) -> None: + data = json.dumps(body, sort_keys=True).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + def log_message(self, format: str, *args: Any) -> None: + # Do not log request bodies, OCR text, or file paths. + return + + @property + def allowed_roots(self) -> list[Path]: + return self.server.allowed_roots # type: ignore[attr-defined] + + def do_GET(self) -> None: # noqa: N802 + if self.path in ("/", "/healthz", "/health"): + self._json(200, { + "ok": True, + "service": "openvino-doc-image-triage-npu", + "bind_policy": "localhost-default", + "npu_busy_time_us": read_npu_busy(), + "npu_busy_check_enabled": True, + "allowed_roots": [str(p) for p in self.allowed_roots], + "privacy": {"external_uploads": False, "raw_text_logged": False}, + }) + return + if self.path == "/models": + self._json(200, { + "models": [ + { + "stage": "needs_attention_embedding", + "model": "bge-base-en-v1.5-int8-ov via local :18817", + "target_device": "NPU", + "verification": "sysfs npu_busy_time_us before/after embedding call", + }, + { + "stage": "image_category_classification", + "model": "rule-based fallback in prototype v1", + "target_device": "CPU", + "npu_status": "not configured; future static-shape MobileNet/EfficientNet/ResNet OV IR", + }, + {"stage": "ocr_text_extraction", "model": "optional local sidecar/PDF text", "target_device": "CPU"}, + ] + }) + return + self._json(404, {"ok": False, "error": "not_found"}) + + def _read_payload(self) -> dict[str, Any]: + length = int(self.headers.get("Content-Length", "0")) + if length > 512 * 1024: + raise ValueError("request JSON too large") + raw = self.rfile.read(length) + if not raw: + return {} + return json.loads(raw.decode()) + + def do_POST(self) -> None: # noqa: N802 + try: + payload = self._read_payload() + options = make_options(payload, self.allowed_roots) + if self.path == "/triage": + path = payload.get("path") + if not path: + self._json(400, {"ok": False, "error": "missing_path"}) + return + self._json(200, {"ok": True, "result": triage_file(path, options)}) + return + if self.path == "/triage/batch": + paths = payload.get("paths") or [] + if not isinstance(paths, list) or not paths: + self._json(400, {"ok": False, "error": "missing_paths"}) + return + self._json(200, triage_batch([str(p) for p in paths], options)) + return + self._json(404, {"ok": False, "error": "not_found"}) + except Exception as exc: + self._json(400, {"ok": False, "error": type(exc).__name__, "message": str(exc)}) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Local-only doc/image triage HTTP server") + parser.add_argument("--host", default=os.environ.get("DOC_IMAGE_TRIAGE_HOST", "127.0.0.1")) + parser.add_argument("--port", type=int, default=int(os.environ.get("DOC_IMAGE_TRIAGE_PORT", "18829"))) + parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; may repeat") + args = parser.parse_args() + try: + host = _validate_loopback_host(args.host) + except ValueError as exc: + parser.error(str(exc)) + roots = [Path(p).expanduser().resolve() for p in args.allowed_root] or [Path.cwd().resolve()] + httpd = ThreadingHTTPServer((host, args.port), Handler) + httpd.allowed_roots = roots # type: ignore[attr-defined] + print(json.dumps({"service": "openvino-doc-image-triage-npu", "host": host, "port": args.port, "allowed_roots": [str(p) for p in roots]}), flush=True) + httpd.serve_forever() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/tests/smoke_test.py b/openvino-doc-image-triage-npu/tests/smoke_test.py new file mode 100644 index 0000000..b504bbc --- /dev/null +++ b/openvino-doc-image-triage-npu/tests/smoke_test.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import json +import socket +import subprocess +import sys +import tempfile +import time +import urllib.error +import urllib.request +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SAMPLES = ROOT / "samples" +BUSY = Path("/sys/class/accel/accel0/device/npu_busy_time_us") + + +def run(cmd: list[str]) -> None: + print("+", " ".join(cmd)) + subprocess.run(cmd, cwd=ROOT, check=True) + + +def post_json(url: str, payload: dict) -> dict: + req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}) + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read().decode()) + + +def post_json_status(url: str, payload: dict) -> tuple[int, dict]: + req = urllib.request.Request(url, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + return resp.status, json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + return exc.code, json.loads(exc.read().decode()) + + +def busy() -> int | None: + try: + return int(BUSY.read_text().strip()) + except Exception: + return None + + +def choose_free_loopback_port() -> int: + """Ask the OS for a free localhost port and verify it is not listening yet.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind(("127.0.0.1", 0)) + port = int(sock.getsockname()[1]) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe: + probe.settimeout(0.25) + assert probe.connect_ex(("127.0.0.1", port)) != 0, f"selected port already has a listener: {port}" + return port + + +def assert_loopback_bind_policy() -> None: + blocked = subprocess.run( + [sys.executable, "server.py", "--host", "0.0.0.0", "--port", "0", "--allowed-root", str(ROOT)], + cwd=ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + assert blocked.returncode != 0, blocked.stdout + blocked.stderr + assert "loopback" in blocked.stderr.lower(), blocked.stderr + + +def main() -> int: + run([sys.executable, "make_samples.py"]) + invoice = SAMPLES / "synthetic_invoice.png" + pdf = SAMPLES / "synthetic_invoice.pdf" + + before = busy() + raw = subprocess.check_output([ + sys.executable, "triage.py", "--allowed-root", str(ROOT), "--pretty", str(invoice), str(pdf) + ], cwd=ROOT, text=True) + data = json.loads(raw) + assert data["ok"], data + first = data["files"][0]["result"] + assert first["privacy"]["external_uploads"] is False + assert first["pages"][0]["classification"]["label"] == "bill_or_invoice" + assert first["pages"][0]["needs_attention"]["value"] is True + assert "amount_due" in first["pages"][0]["needs_attention"]["reasons"] + assert first["processing_device_summary"]["file_intake"] == "CPU" + assert "NPU" in first["processing_device_summary"]["needs_attention_embedding"] or first["pages"][0]["needs_attention"]["device"] == "CPU" + after = busy() + if before is not None and after is not None: + # If :18817 is reachable and text was embedded, NPU delta must be positive. + emb = first["pages"][0]["needs_attention"]["embedding"] + if emb.get("used"): + assert emb.get("verified_npu") is True, emb + assert (emb.get("npu_busy_delta_us") or 0) > 0, emb + assert after > before, {"before": before, "after": after, "embedding": emb} + + # HTTP smoke on a preflighted free localhost port so we do not collide with live/prototype ports. + assert_loopback_bind_policy() + smoke_port = choose_free_loopback_port() + base_url = f"http://127.0.0.1:{smoke_port}" + proc = subprocess.Popen([sys.executable, "server.py", "--host", "127.0.0.1", "--port", str(smoke_port), "--allowed-root", str(ROOT)], cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + try: + deadline = time.time() + 5 + while time.time() < deadline: + try: + health = urllib.request.urlopen(f"{base_url}/healthz", timeout=1).read() + assert b"openvino-doc-image-triage-npu" in health + break + except Exception: + time.sleep(0.1) + else: + raise AssertionError("server did not become ready") + resp = post_json(f"{base_url}/triage", {"path": str(invoice), "options": {"allowed_roots": [str(ROOT)]}}) + assert resp["ok"] is True, resp + assert resp["result"]["source_path_basename"] == "synthetic_invoice.png" + assert "source_path" not in resp["result"] + + # Request bodies may narrow but must not widen the startup --allowed-root policy. + with tempfile.NamedTemporaryFile(suffix=".txt") as outside: + outside.write(b"sensitive text outside configured artifact root") + outside.flush() + status, blocked = post_json_status( + f"{base_url}/triage", + {"path": outside.name, "options": {"allowed_roots": ["/tmp"], "dry_run": True, "use_embeddings": False}}, + ) + assert status == 400, blocked + assert blocked["ok"] is False, blocked + assert "allowed_roots" in blocked.get("message", ""), blocked + + # Request bodies must not redirect extracted text to caller-supplied endpoints. + status, blocked = post_json_status( + f"{base_url}/triage", + {"path": str(invoice), "options": {"embedding_url": "http://198.51.100.1:9/v1/embeddings"}}, + ) + assert status == 400, blocked + assert blocked["ok"] is False, blocked + assert "embedding_url" in blocked.get("message", ""), blocked + finally: + proc.terminate() + proc.wait(timeout=5) + + print(json.dumps({ + "ok": True, + "samples": len(list(SAMPLES.glob("synthetic_*"))), + "npu_busy_before": before, + "npu_busy_after": after, + "npu_delta_observed": None if before is None or after is None else after - before, + "triage_label": first["pages"][0]["classification"]["label"], + "needs_attention": first["pages"][0]["needs_attention"]["value"], + }, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openvino-doc-image-triage-npu/triage.py b/openvino-doc-image-triage-npu/triage.py new file mode 100644 index 0000000..e2764aa --- /dev/null +++ b/openvino-doc-image-triage-npu/triage.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +"""Local-only document/image triage prototype. + +CPU stages: +- local file intake, hashing, MIME/extension checks +- image/PDF-page decoding and normalization +- optional sidecar/native-text extraction +- regex metadata extraction and rule-based category fallback + +NPU stages: +- needs-attention semantic embedding via the existing local OpenVINO NPU + embeddings service on 127.0.0.1:18817, verified by sysfs busy-time delta. + +No external uploads are performed. The only network call is localhost to the +embedding service when enabled. +""" +from __future__ import annotations + +import argparse +import base64 +import dataclasses +import datetime as dt +import hashlib +import io +import json +import mimetypes +import os +import re +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +try: + from PIL import Image, ImageOps +except Exception as exc: # pragma: no cover - caught in CLI smoke + raise SystemExit("Pillow is required: install pillow in the active Python env") from exc + +NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us") +DEFAULT_EMBED_URL = "http://127.0.0.1:18817/v1/embeddings" +DEFAULT_ALLOWED_ROOTS = [Path.cwd()] +MAX_FILE_BYTES = 25 * 1024 * 1024 +CATEGORY_LABELS = [ + "receipt", + "bill_or_invoice", + "tax_or_financial", + "medical_or_insurance", + "legal_or_government", + "form_or_application", + "travel_or_ticket", + "screenshot_conversation", + "screenshot_web_or_app", + "identity_or_sensitive", + "photo_misc", + "unknown_or_low_confidence", +] + +DATE_PATTERNS = [ + re.compile(r"\b(20\d{2}[-/](?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01]))\b"), + re.compile(r"\b((?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12]\d|3[01])[-/](?:20)?\d{2})\b"), + re.compile(r"\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+20\d{2})\b", re.I), +] +AMOUNT_RE = re.compile(r"(? int | None: + try: + return int(NPU_BUSY_PATH.read_text().strip()) + except Exception: + return None + + +def sha256_file(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def under_allowed_root(path: Path, roots: list[Path]) -> bool: + resolved = path.resolve() + for root in roots: + try: + resolved.relative_to(root.resolve()) + return True + except ValueError: + continue + return False + + +def sidecar_text(path: Path) -> tuple[str, str | None]: + for suffix in (path.suffix + ".txt", ".txt"): + candidate = path.with_suffix(suffix) if suffix.startswith(path.suffix) else path.with_suffix(suffix) + if candidate.exists() and candidate.is_file(): + try: + return candidate.read_text(errors="replace")[:12000], f"sidecar:{candidate.name}" + except Exception: + return "", "sidecar_unreadable" + return "", None + + +def extract_pdf_text(path: Path, max_pages: int) -> tuple[str, str | None]: + # Optional dependency; tests do not require it. Keeps PDF support local-only when installed. + try: + import pypdf # type: ignore + except Exception: + return "", "pypdf_not_installed" + try: + reader = pypdf.PdfReader(str(path)) + if getattr(reader, "is_encrypted", False): + return "", "pdf_encrypted" + chunks = [] + for page in reader.pages[:max_pages]: + chunks.append(page.extract_text() or "") + return "\n".join(chunks)[:12000], "pypdf_cpu" + except Exception as exc: + return "", f"pdf_text_error:{type(exc).__name__}" + + +def load_image_pages(path: Path, max_pages: int) -> tuple[list[Image.Image], str | None]: + ext = path.suffix.lower() + if ext == ".pdf": + try: + import pypdfium2 as pdfium # type: ignore + except Exception: + return [], "pypdfium2_not_installed" + try: + pdf = pdfium.PdfDocument(str(path)) + pages = [] + for i in range(min(len(pdf), max_pages)): + bitmap = pdf[i].render(scale=1.5) + pages.append(bitmap.to_pil().convert("RGB")) + return pages, None + except Exception as exc: + return [], f"pdf_render_error:{type(exc).__name__}" + try: + img = Image.open(path) + img = ImageOps.exif_transpose(img).convert("RGB") + return [img], None + except Exception as exc: + return [], f"image_decode_error:{type(exc).__name__}" + + +def normalize_for_hash_features(img: Image.Image) -> dict[str, Any]: + small = ImageOps.contain(img.copy(), (224, 224)) + gray = small.convert("L") + hist = gray.histogram() + pixels = max(1, gray.width * gray.height) + mean = sum(i * c for i, c in enumerate(hist)) / pixels + variance = sum(((i - mean) ** 2) * c for i, c in enumerate(hist)) / pixels + return { + "mean_luma": round(mean, 2), + "contrast": round(variance ** 0.5, 2), + "aspect_ratio": round(img.width / max(1, img.height), 3), + } + + +def classify_rule(text: str, image_features: dict[str, Any]) -> dict[str, Any]: + t = text.lower() + best_label = "unknown_or_low_confidence" + best_score = 0 + for label, words in CATEGORY_KEYWORDS.items(): + score = sum(1 for word in words if word in t) + if score > best_score: + best_label, best_score = label, score + if best_score == 0: + ar = image_features.get("aspect_ratio", 1.0) + if ar > 1.3: + best_label, best_score = "screenshot_web_or_app", 1 + else: + best_label, best_score = "unknown_or_low_confidence", 0 + confidence = min(0.35 + 0.18 * best_score, 0.92) if best_score else 0.2 + if confidence < 0.45: + best_label = "unknown_or_low_confidence" + return { + "label": best_label, + "confidence": round(confidence, 3), + "device": "CPU", + "stage": "category_classification", + "method": "rule_based_fallback", + "npu_status": "not_configured_for_prototype_v1", + "candidate_labels": CATEGORY_LABELS, + } + + +def extract_metadata(text: str) -> dict[str, Any]: + dates = [] + for pat in DATE_PATTERNS: + dates.extend(m.group(1) for m in pat.finditer(text)) + amounts = AMOUNT_RE.findall(text) + flags = { + "org_present": bool(re.search(r"\b(?:inc|llc|clinic|department|bank|insurance|store)\b", text, re.I)), + "address_present": bool(re.search(r"\b\d{2,5}\s+[A-Za-z0-9 .]+\s+(?:st|street|ave|avenue|rd|road|blvd|drive|dr)\b", text, re.I)), + "phone_present": bool(PHONE_RE.search(text)), + "email_present": bool(EMAIL_RE.search(text)), + "policy_or_account_id_present": bool(ACCOUNT_RE.search(text)), + "identity_number_like_present": bool(SSN_LIKE_RE.search(text)), + } + return { + "dates_count": len(set(dates)), + "amounts_count": len(set(amounts)), + "detected_entities": flags, + "raw_values_redacted": True, + } + + +def call_embeddings(text: str, url: str, timeout: float) -> dict[str, Any]: + if not text.strip(): + return {"used": False, "device": "NPU", "status": "skipped_no_text", "npu_busy_delta_us": 0} + before = read_npu_busy() + payload = json.dumps({"input": text[:2048], "purpose": "document"}).encode() + req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}) + t0 = time.perf_counter() + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = resp.read(1024 * 1024) + status = resp.status + parsed = json.loads(body.decode()) + dim = None + if isinstance(parsed, dict) and parsed.get("data"): + emb = parsed["data"][0].get("embedding", []) + dim = len(emb) if isinstance(emb, list) else None + after = read_npu_busy() + delta = (after - before) if before is not None and after is not None else None + return { + "used": True, + "device": "NPU", + "status": "ok" if status == 200 else f"http_{status}", + "embedding_dim": dim, + "wall_ms": round((time.perf_counter() - t0) * 1000, 2), + "npu_busy_delta_us": delta, + "verified_npu": bool(delta and delta > 0), + "endpoint": "127.0.0.1:18817", + } + except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc: + after = read_npu_busy() + delta = (after - before) if before is not None and after is not None else None + return { + "used": False, + "device": "NPU", + "status": f"embedding_service_error:{type(exc).__name__}", + "npu_busy_delta_us": delta, + "verified_npu": False, + "endpoint": "127.0.0.1:18817", + } + + +def needs_attention(text: str, embedding_result: dict[str, Any]) -> dict[str, Any]: + t = text.lower() + reasons = [] + for reason, words in ATTENTION_KEYWORDS.items(): + if any(word in t for word in words): + reasons.append(reason) + meta = extract_metadata(text) + if meta["amounts_count"]: + reasons.append("amount_due") + if meta["dates_count"]: + reasons.append("due_date_present") + reasons = sorted(set(reasons)) + value = bool(reasons) + confidence = min(0.45 + 0.1 * len(reasons), 0.9) if value else 0.35 + if embedding_result.get("verified_npu"): + confidence = min(confidence + 0.05, 0.95) + return { + "value": value, + "confidence": round(confidence, 3), + "reasons": reasons or (["low_confidence"] if not text.strip() else []), + "device": "NPU+CPU" if embedding_result.get("used") else "CPU", + "stage": "needs_attention", + "method": "NPU embedding verification + CPU rules" if embedding_result.get("used") else "CPU rules fallback", + "embedding": embedding_result, + } + + +def infer_media_type(path: Path, is_pdf_page: bool = False) -> str: + if is_pdf_page: + return "pdf_page" + mt, _ = mimetypes.guess_type(path.name) + if path.suffix.lower() == ".pdf": + return "pdf" + if mt and mt.startswith("image/"): + return "image" + return "unknown" + + +def triage_file(path_like: str | Path, options: TriageOptions | None = None) -> dict[str, Any]: + options = options or TriageOptions() + path = Path(path_like).expanduser() + resolved = path.resolve() + if not under_allowed_root(resolved, options.allowed_roots): + raise ValueError(f"path is outside allowed roots: {path}") + if not resolved.exists() or not resolved.is_file(): + raise FileNotFoundError(str(path)) + size = resolved.stat().st_size + if size > MAX_FILE_BYTES: + raise ValueError(f"file too large for prototype limit: {size} bytes") + + file_hash = sha256_file(resolved) + text, text_source = sidecar_text(resolved) + pdf_text_status = None + if resolved.suffix.lower() == ".pdf" and not text: + text, pdf_text_status = extract_pdf_text(resolved, options.max_pages) + text_source = pdf_text_status + + pages: list[dict[str, Any]] = [] + render_error = None + if not options.dry_run: + images, render_error = load_image_pages(resolved, options.max_pages) + else: + images = [] + + if not images and options.dry_run: + images = [] + elif not images: + # Return a file-level record even if PDF rendering is unavailable. + images = [] + + embedding_result = call_embeddings(text, options.embedding_url, options.timeout_seconds) if options.use_embeddings else {"used": False, "device": "NPU", "status": "disabled", "npu_busy_delta_us": 0, "verified_npu": False} + attn = needs_attention(text, embedding_result) + meta = extract_metadata(text) + + if images: + for idx, img in enumerate(images): + features = normalize_for_hash_features(img) + classification = classify_rule(text, features) + pages.append({ + "page_index": idx, + "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"), + "image": {"width": img.width, "height": img.height, "orientation": "portrait" if img.height >= img.width else "landscape", **features}, + "classification": classification, + "needs_attention": attn, + "metadata": meta, + "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source}, + }) + else: + classification = classify_rule(text, {"aspect_ratio": 1.0}) + pages.append({ + "page_index": 0, + "media_type": infer_media_type(resolved, resolved.suffix.lower() == ".pdf"), + "image": {"width": None, "height": None, "orientation": None, "render_error": render_error}, + "classification": classification, + "needs_attention": attn, + "metadata": meta, + "ocr": {"available": bool(text), "quality": 0.7 if text else 0.0, "device": "CPU", "text_source": text_source}, + }) + + result: dict[str, Any] = { + "file_id": f"sha256:{file_hash}", + "source_path_basename": resolved.name, + "media_type": infer_media_type(resolved), + "file_size_bytes": size, + "page_count": len(pages), + "pages": pages, + "processing_device_summary": { + "file_intake": "CPU", + "pdf_rendering": "CPU" if resolved.suffix.lower() == ".pdf" else "not_applicable", + "image_category_classification": "CPU rule fallback (NPU model not configured in prototype v1)", + "ocr_text_extraction": "CPU/local sidecar or optional local PDF text extractor", + "needs_attention_embedding": "NPU via local :18817" if embedding_result.get("used") else "CPU fallback/no text", + "metadata_extraction": "CPU", + "npu_verified": bool(embedding_result.get("verified_npu")), + "npu_busy_delta_us": embedding_result.get("npu_busy_delta_us"), + }, + "privacy": { + "external_uploads": False, + "localhost_only_embedding_call": bool(options.use_embeddings), + "raw_text_logged": False, + "raw_values_redacted": True, + "full_path_included": options.include_full_path, + }, + "errors": [e for e in [render_error, pdf_text_status if pdf_text_status and not text else None] if e], + } + if options.include_full_path: + result["source_path"] = str(resolved) + if options.include_ocr_text: + result["ocr_text"] = text + return result + + +def triage_batch(paths: list[str], options: TriageOptions | None = None) -> dict[str, Any]: + items = [] + for p in paths: + try: + items.append({"ok": True, "result": triage_file(p, options)}) + except Exception as exc: + items.append({"ok": False, "source_path_basename": Path(p).name, "error": type(exc).__name__, "message": str(exc)}) + return {"ok": all(item["ok"] for item in items), "files": items, "generated_at": dt.datetime.now(dt.UTC).isoformat()} + + +def cli() -> int: + parser = argparse.ArgumentParser(description="Local document/image triage prototype") + parser.add_argument("paths", nargs="+", help="local image/PDF paths") + parser.add_argument("--allowed-root", action="append", default=[], help="allowed local root; defaults to cwd") + parser.add_argument("--max-pages", type=int, default=3) + parser.add_argument("--include-ocr-text", action="store_true") + parser.add_argument("--include-full-path", action="store_true") + parser.add_argument("--no-embeddings", action="store_true", help="disable local NPU embedding call") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--pretty", action="store_true") + args = parser.parse_args() + roots = [Path(p) for p in args.allowed_root] if args.allowed_root else [Path.cwd()] + options = TriageOptions( + max_pages=args.max_pages, + include_ocr_text=args.include_ocr_text, + dry_run=args.dry_run, + use_embeddings=not args.no_embeddings, + allowed_roots=roots, + include_full_path=args.include_full_path, + ) + out = triage_batch(args.paths, options) + print(json.dumps(out, indent=2 if args.pretty else None, sort_keys=True)) + return 0 if out["ok"] else 2 + + +if __name__ == "__main__": + raise SystemExit(cli())