feat(npu): add explicit-root batch triage wrapper

This commit is contained in:
William Valentin
2026-06-05 15:52:43 -07:00
parent 6155b54ab5
commit 6906c2079b
7 changed files with 888 additions and 0 deletions
+202
View File
@@ -0,0 +1,202 @@
from __future__ import annotations
import importlib.util
import json
import os
import subprocess
import sys
from pathlib import Path
import pytest
import yaml
ROOT = Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "scripts" / "npu-batch-triage-dry-run.py"
def load_script_module():
spec = importlib.util.spec_from_file_location("npu_batch_triage_dry_run", SCRIPT)
assert spec and spec.loader
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
return subprocess.run(
[sys.executable, str(SCRIPT), *args],
cwd=ROOT,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
def manifest(tmp_path: Path, lane: str = "screenshots", approved: bool = True, root: Path | None = None, exts: list[str] | None = None) -> Path:
root = root or tmp_path / "approved"
root.mkdir(parents=True, exist_ok=True)
data = {
"version": 1,
"policy": {
"default_mode": "dry_run",
"require_explicit_root": True,
"allow_external_uploads": False,
"allow_mutations": False,
"log_raw_text": False,
},
"roots": {
lane: {
"approved": approved,
"root": str(root),
"allowed_extensions": exts or [".png", ".txt", ".wav"],
"max_files": 10,
"max_file_mb": 5,
}
},
}
path = tmp_path / "triage-roots.yaml"
path.write_text(yaml.safe_dump(data))
return path
def test_missing_manifest_fails_closed(tmp_path: Path) -> None:
result = run_cli("--manifest", str(tmp_path / "missing.yaml"), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
out = json.loads(result.stdout)
assert result.returncode == 2
assert out["ok"] is False
assert out["error"] == "fail_closed"
assert "manifest_missing" in out["reason"]
assert all(v is False for v in out["mutations"].values())
def test_unapproved_lane_fails_closed(tmp_path: Path) -> None:
man = manifest(tmp_path, approved=False)
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
out = json.loads(result.stdout)
assert result.returncode == 2
assert out["ok"] is False
assert "lane_unapproved:screenshots" in out["reason"]
def test_request_root_cannot_broaden_manifest_root(tmp_path: Path) -> None:
approved = tmp_path / "approved" / "narrow"
man = manifest(tmp_path, root=approved)
broad = tmp_path / "approved"
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--root", str(broad), "--dry-run", "--no-npu", "--json")
out = json.loads(result.stdout)
assert result.returncode == 2
assert out["ok"] is False
assert "request_root_broadens_approval:screenshots" in out["reason"]
def test_symlink_escape_is_skipped_and_output_redacted(tmp_path: Path) -> None:
approved = tmp_path / "approved"
approved.mkdir()
outside = tmp_path / "outside"
outside.mkdir()
(approved / "note.png").write_bytes(b"fake image")
(approved / "note.png.txt").write_text("Invoice payment due 2026-06-10 $42.00")
(outside / "secret.png").write_bytes(b"secret")
os.symlink(outside / "secret.png", approved / "escape.png")
man = manifest(tmp_path, root=approved, exts=[".png"])
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
out = json.loads(result.stdout)
assert result.returncode == 0
assert out["ok"] is True
assert out["files_processed"] == 1
assert out["skipped"]["symlink_escape"] == 1
item = out["items"][0]
assert item["basename"] == "note.png"
assert item["raw_text_redacted"] is True
assert item["full_path_included"] is False
assert "full_path" not in item
assert "Invoice" not in json.dumps(out)
assert out["npu"]["claimed"] is False
assert all(v is False for v in out["mutations"].values())
def test_committed_sample_manifest_cpu_smoke() -> None:
result = run_cli(
"--manifest",
"config/triage-roots.test.yaml",
"--lane",
"receipts",
"--root",
"openvino-doc-image-triage-npu/samples",
"--limit",
"2",
"--dry-run",
"--no-npu",
"--json",
)
out = json.loads(result.stdout)
assert result.returncode == 0
assert out["ok"] is True
assert out["lane"] == "receipts"
assert out["dry_run"] is True
assert out["files_processed"] == 2
assert out["npu"] == {"busy_delta_us": None, "claimed": False, "proof_ok": False} or out["npu"]["claimed"] is False
assert all(v is False for v in out["mutations"].values())
assert all(item.get("raw_text_redacted", True) for item in out["items"])
def test_audio_lane_no_npu_does_not_transcribe_or_claim(tmp_path: Path) -> None:
approved = tmp_path / "voice"
approved.mkdir()
(approved / "memo.wav").write_bytes(b"not really wav; no-npu mode must not decode")
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
result = run_cli("--manifest", str(man), "--lane", "voice_memos", "--dry-run", "--no-npu", "--json")
out = json.loads(result.stdout)
assert result.returncode == 0
item = out["items"][0]
assert item["transcribed"] is False
assert item["raw_transcript_logged"] is False
assert out["npu"]["claimed"] is False
assert all(v is False for v in out["mutations"].values())
def test_external_whisper_url_fails_closed_before_audio_read(tmp_path: Path) -> None:
module = load_script_module()
with pytest.raises(module.FailClosed, match="whisper_url_not_loopback"):
module.multipart_transcribe(tmp_path / "missing.wav", "http://example.com:18816/v1/audio/transcriptions", 0.01)
def test_audio_lane_rejects_external_whisper_url(tmp_path: Path) -> None:
approved = tmp_path / "voice"
approved.mkdir()
(approved / "memo.wav").write_bytes(b"synthetic audio bytes")
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
result = run_cli(
"--manifest",
str(man),
"--lane",
"voice_memos",
"--dry-run",
"--whisper-url",
"https://example.com/v1/audio/transcriptions",
"--json",
)
out = json.loads(result.stdout)
assert result.returncode == 2
assert out["ok"] is False
assert out["error"] == "fail_closed"
assert out["reason"] == "whisper_url_scheme_not_http"
assert out["mutations"] == {
"obsidian": False,
"rag": False,
"vector_db": False,
"sends": False,
"file_moves": False,
"routing": False,
"memory": False,
"service_restarts": False,
}
def test_localhost_whisper_url_is_allowed() -> None:
module = load_script_module()
assert module.validate_local_whisper_url("http://localhost:18816/v1/audio/transcriptions")
assert module.validate_local_whisper_url("http://127.0.0.1:18816/v1/audio/transcriptions")
assert module.validate_local_whisper_url("http://[::1]:18816/v1/audio/transcriptions")