feat(npu): add explicit-root batch triage wrapper
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SCRIPT = ROOT / "scripts" / "npu-batch-triage-dry-run.py"
|
||||
|
||||
|
||||
def load_script_module():
|
||||
spec = importlib.util.spec_from_file_location("npu_batch_triage_dry_run", SCRIPT)
|
||||
assert spec and spec.loader
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def run_cli(*args: str) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
[sys.executable, str(SCRIPT), *args],
|
||||
cwd=ROOT,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def manifest(tmp_path: Path, lane: str = "screenshots", approved: bool = True, root: Path | None = None, exts: list[str] | None = None) -> Path:
|
||||
root = root or tmp_path / "approved"
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"version": 1,
|
||||
"policy": {
|
||||
"default_mode": "dry_run",
|
||||
"require_explicit_root": True,
|
||||
"allow_external_uploads": False,
|
||||
"allow_mutations": False,
|
||||
"log_raw_text": False,
|
||||
},
|
||||
"roots": {
|
||||
lane: {
|
||||
"approved": approved,
|
||||
"root": str(root),
|
||||
"allowed_extensions": exts or [".png", ".txt", ".wav"],
|
||||
"max_files": 10,
|
||||
"max_file_mb": 5,
|
||||
}
|
||||
},
|
||||
}
|
||||
path = tmp_path / "triage-roots.yaml"
|
||||
path.write_text(yaml.safe_dump(data))
|
||||
return path
|
||||
|
||||
|
||||
def test_missing_manifest_fails_closed(tmp_path: Path) -> None:
|
||||
result = run_cli("--manifest", str(tmp_path / "missing.yaml"), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 2
|
||||
assert out["ok"] is False
|
||||
assert out["error"] == "fail_closed"
|
||||
assert "manifest_missing" in out["reason"]
|
||||
assert all(v is False for v in out["mutations"].values())
|
||||
|
||||
|
||||
def test_unapproved_lane_fails_closed(tmp_path: Path) -> None:
|
||||
man = manifest(tmp_path, approved=False)
|
||||
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 2
|
||||
assert out["ok"] is False
|
||||
assert "lane_unapproved:screenshots" in out["reason"]
|
||||
|
||||
|
||||
def test_request_root_cannot_broaden_manifest_root(tmp_path: Path) -> None:
|
||||
approved = tmp_path / "approved" / "narrow"
|
||||
man = manifest(tmp_path, root=approved)
|
||||
broad = tmp_path / "approved"
|
||||
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--root", str(broad), "--dry-run", "--no-npu", "--json")
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 2
|
||||
assert out["ok"] is False
|
||||
assert "request_root_broadens_approval:screenshots" in out["reason"]
|
||||
|
||||
|
||||
def test_symlink_escape_is_skipped_and_output_redacted(tmp_path: Path) -> None:
|
||||
approved = tmp_path / "approved"
|
||||
approved.mkdir()
|
||||
outside = tmp_path / "outside"
|
||||
outside.mkdir()
|
||||
(approved / "note.png").write_bytes(b"fake image")
|
||||
(approved / "note.png.txt").write_text("Invoice payment due 2026-06-10 $42.00")
|
||||
(outside / "secret.png").write_bytes(b"secret")
|
||||
os.symlink(outside / "secret.png", approved / "escape.png")
|
||||
man = manifest(tmp_path, root=approved, exts=[".png"])
|
||||
|
||||
result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json")
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 0
|
||||
assert out["ok"] is True
|
||||
assert out["files_processed"] == 1
|
||||
assert out["skipped"]["symlink_escape"] == 1
|
||||
item = out["items"][0]
|
||||
assert item["basename"] == "note.png"
|
||||
assert item["raw_text_redacted"] is True
|
||||
assert item["full_path_included"] is False
|
||||
assert "full_path" not in item
|
||||
assert "Invoice" not in json.dumps(out)
|
||||
assert out["npu"]["claimed"] is False
|
||||
assert all(v is False for v in out["mutations"].values())
|
||||
|
||||
|
||||
def test_committed_sample_manifest_cpu_smoke() -> None:
|
||||
result = run_cli(
|
||||
"--manifest",
|
||||
"config/triage-roots.test.yaml",
|
||||
"--lane",
|
||||
"receipts",
|
||||
"--root",
|
||||
"openvino-doc-image-triage-npu/samples",
|
||||
"--limit",
|
||||
"2",
|
||||
"--dry-run",
|
||||
"--no-npu",
|
||||
"--json",
|
||||
)
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 0
|
||||
assert out["ok"] is True
|
||||
assert out["lane"] == "receipts"
|
||||
assert out["dry_run"] is True
|
||||
assert out["files_processed"] == 2
|
||||
assert out["npu"] == {"busy_delta_us": None, "claimed": False, "proof_ok": False} or out["npu"]["claimed"] is False
|
||||
assert all(v is False for v in out["mutations"].values())
|
||||
assert all(item.get("raw_text_redacted", True) for item in out["items"])
|
||||
|
||||
|
||||
def test_audio_lane_no_npu_does_not_transcribe_or_claim(tmp_path: Path) -> None:
|
||||
approved = tmp_path / "voice"
|
||||
approved.mkdir()
|
||||
(approved / "memo.wav").write_bytes(b"not really wav; no-npu mode must not decode")
|
||||
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
|
||||
result = run_cli("--manifest", str(man), "--lane", "voice_memos", "--dry-run", "--no-npu", "--json")
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 0
|
||||
item = out["items"][0]
|
||||
assert item["transcribed"] is False
|
||||
assert item["raw_transcript_logged"] is False
|
||||
assert out["npu"]["claimed"] is False
|
||||
assert all(v is False for v in out["mutations"].values())
|
||||
|
||||
|
||||
def test_external_whisper_url_fails_closed_before_audio_read(tmp_path: Path) -> None:
|
||||
module = load_script_module()
|
||||
with pytest.raises(module.FailClosed, match="whisper_url_not_loopback"):
|
||||
module.multipart_transcribe(tmp_path / "missing.wav", "http://example.com:18816/v1/audio/transcriptions", 0.01)
|
||||
|
||||
|
||||
def test_audio_lane_rejects_external_whisper_url(tmp_path: Path) -> None:
|
||||
approved = tmp_path / "voice"
|
||||
approved.mkdir()
|
||||
(approved / "memo.wav").write_bytes(b"synthetic audio bytes")
|
||||
man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"])
|
||||
result = run_cli(
|
||||
"--manifest",
|
||||
str(man),
|
||||
"--lane",
|
||||
"voice_memos",
|
||||
"--dry-run",
|
||||
"--whisper-url",
|
||||
"https://example.com/v1/audio/transcriptions",
|
||||
"--json",
|
||||
)
|
||||
out = json.loads(result.stdout)
|
||||
assert result.returncode == 2
|
||||
assert out["ok"] is False
|
||||
assert out["error"] == "fail_closed"
|
||||
assert out["reason"] == "whisper_url_scheme_not_http"
|
||||
assert out["mutations"] == {
|
||||
"obsidian": False,
|
||||
"rag": False,
|
||||
"vector_db": False,
|
||||
"sends": False,
|
||||
"file_moves": False,
|
||||
"routing": False,
|
||||
"memory": False,
|
||||
"service_restarts": False,
|
||||
}
|
||||
|
||||
|
||||
def test_localhost_whisper_url_is_allowed() -> None:
|
||||
module = load_script_module()
|
||||
assert module.validate_local_whisper_url("http://localhost:18816/v1/audio/transcriptions")
|
||||
assert module.validate_local_whisper_url("http://127.0.0.1:18816/v1/audio/transcriptions")
|
||||
assert module.validate_local_whisper_url("http://[::1]:18816/v1/audio/transcriptions")
|
||||
Reference in New Issue
Block a user