from __future__ import annotations import importlib.util import json import os import subprocess import sys from pathlib import Path import pytest import yaml ROOT = Path(__file__).resolve().parents[1] SCRIPT = ROOT / "scripts" / "npu-batch-triage-dry-run.py" def load_script_module(): spec = importlib.util.spec_from_file_location("npu_batch_triage_dry_run", SCRIPT) assert spec and spec.loader module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module def run_cli(*args: str) -> subprocess.CompletedProcess[str]: return subprocess.run( [sys.executable, str(SCRIPT), *args], cwd=ROOT, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False, ) def manifest(tmp_path: Path, lane: str = "screenshots", approved: bool = True, root: Path | None = None, exts: list[str] | None = None) -> Path: root = root or tmp_path / "approved" root.mkdir(parents=True, exist_ok=True) data = { "version": 1, "policy": { "default_mode": "dry_run", "require_explicit_root": True, "allow_external_uploads": False, "allow_mutations": False, "log_raw_text": False, }, "roots": { lane: { "approved": approved, "root": str(root), "allowed_extensions": exts or [".png", ".txt", ".wav"], "max_files": 10, "max_file_mb": 5, } }, } path = tmp_path / "triage-roots.yaml" path.write_text(yaml.safe_dump(data)) return path def test_missing_manifest_fails_closed(tmp_path: Path) -> None: result = run_cli("--manifest", str(tmp_path / "missing.yaml"), "--lane", "screenshots", "--dry-run", "--no-npu", "--json") out = json.loads(result.stdout) assert result.returncode == 2 assert out["ok"] is False assert out["error"] == "fail_closed" assert "manifest_missing" in out["reason"] assert all(v is False for v in out["mutations"].values()) def test_unapproved_lane_fails_closed(tmp_path: Path) -> None: man = manifest(tmp_path, approved=False) result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json") out = json.loads(result.stdout) assert result.returncode == 2 assert out["ok"] is False assert "lane_unapproved:screenshots" in out["reason"] def test_request_root_cannot_broaden_manifest_root(tmp_path: Path) -> None: approved = tmp_path / "approved" / "narrow" man = manifest(tmp_path, root=approved) broad = tmp_path / "approved" result = run_cli("--manifest", str(man), "--lane", "screenshots", "--root", str(broad), "--dry-run", "--no-npu", "--json") out = json.loads(result.stdout) assert result.returncode == 2 assert out["ok"] is False assert "request_root_broadens_approval:screenshots" in out["reason"] def test_symlink_escape_is_skipped_and_output_redacted(tmp_path: Path) -> None: approved = tmp_path / "approved" approved.mkdir() outside = tmp_path / "outside" outside.mkdir() (approved / "note.png").write_bytes(b"fake image") (approved / "note.png.txt").write_text("Invoice payment due 2026-06-10 $42.00") (outside / "secret.png").write_bytes(b"secret") os.symlink(outside / "secret.png", approved / "escape.png") man = manifest(tmp_path, root=approved, exts=[".png"]) result = run_cli("--manifest", str(man), "--lane", "screenshots", "--dry-run", "--no-npu", "--json") out = json.loads(result.stdout) assert result.returncode == 0 assert out["ok"] is True assert out["files_processed"] == 1 assert out["skipped"]["symlink_escape"] == 1 item = out["items"][0] assert item["basename"] == "note.png" assert item["raw_text_redacted"] is True assert item["full_path_included"] is False assert "full_path" not in item assert "Invoice" not in json.dumps(out) assert out["npu"]["claimed"] is False assert all(v is False for v in out["mutations"].values()) def test_committed_sample_manifest_cpu_smoke() -> None: result = run_cli( "--manifest", "config/triage-roots.test.yaml", "--lane", "receipts", "--root", "openvino-doc-image-triage-npu/samples", "--limit", "2", "--dry-run", "--no-npu", "--json", ) out = json.loads(result.stdout) assert result.returncode == 0 assert out["ok"] is True assert out["lane"] == "receipts" assert out["dry_run"] is True assert out["files_processed"] == 2 assert out["npu"] == {"busy_delta_us": None, "claimed": False, "proof_ok": False} or out["npu"]["claimed"] is False assert all(v is False for v in out["mutations"].values()) assert all(item.get("raw_text_redacted", True) for item in out["items"]) def test_audio_lane_no_npu_does_not_transcribe_or_claim(tmp_path: Path) -> None: approved = tmp_path / "voice" approved.mkdir() (approved / "memo.wav").write_bytes(b"not really wav; no-npu mode must not decode") man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"]) result = run_cli("--manifest", str(man), "--lane", "voice_memos", "--dry-run", "--no-npu", "--json") out = json.loads(result.stdout) assert result.returncode == 0 item = out["items"][0] assert item["transcribed"] is False assert item["raw_transcript_logged"] is False assert out["npu"]["claimed"] is False assert all(v is False for v in out["mutations"].values()) def test_external_whisper_url_fails_closed_before_audio_read(tmp_path: Path) -> None: module = load_script_module() with pytest.raises(module.FailClosed, match="whisper_url_not_loopback"): module.multipart_transcribe(tmp_path / "missing.wav", "http://example.com:18816/v1/audio/transcriptions", 0.01) def test_audio_lane_rejects_external_whisper_url(tmp_path: Path) -> None: approved = tmp_path / "voice" approved.mkdir() (approved / "memo.wav").write_bytes(b"synthetic audio bytes") man = manifest(tmp_path, lane="voice_memos", root=approved, exts=[".wav"]) result = run_cli( "--manifest", str(man), "--lane", "voice_memos", "--dry-run", "--whisper-url", "https://example.com/v1/audio/transcriptions", "--json", ) out = json.loads(result.stdout) assert result.returncode == 2 assert out["ok"] is False assert out["error"] == "fail_closed" assert out["reason"] == "whisper_url_scheme_not_http" assert out["mutations"] == { "obsidian": False, "rag": False, "vector_db": False, "sends": False, "file_moves": False, "routing": False, "memory": False, "service_restarts": False, } def test_localhost_whisper_url_is_allowed() -> None: module = load_script_module() assert module.validate_local_whisper_url("http://localhost:18816/v1/audio/transcriptions") assert module.validate_local_whisper_url("http://127.0.0.1:18816/v1/audio/transcriptions") assert module.validate_local_whisper_url("http://[::1]:18816/v1/audio/transcriptions")