feat(npu): add local context gate advisory

2026-06-05 15:52:42 -07:00
parent 24d620e9c9
commit b7b4edf0f5
7 changed files with 884 additions and 0 deletions
@@ -37,6 +37,8 @@ For the current host-side AI/search/voice automation stack, n8n watchdogs, and a
 - [`docs/swarm-infrastructure.md`](docs/swarm-infrastructure.md) — operational overview and quick checks
 - [`docs/swarm-infrastructure.html`](docs/swarm-infrastructure.html) — dark SVG architecture diagram
 - [`docs/diagram-maintenance.md`](docs/diagram-maintenance.md) — diagram upkeep conventions
+- [`docs/npu-utilization-digest.md`](docs/npu-utilization-digest.md) — compact on-demand NPU proof/utilization digest runbook
+- [`docs/npu-integrated-health-ops.md`](docs/npu-integrated-health-ops.md) — integrated operator health-check workflow combining `npu-service-health.sh` and the utilization digest
 - OpenVINO NPU services and prototypes are documented in `swarm-common/obsidian-vault/will/will-shared-zap/Runbooks/OpenVINO NPU Services Runbook.md` and the component READMEs under `openvino-*-npu*/`. Live baseline ports are RAG `:18810`, Whisper NPU `:18816`, and embeddings `:18817`; sidecar ports `:18818`, `:18819`, `:18820`, and optional doc/image triage `:18829` are approved prototypes only, not live Atlas/Hermes routing.

 ## VM: zap
@@ -0,0 +1,89 @@
+# OpenVINO Context Gate
+
+Local-only Atlas/Hermes context-gate advisory prototype.
+
+This first slice is CLI-only and dry-run by design. It takes a non-private query,
+optionally asks the localhost classifier on `127.0.0.1:18819` for advisory labels,
+and emits a compact typed context bundle plan. It does not retrieve private
+content or change live Atlas/Hermes behavior.
+
+## Safety invariants
+
+Closed in v1:
+
+- live Atlas/Hermes routing changes
+- memory writes
+- outbound sends
+- tool execution by the sidecar
+- service restarts
+- vector DB mutation or reindexing
+- private root broadening
+- live config changes
+
+The CLI only plans which source classes an authoritative Atlas/Hermes agent might
+use later: `durable_memory`, `session_search`, `rag_search`, `repo_files`,
+`live_system`, `web`, or `no_retrieval`.
+
+NPU proof is strict: `npu_verified=true` is only emitted when a live classifier
+request reports a positive endpoint NPU delta and a positive sysfs/endpoint sysfs
+busy delta. HTTP 200 alone is never treated as proof. Offline and fallback modes
+set `npu_verified=false` and include a warning.
+
+## Usage
+
+Live classifier path, with compact terminal output:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --query "How do I check whether the RAG reranker is using the NPU?" \
+  --format compact
+```
+
+Deterministic offline smoke, safe for unit-test hosts without NPU services:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --offline \
+  --query "Write a haiku about Seattle rain." \
+  --format compact-json
+```
+
+Fallback plan if the classifier is down:
+
+```bash
+python scripts/context-gate-advisory.py \
+  --allow-offline-fallback \
+  --query "Where did we leave the NPU context gate implementation plan?" \
+  --context platform=kanban \
+  --context repo_path=/home/will/lab/swarm \
+  --format compact-json
+```
+
+## Output shape
+
+Full JSON includes:
+
+- `schema=atlas_context_gate_plan_v1`
+- `dry_run=true`
+- `query_class`
+- `source_plan`
+- `bundle_plan`
+- `npu_proof`
+- closed `authority`
+- closed approval `gates`
+- compact `warnings`
+
+Compact output intentionally avoids raw private snippets and raw JSON dumps:
+
+```text
+ok=true schema=atlas_context_gate_plan_v1 bundle=OpsDebugBundle sources=live_system,repo_files,rag_search source_count=3 npu_verified=false classifier_delta_us=None outer_sysfs_delta_us=None gates=closed:route,memory,send,tools,restart,vector,private_roots,config warnings=offline_heuristic_classifier_no_npu_claim,npu_proof_inconclusive
+```
+
+## Notes for reviewers
+
+- No HTTP service or systemd unit is added in this slice.
+- The prototype does not call RAG, memory, session search, web, filesystem tools,
+  or the advisory gateway. It only emits a plan.
+- Unit tests use fake/offline classifier results and do not require live NPU.
+- Optional live smoke may call only the local classifier endpoint and read
+  `/sys/class/accel/accel0/device/npu_busy_time_us` for positive delta proof.
@@ -0,0 +1,5 @@
+"""Atlas/Hermes local advisory context-gate prototype."""
+
+from .context_gate import SCHEMA, ContextGateError, build_plan, compact_json, compact_line, validate_plan
+
+__all__ = ["SCHEMA", "ContextGateError", "build_plan", "compact_json", "compact_line", "validate_plan"]
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from typing import Any
+
+from .context_gate import (
+    DEFAULT_CLASSIFIER_URL,
+    ContextGateError,
+    build_plan,
+    classify_live,
+    classify_offline,
+    compact_json,
+    compact_line,
+)
+
+
+def _parse_context(raw_items: list[str]) -> dict[str, Any]:
+    context: dict[str, Any] = {}
+    for item in raw_items:
+        if "=" not in item:
+            raise ContextGateError(f"invalid_context_item:{item}")
+        key, value = item.split("=", 1)
+        if not key:
+            raise ContextGateError("invalid_context_key")
+        if value.lower() == "true":
+            parsed: Any = True
+        elif value.lower() == "false":
+            parsed = False
+        else:
+            parsed = value
+        context[key] = parsed
+    return context
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Emit a local-only Atlas/Hermes advisory context bundle plan. No routing, retrieval, memory writes, sends, restarts, or vector mutations are performed.",
+    )
+    parser.add_argument("--query", required=True, help="Non-private query to plan for")
+    parser.add_argument("--format", choices=["compact", "compact-json", "json"], default="compact")
+    parser.add_argument("--context", action="append", default=[], metavar="KEY=VALUE", help="Optional compact request context, e.g. platform=kanban repo_path=/path")
+    parser.add_argument("--max-sources", type=int, default=4)
+    parser.add_argument("--trace-id")
+    parser.add_argument("--classifier-url", default=DEFAULT_CLASSIFIER_URL)
+    parser.add_argument("--classifier-timeout", type=float, default=8.0)
+    parser.add_argument("--offline", action="store_true", help="Use deterministic heuristic labels; makes no NPU claim")
+    parser.add_argument("--allow-offline-fallback", action="store_true", help="If live classifier is unavailable, emit an advisory fallback plan with npu_verified=false")
+    parser.add_argument("--no-require-npu-proof", action="store_true", help="Do not add npu_proof_inconclusive warning when running offline/fallback")
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_arg_parser()
+    args = parser.parse_args(argv)
+    try:
+        context = _parse_context(args.context)
+        options = {
+            "dry_run": True,
+            "max_sources": args.max_sources,
+            "include_private_text": False,
+            "require_npu_proof": not args.no_require_npu_proof,
+            "trace_id": args.trace_id,
+        }
+        if args.offline:
+            classifier = classify_offline(args.query, context)
+        else:
+            try:
+                classifier = classify_live(args.query, context, classifier_url=args.classifier_url, timeout=args.classifier_timeout)
+            except ContextGateError as exc:
+                if not args.allow_offline_fallback:
+                    raise
+                classifier = classify_offline(args.query, context, warning=str(exc))
+        plan = build_plan(args.query, context=context, options=options, classifier=classifier)
+    except ContextGateError as exc:
+        print(f"error={exc}", file=sys.stderr)
+        return 2
+
+    if args.format == "json":
+        print(json.dumps(plan, indent=2, sort_keys=True))
+    elif args.format == "compact-json":
+        print(compact_json(plan))
+    else:
+        print(compact_line(plan))
+    return 0
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())
@@ -0,0 +1,482 @@
+"""Local-only advisory context bundle planner for Atlas/Hermes.
+
+This module intentionally emits a retrieval/authority plan only. It does not call
+Hermes memory/session/RAG/web tools, mutate vector stores, broaden private roots,
+or change live routing.
+"""
+
+from __future__ import annotations
+
+import json
+import ipaddress
+import re
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Mapping, Sequence
+
+SCHEMA = "atlas_context_gate_plan_v1"
+NPU_BUSY_PATH = Path("/sys/class/accel/accel0/device/npu_busy_time_us")
+DEFAULT_CLASSIFIER_URL = "http://127.0.0.1:18819/v1/classify"
+
+AUTHORITY = {
+    "may_route": False,
+    "may_write_memory": False,
+    "may_send_external": False,
+    "may_process_private_dirs": False,
+    "may_execute_tools": False,
+    "may_restart_services": False,
+    "may_mutate_vector_db": False,
+    "may_change_live_config": False,
+}
+
+GATES = {
+    "live_routing_change": "closed_requires_explicit_approval",
+    "memory_write": "closed_requires_explicit_approval",
+    "outbound_send": "closed_requires_explicit_approval",
+    "tool_execution": "closed_requires_explicit_approval",
+    "service_restart": "closed_requires_explicit_approval",
+    "vector_mutation": "closed_requires_explicit_approval",
+    "private_root_broadening": "closed_requires_explicit_approval",
+}
+
+_ALLOWED_SOURCES = {
+    "durable_memory",
+    "session_search",
+    "rag_search",
+    "repo_files",
+    "live_system",
+    "web",
+    "no_retrieval",
+}
+
+
+class ContextGateError(ValueError):
+    """Raised for invalid requests or unavailable required local stages."""
+
+
+@dataclass(frozen=True)
+class ClassifierResult:
+    labels: Mapping[str, Any]
+    npu_busy_delta_us: int | None
+    sysfs_npu_busy_delta_us: int | None
+    outer_sysfs_delta_us: int | None
+    live: bool
+    warning: str | None = None
+
+
+def read_npu_busy_time_us(path: Path = NPU_BUSY_PATH) -> int | None:
+    try:
+        return int(path.read_text(encoding="utf-8").strip())
+    except (FileNotFoundError, PermissionError, ValueError, OSError):
+        return None
+
+
+def _label_value(labels: Mapping[str, Any], name: str, default: Any) -> Any:
+    value = labels.get(name, default)
+    if isinstance(value, Mapping) and "value" in value:
+        return value.get("value", default)
+    return value
+
+
+def _label_confidence(labels: Mapping[str, Any], name: str, default: float = 0.5) -> float:
+    value = labels.get(name)
+    if isinstance(value, Mapping):
+        try:
+            return float(value.get("confidence", default))
+        except (TypeError, ValueError):
+            return default
+    return default
+
+
+def heuristic_labels(query: str, context: Mapping[str, Any] | None = None) -> dict[str, Any]:
+    """Small transparent fallback used by tests and explicit offline smoke mode."""
+    text = query.lower()
+    platform = str((context or {}).get("platform", "unknown")).lower()
+
+    current_words = ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs"]
+    prior_words = ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "plan"]
+    coding_words = ["implement", "code", "repo", "test", "pytest", "diff", "branch", "hermes"]
+    research_words = ["research", "compare", "summarize", "explain", "what is", "how do i"]
+    unsafe_words = ["change live routing", "live routing", "restart", "send", "write memory", "reindex", "mutate", "delete"]
+
+    safety = any(w in text for w in unsafe_words)
+    tool_needed = any(w in text for w in current_words + coding_words) or safety
+
+    if platform == "kanban" or "kanban" in text or any(w in text for w in coding_words):
+        category = "coding"
+    elif any(w in text for w in current_words):
+        category = "devops"
+    elif any(w in text for w in research_words + prior_words):
+        category = "research"
+    else:
+        category = "chat"
+
+    if "remember" in text or "preference" in text:
+        memory_candidate = "durable_user_fact"
+    elif "convention" in text or "workflow" in text:
+        memory_candidate = "workflow_convention"
+    else:
+        memory_candidate = "none"
+
+    urgency = "high" if any(w in text for w in ["urgent", "critical", "down", "broken"]) else "normal"
+    return {
+        "tool_needed": {"value": tool_needed, "confidence": 0.76 if tool_needed else 0.68},
+        "memory_candidate": {"value": memory_candidate, "confidence": 0.8 if memory_candidate != "none" else 0.35},
+        "urgency": {"value": urgency, "confidence": 0.8 if urgency == "high" else 0.65},
+        "workflow_category": {"value": category, "confidence": 0.78 if category != "chat" else 0.7},
+        "safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.2},
+    }
+
+
+class _NoClassifierRedirectHandler(urllib.request.HTTPRedirectHandler):
+    """Fail closed instead of following redirects away from a validated local URL."""
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):  # type: ignore[no-untyped-def]
+        return None
+
+
+_CLASSIFIER_OPENER = urllib.request.build_opener(_NoClassifierRedirectHandler)
+
+
+def classify_live(
+    query: str,
+    context: Mapping[str, Any] | None = None,
+    classifier_url: str = DEFAULT_CLASSIFIER_URL,
+    timeout: float = 8.0,
+) -> ClassifierResult:
+    classifier_url = validate_classifier_url(classifier_url)
+    before = read_npu_busy_time_us()
+    payload = {
+        "id": f"context-gate-{int(time.time())}",
+        "text": query,
+        "context": {"platform": (context or {}).get("platform", "cli"), "source": "context_gate"},
+        "options": {"include_evidence": False, "include_embedding_debug": False, "dry_run": True},
+    }
+    req = urllib.request.Request(
+        classifier_url,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with _CLASSIFIER_OPENER.open(req, timeout=timeout) as resp:  # noqa: S310 - local configured endpoint only
+            raw = resp.read(256_000)
+    except (urllib.error.URLError, TimeoutError, OSError) as exc:
+        raise ContextGateError(f"classifier_unavailable: {exc}") from exc
+    after = read_npu_busy_time_us()
+    try:
+        data = json.loads(raw.decode("utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ContextGateError("classifier_invalid_json") from exc
+    labels = data.get("labels")
+    if not isinstance(labels, Mapping):
+        raise ContextGateError("classifier_missing_labels")
+    outer = after - before if before is not None and after is not None else None
+    return ClassifierResult(
+        labels=labels,
+        npu_busy_delta_us=_as_int_or_none(data.get("npu_busy_delta_us")),
+        sysfs_npu_busy_delta_us=_as_int_or_none(data.get("sysfs_npu_busy_delta_us")),
+        outer_sysfs_delta_us=outer,
+        live=True,
+    )
+
+
+def validate_classifier_url(classifier_url: str) -> str:
+    """Validate the local-only classifier endpoint before any POST is attempted."""
+    parsed = urllib.parse.urlparse(classifier_url)
+    if parsed.scheme not in {"http", "https"}:
+        raise ContextGateError("invalid_classifier_url:scheme_must_be_http_or_https")
+    host = parsed.hostname
+    if not host:
+        raise ContextGateError("invalid_classifier_url:missing_host")
+    host_normalized = host.lower().rstrip(".")
+    if host_normalized == "localhost":
+        return classifier_url
+    try:
+        address = ipaddress.ip_address(host_normalized)
+    except ValueError as exc:
+        raise ContextGateError("invalid_classifier_url:host_must_be_loopback") from exc
+    if not address.is_loopback:
+        raise ContextGateError("invalid_classifier_url:host_must_be_loopback")
+    return classifier_url
+
+
+def _as_int_or_none(value: Any) -> int | None:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def classify_offline(query: str, context: Mapping[str, Any] | None = None, warning: str | None = None) -> ClassifierResult:
+    return ClassifierResult(
+        labels=heuristic_labels(query, context),
+        npu_busy_delta_us=None,
+        sysfs_npu_busy_delta_us=None,
+        outer_sysfs_delta_us=None,
+        live=False,
+        warning=warning or "offline_heuristic_classifier_no_npu_claim",
+    )
+
+
+def _has_any(text: str, needles: list[str]) -> bool:
+    return any(n in text for n in needles)
+
+
+def _source(source: str, action: str, reason: str, priority: int, freshness: str, confidence: float) -> dict[str, Any]:
+    assert source in _ALLOWED_SOURCES
+    return {
+        "source": source,
+        "action": action,
+        "reason": reason,
+        "priority": priority,
+        "freshness": freshness,
+        "permission": "tool_required_by_authoritative_agent" if source != "no_retrieval" else "none",
+        "missing_behavior": "retrieve_or_mark_missing" if source != "no_retrieval" else "skip_retrieval",
+        "confidence": round(confidence, 2),
+    }
+
+
+def select_sources(query: str, labels: Mapping[str, Any], context: Mapping[str, Any], max_sources: int) -> list[dict[str, Any]]:
+    text = query.lower()
+    sources: list[dict[str, Any]] = []
+    category = str(_label_value(labels, "workflow_category", "unknown"))
+    memory_candidate = str(_label_value(labels, "memory_candidate", "none"))
+    tool_needed = bool(_label_value(labels, "tool_needed", False))
+
+    if tool_needed or _has_any(text, ["current", "now", "health", "port", "process", "systemd", "status", "npu", "listening", "logs", "time", "date"]):
+        sources.append(_source("live_system", "inspect_with_terminal_or_domain_tool", "current service/system state requested", 1, "live_required", 0.9))
+
+    if context.get("repo_path") or category == "coding" or _has_any(text, ["repo", "code", "file", "test", "pytest", "diff", "implementation", "hermes", "atlas"]):
+        sources.append(_source("repo_files", "inspect_explicit_repo_paths", "repo-specific implementation or config context", 2, "current_filesystem", 0.84))
+
+    if _has_any(text, ["where did we leave", "what did we decide", "previous", "earlier", "handoff", "prior", "last time"]):
+        sources.append(_source("session_search", "search_prior_sessions_or_kanban_handoffs", "prior decision or handoff requested", 3, "session-era", 0.82))
+
+    if _has_any(text, ["runbook", "note", "obsidian", "rag", "docs", "knowledge", "plan"]):
+        sources.append(_source("rag_search", "query_local_index_read_only", "local docs or indexed knowledge likely useful", 4, "cached_index", 0.76))
+
+    if memory_candidate != "none" or _has_any(text, ["preference", "remember", "profile", "durable fact"]):
+        sources.append(_source("durable_memory", "read_stable_facts_only", "stable preference/environment facts may be relevant", 5, "static", 0.72))
+
+    if _has_any(text, ["latest", "news", "version", "release", "public", "web"]):
+        sources.append(_source("web", "search_public_current_sources", "current external public fact requested", 6, "live_external", 0.7))
+
+    if not sources:
+        sources.append(_source("no_retrieval", "answer_directly", "no factual retrieval dependency detected", 1, "none", 0.78))
+
+    # Stable priority order and bounded compact plan.
+    seen: set[str] = set()
+    deduped = []
+    for item in sorted(sources, key=lambda x: x["priority"]):
+        if item["source"] not in seen:
+            seen.add(item["source"])
+            deduped.append(item)
+    return deduped[:max_sources]
+
+
+def select_bundle_name(query: str, labels: Mapping[str, Any], context: Mapping[str, Any]) -> str:
+    text = query.lower()
+    category = str(_label_value(labels, "workflow_category", "unknown"))
+    if context.get("platform") == "kanban" or context.get("task_id") or category == "coding":
+        return "CodingTaskBundle"
+    if category in {"devops", "debugging"} or _has_any(text, ["health", "port", "systemd", "npu", "service", "logs"]):
+        return "OpsDebugBundle"
+    if category in {"note_taking", "productivity"} or _has_any(text, ["preference", "remember", "profile"]):
+        return "PersonalAssistantBundle"
+    if "no_retrieval" in [s["source"] for s in select_sources(query, labels, context, 1)]:
+        return "SimpleResponseBundle"
+    return "ResearchBundle"
+
+
+def _field(field: str, shape: str, source: str, freshness: str, missing: str, privacy: str, confidence: float = 0.8) -> dict[str, Any]:
+    return {
+        "field": field,
+        "shape": shape,
+        "source_of_truth": source,
+        "freshness": freshness,
+        "provenance_required": True,
+        "missing_behavior": missing,
+        "privacy": privacy,
+        "confidence": round(confidence, 2),
+    }
+
+
+def build_bundle_plan(bundle_name: str, sources: Sequence[Mapping[str, Any]], query: str, labels: Mapping[str, Any]) -> dict[str, Any]:
+    safety_required = bool(_label_value(labels, "safety_confirmation_required", False))
+    source_names = {s["source"] for s in sources}
+    if bundle_name == "OpsDebugBundle":
+        required = [
+            _field("problem_statement", "compact_text", "user", "request", "mark_missing", "query_text_only"),
+            _field("target_scope", "service_repo_or_host", "query_or_classifier", "request", "ask_or_infer_low_confidence", "no_private_paths_beyond_explicit"),
+            _field("live_state", "status_table", "live_system", "live_required", "retrieve_or_fail_closed", "no_raw_logs_by_default"),
+            _field("safety_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
+            _field("provenance", "tool_names_and_paths", "executing_agent", "run", "mark_missing", "paths_only"),
+        ]
+    elif bundle_name == "CodingTaskBundle":
+        required = [
+            _field("repo_root", "absolute_path", "task_or_context", "current", "ask_or_fail", "explicit_path_only"),
+            _field("git_state", "branch_dirty_counts", "live_system", "live_required", "retrieve_or_fail_closed", "no_diff_dump_by_default"),
+            _field("requirements", "bullet_summary", "user_kanban_files", "current", "retrieve_or_mark_missing", "no_private_snippets"),
+            _field("relevant_paths", "path_list", "repo_files", "current_filesystem", "search_narrowly", "paths_only"),
+            _field("tests_or_smokes", "command_list", "repo_files", "current_filesystem", "mark_missing", "commands_only"),
+            _field("review_gates", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
+        ]
+    elif bundle_name == "PersonalAssistantBundle":
+        required = [
+            _field("user_intent", "compact_text", "user", "request", "mark_missing", "query_text_only"),
+            _field("durable_facts_needed", "fact_keys", "durable_memory", "static", "retrieve_or_mark_missing", "no_raw_memory_dump"),
+            _field("prior_decisions_needed", "session_refs", "session_search", "session-era", "retrieve_or_mark_missing", "summaries_only"),
+            _field("privacy_boundary", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
+            _field("action_authority", "closed_gate_map", "policy", "static", "fail_closed", "no_private_data"),
+        ]
+    elif bundle_name == "SimpleResponseBundle":
+        required = []
+    else:
+        required = [
+            _field("research_question", "compact_text", "user", "request", "mark_missing", "query_text_only"),
+            _field("source_plan", "ordered_source_list", "context_gate", "run", "mark_missing", "no_private_snippets"),
+            _field("evidence_requirements", "provenance_rules", "policy", "static", "fail_closed", "no_private_data"),
+            _field("freshness_cutoff", "freshness_policy", "classifier_query", "request", "mark_missing", "no_private_data"),
+            _field("missing_data_behavior", "policy_enum", "policy", "static", "fail_closed", "no_private_data"),
+        ]
+
+    blocked = []
+    if safety_required or re.search(r"\b(route|routing|restart|send|write memory|reindex|delete|mutate)\b", query.lower()):
+        blocked.append(_field("authority_side_effect", "approval_required", "policy", "static", "fail_closed", "no_side_effects_in_v1", 0.95))
+    if "rag_search" in source_names:
+        blocked.append(_field("vector_db_mutation", "not_allowed", "policy", "static", "fail_closed", "read_only_query_plan", 0.95))
+    return {"bundle_name": bundle_name, "required_fields": required, "optional_fields": [], "blocked_fields": blocked}
+
+
+def summarize_query_class(labels: Mapping[str, Any]) -> dict[str, Any]:
+    return {
+        "workflow_category": _label_value(labels, "workflow_category", "unknown"),
+        "urgency": _label_value(labels, "urgency", "normal"),
+        "tool_needed": bool(_label_value(labels, "tool_needed", False)),
+        "memory_candidate": _label_value(labels, "memory_candidate", "none"),
+        "safety_confirmation_required": bool(_label_value(labels, "safety_confirmation_required", False)),
+        "confidence": round(max(
+            _label_confidence(labels, "workflow_category", 0.5),
+            _label_confidence(labels, "tool_needed", 0.5),
+            _label_confidence(labels, "safety_confirmation_required", 0.5),
+        ), 2),
+    }
+
+
+def npu_proof_from_classifier(result: ClassifierResult, require_npu_proof: bool) -> tuple[dict[str, Any], list[str]]:
+    endpoint_delta = result.npu_busy_delta_us
+    endpoint_sysfs_delta = result.sysfs_npu_busy_delta_us
+    outer_delta = result.outer_sysfs_delta_us
+    positive_endpoint_sysfs = endpoint_sysfs_delta is not None and endpoint_sysfs_delta > 0
+    positive_outer = outer_delta is not None and outer_delta > 0
+    verified = bool(result.live and (positive_endpoint_sysfs or positive_outer))
+    warnings: list[str] = []
+    if result.warning:
+        warnings.append(result.warning)
+    if require_npu_proof and not verified:
+        warnings.append("npu_proof_inconclusive")
+    return {
+        "classifier_delta_us": endpoint_delta,
+        "classifier_sysfs_delta_us": endpoint_sysfs_delta,
+        "outer_sysfs_delta_us": outer_delta,
+        "rerank_delta_us": None,
+        "verified": verified,
+        "required": require_npu_proof,
+        "classifier_live": result.live,
+    }, warnings
+
+
+def build_plan(
+    query: str,
+    *,
+    context: Mapping[str, Any] | None = None,
+    options: Mapping[str, Any] | None = None,
+    classifier: ClassifierResult | None = None,
+) -> dict[str, Any]:
+    if not query or not query.strip():
+        raise ContextGateError("query_required")
+    context = dict(context or {})
+    options = dict(options or {})
+    if options.get("dry_run", True) is not True:
+        raise ContextGateError("dry_run_must_remain_true_in_v1")
+    if options.get("include_private_text", False):
+        raise ContextGateError("include_private_text_not_allowed_in_v1")
+    max_sources = max(1, min(6, int(options.get("max_sources", 4))))
+    require_npu = bool(options.get("require_npu_proof", True))
+    if classifier is None:
+        classifier = classify_offline(query, context)
+    labels = classifier.labels
+    source_plan = select_sources(query, labels, context, max_sources)
+    bundle_name = select_bundle_name(query, labels, context)
+    npu_proof, warnings = npu_proof_from_classifier(classifier, require_npu)
+    plan = {
+        "schema": SCHEMA,
+        "trace_id": options.get("trace_id") or context.get("trace_id"),
+        "dry_run": True,
+        "ok": True,
+        "query_class": summarize_query_class(labels),
+        "source_plan": source_plan,
+        "bundle_plan": build_bundle_plan(bundle_name, source_plan, query, labels),
+        "npu_proof": npu_proof,
+        "authority": dict(AUTHORITY),
+        "gates": dict(GATES),
+        "warnings": warnings,
+    }
+    validate_plan(plan)
+    return plan
+
+
+def validate_plan(plan: Mapping[str, Any]) -> None:
+    if plan.get("schema") != SCHEMA:
+        raise ContextGateError("invalid_schema")
+    if plan.get("dry_run") is not True:
+        raise ContextGateError("dry_run_missing")
+    if plan.get("authority") != AUTHORITY:
+        raise ContextGateError("authority_not_closed")
+    sources = plan.get("source_plan")
+    if not isinstance(sources, list) or not sources:
+        raise ContextGateError("source_plan_required")
+    for item in sources:
+        if item.get("source") not in _ALLOWED_SOURCES:
+            raise ContextGateError(f"invalid_source:{item.get('source')}")
+    required_blocks = ["query_class", "bundle_plan", "npu_proof", "gates"]
+    for block in required_blocks:
+        if block not in plan:
+            raise ContextGateError(f"missing_block:{block}")
+
+
+def compact_line(plan: Mapping[str, Any]) -> str:
+    sources = ",".join(str(s["source"]) for s in plan["source_plan"])
+    closed = "route,memory,send,tools,restart,vector,private_roots,config"
+    warnings = ",".join(plan.get("warnings") or []) or "none"
+    return (
+        f"ok={str(plan['ok']).lower()} schema={plan['schema']} "
+        f"bundle={plan['bundle_plan']['bundle_name']} sources={sources} "
+        f"source_count={len(plan['source_plan'])} "
+        f"npu_verified={str(plan['npu_proof']['verified']).lower()} "
+        f"classifier_delta_us={plan['npu_proof'].get('classifier_delta_us')} "
+        f"outer_sysfs_delta_us={plan['npu_proof'].get('outer_sysfs_delta_us')} "
+        f"gates=closed:{closed} warnings={warnings}"
+    )
+
+
+def compact_json(plan: Mapping[str, Any]) -> str:
+    compact = {
+        "schema": plan["schema"],
+        "ok": plan["ok"],
+        "dry_run": plan["dry_run"],
+        "bundle_name": plan["bundle_plan"]["bundle_name"],
+        "sources": [s["source"] for s in plan["source_plan"]],
+        "source_count": len(plan["source_plan"]),
+        "query_class": plan["query_class"],
+        "npu_proof": plan["npu_proof"],
+        "authority": plan["authority"],
+        "gates_closed": list(plan["gates"].keys()),
+        "warnings": plan.get("warnings", []),
+    }
+    return json.dumps(compact, sort_keys=True, separators=(",", ":"))
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+"""Thin repo-local wrapper for the Atlas/Hermes context-gate advisory CLI."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from openvino_context_gate.cli import main  # noqa: E402
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+import json
+import socket
+import subprocess
+import sys
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from openvino_context_gate.context_gate import (  # noqa: E402
+    AUTHORITY,
+    ClassifierResult,
+    ContextGateError,
+    build_plan,
+    classify_live,
+    compact_json,
+    compact_line,
+)
+
+
+def fake_classifier(
+    labels: dict,
+    *,
+    endpoint_delta: int | None = 120,
+    sysfs_delta: int | None = 120,
+    outer_delta: int | None = 80,
+) -> ClassifierResult:
+    return ClassifierResult(
+        labels=labels,
+        npu_busy_delta_us=endpoint_delta,
+        sysfs_npu_busy_delta_us=sysfs_delta,
+        outer_sysfs_delta_us=outer_delta,
+        live=True,
+    )
+
+
+def labels(category: str, *, tool: bool = False, safety: bool = False, memory: str = "none") -> dict:
+    return {
+        "tool_needed": {"value": tool, "confidence": 0.8 if tool else 0.4},
+        "memory_candidate": {"value": memory, "confidence": 0.8 if memory != "none" else 0.3},
+        "urgency": {"value": "normal", "confidence": 0.6},
+        "workflow_category": {"value": category, "confidence": 0.86},
+        "safety_confirmation_required": {"value": safety, "confidence": 0.9 if safety else 0.1},
+    }
+
+
+def test_current_npu_debug_query_selects_ops_live_and_repo_sources() -> None:
+    plan = build_plan(
+        "How do I check whether the RAG reranker is using the NPU?",
+        context={"platform": "cli", "repo_path": "/home/will/lab/swarm"},
+        classifier=fake_classifier(labels("devops", tool=True)),
+    )
+    assert plan["schema"] == "atlas_context_gate_plan_v1"
+    assert plan["bundle_plan"]["bundle_name"] == "OpsDebugBundle"
+    assert [s["source"] for s in plan["source_plan"]][:2] == ["live_system", "repo_files"]
+    assert plan["npu_proof"]["verified"] is True
+    assert plan["authority"] == AUTHORITY
+    assert all(value.startswith("closed_") for value in plan["gates"].values())
+
+
+def test_prior_plan_query_uses_session_or_rag_and_coding_for_kanban() -> None:
+    plan = build_plan(
+        "Where did we leave the NPU context gate implementation plan?",
+        context={"platform": "kanban", "task_id": "t_example", "repo_path": "/home/will/lab/swarm"},
+        classifier=fake_classifier(labels("coding", tool=True)),
+    )
+    sources = [s["source"] for s in plan["source_plan"]]
+    assert plan["bundle_plan"]["bundle_name"] == "CodingTaskBundle"
+    assert "repo_files" in sources
+    assert "session_search" in sources
+    assert "rag_search" in sources
+
+
+def test_simple_creative_query_no_retrieval_offline_no_npu_claim() -> None:
+    plan = build_plan("Write a haiku about Seattle rain.")
+    assert plan["bundle_plan"]["bundle_name"] == "SimpleResponseBundle"
+    assert [s["source"] for s in plan["source_plan"]] == ["no_retrieval"]
+    assert plan["npu_proof"]["verified"] is False
+    assert "npu_proof_inconclusive" in plan["warnings"]
+    assert "offline_heuristic_classifier_no_npu_claim" in plan["warnings"]
+
+
+def test_unsafe_live_routing_request_keeps_authority_closed_and_blocks_side_effect() -> None:
+    plan = build_plan(
+        "Change Hermes live routing to use the classifier automatically.",
+        context={"repo_path": "/home/will/lab/swarm"},
+        classifier=fake_classifier(labels("coding", tool=True, safety=True)),
+    )
+    assert plan["authority"] == AUTHORITY
+    assert plan["authority"]["may_route"] is False
+    assert any(field["field"] == "authority_side_effect" for field in plan["bundle_plan"]["blocked_fields"])
+    assert plan["gates"]["live_routing_change"] == "closed_requires_explicit_approval"
+
+
+def test_rejects_non_dry_run_and_private_text_options() -> None:
+    with pytest.raises(ContextGateError, match="dry_run_must_remain_true"):
+        build_plan("hello", options={"dry_run": False})
+    with pytest.raises(ContextGateError, match="include_private_text"):
+        build_plan("hello", options={"include_private_text": True})
+
+
+def test_compact_outputs_are_small_and_parseable() -> None:
+    plan = build_plan("How do I check whether port 18819 is healthy?")
+    line = compact_line(plan)
+    assert "schema=atlas_context_gate_plan_v1" in line
+    assert "gates=closed:" in line
+    parsed = json.loads(compact_json(plan))
+    assert parsed["schema"] == "atlas_context_gate_plan_v1"
+    assert isinstance(parsed["sources"], list)
+    assert "authority" in parsed
+
+
+def test_cli_offline_compact_json_smoke() -> None:
+    script = REPO_ROOT / "scripts" / "context-gate-advisory.py"
+    result = subprocess.run(
+        [sys.executable, str(script), "--offline", "--query", "Write a haiku about Seattle rain.", "--format", "compact-json"],
+        check=True,
+        text=True,
+        capture_output=True,
+        cwd=REPO_ROOT,
+    )
+    parsed = json.loads(result.stdout)
+    assert parsed["ok"] is True
+    assert parsed["bundle_name"] == "SimpleResponseBundle"
+    assert parsed["sources"] == ["no_retrieval"]
+    assert parsed["npu_proof"]["verified"] is False
+
+
+def test_npu_proof_requires_positive_sysfs_delta() -> None:
+    classifier = fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=0, outer_delta=None)
+    plan = build_plan("How do I check whether the RAG reranker is using the NPU?", classifier=classifier)
+    assert plan["npu_proof"]["verified"] is False
+    assert "npu_proof_inconclusive" in plan["warnings"]
+
+    endpoint_sysfs_plan = build_plan(
+        "How do I check whether the RAG reranker is using the NPU?",
+        classifier=fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=1, outer_delta=None),
+    )
+    assert endpoint_sysfs_plan["npu_proof"]["verified"] is True
+
+    outer_sysfs_plan = build_plan(
+        "How do I check whether the RAG reranker is using the NPU?",
+        classifier=fake_classifier(labels("devops", tool=True), endpoint_delta=120, sysfs_delta=0, outer_delta=1),
+    )
+    assert outer_sysfs_plan["npu_proof"]["verified"] is True
+
+
+def test_classifier_url_must_be_loopback_or_localhost() -> None:
+    for url in [
+        "http://example.com/v1/classify",
+        "https://10.0.0.5/v1/classify",
+        "http://0.0.0.0:18819/v1/classify",
+        "ftp://127.0.0.1/v1/classify",
+    ]:
+        with pytest.raises(ContextGateError, match="invalid_classifier_url"):
+            classify_live("hello", classifier_url=url, timeout=0.01)
+
+
+def test_classifier_url_redirect_to_non_loopback_is_not_followed(monkeypatch: pytest.MonkeyPatch) -> None:
+    requests: list[str] = []
+
+    class RedirectHandler(BaseHTTPRequestHandler):
+        def do_POST(self) -> None:  # noqa: N802 - stdlib callback name
+            requests.append(self.path)
+            self.send_response(302)
+            self.send_header("Location", "http://example.com/v1/classify")
+            self.end_headers()
+
+        def log_message(self, format: str, *args: object) -> None:
+            return
+
+    original_create_connection = socket.create_connection
+
+    def guarded_create_connection(address, *args, **kwargs):  # type: ignore[no-untyped-def]
+        host = address[0]
+        if host not in {"127.0.0.1", "localhost"}:
+            raise AssertionError(f"attempted non-loopback redirect connection to {host}")
+        return original_create_connection(address, *args, **kwargs)
+
+    server = ThreadingHTTPServer(("127.0.0.1", 0), RedirectHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    monkeypatch.setattr(socket, "create_connection", guarded_create_connection)
+    try:
+        url = f"http://127.0.0.1:{server.server_port}/v1/classify"
+        with pytest.raises(ContextGateError, match="classifier_unavailable"):
+            classify_live("hello", classifier_url=url, timeout=1.0)
+    finally:
+        server.shutdown()
+        server.server_close()
+        thread.join(timeout=2)
+
+    assert requests == ["/v1/classify"]