feat(audit): replace probe baseline workflow with live anonymized capture

This commit is contained in:
William Valentin
2026-02-26 23:41:13 -08:00
parent c96aca5f1d
commit 4b07a1f166
13 changed files with 968 additions and 10 deletions
+31 -9
View File
@@ -66,20 +66,42 @@
"phase0-ticket-0.5-docs-diagram-state-sync": {
"status": "completed",
"date": "2026-02-25",
"updated": "2026-02-25",
"summary": "Updated protocol/docs/diagrams for phase-0 telemetry fields, documented baseline workflow, and generated phase-0 baseline artifacts using a probe log with representative channel + gateway run/reaction events.",
"updated": "2026-02-27",
"summary": "Updated protocol/docs/diagrams for phase-0 telemetry fields, documented baseline workflow, and replaced the original probe-only baseline workflow with anonymized live channel-session audit artifacts (`phase0_baseline_live_2026-02-27.*`).",
"files_modified": [
"README.md",
"docs/api/PROTOCOL.md",
"docs/architecture/AGENT_DIAGRAM.md",
"docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
"docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md",
"docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl",
"docs/plans/artifacts/phase0_baseline_2026-02-25.md",
"docs/plans/artifacts/phase0_baseline_2026-02-25.json",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.md",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.json",
"docs/plans/state.json"
],
"test_status": "pnpm audit:phase0-baseline --audit docs/plans/artifacts/phase0_baseline_probe_2026-02-25.jsonl --format markdown/json (probe log with representative events)"
"test_status": "pnpm audit:phase0-baseline:live + pnpm test:run src/audit/phase0LiveBaseline.test.ts src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing"
},
"phase0-live-baseline-capture-tooling": {
"status": "completed",
"date": "2026-02-27",
"updated": "2026-02-27",
"summary": "Added a dedicated live phase-0 baseline capture flow that reads audit logs, filters run/reaction telemetry, excludes probe sessions, anonymizes session/sender/request IDs, and writes sample + summary artifacts for operational refreshes.",
"files_modified": [
"src/audit/phase0LiveBaseline.ts",
"src/audit/phase0LiveBaseline.test.ts",
"scripts/capture-phase0-live-baseline.ts",
"package.json",
"README.md",
"docs/api/PROTOCOL.md",
"docs/architecture/AGENT_DIAGRAM.md",
"docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
"docs/plans/2026-02-25-phase0-instrumentation-ticket-checklist.md",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.jsonl",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.md",
"docs/plans/artifacts/phase0_baseline_live_2026-02-27.json",
"docs/plans/state.json"
],
"test_status": "pnpm audit:phase0-baseline:live + pnpm test:run src/audit/phase0LiveBaseline.test.ts src/audit/phase0BaselineSummary.test.ts + pnpm typecheck passing"
},
"phase0-instrumentation-ticket-checklist": {
"status": "completed",
@@ -6786,7 +6808,7 @@
"test_status": "docs only"
},
"personal-assistant-productization-plan-2026-02-26": {
"status": "in_progress",
"status": "completed",
"date": "2026-02-26",
"updated": "2026-02-27",
"summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week roadmap. Phase 3 browser reliability, Phase 1 companion reconnect/handoff reliability, Phase 2 voice daily-driver reliability (talk controls + TTS provider fallback/health + interruption-safe voice cancel semantics), and Phase 4 onboarding first-success funnel improvements are now shipped.",
@@ -7266,8 +7288,8 @@
"deeper_surfaces_phase0_ticket_02": "completed — gateway + daemon routing emit run lifecycle/cancel telemetry and reaction match/skip audit events with filter summaries and cancellation latency, plus focused tests",
"deeper_surfaces_phase0_ticket_03": "completed — gateway metrics now track run-state outcomes, cancel latency samples, and reaction decision counters with routing/gateway emitters",
"deeper_surfaces_phase0_ticket_04": "completed — added phase-0 baseline summary tooling for run outcomes, cancel latency, and reaction decisions with markdown/json CLI output",
"deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated baseline artifacts from a probe log with representative channel + gateway events",
"next_up": "Replace probe baseline artifacts with live audit samples once gateway/channel sessions emit real run/reaction events",
"deeper_surfaces_phase0_ticket_05": "completed — documented phase-0 telemetry fields/workflow, refreshed architecture/protocol docs, and generated anonymized live baseline artifacts from real channel audit traffic (probe-only artifact workflow superseded)",
"next_up": "Capture a gateway-origin live phase-0 baseline sample (including run.cancel/cancelled paths) and append as a second live artifact window alongside the channel sample",
"pi_embedded_canary_spike": "completed — added optional pi_embedded backend adapter, canary-safe no-tools routing guard, backend success/fallback latency audit events, and docs/diagram updates while native remains default",
"pi_embedded_evaluation_phase": "completed — final decision rollback (applied in runtime config): Window A failed latency/fallback gates (p50 +259ms, p95 +5695ms, fallback 25%, categories: pi_module_interface/empty_assistant_text); Window B remained sample-insufficient; controlled probes verified guard coverage (pi_no_tools_mode/capability_query/attachments_present each hit once)",
"pi_embedded_manual_mode": "completed — added persisted runtime backend controls for manual Pi activation/deactivation (`/runtime` preferred, `/backend` alias; `status`, `activate pi`, `deactivate pi`, `use config`) while keeping config-driven default routing",