Document browser reliability layer and roadmap progress

2026-02-26 14:06:53 -08:00
parent 7c904ef0fd
commit e9873ad22b
6 changed files with 52 additions and 8 deletions
@@ -6786,15 +6786,37 @@
      "test_status": "docs only"
    },
    "personal-assistant-productization-plan-2026-02-26": {
-      "status": "proposed",
+      "status": "in_progress",
      "date": "2026-02-26",
      "updated": "2026-02-26",
-      "summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week productization roadmap focused on shipped companion apps, voice daily-driver reliability, browser workflow reliability, and onboarding first-success funnel metrics.",
+      "summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week roadmap. Phase 3 browser reliability work is now shipped (workflow primitives, retry/budget/guardrails, checkpoints), with companion/voice/onboarding phases remaining.",
      "files_modified": [
        "docs/plans/2026-02-26-personal-assistant-productization-plan.md",
        "docs/plans/state.json"
      ],
-      "test_status": "planning/docs update only; no runtime code changes"
+      "test_status": "roadmap status updated; implementation tracked in phase-specific entries"
+    },
+    "personal-assistant-productization-phase3-browser-reliability": {
+      "status": "completed",
+      "date": "2026-02-26",
+      "updated": "2026-02-26",
+      "summary": "Implemented Phase 3 browser workflow reliability layer: added `browser.wait_for`, `browser.assert`, `browser.extract`, checkpoint save/resume tools, retry wrappers, domain allowlist + high-risk confirmation guardrails, and bounded workflow-step budgets wired through config and daemon registration.",
+      "files_modified": [
+        "src/tools/builtin/browser/tools.ts",
+        "src/tools/builtin/browser/tools.test.ts",
+        "src/daemon/tools.ts",
+        "src/tools/policy.ts",
+        "src/config/schema.ts",
+        "src/config/schema.test.ts",
+        "config/default.yaml",
+        "README.md",
+        "docs/api/TOOLS.md",
+        "docs/api/PROTOCOL.md",
+        "docs/architecture/AGENT_DIAGRAM.md",
+        "docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
+        "docs/plans/state.json"
+      ],
+      "test_status": "pnpm test:run src/tools/builtin/browser/tools.test.ts src/config/schema.test.ts src/tools/policy.test.ts + pnpm typecheck passing"
    },
    "subagents-support-phase1": {
      "status": "completed",
@@ -6830,7 +6852,7 @@
    }
  },
  "overall_progress": {
-    "total_test_count": 2534,
+    "total_test_count": 2544,
    "all_tests_passing": true,
    "p0_completion": "3/3 (100%)",
    "p1_completion": "4/4 (100%)",
@@ -6845,7 +6867,7 @@
    "tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
    "tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
    "tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
-    "feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and full subagent support (`subagent.*` + queue modes + budgets + trace/audit + `/subagents` inspection) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.",
+    "feature_gap_scorecard": "rebaselined 2026-02-26 and updated 2026-02-26 (phase 3) — channel breadth, setup wizard, baseline browser automation, subagent controls, and browser workflow reliability primitives (wait/assert/extract/retries/checkpoints/guardrails/budgets) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, and first-success onboarding funnel optimization.",
    "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done",
    "dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling",
    "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
@@ -6878,7 +6900,7 @@
    "deeper_surfaces_phase3_companion_canvas_voice": "completed \u2014 companion reconnect resilience (auto-reconnect with backoff, pending-wait cancellation on disconnect), canvas artifact persistence (SQLite-backed store, daemon-restart durability), voice TTS fallback coverage (text-only reply on TTS failure, no dropped responses)",
    "deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized",
    "post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)",
-    "personal_assistant_productization_plan": "proposed \u2014 8-10 week phased roadmap defined (companion MVP surfaces, voice reliability hardening, browser workflow reliability layer, onboarding 2.0 first-success funnel) with measurable exit gates.",
+    "personal_assistant_productization_plan": "in_progress \u2014 8-10 week phased roadmap active; Phase 3 browser workflow reliability layer shipped (wait/assert/extract/checkpoints + guardrails/retries/budgets). Remaining phases: companion MVP surfaces, voice reliability hardening, and onboarding 2.0 first-success funnel.",
    "subagents_support": "completed \u2014 subagent phases 1-3 shipped with `subagent.spawn/send/list/cancel/delete/summary`, per-child queue mode (`followup|interrupt`), budgets (`max_turns`, `max_total_tokens`, `turn_timeout_ms`), tool-profile overrides, trace-linked audit events, `/subagents` inspection commands, and focused regression tests."
  },
  "soul_md_and_cron_create": {