Document browser reliability layer and roadmap progress

This commit is contained in:
William Valentin
2026-02-26 14:06:53 -08:00
parent 7c904ef0fd
commit e9873ad22b
6 changed files with 52 additions and 8 deletions
+28 -6
View File
@@ -6786,15 +6786,37 @@
"test_status": "docs only"
},
"personal-assistant-productization-plan-2026-02-26": {
"status": "proposed",
"status": "in_progress",
"date": "2026-02-26",
"updated": "2026-02-26",
"summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week productization roadmap focused on shipped companion apps, voice daily-driver reliability, browser workflow reliability, and onboarding first-success funnel metrics.",
"summary": "Rebaselined Flynn's OpenClaw-style personal-assistant gaps and defined an execution-ready 8-10 week roadmap. Phase 3 browser reliability work is now shipped (workflow primitives, retry/budget/guardrails, checkpoints), with companion/voice/onboarding phases remaining.",
"files_modified": [
"docs/plans/2026-02-26-personal-assistant-productization-plan.md",
"docs/plans/state.json"
],
"test_status": "planning/docs update only; no runtime code changes"
"test_status": "roadmap status updated; implementation tracked in phase-specific entries"
},
"personal-assistant-productization-phase3-browser-reliability": {
"status": "completed",
"date": "2026-02-26",
"updated": "2026-02-26",
"summary": "Implemented Phase 3 browser workflow reliability layer: added `browser.wait_for`, `browser.assert`, `browser.extract`, checkpoint save/resume tools, retry wrappers, domain allowlist + high-risk confirmation guardrails, and bounded workflow-step budgets wired through config and daemon registration.",
"files_modified": [
"src/tools/builtin/browser/tools.ts",
"src/tools/builtin/browser/tools.test.ts",
"src/daemon/tools.ts",
"src/tools/policy.ts",
"src/config/schema.ts",
"src/config/schema.test.ts",
"config/default.yaml",
"README.md",
"docs/api/TOOLS.md",
"docs/api/PROTOCOL.md",
"docs/architecture/AGENT_DIAGRAM.md",
"docs/architecture/GATEWAY_SESSIONS_AND_QUEUE.md",
"docs/plans/state.json"
],
"test_status": "pnpm test:run src/tools/builtin/browser/tools.test.ts src/config/schema.test.ts src/tools/policy.test.ts + pnpm typecheck passing"
},
"subagents-support-phase1": {
"status": "completed",
@@ -6830,7 +6852,7 @@
}
},
"overall_progress": {
"total_test_count": 2534,
"total_test_count": 2544,
"all_tests_passing": true,
"p0_completion": "3/3 (100%)",
"p1_completion": "4/4 (100%)",
@@ -6845,7 +6867,7 @@
"tier2_completion": "4/4 (100%) \u2014 inbound webhooks, vector memory search, Dockerfile, heartbeat monitor",
"tier3_completion": "5/5 (100%) \u2014 lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings",
"tier4_completion": "4/4 (100%) \u2014 gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes",
"feature_gap_scorecard": "rebaselined 2026-02-26 — channel breadth, setup wizard, baseline browser automation, and full subagent support (`subagent.*` + queue modes + budgets + trace/audit + `/subagents` inspection) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, browser workflow reliability primitives, and first-success onboarding funnel optimization.",
"feature_gap_scorecard": "rebaselined 2026-02-26 and updated 2026-02-26 (phase 3) — channel breadth, setup wizard, baseline browser automation, subagent controls, and browser workflow reliability primitives (wait/assert/extract/retries/checkpoints/guardrails/budgets) are implemented; remaining high-impact personal-assistant gaps center on shipped companion apps (desktop/mobile), voice UX polish, and first-success onboarding funnel optimization.",
"operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete \u2014 milestone done",
"dashboard_observability": "completed \u2014 service health graphs + core service log viewer added to web UI via observability RPCs and bounded backend sampling",
"gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram",
@@ -6878,7 +6900,7 @@
"deeper_surfaces_phase3_companion_canvas_voice": "completed \u2014 companion reconnect resilience (auto-reconnect with backoff, pending-wait cancellation on disconnect), canvas artifact persistence (SQLite-backed store, daemon-restart durability), voice TTS fallback coverage (text-only reply on TTS failure, no dropped responses)",
"deeper_surfaces_phase4_rollout": "completed \u2014 phase 4 rollout and operator readiness plan documented: canary rollout plan by feature flag/surface, explicit rollback playbook, operator docs and architecture/protocol docs synchronized",
"post_phase_test_fixes": "completed \u2014 fixed 4 test failures introduced by phases 1-3: iOS/Android push listNodes (missing publishHeartbeat before platform-filtered query), server.test agent.send (run_state events now precede done; added sendAndWaitForDone helper), httpBody 413 (req.destroy() closed socket before response could be sent; replaced with Connection: close header on 413 responses)",
"personal_assistant_productization_plan": "proposed \u2014 8-10 week phased roadmap defined (companion MVP surfaces, voice reliability hardening, browser workflow reliability layer, onboarding 2.0 first-success funnel) with measurable exit gates.",
"personal_assistant_productization_plan": "in_progress \u2014 8-10 week phased roadmap active; Phase 3 browser workflow reliability layer shipped (wait/assert/extract/checkpoints + guardrails/retries/budgets). Remaining phases: companion MVP surfaces, voice reliability hardening, and onboarding 2.0 first-success funnel.",
"subagents_support": "completed \u2014 subagent phases 1-3 shipped with `subagent.spawn/send/list/cancel/delete/summary`, per-child queue mode (`followup|interrupt`), budgets (`max_turns`, `max_total_tokens`, `turn_timeout_ms`), tool-profile overrides, trace-linked audit events, `/subagents` inspection commands, and focused regression tests."
},
"soul_md_and_cron_create": {