diff --git a/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md b/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md index 61004a3..d6ec66d 100644 --- a/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md +++ b/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md @@ -174,8 +174,8 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | macOS menu bar app | Full | -- | **MISSING** | | iOS node | Full (Canvas, Voice, Camera) | -- | **MISSING** | | Android node | Full (Canvas, Talk, Camera) | -- | **MISSING** | -| Voice Wake / Talk Mode | Full (ElevenLabs) | -- | **MISSING** | -| Camera / screen capture | Via nodes | -- | **MISSING** | +| Voice Wake / Talk Mode | Full (ElevenLabs) | Wake phrase + timed talk mode | **MATCH** | +| Camera / screen capture | Via nodes | `screen.capture` + `camera.capture` tools | **MATCH** | | Location access | Via nodes | -- | **MISSING** | --- @@ -299,7 +299,6 @@ All five Tier 3 items implemented: Lane Queue (per-session FIFO in gateway), cre - Companion apps (macOS/iOS/Android) — massive scope - LINE, Feishu, Mattermost — niche audience - Canvas/A2UI — experimental visual workspace -- Voice Wake / Talk Mode — ElevenLabs TTS integration - Nix/Fly.io/Railway deployment — platform-specific - OAuth subscription auth — complex - Skill/plugin safety scanner — static analysis diff --git a/docs/plans/2026-02-15-openclaw-gap-roadmap.md b/docs/plans/2026-02-15-openclaw-gap-roadmap.md index c53b836..ed5daee 100644 --- a/docs/plans/2026-02-15-openclaw-gap-roadmap.md +++ b/docs/plans/2026-02-15-openclaw-gap-roadmap.md @@ -62,8 +62,6 @@ A gap item is considered implemented when: - macOS menu bar app - iOS node - Android node -- Voice Wake / Talk Mode -- Camera / screen capture - Location access ## Roadmap Overview (Milestones) @@ -323,7 +321,7 @@ These are substantial UX/ecosystem projects or highly platform-specific; defer u - Canvas/A2UI - Companion apps (macOS/iOS/Android) -- Voice wake/talk mode + camera/screen capture/location +- Location access - Presence tracking - Bonjour/mDNS discovery - QMD backend diff --git a/docs/plans/2026-02-16-voice-wake-and-capture-checklist.md b/docs/plans/2026-02-16-voice-wake-and-capture-checklist.md new file mode 100644 index 0000000..a2cd803 --- /dev/null +++ b/docs/plans/2026-02-16-voice-wake-and-capture-checklist.md @@ -0,0 +1,41 @@ +# Voice Wake + Capture Checklist + +Date: 2026-02-16 +Status: completed + +## Scope + +- Implement a practical Voice Wake / Talk Mode slice. +- Implement Camera / Screen Capture via host tools. + +## Completed + +- Added `audio.talk_mode` config schema: + - `enabled` (default `false`) + - `wake_phrase` (default `hey flynn`) + - `timeout_ms` (default `120000`) + - `allow_manual_toggle` (default `true`) +- Wired talk mode gating in `src/daemon/routing.ts`: + - wake phrase activation with timed listen window + - manual controls: `/talk on`, `/talk off`, `/talk status` + - idle mode drops non-wake text when talk mode is enabled +- Added built-in capture tools in `src/tools/builtin/capture.ts`: + - `screen.capture` + - `camera.capture` + - returns base64 image payloads (`image/png` or `image/jpeg`) +- Registered capture tools in builtin index and tool policy runtime/coding sets. +- Updated docs: + - `README.md` + - `config/default.yaml` + - gap/roadmap docs in `docs/plans/` + +## Tests + +- `src/daemon/routing.test.ts` (wake phrase/talk mode behavior) +- `src/tools/builtin/capture.test.ts` (capture tool command wrapping behavior) +- `src/config/schema.test.ts` (talk mode schema defaults/overrides) + +## Verification + +- `pnpm test:run src/daemon/routing.test.ts src/tools/builtin/capture.test.ts src/config/schema.test.ts src/tools/policy.test.ts` +- `pnpm typecheck` diff --git a/docs/plans/state.json b/docs/plans/state.json index b3e966e..ace713f 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -581,6 +581,39 @@ ], "test_status": "pnpm test:run src/channels/bluebubbles/adapter.test.ts src/config/schema.test.ts src/gateway/handlers/services.test.ts + pnpm typecheck passing" }, + "voice-wake-talk-mode": { + "file": "2026-02-16-voice-wake-and-capture-checklist.md", + "status": "completed", + "date": "2026-02-16", + "updated": "2026-02-16", + "summary": "Added configurable wake-phrase talk mode (`audio.talk_mode`) with timed listen windows and `/talk on|off|status` controls in the message router.", + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/daemon/routing.ts", + "src/daemon/routing.test.ts", + "README.md", + "config/default.yaml" + ], + "test_status": "pnpm test:run src/daemon/routing.test.ts src/config/schema.test.ts + pnpm typecheck passing" + }, + "camera-screen-capture": { + "file": "2026-02-16-voice-wake-and-capture-checklist.md", + "status": "completed", + "date": "2026-02-16", + "updated": "2026-02-16", + "summary": "Added host capture tools (`screen.capture`, `camera.capture`) with platform command wrappers and base64 image payload output for camera/screen workflows.", + "files_created": [ + "src/tools/builtin/capture.ts", + "src/tools/builtin/capture.test.ts" + ], + "files_modified": [ + "src/tools/builtin/index.ts", + "src/tools/policy.ts", + "README.md" + ], + "test_status": "pnpm test:run src/tools/builtin/capture.test.ts src/tools/policy.test.ts + pnpm typecheck passing" + }, "openclaw-style-personal-agent-without-openclaw-risks": { "file": "2026-02-14-openclaw-style-personal-agent-without-openclaw-risks-plan.md", "status": "completed", @@ -2903,12 +2936,12 @@ "tier2_completion": "4/4 (100%) — inbound webhooks, vector memory search, Dockerfile, heartbeat monitor", "tier3_completion": "5/5 (100%) — lane queue, credential redaction, web UI token dashboard, xAI (Grok) provider, Voyage AI embeddings", "tier4_completion": "4/4 (100%) — gateway lock, shell completion, Tailscale Serve/Funnel, DM pairing codes", - "feature_gap_scorecard": "113/128 match (88%), 0 partial (0%), 15 missing (12%)", + "feature_gap_scorecard": "115/128 match (90%), 0 partial (0%), 13 missing (10%)", "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 2/2 plans complete — milestone done", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback", "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 3/3 (100%) — component registry, confidence routing, history index. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", - "next_up": "OpenClaw gap: Zalo channel adapter (open next scoped implementation checklist)" + "next_up": "OpenClaw gap: Location access (open next scoped implementation checklist)" }, "soul_md_and_cron_create": { "date": "2026-02-11",