From 6bb424cddc59838575b618f643ac5eb378967694 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 7 Feb 2026 12:23:09 -0800 Subject: [PATCH] feat: add agent tools and sanitize tool names for Anthropic API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 8 new agent-callable tools (sessions.list/history/create/delete, agents.list, message.send, cron.list/trigger) and sanitize tool names at the API boundary (dots → underscores) to comply with Anthropic's `^[a-zA-Z0-9_-]{1,128}` requirement. Reverse-maps sanitized names back to internal names for hook callbacks and tool execution. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 9 + ...026-02-06-openclaw-feature-gap-analysis.md | 220 +++++++++--------- docs/plans/state.json | 60 ++++- src/backends/native/agent.ts | 7 +- src/tools/builtin/agents-list.ts | 53 +++++ src/tools/builtin/cron.ts | 82 +++++++ src/tools/builtin/index.ts | 4 + src/tools/builtin/message-send.ts | 73 ++++++ src/tools/builtin/sessions.ts | 213 +++++++++++++++++ src/tools/executor.ts | 2 +- src/tools/index.ts | 2 +- src/tools/registry.test.ts | 37 ++- src/tools/registry.ts | 18 +- 13 files changed, 656 insertions(+), 124 deletions(-) create mode 100644 src/tools/builtin/agents-list.ts create mode 100644 src/tools/builtin/cron.ts create mode 100644 src/tools/builtin/message-send.ts create mode 100644 src/tools/builtin/sessions.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index e349ab5..1b7cfa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ All notable changes to Flynn are documented in this file. ### Added +- **Agent Tools: sessions.\*** -- 4 new agent-callable tools (`sessions.list`, + `sessions.history`, `sessions.create`, `sessions.delete`) wrapping SessionManager + for runtime session management by the AI agent +- **Agent Tools: agents.list** -- New tool exposing AgentConfigRegistry to the agent, + listing all registered agent configurations with tiers, profiles, and sandbox status +- **Agent Tools: message.send** -- Cross-channel messaging tool allowing the agent to + proactively send messages to any connected channel (Telegram, Discord, Slack, etc.) +- **Agent Tools: cron.\*** -- 2 new tools (`cron.list`, `cron.trigger`) for runtime + cron job management, allowing the agent to list and manually trigger scheduled jobs - **Web UI Dashboard (P7)** -- Full SPA control dashboard at the gateway web UI with four pages: Dashboard (health stats, channels, auto-refresh), Chat (session selector, streaming tool events, markdown rendering), Sessions (list, history viewer, delete), diff --git a/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md b/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md index a8c9de2..8955407 100644 --- a/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md +++ b/docs/plans/2026-02-06-openclaw-feature-gap-analysis.md @@ -1,6 +1,7 @@ # Flynn vs OpenClaw — Feature Gap Analysis **Date:** 2026-02-06 +**Last updated:** 2026-02-07 **Purpose:** Comprehensive comparison of Flynn's current implementation against OpenClaw's feature set, to guide prioritisation of future work. ## Legend @@ -15,21 +16,21 @@ | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Telegram | grammY bot | grammY bot | **MATCH** | -| WhatsApp | Baileys (WhatsApp Web) | -- | **MISSING** | -| Discord | discord.js | -- | **MISSING** | -| Slack | Bolt SDK | -- | **MISSING** | +| Telegram | grammY bot | grammY bot (allowlists, mention gating, group support) | **MATCH** | +| WhatsApp | Baileys (WhatsApp Web) | whatsapp-web.js (allowlists, mention gating, groups) | **MATCH** | +| Discord | discord.js | discord.js (guild/channel allowlists, mention gating) | **MATCH** | +| Slack | Bolt SDK | Bolt SDK Socket Mode (channel allowlists, mention gating) | **MATCH** | | Signal | signal-cli | -- | **MISSING** | | iMessage / BlueBubbles | imsg + BlueBubbles | -- | **MISSING** | | Google Chat | Chat API | -- | **MISSING** | | Microsoft Teams | Bot Framework | -- | **MISSING** | | Matrix | Extension | -- | **MISSING** | | Zalo / Zalo Personal | Extension | -- | **MISSING** | -| WebChat | Gateway-served | Gateway (stub) | **PARTIAL** | +| WebChat | Gateway-served | Full WebSocket + SPA dashboard | **MATCH** | | TUI (terminal) | `openclaw tui` | Minimal + Fullscreen (React/Ink) | **MATCH** | | LINE / Feishu / Mattermost | Extensions/plugins | -- | **MISSING** | -Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single biggest gap. +Flynn has **6 of ~15 channels** (Telegram, WhatsApp, Discord, Slack, WebChat, TUI). --- @@ -37,21 +38,22 @@ Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single b | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Anthropic (Claude) | Full + OAuth | Full | **MATCH** | -| OpenAI | Full + OAuth + Codex | Full | **MATCH** | -| Ollama (local) | Supported | Full | **MATCH** | -| Llama.cpp (local) | Supported | Basic | **PARTIAL** | -| Gemini / Google | Full provider | Stub only | **PARTIAL** | -| OpenRouter | Supported | -- | **MISSING** | -| Amazon Bedrock | Supported | -- | **MISSING** | +| Anthropic (Claude) | Full + OAuth | Full (API key + auth token) | **MATCH** | +| OpenAI | Full + OAuth + Codex | Full (OpenAI SDK) | **MATCH** | +| Ollama (local) | Supported | Full (host, num_gpu config) | **MATCH** | +| Llama.cpp (local) | Supported | Full (endpoint, auth_token, context_window) | **MATCH** | +| Gemini / Google | Full provider | Full (Gemini SDK, vision support) | **MATCH** | +| OpenRouter | Supported | Full (via OpenAI-compatible client, custom baseURL) | **MATCH** | +| Amazon Bedrock | Supported | Full (Bedrock SDK, region/credentials) | **MATCH** | +| GitHub Models | Supported | Full (device flow auth, Codex models) | **MATCH** | | GLM / MiniMax / Moonshot | Supported | -- | **MISSING** | | Vercel AI Gateway | Supported | -- | **MISSING** | | Z.AI | Supported | -- | **MISSING** | | Synthetic provider | Supported | -- | **MISSING** | | OAuth subscription auth | Anthropic + OpenAI | API keys only | **MISSING** | -| Model failover chains | Full (fallback + rotation) | Fallback chains | **MATCH** | -| Model tier routing | Per-agent, per-provider | default/fast/complex/local | **MATCH** | -| Provider-specific tool policy | Per-provider tool filtering | -- | **MISSING** | +| Model failover chains | Full (fallback + rotation) | Full (configurable fallback chain + retry) | **MATCH** | +| Model tier routing | Per-agent, per-provider | default/fast/complex/local with per-agent override | **MATCH** | +| Provider-specific tool policy | Per-provider tool filtering | Full (per-provider allow/deny in tools config) | **MATCH** | --- @@ -59,27 +61,26 @@ Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single b | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Tool loop with streaming | RPC mode + block streaming | Tool loop (max 10 iter) | **MATCH** | -| `exec` / shell | Full (background, pty, timeout, elevated) | Basic (bash -c, timeout) | **PARTIAL** | +| Tool loop with streaming | RPC mode + block streaming | Tool loop (max iterations, streaming) | **MATCH** | +| `exec` / shell | Full (background, pty, timeout, elevated) | Full (bash -c, configurable timeout, background via process tools) | **MATCH** | | `read` / file read | Full (line ranges) | Full (line offset/limit) | **MATCH** | | `write` / file write | Full | Full (auto-mkdir) | **MATCH** | | `edit` / file edit | Full | Full (exact match, replace_all) | **MATCH** | | `apply_patch` | Multi-hunk structured patches | -- | **MISSING** | | `file.list` / glob | -- | Full (glob filtering) | **MATCH** | -| `web_fetch` | Full (markdown/text extract, caching) | Basic HTTP GET | **PARTIAL** | -| `web_search` | Brave Search API | -- | **MISSING** | -| Browser control | Full CDP (Chromium profiles, snapshots, actions) | -- | **MISSING** | +| `web_fetch` | Full (markdown/text extract, caching) | Full (HTML-to-markdown, readability, caching) | **MATCH** | +| `web.search` | Brave Search API | Full (Brave + SearXNG providers) | **MATCH** | +| Browser control | Full CDP (Chromium profiles, snapshots, actions) | Full CDP (Puppeteer, navigate/click/type/screenshot/evaluate) | **MATCH** | | Canvas / A2UI | Agent-driven visual workspace | -- | **MISSING** | -| `process` tool | Background exec management (poll/log/write/kill) | -- | **MISSING** | -| `image` tool | Image analysis with configurable model | -- | **MISSING** | -| `message` tool | Cross-channel messaging + actions | -- | **MISSING** | -| `cron` tool | Runtime cron management | -- | **MISSING** | -| `gateway` tool | Restart/config management | -- | **MISSING** | -| `sessions_*` tools | List/history/send/spawn across sessions | -- | **MISSING** | -| `agents_list` tool | Sub-agent discovery | -- | **MISSING** | -| Tool profiles | minimal/coding/messaging/full | -- | **MISSING** | +| `process.*` tools | Background exec management (poll/log/write/kill) | Full (start/output/status/kill/list) | **MATCH** | +| `image.analyze` tool | Image analysis with configurable model | Full (multi-provider vision analysis) | **MATCH** | +| `message.send` tool | Cross-channel messaging + actions | Full (send to any registered channel) | **MATCH** | +| `cron.*` tools | Runtime cron management | Full (list/trigger) | **MATCH** | +| `sessions.*` tools | List/history/send/spawn across sessions | Full (list/history/create/delete) | **MATCH** | +| `agents.list` tool | Sub-agent discovery | Full (list configs with tiers/profiles) | **MATCH** | +| Tool profiles | minimal/coding/messaging/full | Full (4 profiles + per-agent + per-provider) | **MATCH** | | Tool groups | `group:fs`, `group:runtime`, etc. | -- | **MISSING** | -| Tool allow/deny lists | Global + per-agent + per-provider | -- | **MISSING** | +| Tool allow/deny lists | Global + per-agent + per-provider | Full (global + per-agent + per-provider allow/deny) | **MATCH** | --- @@ -88,12 +89,12 @@ Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single b | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| | Session persistence | JSONL files | SQLite | **MATCH** (different storage) | -| Session isolation | Per-sender + group isolation | `{frontend}:{userId}` | **MATCH** | +| Session isolation | Per-sender + group isolation | `{frontend}:{userId}` with agent config key | **MATCH** | | Session transfer | Between channels | Between frontends | **MATCH** | -| Multi-agent routing | Isolated workspaces per agent | Single backend | **MISSING** | +| Multi-agent routing | Isolated workspaces per agent | Full (AgentRouter + per-agent config/sandbox/tools) | **MATCH** | | Session pruning | Tool result trimming (in-memory) | -- | **MISSING** | | `/new` / `/reset` | Full | Full | **MATCH** | -| `/status` | Full (model + tokens + cost) | Full (model + confirmations) | **MATCH** | +| `/status` | Full (model + tokens + cost) | Full (model + tokens + cost) | **MATCH** | --- @@ -101,12 +102,10 @@ Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single b | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Auto-compaction | Full (summarise older history) | -- | **MISSING** | -| Manual `/compact` | Full (with instructions) | -- | **MISSING** | -| Pre-compaction memory flush | Silent agentic turn | -- | **MISSING** | -| Token tracking | Full (per-response, cost) | Input/output counters | **PARTIAL** | - -**Critical gap** — without compaction, long conversations will hit token limits and fail. +| Auto-compaction | Full (summarise older history) | Full (threshold-based, delegated to fast tier) | **MATCH** | +| Manual `/compact` | Full (with instructions) | Full (via command metadata) | **MATCH** | +| Pre-compaction memory flush | Silent agentic turn | Full (auto-extract memory before compaction) | **MATCH** | +| Token tracking | Full (per-response, cost) | Full (per-tier, per-call, estimated cost) | **MATCH** | --- @@ -114,16 +113,14 @@ Flynn has **2 of ~15 channels**. The messaging channel ecosystem is the single b | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Markdown memory files | `MEMORY.md` + daily logs | -- | **MISSING** | -| `memory_search` tool | Semantic vector search | -- | **MISSING** | -| `memory_get` tool | Read memory files | -- | **MISSING** | +| Markdown memory files | `MEMORY.md` + daily logs | Namespace-based flat files (user/global/session) | **MATCH** | +| `memory.search` tool | Semantic vector search | Full (keyword search across namespaces) | **MATCH** | +| `memory.read` tool | Read memory files | Full (read by namespace) | **MATCH** | +| `memory.write` tool | Write memory files | Full (write/append to namespace) | **MATCH** | | Vector embeddings | OpenAI/Gemini/local | -- | **MISSING** | | Hybrid search (BM25 + vector) | Full | -- | **MISSING** | -| Session memory indexing | Experimental | -- | **MISSING** | | QMD backend | Experimental | -- | **MISSING** | -OpenClaw has a sophisticated memory system. Flynn has none. - --- ## 7. MCP (Model Context Protocol) @@ -143,13 +140,13 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| | Tool confirmation hooks | Full | Full (confirm/log/silent patterns) | **MATCH** | -| Chat ID allowlists | Per-channel | Telegram only | **PARTIAL** | +| Chat ID allowlists | Per-channel | Full (Telegram, Discord, Slack, WhatsApp all have allowlists) | **MATCH** | | DM pairing (unknown senders) | Full (pairing codes) | -- | **MISSING** | -| Docker sandboxing | Full (per-session/agent/shared) | -- | **MISSING** | +| Docker sandboxing | Full (per-session/agent/shared) | Full (per-agent sandbox via SandboxManager + Docker) | **MATCH** | | Elevated mode | Host exec escape hatch | -- | **MISSING** | -| Tool execution timeouts | Full (configurable) | 30s default | **MATCH** | +| Tool execution timeouts | Full (configurable) | Full (configurable per-process + shell) | **MATCH** | | Output truncation | Full | 51KB | **MATCH** | -| Gateway auth (token/password) | Full | -- | **MISSING** | +| Gateway auth (token/password) | Full | Full (bearer token + Tailscale identity + HTTP auth) | **MATCH** | --- @@ -157,7 +154,7 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Cron jobs | Full (runtime + config) | Full (YAML config) | **MATCH** | +| Cron jobs | Full (runtime + config) | Full (YAML config + runtime trigger via tools) | **MATCH** | | Webhooks | Full (inbound triggers) | -- | **MISSING** | | Gmail Pub/Sub | Full | -- | **MISSING** | | Heartbeat | Full | -- | **MISSING** | @@ -181,11 +178,11 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Skills system | Bundled/managed/workspace | Bundled/managed/workspace | **MATCH** | -| Skill manifest | Full | Full (requirements, versioning) | **MATCH** | +| Skills system | Bundled/managed/workspace | Full (bundled/managed/workspace tiers) | **MATCH** | +| Skill manifest | Full | Full (requirements, versioning, manifest.json) | **MATCH** | +| Skill installer | Registry install/upgrade/uninstall | Full (directory-based install/upgrade/uninstall) | **MATCH** | | ClawHub registry | Community skill registry | -- | **MISSING** | -| Plugin system | Full (register tools + CLI commands) | -- | **MISSING** | -| Workspace prompt injection | AGENTS.md, SOUL.md, TOOLS.md | -- | **MISSING** | +| Workspace prompt injection | AGENTS.md, SOUL.md, TOOLS.md | Full (SOUL.md, AGENTS.md via prompt template system) | **MATCH** | --- @@ -193,10 +190,10 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| WebSocket control plane | Full | WebSocket gateway (basic) | **PARTIAL** | -| Control UI (web dashboard) | Full | -- | **MISSING** | +| WebSocket control plane | Full | Full (JSON-RPC protocol, session bridge, handlers) | **MATCH** | +| Control UI (web dashboard) | Full | Full SPA (dashboard, chat, sessions, settings) | **MATCH** | | Tailscale Serve/Funnel | Full integration | -- | **MISSING** | -| Remote gateway access | SSH tunnels + tailnet | -- | **MISSING** | +| Remote gateway access | SSH tunnels + tailnet | Tailscale-only binding option | **PARTIAL** | | Health checks / doctor | 10+ checks | 10 checks | **MATCH** | | `onboard` wizard | Full guided setup | -- | **MISSING** | | Docker deployment | Full | -- | **MISSING** | @@ -213,12 +210,12 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli |---------|----------|-------|--------| | `/status` | Full | Full | **MATCH** | | `/new` / `/reset` | Full | Full | **MATCH** | -| `/compact` | Full | -- | **MISSING** | +| `/compact` | Full | Full (manual via command) | **MATCH** | | `/think ` | Full (off to xhigh) | -- | **MISSING** | | `/verbose` | Full | -- | **MISSING** | -| `/usage` | Full (off/tokens/full) | -- | **MISSING** | +| `/usage` | Full (off/tokens/full) | Full (per-tier breakdown + cost) | **MATCH** | | `/local` / `/cloud` | -- | Full | Flynn-unique | -| `/model` | -- | Full | Flynn-unique | +| `/model` | -- | Full (tier switching) | Flynn-unique | --- @@ -226,75 +223,75 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli | Feature | OpenClaw | Flynn | Status | |---------|----------|-------|--------| -| Streaming & chunking | Full (per-channel limits) | Full (streaming responses) | **MATCH** | -| Typing indicators | Full | Telegram only | **PARTIAL** | +| Streaming & chunking | Full (per-channel limits) | Full (streaming + per-channel chunking) | **MATCH** | +| Typing indicators | Full | Telegram (built-in grammY) | **PARTIAL** | | Presence tracking | Full | -- | **MISSING** | -| Usage tracking / cost | Full | Basic token counters | **PARTIAL** | -| Markdown rendering | Per-channel formatting | Basic (TUI + Telegram) | **PARTIAL** | -| Media pipeline | Images/audio/video/transcription | -- | **MISSING** | -| Group chat support | Full (mention gating, routing) | -- | **MISSING** | -| Retry policy | Full (configurable) | -- | **MISSING** | -| System prompt templating | AGENTS.md, SOUL.md, IDENTITY.md, USER.md | -- | **MISSING** | +| Usage tracking / cost | Full | Full (per-tier tokens, estimated cost via MODEL_COSTS) | **MATCH** | +| Markdown rendering | Per-channel formatting | Full (TUI markdown renderer + channel-specific) | **MATCH** | +| Media pipeline | Images/audio/video/transcription | Full (image analysis, audio transcription, media.send) | **MATCH** | +| Group chat support | Full (mention gating, routing) | Full (all channels support mention gating + group filtering) | **MATCH** | +| Retry policy | Full (configurable) | Full (configurable retries, backoff, delay caps) | **MATCH** | +| System prompt templating | AGENTS.md, SOUL.md, IDENTITY.md, USER.md | Full (SOUL.md, AGENTS.md, configurable search dirs + extra sections) | **MATCH** | --- ## Summary Scorecard -| Category | Compared | Match | Partial | Missing | -|----------|:--------:|:-----:|:-------:|:-------:| -| Channels | 15 | 2 | 1 | 12 | -| Model Providers | 14 | 5 | 2 | 7 | -| Agent & Tools | 17 | 4 | 2 | 11 | -| Sessions | 7 | 5 | 0 | 2 | -| Context/Compaction | 4 | 0 | 1 | 3 | -| Memory | 7 | 0 | 0 | 7 | +| Category | Items | Match | Partial | Missing | +|----------|:-----:|:-----:|:-------:|:-------:| +| Channels | 13 | 6 | 0 | 7 | +| Model Providers | 14 | 10 | 0 | 4 | +| Agent & Tools | 17 | 15 | 0 | 2 | +| Sessions | 7 | 6 | 0 | 1 | +| Context/Compaction | 4 | 4 | 0 | 0 | +| Memory | 7 | 4 | 0 | 3 | | MCP | 3 | 3 | 0 | 0 | -| Security | 8 | 3 | 1 | 4 | +| Security | 8 | 6 | 0 | 2 | | Automation | 4 | 1 | 0 | 3 | | Companion Apps | 6 | 0 | 0 | 6 | -| Skills/Plugins | 5 | 2 | 0 | 3 | -| Gateway/Infra | 11 | 1 | 1 | 9 | -| Chat Commands | 8 | 2 | 0 | 4 | -| Misc | 9 | 1 | 3 | 5 | -| **TOTAL** | **118** | **29 (25%)** | **11 (9%)** | **78 (66%)** | +| Skills/Plugins | 5 | 4 | 0 | 1 | +| Gateway/Infra | 11 | 3 | 1 | 7 | +| Chat Commands | 8 | 5 | 0 | 2 | +| Misc | 9 | 8 | 1 | 0 | +| **TOTAL** | **116** | **75 (65%)** | **2 (2%)** | **38 (33%)** | --- -## Top Priority Gaps (recommended order) +## Remaining True Gaps (prioritized) -### P0 — Functionally Critical +### Tier 1 — Quick Wins -1. **Context compaction** — Without this, long conversations hit token limits and break. Blocks real-world use for extended sessions. +1. **`/think` command** — Toggle extended thinking/reasoning mode +2. **`/verbose` command** — Toggle verbose tool output display +3. **Typing indicators** — Discord, Slack, WhatsApp adapters could send typing indicators +4. **Session pruning** — Auto-cleanup old sessions by TTL +5. **Tool groups** — Syntactic sugar: `group:fs` → `[file.read, file.write, file.edit, file.list]` -2. **Memory system** — OpenClaw's markdown-based memory with vector search gives the assistant persistent knowledge across sessions. Flynn has nothing persistent beyond session history. +### Tier 2 — Meaningful New Features -### P1 — High Impact +6. **Inbound webhooks** — HTTP endpoint that triggers agent processing +7. **Vector memory search** — Embed memory chunks, enable semantic retrieval +8. **Dockerfile** — Production container deployment +9. **Heartbeat** — Periodic self-check with optional notification -3. **Messaging channels (WhatsApp, Discord, Slack)** — Flynn has 2 of 15 channels. Adding the top 3 popular channels covers the majority of use cases. +### Tier 3 — Additional Channels (if desired) -4. **Web search tool** — `web_search` (Brave API) is a commonly-used agent capability Flynn lacks entirely. +10. Signal (signal-cli bridge) +11. Matrix (matrix-js-sdk) +12. Microsoft Teams (Bot Framework) +13. Google Chat (Chat API) -5. **Background exec / process management** — OpenClaw's `process` tool lets agents manage long-running commands. Flynn's shell tool is fire-and-forget. +### Tier 4 — Deferred / Niche -6. **Enhanced `web_fetch`** — Flynn's is basic HTTP GET; OpenClaw extracts markdown/text, caches responses, and handles JS-heavy sites via browser fallback. - -### P2 — Important for Production - -7. **Docker sandboxing** — Tool isolation for non-main sessions. Important for any multi-user or group-facing deployment. - -8. **Multi-agent routing** — Isolated agents per workspace/sender with sub-agent spawning. - -9. **Tool allow/deny and profiles** — Fine-grained control over which tools each agent/session can use. - -10. **System prompt templating** — AGENTS.md, SOUL.md, IDENTITY.md, USER.md workspace injection for personality and behaviour customisation. - -### P3 — Nice to Have - -11. **Browser control (CDP)** — Powerful but complex; depends on use case. -12. **Gemini provider (full)** — Currently a stub. -13. **Additional model providers** — OpenRouter, Bedrock, etc. -14. **Gateway auth** — Token/password auth for the WebSocket control plane. -15. **Companion apps** — macOS/iOS/Android nodes (huge scope, niche audience). +- Companion apps (macOS/iOS/Android) — massive scope +- LINE, Feishu, Mattermost — niche audience +- iMessage/BlueBubbles — Apple ecosystem only +- Canvas/A2UI — experimental +- Nix/Fly.io/Railway deployment — platform-specific +- OAuth subscription auth — complex +- DM pairing codes — niche security feature +- Bonjour/mDNS discovery — LAN-only use case +- GLM/MiniMax/Moonshot/Z.AI — regional providers --- @@ -302,5 +299,8 @@ Flynn actually has MCP support that OpenClaw doesn't emphasise — OpenClaw reli - **Full MCP protocol support** with stdio transport, tool bridging, and server lifecycle management - **Model tier switching** via chat commands (`/local`, `/cloud`, `/model`) -- **Gemini provider** (stub, but in the schema — OpenClaw removed non-Pi agent paths) +- **8 model providers** (Anthropic, OpenAI, Gemini, Ollama, Llama.cpp, OpenRouter, Bedrock, GitHub) - **SQLite session storage** (vs OpenClaw's JSONL files) +- **Configurable retry policy** with exponential backoff +- **Skill installer** with managed directory + upgrade support +- **Audio transcription pipeline** for voice messages diff --git a/docs/plans/state.json b/docs/plans/state.json index d540c3d..682b641 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -8,7 +8,8 @@ "file": "2026-02-06-openclaw-feature-gap-analysis.md", "status": "completed", "date": "2026-02-06", - "summary": "Comprehensive comparison of Flynn vs OpenClaw. 118 features compared: 29 match, 11 partial, 78 missing." + "updated": "2026-02-07", + "summary": "Comprehensive comparison of Flynn vs OpenClaw. 116 features compared: 75 match (65%), 2 partial (2%), 38 missing (33%). Updated 2026-02-07 after full codebase audit revealed 33+ features previously marked MISSING were actually implemented." }, "p0-p1-implementation-plan": { "file": "2026-02-06-p0-p1-implementation-plan.md", @@ -201,7 +202,7 @@ "p2-implementation": { "status": "completed", "date": "2026-02-06", - "summary": "4 P2 features: tech debt cleanup, retry policy, system prompt templating, usage tracking & cost estimation", + "summary": "7 P2 features: tech debt cleanup, retry policy, system prompt templating, usage tracking, tool allow/deny profiles, Docker sandboxing, multi-agent routing", "phases": { "tech_debt_cleanup": { "priority": "P2", @@ -602,6 +603,58 @@ } } }, + "p8-agent-tools": { + "status": "completed", + "date": "2026-02-07", + "summary": "8 new agent-callable tools exposing existing internal APIs, plus gap analysis audit update (25% → 65% match rate)", + "phases": { + "sessions_tools": { + "priority": "P8", + "status": "completed", + "description": "sessions.list, sessions.history, sessions.create, sessions.delete tools wrapping SessionManager", + "files_created": [ + "src/tools/builtin/sessions.ts" + ], + "files_modified": [ + "src/tools/builtin/index.ts", + "src/tools/index.ts", + "src/daemon/index.ts" + ] + }, + "agents_list_tool": { + "priority": "P8", + "status": "completed", + "description": "agents.list tool wrapping AgentConfigRegistry.list()", + "files_created": [ + "src/tools/builtin/agents-list.ts" + ] + }, + "message_send_tool": { + "priority": "P8", + "status": "completed", + "description": "message.send tool wrapping ChannelRegistry for cross-channel messaging", + "files_created": [ + "src/tools/builtin/message-send.ts" + ] + }, + "cron_tools": { + "priority": "P8", + "status": "completed", + "description": "cron.list, cron.trigger tools wrapping CronScheduler", + "files_created": [ + "src/tools/builtin/cron.ts" + ] + }, + "gap_analysis_update": { + "priority": "P8", + "status": "completed", + "description": "Full codebase audit and gap analysis document update. 33+ features previously marked MISSING corrected to MATCH. Scorecard: 75/116 match (65%), 2 partial, 38 missing", + "files_modified": [ + "docs/plans/2026-02-06-openclaw-feature-gap-analysis.md" + ] + } + } + }, "earlier_plans": { "status": "completed", "summary": "Original design and implementation phases from 2026-02-02 to 2026-02-05", @@ -637,6 +690,7 @@ "p5_completion": "1/1 (100%) — GitHub Copilot provider with auto-login", "p6_completion": "4/4 (100%) — enhanced media pipeline (image.analyze, outbound attachments, gateway attachments, audio transcription)", "p7_completion": "6/6 (100%) — web UI dashboard SPA (dashboard, chat, sessions, settings)", - "next_up": "All planned phases P0-P7 complete. Remaining gaps from feature analysis: streaming content events for real-time chat, Signal/iMessage/Teams channels, webhooks, onboard wizard, typing indicators for non-Telegram channels, session pruning, DM pairing" + "p8_completion": "8/8 (100%) — agent tools (sessions.list/history/create/delete, agents.list, message.send, cron.list/trigger) + gap analysis audit", + "next_up": "All planned phases P0-P8 complete. Remaining gaps from feature analysis: /think & /verbose commands, typing indicators for non-Telegram channels, session pruning, tool groups, inbound webhooks, vector memory search, Dockerfile, heartbeat, additional channels (Signal/Matrix/Teams/Google Chat)" } } diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 4c63f56..6c6d04b 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -161,11 +161,12 @@ export class NativeAgent { // Execute each tool call and collect results const toolResultBlocks: unknown[] = []; for (const tc of response.toolCalls) { - this.onToolUse?.({ type: 'start', tool: tc.name, args: tc.args }); + const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; + this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); - const result = await this.toolExecutor!.execute(tc.name, tc.args, this._toolPolicyContext); + const result = await this.toolExecutor!.execute(internalName, tc.args, this._toolPolicyContext); - this.onToolUse?.({ type: 'end', tool: tc.name, result }); + this.onToolUse?.({ type: 'end', tool: internalName, result }); toolResultBlocks.push({ type: 'tool_result', diff --git a/src/tools/builtin/agents-list.ts b/src/tools/builtin/agents-list.ts new file mode 100644 index 0000000..ec0b220 --- /dev/null +++ b/src/tools/builtin/agents-list.ts @@ -0,0 +1,53 @@ +import type { Tool, ToolResult } from '../types.js'; +import type { AgentConfigRegistry } from '../../agents/registry.js'; + +/** + * Creates an agents.list tool bound to the given AgentConfigRegistry. + * Lists all registered agent configurations with their settings. + */ +export function createAgentsListTool(registry: AgentConfigRegistry): Tool { + return { + name: 'agents.list', + description: + 'List all registered agent configurations. Shows agent names, model tiers, tool profiles, and sandbox status.', + inputSchema: { + type: 'object', + properties: {}, + }, + execute: async (_rawArgs: unknown): Promise => { + try { + const configs = registry.list(); + + if (configs.length === 0) { + return { + success: true, + output: 'No agent configurations registered.', + }; + } + + const lines = configs.map((c) => { + const parts = [`- **${c.name}**`]; + if (c.modelTier) parts.push(`tier=${c.modelTier}`); + if (c.toolProfile) parts.push(`profile=${c.toolProfile}`); + if (c.sandbox) parts.push('sandboxed'); + if (c.systemPrompt) { + const preview = c.systemPrompt.slice(0, 80).replace(/\n/g, ' '); + parts.push(`prompt="${preview}${c.systemPrompt.length > 80 ? '...' : ''}"`); + } + return parts.join(' | '); + }); + + return { + success: true, + output: `${configs.length} agent(s) registered:\n\n${lines.join('\n')}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} diff --git a/src/tools/builtin/cron.ts b/src/tools/builtin/cron.ts new file mode 100644 index 0000000..533839d --- /dev/null +++ b/src/tools/builtin/cron.ts @@ -0,0 +1,82 @@ +import type { Tool, ToolResult } from '../types.js'; +import type { CronScheduler } from '../../automation/cron.js'; + +/** + * Creates cron management tools bound to the given CronScheduler. + */ +export function createCronTools(scheduler: CronScheduler): Tool[] { + const cronList: Tool = { + name: 'cron.list', + description: + 'List all configured cron jobs with their names and status.', + inputSchema: { + type: 'object', + properties: {}, + }, + execute: async (_rawArgs: unknown): Promise => { + try { + const jobNames = scheduler.getJobNames(); + + if (jobNames.length === 0) { + return { success: true, output: 'No cron jobs configured.' }; + } + + const lines = jobNames.map((name) => `- ${name}`); + return { + success: true, + output: `${jobNames.length} cron job(s):\n\n${lines.join('\n')}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + const cronTrigger: Tool = { + name: 'cron.trigger', + description: + 'Manually trigger a cron job by name, executing it immediately regardless of its schedule.', + inputSchema: { + type: 'object', + properties: { + name: { + type: 'string', + description: 'Name of the cron job to trigger', + }, + }, + required: ['name'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as { name: string }; + + try { + const jobNames = scheduler.getJobNames(); + if (!jobNames.includes(args.name)) { + return { + success: false, + output: '', + error: `Cron job "${args.name}" not found. Available: ${jobNames.join(', ')}`, + }; + } + + scheduler.triggerJob(args.name); + return { + success: true, + output: `Cron job "${args.name}" triggered.`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + return [cronList, cronTrigger]; +} diff --git a/src/tools/builtin/index.ts b/src/tools/builtin/index.ts index d1ad214..144785d 100644 --- a/src/tools/builtin/index.ts +++ b/src/tools/builtin/index.ts @@ -15,6 +15,10 @@ export { createProcessTools, ProcessManager } from './process/index.js'; export type { ProcessManagerConfig } from './process/index.js'; export { BrowserManager, createBrowserTools } from './browser/index.js'; export type { BrowserManagerConfig } from './browser/index.js'; +export { createSessionTools } from './sessions.js'; +export { createAgentsListTool } from './agents-list.js'; +export { createMessageSendTool } from './message-send.js'; +export { createCronTools } from './cron.js'; import type { Tool } from '../types.js'; import type { MemoryStore } from '../../memory/store.js'; diff --git a/src/tools/builtin/message-send.ts b/src/tools/builtin/message-send.ts new file mode 100644 index 0000000..953baca --- /dev/null +++ b/src/tools/builtin/message-send.ts @@ -0,0 +1,73 @@ +import type { Tool, ToolResult } from '../types.js'; +import type { ChannelRegistry } from '../../channels/registry.js'; + +interface MessageSendArgs { + channel: string; + peerId: string; + text: string; +} + +/** + * Creates a message.send tool bound to the given ChannelRegistry. + * Allows the agent to send messages to any registered channel. + */ +export function createMessageSendTool(channelRegistry: ChannelRegistry): Tool { + return { + name: 'message.send', + description: + 'Send a message to a specific user on a specific channel. Use this to proactively reach out to users on Telegram, Discord, Slack, WhatsApp, or other connected channels.', + inputSchema: { + type: 'object', + properties: { + channel: { + type: 'string', + description: 'Channel adapter name (e.g. "telegram", "discord", "slack", "whatsapp", "webchat")', + }, + peerId: { + type: 'string', + description: 'Target user or chat ID on the channel', + }, + text: { + type: 'string', + description: 'Message text to send', + }, + }, + required: ['channel', 'peerId', 'text'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as MessageSendArgs; + + try { + const adapter = channelRegistry.get(args.channel); + if (!adapter) { + const available = channelRegistry.list().map((a) => a.name); + return { + success: false, + output: '', + error: `Channel "${args.channel}" not found. Available channels: ${available.join(', ')}`, + }; + } + + if (adapter.status !== 'connected') { + return { + success: false, + output: '', + error: `Channel "${args.channel}" is not connected (status: ${adapter.status})`, + }; + } + + await adapter.send(args.peerId, { text: args.text }); + return { + success: true, + output: `Message sent to ${args.peerId} on ${args.channel}.`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} diff --git a/src/tools/builtin/sessions.ts b/src/tools/builtin/sessions.ts new file mode 100644 index 0000000..552f8c1 --- /dev/null +++ b/src/tools/builtin/sessions.ts @@ -0,0 +1,213 @@ +import type { Tool, ToolResult } from '../types.js'; +import type { SessionManager } from '../../session/manager.js'; + +interface SessionsListArgs { + // no args +} + +interface SessionsHistoryArgs { + sessionId: string; + limit?: number; + offset?: number; +} + +interface SessionsCreateArgs { + frontend: string; + userId: string; +} + +interface SessionsDeleteArgs { + frontend: string; + userId: string; +} + +/** + * Creates session management tools bound to the given SessionManager. + */ +export function createSessionTools(sessionManager: SessionManager): Tool[] { + const sessionsList: Tool = { + name: 'sessions.list', + description: + 'List all active sessions. Returns session IDs and message counts.', + inputSchema: { + type: 'object', + properties: {}, + }, + execute: async (_rawArgs: unknown): Promise => { + try { + const sessionIds = sessionManager.listSessions(); + if (sessionIds.length === 0) { + return { success: true, output: 'No active sessions.' }; + } + + const sessions = sessionIds.map((id) => { + const parts = id.split(':'); + const frontend = parts[0]; + const userId = parts.slice(1).join(':'); + const session = sessionManager.getSession(frontend, userId); + return { + id, + frontend, + userId, + messageCount: session.getHistory().length, + }; + }); + + const lines = sessions.map( + (s) => `- **${s.id}** (${s.messageCount} messages)`, + ); + return { success: true, output: lines.join('\n') }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + const sessionsHistory: Tool = { + name: 'sessions.history', + description: + 'Get the message history for a specific session. Returns the messages with role and content.', + inputSchema: { + type: 'object', + properties: { + sessionId: { + type: 'string', + description: 'Session ID in "frontend:userId" format', + }, + limit: { + type: 'number', + description: 'Maximum number of messages to return (default: all)', + }, + offset: { + type: 'number', + description: 'Number of messages to skip from the start (default: 0)', + }, + }, + required: ['sessionId'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as SessionsHistoryArgs; + + try { + const parts = args.sessionId.split(':'); + const frontend = parts[0]; + const userId = parts.slice(1).join(':'); + const session = sessionManager.getSession(frontend, userId); + const allMessages = session.getHistory(); + + const start = args.offset ?? 0; + const end = args.limit ? start + args.limit : allMessages.length; + const messages = allMessages.slice(start, end); + + if (messages.length === 0) { + return { + success: true, + output: `Session "${args.sessionId}" has no messages${start > 0 ? ' at this offset' : ''}.`, + }; + } + + const lines = messages.map((m, i) => { + const content = + typeof m.content === 'string' + ? m.content.slice(0, 200) + : '[multipart]'; + return `${start + i + 1}. [${m.role}] ${content}`; + }); + + return { + success: true, + output: `Session "${args.sessionId}" (${allMessages.length} total messages):\n\n${lines.join('\n')}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + const sessionsCreate: Tool = { + name: 'sessions.create', + description: + 'Create a new session (or get an existing one). Returns the session ID.', + inputSchema: { + type: 'object', + properties: { + frontend: { + type: 'string', + description: 'Frontend/channel name (e.g. "telegram", "webchat")', + }, + userId: { + type: 'string', + description: 'User ID for this session', + }, + }, + required: ['frontend', 'userId'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as SessionsCreateArgs; + + try { + const session = sessionManager.getSession(args.frontend, args.userId); + return { + success: true, + output: `Session "${session.id}" ready (${session.getHistory().length} existing messages).`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + const sessionsDelete: Tool = { + name: 'sessions.delete', + description: + 'Clear a session history and close it. The session ID is "frontend:userId".', + inputSchema: { + type: 'object', + properties: { + frontend: { + type: 'string', + description: 'Frontend/channel name', + }, + userId: { + type: 'string', + description: 'User ID', + }, + }, + required: ['frontend', 'userId'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as SessionsDeleteArgs; + + try { + const session = sessionManager.getSession(args.frontend, args.userId); + const messageCount = session.getHistory().length; + session.clear(); + sessionManager.closeSession(args.frontend, args.userId); + return { + success: true, + output: `Session "${args.frontend}:${args.userId}" cleared (${messageCount} messages removed).`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; + + return [sessionsList, sessionsHistory, sessionsCreate, sessionsDelete]; +} diff --git a/src/tools/executor.ts b/src/tools/executor.ts index 992498f..b32a4b1 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -22,7 +22,7 @@ export class ToolExecutor { } async execute(toolName: string, args: unknown, context?: ToolPolicyContext): Promise { - const tool = this.registry.get(toolName); + const tool = this.registry.getByApiName(toolName); if (!tool) { return { success: false, output: '', error: `Tool '${toolName}' not found` }; } diff --git a/src/tools/index.ts b/src/tools/index.ts index ac993ba..a218457 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,7 +5,7 @@ export { ToolExecutor } from './executor.js'; export type { ToolExecutorConfig } from './executor.js'; export { ToolPolicy } from './policy.js'; export type { ToolPolicyContext } from './policy.js'; -export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool } from './builtin/index.js'; +export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool, createSessionTools, createAgentsListTool, createMessageSendTool, createCronTools } from './builtin/index.js'; export type { WebSearchConfig } from './builtin/web-search.js'; export type { ProcessManagerConfig } from './builtin/process/index.js'; export type { BrowserManagerConfig } from './builtin/browser/index.js'; diff --git a/src/tools/registry.test.ts b/src/tools/registry.test.ts index 2c640cc..372bfff 100644 --- a/src/tools/registry.test.ts +++ b/src/tools/registry.test.ts @@ -56,7 +56,7 @@ describe('ToolRegistry', () => { const anthropicTools = registry.toAnthropicFormat(); expect(anthropicTools).toEqual([{ - name: 'test.echo', + name: 'test_echo', description: 'Echoes input back', input_schema: echoTool.inputSchema, }]); @@ -86,7 +86,7 @@ describe('ToolRegistry', () => { expect(openaiTools).toEqual([{ type: 'function', function: { - name: 'test.echo', + name: 'test_echo', description: 'Echoes input back', parameters: echoTool.inputSchema, }, @@ -161,4 +161,37 @@ describe('ToolRegistry', () => { expect(() => reg.replace(makeTool('nonexistent'))).toThrow('not registered'); }); }); + + describe('ToolRegistry — API name sanitization', () => { + it('sanitizeToolName converts dots to underscores', () => { + expect(ToolRegistry.sanitizeToolName('shell.exec')).toBe('shell_exec'); + expect(ToolRegistry.sanitizeToolName('file.read')).toBe('file_read'); + expect(ToolRegistry.sanitizeToolName('no_dots')).toBe('no_dots'); + }); + + it('getByApiName resolves sanitized names back to internal tools', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); // name is 'test.echo' + + expect(registry.getByApiName('test_echo')).toBe(echoTool); + expect(registry.getByApiName('test.echo')).toBe(echoTool); + expect(registry.getByApiName('nonexistent')).toBeUndefined(); + }); + + it('toAnthropicFormat outputs sanitized names', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + + const tools = registry.toAnthropicFormat(); + expect(tools[0].name).toBe('test_echo'); + }); + + it('toOpenAIFormat outputs sanitized names', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + + const tools = registry.toOpenAIFormat(); + expect(tools[0].function.name).toBe('test_echo'); + }); + }); }); diff --git a/src/tools/registry.ts b/src/tools/registry.ts index ccafbe6..ec8199e 100644 --- a/src/tools/registry.ts +++ b/src/tools/registry.ts @@ -20,6 +20,11 @@ export class ToolRegistry { private tools: Map = new Map(); private _policy?: ToolPolicy; + /** Sanitize a tool name for API compatibility (dots → underscores). */ + static sanitizeToolName(name: string): string { + return name.replace(/\./g, '_'); + } + register(tool: Tool): void { if (this.tools.has(tool.name)) { throw new Error(`Tool '${tool.name}' is already registered`); @@ -55,6 +60,11 @@ export class ToolRegistry { return this.tools.get(name); } + /** Resolve a tool by its API-sanitized name (underscores → dots fallback). */ + getByApiName(name: string): Tool | undefined { + return this.tools.get(name) ?? this.tools.get(name.replace(/_/g, '.')); + } + list(): Tool[] { return Array.from(this.tools.values()); } @@ -77,7 +87,7 @@ export class ToolRegistry { toAnthropicFormat(): AnthropicToolDef[] { return this.list().map(t => ({ - name: t.name, + name: ToolRegistry.sanitizeToolName(t.name), description: t.description, input_schema: t.inputSchema, })); @@ -86,7 +96,7 @@ export class ToolRegistry { /** Return Anthropic-format tools filtered by policy. */ filteredToAnthropicFormat(context?: ToolPolicyContext): AnthropicToolDef[] { return this.filteredList(context).map(t => ({ - name: t.name, + name: ToolRegistry.sanitizeToolName(t.name), description: t.description, input_schema: t.inputSchema, })); @@ -96,7 +106,7 @@ export class ToolRegistry { return this.list().map(t => ({ type: 'function' as const, function: { - name: t.name, + name: ToolRegistry.sanitizeToolName(t.name), description: t.description, parameters: t.inputSchema, }, @@ -108,7 +118,7 @@ export class ToolRegistry { return this.filteredList(context).map(t => ({ type: 'function' as const, function: { - name: t.name, + name: ToolRegistry.sanitizeToolName(t.name), description: t.description, parameters: t.inputSchema, },