diff --git a/docs/plans/2026-02-05-openclaw-parity-design.md b/docs/plans/2026-02-05-openclaw-parity-design.md new file mode 100644 index 0000000..66879ba --- /dev/null +++ b/docs/plans/2026-02-05-openclaw-parity-design.md @@ -0,0 +1,378 @@ +# OpenClaw Parity Design: Flynn Feature Roadmap + +## Overview + +Plan to evolve Flynn from a multi-model chat wrapper into a full self-hosted OpenClaw alternative. Bottom-up approach: tools first, then gateway, then channels, then skills/advanced features. + +**Build approach**: Use Sonnet/Haiku via GitHub Copilot for implementation subagents. + +**Current state**: Flynn v0.1.0 covers ~22% of OpenClaw features. Strong in model routing (4 providers, tiered fallback) and session management (SQLite, transfer). Zero tool execution capability. + +--- + +## Phase 1: Agent Tool Framework + Agent Loop + +**Goal**: Turn Flynn from a chatbot into an agent that can execute tools with multi-step reasoning. + +### 1.1 Tool Definition System + +``` +src/tools/ +├── types.ts # Tool, ToolCall, ToolResult interfaces +├── registry.ts # ToolRegistry: register, lookup, list, serialize for providers +├── executor.ts # ToolExecutor: run tools, enforce hooks, timeout, truncation +├── builtin/ +│ ├── shell.ts # shell.exec - run bash commands (cwd, timeout, max output) +│ ├── file-read.ts # file.read - read file contents (path, offset, limit) +│ ├── file-write.ts # file.write - write/create files (path, content) +│ ├── file-edit.ts # file.edit - find-and-replace (path, oldString, newString) +│ ├── file-list.ts # file.list - glob/list directory (pattern, path) +│ └── web-fetch.ts # web.fetch - HTTP GET with markdown conversion (url, format) +``` + +**Tool interface**: +```typescript +interface Tool { + name: string; // e.g. "shell.exec" + description: string; // For the model's tool selection + inputSchema: Record; // JSON Schema for parameters + execute(args: unknown): Promise; +} + +interface ToolResult { + success: boolean; + output: string; + error?: string; +} + +interface ToolCall { + id: string; // Provider-assigned call ID + name: string; // Tool name + args: unknown; // Parsed arguments +} +``` + +**ToolRegistry**: Collects tools, serializes to Anthropic format (`{ name, description, input_schema }`) or OpenAI format (`{ type: "function", function: { name, description, parameters } }`). + +**ToolExecutor**: Wraps execution with: +- Hook engine check (confirm/log/silent) before execution +- Configurable timeout (default 30s for shell, 10s for file ops) +- Output truncation (max 50KB, with "truncated" marker) +- Error capture (catches exceptions, returns as ToolResult.error) + +### 1.2 Model Provider Tool Support + +Update `ChatRequest` to accept optional `tools: Tool[]`. + +**Anthropic** (`src/models/anthropic.ts`): +- Pass tools as `tools` parameter to `messages.create()`/`messages.stream()` +- Parse `tool_use` content blocks from response (id, name, input) +- Accept `tool_result` content blocks in messages (tool_use_id, content) +- Handle `stop_reason: "tool_use"` to signal tool call response + +**OpenAI** (`src/models/openai.ts`): +- Pass tools as `tools` parameter with `type: "function"` wrapper +- Parse `tool_calls` from response choices +- Accept `role: "tool"` messages with `tool_call_id` +- Handle `finish_reason: "tool_calls"` to signal tool call response + +**Types updates** (`src/models/types.ts`): +- Add `ToolCall` to `ChatResponse` +- Add `tool_use` and `tool_result` message roles +- Add `tools` to `ChatRequest` +- Add `tool_calls` stream event type + +### 1.3 Agent Loop + +Replace single-turn NativeAgent with iterative tool-use loop: + +``` +User message + -> Model call (with tools in request) + -> If response contains tool_use: + -> For each tool call: + -> Check hooks (confirm/log/silent) + -> If confirm: wait for approval (Telegram inline keyboard / TUI prompt) + -> Execute tool via ToolExecutor + -> Collect ToolResult + -> Append tool_results to conversation + -> Loop back to model call + -> If text response (no tool_use): + -> Return final response to user + -> If max iterations reached (default 10): + -> Return partial response with warning +``` + +**Streaming during tool execution**: Emit status events: +- `{ event: "tool_start", tool: "shell.exec", args: { command: "ls" } }` +- `{ event: "tool_end", tool: "shell.exec", result: { success: true, output: "..." } }` +- These render as status lines in TUI and Telegram + +**Abort support**: +- TUI: Escape key sets abort flag, checked before each tool execution +- Telegram: `/cancel` command sets abort flag on active session +- Agent loop checks abort flag before each iteration + +### 1.4 Frontend Updates + +**Telegram**: +- Show tool execution as status messages ("Running shell.exec: `ls -la`...") +- Confirmation buttons already exist, wire to tool executor +- Show final response after tool loop completes + +**TUI (both modes)**: +- Show tool calls inline with dimmed formatting +- Show tool results with output (truncated in UI if long) +- Streaming text interleaved with tool status +- Escape aborts the agent loop + +### 1.5 Deliverables + +- [ ] `src/tools/types.ts` - Tool, ToolCall, ToolResult interfaces +- [ ] `src/tools/registry.ts` - ToolRegistry with provider serialization +- [ ] `src/tools/executor.ts` - ToolExecutor with hooks, timeout, truncation +- [ ] `src/tools/builtin/shell.ts` - Shell exec tool +- [ ] `src/tools/builtin/file-read.ts` - File read tool +- [ ] `src/tools/builtin/file-write.ts` - File write tool +- [ ] `src/tools/builtin/file-edit.ts` - File edit tool +- [ ] `src/tools/builtin/file-list.ts` - File list/glob tool +- [ ] `src/tools/builtin/web-fetch.ts` - Web fetch tool +- [ ] `src/models/types.ts` - Add tool-related types +- [ ] `src/models/anthropic.ts` - Add tool use support +- [ ] `src/models/openai.ts` - Add tool use support +- [ ] `src/backends/native/agent.ts` - Agent loop with tool execution +- [ ] `src/frontends/telegram/bot.ts` - Tool status messages +- [ ] `src/frontends/tui/minimal.ts` - Tool display in minimal TUI +- [ ] `src/frontends/tui/components/App.tsx` - Tool display in fullscreen TUI +- [ ] Tests for all new modules + +--- + +## Phase 2: WebSocket Gateway + +**Goal**: Central control plane that multiple clients connect to. Decouples frontends from the agent. + +### 2.1 Gateway Core + +``` +src/gateway/ +├── server.ts # WebSocket server (ws library), configurable port +├── protocol.ts # JSON-RPC-style message types +├── router.ts # Routes methods to handlers +├── auth.ts # Token + password auth, Tailscale identity headers +├── session-bridge.ts # Maps WS client connections to sessions +└── handlers/ + ├── agent.ts # agent.send, agent.cancel, agent.status + ├── sessions.ts # sessions.list, sessions.history, sessions.send + ├── tools.ts # tools.list, tools.invoke + ├── config.ts # config.get, config.patch + └── system.ts # system.health, system.restart +``` + +### 2.2 Protocol + +JSON-RPC-like over WebSocket: + +``` +Request: { id: number, method: string, params: object } +Response: { id: number, result: object } +Error: { id: number, error: { code: number, message: string } } +Event: { id: number, event: string, data: object } // streaming +``` + +Event types for agent.send streaming: +- `content` - text chunk from model +- `tool_start` - tool execution beginning +- `tool_end` - tool execution complete +- `thinking` - model reasoning (if exposed) +- `done` - final response + +### 2.3 Control UI + +Minimal web dashboard served from gateway port: + +``` +src/gateway/ui/ +├── index.html # Dashboard: sessions, model info, config +├── chat.html # WebChat: WS-connected chat interface +└── assets/ # CSS + minimal JS (no framework, or Preact) +``` + +### 2.4 Daemon Refactor + +Gateway becomes the hub: +- Owns session manager, tool registry, model router +- Telegram adapter connects as a gateway client (in-process bridge) +- TUI connects as a gateway client (in-process or WS) +- WebChat connects via WS from browser + +### 2.5 Deliverables + +- [ ] `src/gateway/server.ts` - WebSocket server +- [ ] `src/gateway/protocol.ts` - Message type definitions +- [ ] `src/gateway/router.ts` - Method routing +- [ ] `src/gateway/auth.ts` - Authentication +- [ ] `src/gateway/session-bridge.ts` - Client-to-session mapping +- [ ] Gateway handlers (agent, sessions, tools, config, system) +- [ ] `src/gateway/ui/` - Control UI + WebChat +- [ ] Refactor daemon to use gateway as hub +- [ ] Refactor Telegram as gateway client +- [ ] Tests for gateway protocol and handlers + +--- + +## Phase 3: Channel Adapters + +**Goal**: Multi-channel inbox. One assistant accessible from WhatsApp, Telegram, Discord, Slack, and WebChat. + +### 3.1 Channel Adapter Interface + +``` +src/channels/ +├── types.ts # ChannelAdapter interface +├── registry.ts # ChannelRegistry: load/unload at runtime +├── telegram/ # Refactored from src/frontends/telegram/ +├── discord/ # discord.js +├── whatsapp/ # Baileys +├── slack/ # Bolt (Socket Mode) +└── webchat/ # Gateway WS built-in +``` + +**ChannelAdapter interface**: +```typescript +interface ChannelAdapter { + name: string; + connect(): Promise; + disconnect(): Promise; + send(peerId: string, message: OutboundMessage): Promise; + onMessage(handler: (msg: InboundMessage) => void): void; +} +``` + +### 3.2 Build Order + +1. Telegram (refactor existing) +2. Discord (discord.js) +3. WhatsApp (Baileys) +4. Slack (Bolt Socket Mode) +5. WebChat (gateway built-in) + +### 3.3 Security Per Channel + +Every adapter implements: +- DM pairing: unknown senders get pairing code, must be approved via CLI/UI +- Allowlists: `channels..allowFrom` config array +- Group mention gating: `channels..groups.*.requireMention` +- Rate limiting: per-sender throttle (configurable) +- Message size limits per channel + +### 3.4 Deliverables + +- [ ] `src/channels/types.ts` - ChannelAdapter interface +- [ ] `src/channels/registry.ts` - Channel registry +- [ ] `src/channels/telegram/` - Refactored Telegram adapter +- [ ] `src/channels/discord/` - Discord adapter +- [ ] `src/channels/whatsapp/` - WhatsApp adapter +- [ ] `src/channels/slack/` - Slack adapter +- [ ] `src/channels/webchat/` - WebChat adapter +- [ ] DM pairing system +- [ ] Per-channel security (allowlists, mention gating, rate limiting) +- [ ] Tests per adapter + +--- + +## Phase 4: Skills + MCP + +**Goal**: Extensible capability system with community skills and MCP tool servers. + +### 4.1 Skills System + +``` +src/skills/ +├── types.ts # Skill, SkillManifest interfaces +├── loader.ts # Load SKILL.md + scripts from skill directories +├── registry.ts # Discovery, gating (OS/bin/env checks) +└── installer.ts # Auto-install dependencies (with user confirmation) +``` + +Skill directory structure: +``` +~/.flynn/workspace/skills// +├── SKILL.md # Instructions injected into system prompt +├── manifest.json # Requirements, permissions, dependencies +└── scripts/ # Executable helpers +``` + +Three tiers: bundled (shipped with Flynn), managed (installed via CLI), workspace (user-created). + +### 4.2 MCP Integration + +``` +src/mcp/ +├── client.ts # MCP client (stdio transport) +├── bridge.ts # Convert MCP tools -> Flynn tool registry entries +└── manager.ts # Lifecycle: start/stop/restart MCP servers per config +``` + +Wire the existing `mcp.servers` config to actually start MCP server processes, discover their tools, and register them in the tool registry. MCP tools appear alongside builtins -- the agent doesn't know the difference. + +### 4.3 Deliverables + +- [ ] Skills types, loader, registry, installer +- [ ] MCP client, bridge, manager +- [ ] Wire MCP config to runtime +- [ ] Bundled skills (at least: web-search, git, system-info) +- [ ] Tests + +--- + +## Phase 5: Advanced Features + +Build after core is solid. Each is independent. + +| Feature | Module | Description | +|---------|--------|-------------| +| Cron/scheduling | `src/automation/cron.ts` | Cron expressions trigger agent messages | +| Webhooks | `src/automation/webhooks.ts` | Inbound HTTP triggers | +| Browser tool | `src/tools/builtin/browser.ts` | Playwright headless browser | +| Agent-to-agent | `sessions_list/history/send` tools | Multi-agent coordination | +| Sub-agents | `src/backends/native/subagent.ts` | Spawn scoped child sessions | +| Sandboxing | `src/sandbox/` | Docker per-session isolation | +| Voice | `src/voice/` | STT (Whisper) + TTS (ElevenLabs) | +| Canvas/A2UI | `src/gateway/ui/canvas/` | Agent-driven web workspace | +| CLI surface | `src/cli/` | `flynn gateway`, `flynn agent`, `flynn send`, etc. | +| Mobile nodes | Separate repos | iOS (Swift) + Android (Kotlin) companion apps | +| Onboarding wizard | `src/cli/onboard.ts` | Guided setup flow | +| Doctor diagnostics | `src/cli/doctor.ts` | Config + health validation | + +--- + +## Implementation Notes + +### Model Selection for Subagents + +Use cheaper/faster models via GitHub Copilot for implementation: +- **Sonnet**: Complex implementation tasks (agent loop, gateway, protocol) +- **Haiku**: Mechanical tasks (individual tool implementations, adapter boilerplate, tests) +- **Opus**: Design review, architecture decisions only + +### Testing Strategy + +- Unit tests for all tools (mock filesystem/network) +- Unit tests for tool registry serialization (Anthropic + OpenAI formats) +- Integration tests for agent loop (mock model returns tool_use, verify execution) +- Integration tests for gateway protocol (WS client/server) +- E2E tests for tool execution (real shell commands in temp dirs) + +### Migration Path + +- Phase 1 is additive (no breaking changes to existing code) +- Phase 2 refactors daemon (breaking, but internal only) +- Phase 3 moves Telegram (file rename, adapter interface) +- Each phase is a separate feature branch + +--- + +*Design Version: 1.0* +*Created: 2026-02-05* +*Approach: Bottom-up (tools -> gateway -> channels -> skills -> advanced)* diff --git a/docs/plans/2026-02-05-phase1-tool-framework.md b/docs/plans/2026-02-05-phase1-tool-framework.md new file mode 100644 index 0000000..e4a85ae --- /dev/null +++ b/docs/plans/2026-02-05-phase1-tool-framework.md @@ -0,0 +1,2146 @@ +# Phase 1: Agent Tool Framework + Agent Loop — Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add a tool execution framework with native function calling (Anthropic/OpenAI) and an iterative agent loop so Flynn can run shell commands, read/write/edit files, fetch web pages, and chain multiple tool calls per turn. + +**Architecture:** Tools are defined as typed objects with JSON Schema inputs and an async `execute` method. A ToolRegistry collects them and serializes to provider-specific formats. A ToolExecutor wraps execution with hook checks, timeouts, and output truncation. The NativeAgent gains an agentic loop: call model -> if tool_use, execute tools -> feed results back -> repeat until text response or max iterations. + +**Tech Stack:** TypeScript (strict, NodeNext), Vitest, Anthropic SDK `@anthropic-ai/sdk`, OpenAI SDK `openai`, Node.js `child_process` for shell, `fs` for file ops, `fetch` for web. + +**Build model policy:** Opus 4.6 supervises and reviews. Sonnet/Haiku via GitHub Copilot execute implementation tasks as subagents. Each task dispatched to a subagent, reviewed by Opus before committing. + +--- + +## Task 0: SOUL.md + System Prompt Foundation + +**Files:** +- Create: `SOUL.md` (project root) +- Modify: `src/daemon/index.ts` (load SOUL.md into system prompt) + +**Step 1: Create SOUL.md** + +Already created at project root. Defines Flynn's identity: direct, technical, opinionated, security-conscious. Loaded into every session. + +**Step 2: Update daemon to load SOUL.md** + +In `src/daemon/index.ts`, replace the hardcoded `SYSTEM_PROMPT` string with a loader that reads `SOUL.md` from the workspace root and prepends it to the system prompt: + +```typescript +import { readFileSync, existsSync } from 'fs'; +import { resolve } from 'path'; + +function loadSystemPrompt(): string { + const soulPath = resolve(process.cwd(), 'SOUL.md'); + let soul = ''; + if (existsSync(soulPath)) { + soul = readFileSync(soulPath, 'utf-8') + '\n\n'; + } + return soul + TOOL_INSTRUCTIONS; +} +``` + +Where `TOOL_INSTRUCTIONS` is the tool-aware portion added in Task 17. + +**Step 3: Commit** + +```bash +git add SOUL.md src/daemon/index.ts +git commit -m "feat: add SOUL.md identity file and load into system prompt" +``` + +--- + +## Task 1: Tool Type Definitions + +**Files:** +- Create: `src/tools/types.ts` +- Test: `src/tools/types.test.ts` + +**Step 1: Write the failing test** + +```typescript +// src/tools/types.test.ts +import { describe, it, expect } from 'vitest'; +import type { Tool, ToolCall, ToolResult, ToolUseMessage, ToolResultMessage } from './types.js'; + +describe('Tool types', () => { + it('Tool interface is structurally correct', () => { + const tool: Tool = { + name: 'test.echo', + description: 'Echoes input', + inputSchema: { + type: 'object', + properties: { text: { type: 'string' } }, + required: ['text'], + }, + execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }), + }; + + expect(tool.name).toBe('test.echo'); + expect(tool.inputSchema.type).toBe('object'); + }); + + it('ToolCall has required fields', () => { + const call: ToolCall = { id: 'call_1', name: 'test.echo', args: { text: 'hi' } }; + expect(call.id).toBe('call_1'); + expect(call.name).toBe('test.echo'); + }); + + it('ToolResult has success and output', () => { + const result: ToolResult = { success: true, output: 'hello' }; + expect(result.success).toBe(true); + + const errResult: ToolResult = { success: false, output: '', error: 'boom' }; + expect(errResult.error).toBe('boom'); + }); + + it('ToolUseMessage has correct shape', () => { + const msg: ToolUseMessage = { + role: 'assistant', + content: [{ type: 'tool_use', id: 'call_1', name: 'test.echo', input: { text: 'hi' } }], + }; + expect(msg.role).toBe('assistant'); + expect(msg.content[0].type).toBe('tool_use'); + }); + + it('ToolResultMessage has correct shape', () => { + const msg: ToolResultMessage = { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'output here' }], + }; + expect(msg.role).toBe('user'); + expect(msg.content[0].type).toBe('tool_result'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/types.test.ts` +Expected: FAIL — module `./types.js` not found + +**Step 3: Write the implementation** + +```typescript +// src/tools/types.ts + +export interface ToolInputSchema { + type: 'object'; + properties: Record; + required?: string[]; +} + +export interface Tool { + name: string; + description: string; + inputSchema: ToolInputSchema; + execute(args: unknown): Promise; +} + +export interface ToolCall { + id: string; + name: string; + args: unknown; +} + +export interface ToolResult { + success: boolean; + output: string; + error?: string; +} + +// Content block for assistant messages containing tool calls +export interface ToolUseBlock { + type: 'tool_use'; + id: string; + name: string; + input: unknown; +} + +// Content block for user messages returning tool results +export interface ToolResultBlock { + type: 'tool_result'; + tool_use_id: string; + content: string; + is_error?: boolean; +} + +// Message from assistant requesting tool use +export interface ToolUseMessage { + role: 'assistant'; + content: ToolUseBlock[]; +} + +// Message from user returning tool results +export interface ToolResultMessage { + role: 'user'; + content: ToolResultBlock[]; +} +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/types.test.ts` +Expected: PASS (all 5 tests) + +**Step 5: Commit** + +```bash +git add src/tools/types.ts src/tools/types.test.ts +git commit -m "feat(tools): add tool type definitions" +``` + +--- + +## Task 2: Tool Registry + +**Files:** +- Create: `src/tools/registry.ts` +- Test: `src/tools/registry.test.ts` + +**Step 1: Write the failing test** + +```typescript +// src/tools/registry.test.ts +import { describe, it, expect } from 'vitest'; +import { ToolRegistry } from './registry.js'; +import type { Tool } from './types.js'; + +const echoTool: Tool = { + name: 'test.echo', + description: 'Echoes input back', + inputSchema: { + type: 'object', + properties: { text: { type: 'string', description: 'Text to echo' } }, + required: ['text'], + }, + execute: async (args) => ({ success: true, output: String((args as { text: string }).text) }), +}; + +const greetTool: Tool = { + name: 'test.greet', + description: 'Greets someone', + inputSchema: { + type: 'object', + properties: { name: { type: 'string' } }, + required: ['name'], + }, + execute: async (args) => ({ success: true, output: `Hello ${(args as { name: string }).name}` }), +}; + +describe('ToolRegistry', () => { + it('registers and retrieves tools by name', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + + expect(registry.get('test.echo')).toBe(echoTool); + expect(registry.get('nonexistent')).toBeUndefined(); + }); + + it('lists all registered tools', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + registry.register(greetTool); + + const tools = registry.list(); + expect(tools).toHaveLength(2); + expect(tools.map(t => t.name)).toContain('test.echo'); + expect(tools.map(t => t.name)).toContain('test.greet'); + }); + + it('throws on duplicate registration', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + expect(() => registry.register(echoTool)).toThrow('already registered'); + }); + + it('serializes to Anthropic format', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + + const anthropicTools = registry.toAnthropicFormat(); + expect(anthropicTools).toEqual([{ + name: 'test.echo', + description: 'Echoes input back', + input_schema: echoTool.inputSchema, + }]); + }); + + it('serializes to OpenAI format', () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + + const openaiTools = registry.toOpenAIFormat(); + expect(openaiTools).toEqual([{ + type: 'function', + function: { + name: 'test.echo', + description: 'Echoes input back', + parameters: echoTool.inputSchema, + }, + }]); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/registry.test.ts` +Expected: FAIL — module `./registry.js` not found + +**Step 3: Write the implementation** + +```typescript +// src/tools/registry.ts +import type { Tool, ToolInputSchema } from './types.js'; + +export interface AnthropicToolDef { + name: string; + description: string; + input_schema: ToolInputSchema; +} + +export interface OpenAIToolDef { + type: 'function'; + function: { + name: string; + description: string; + parameters: ToolInputSchema; + }; +} + +export class ToolRegistry { + private tools: Map = new Map(); + + register(tool: Tool): void { + if (this.tools.has(tool.name)) { + throw new Error(`Tool '${tool.name}' is already registered`); + } + this.tools.set(tool.name, tool); + } + + get(name: string): Tool | undefined { + return this.tools.get(name); + } + + list(): Tool[] { + return Array.from(this.tools.values()); + } + + toAnthropicFormat(): AnthropicToolDef[] { + return this.list().map(t => ({ + name: t.name, + description: t.description, + input_schema: t.inputSchema, + })); + } + + toOpenAIFormat(): OpenAIToolDef[] { + return this.list().map(t => ({ + type: 'function' as const, + function: { + name: t.name, + description: t.description, + parameters: t.inputSchema, + }, + })); + } +} +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/registry.test.ts` +Expected: PASS (all 5 tests) + +**Step 5: Commit** + +```bash +git add src/tools/registry.ts src/tools/registry.test.ts +git commit -m "feat(tools): add ToolRegistry with provider serialization" +``` + +--- + +## Task 3: Tool Executor + +**Files:** +- Create: `src/tools/executor.ts` +- Test: `src/tools/executor.test.ts` + +**Step 1: Write the failing test** + +```typescript +// src/tools/executor.test.ts +import { describe, it, expect, vi } from 'vitest'; +import { ToolExecutor } from './executor.js'; +import { ToolRegistry } from './registry.js'; +import { HookEngine } from '../hooks/engine.js'; +import type { Tool } from './types.js'; + +const echoTool: Tool = { + name: 'test.echo', + description: 'Echoes input', + inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, + execute: async (args) => ({ success: true, output: (args as { text: string }).text }), +}; + +const slowTool: Tool = { + name: 'test.slow', + description: 'Takes forever', + inputSchema: { type: 'object', properties: {} }, + execute: async () => { + await new Promise(r => setTimeout(r, 5000)); + return { success: true, output: 'done' }; + }, +}; + +const failTool: Tool = { + name: 'test.fail', + description: 'Throws', + inputSchema: { type: 'object', properties: {} }, + execute: async () => { throw new Error('kaboom'); }, +}; + +const bigOutputTool: Tool = { + name: 'test.big', + description: 'Returns huge output', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'x'.repeat(100_000) }), +}; + +describe('ToolExecutor', () => { + it('executes a tool and returns result', async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const result = await executor.execute('test.echo', { text: 'hello' }); + expect(result.success).toBe(true); + expect(result.output).toBe('hello'); + }); + + it('returns error for unknown tool', async () => { + const registry = new ToolRegistry(); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const result = await executor.execute('nonexistent', {}); + expect(result.success).toBe(false); + expect(result.error).toContain('not found'); + }); + + it('catches tool execution errors', async () => { + const registry = new ToolRegistry(); + registry.register(failTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const result = await executor.execute('test.fail', {}); + expect(result.success).toBe(false); + expect(result.error).toContain('kaboom'); + }); + + it('enforces timeout', async () => { + const registry = new ToolRegistry(); + registry.register(slowTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 }); + + const result = await executor.execute('test.slow', {}); + expect(result.success).toBe(false); + expect(result.error).toContain('timed out'); + }); + + it('truncates large output', async () => { + const registry = new ToolRegistry(); + registry.register(bigOutputTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 }); + + const result = await executor.execute('test.big', {}); + expect(result.success).toBe(true); + expect(result.output.length).toBeLessThanOrEqual(1100); // 1000 + truncation message + expect(result.output).toContain('[truncated]'); + }); + + it('blocks on confirm hook and resolves when approved', async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + // Start execution (will block on confirmation) + const resultPromise = executor.execute('test.echo', { text: 'hi' }); + + // Approve the pending confirmation + const pending = hooks.getPendingConfirmations(); + expect(pending).toHaveLength(1); + hooks.resolveConfirmation(pending[0].id, { approved: true }); + + const result = await resultPromise; + expect(result.success).toBe(true); + expect(result.output).toBe('hi'); + }); + + it('blocks on confirm hook and returns denied', async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const resultPromise = executor.execute('test.echo', { text: 'hi' }); + + const pending = hooks.getPendingConfirmations(); + hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' }); + + const result = await resultPromise; + expect(result.success).toBe(false); + expect(result.error).toContain('denied'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/executor.test.ts` +Expected: FAIL — module `./executor.js` not found + +**Step 3: Write the implementation** + +```typescript +// src/tools/executor.ts +import type { ToolResult } from './types.js'; +import type { ToolRegistry } from './registry.js'; +import type { HookEngine } from '../hooks/engine.js'; + +export interface ToolExecutorConfig { + defaultTimeoutMs?: number; + maxOutputBytes?: number; +} + +export class ToolExecutor { + private registry: ToolRegistry; + private hooks: HookEngine; + private defaultTimeoutMs: number; + private maxOutputBytes: number; + + constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) { + this.registry = registry; + this.hooks = hooks; + this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000; + this.maxOutputBytes = config?.maxOutputBytes ?? 51_200; + } + + async execute(toolName: string, args: unknown): Promise { + const tool = this.registry.get(toolName); + if (!tool) { + return { success: false, output: '', error: `Tool '${toolName}' not found` }; + } + + // Check hooks + const action = this.hooks.getAction(toolName); + if (action === 'confirm') { + const hookResult = await this.hooks.requestConfirmation( + toolName, + args as Record, + ); + if (!hookResult.approved) { + return { + success: false, + output: '', + error: `Tool '${toolName}' denied by user: ${hookResult.reason ?? 'no reason'}`, + }; + } + } + + // Execute with timeout + try { + const result = await Promise.race([ + tool.execute(args), + new Promise((_, reject) => + setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs) + ), + ]); + + // Truncate output if too large + if (result.output.length > this.maxOutputBytes) { + result.output = result.output.slice(0, this.maxOutputBytes) + '\n[truncated]'; + } + + return result; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + } +} +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/executor.test.ts` +Expected: PASS (all 7 tests) + +**Step 5: Commit** + +```bash +git add src/tools/executor.ts src/tools/executor.test.ts +git commit -m "feat(tools): add ToolExecutor with hooks, timeout, truncation" +``` + +--- + +## Task 4: Shell Exec Tool + +**Files:** +- Create: `src/tools/builtin/shell.ts` +- Test: `src/tools/builtin/shell.test.ts` + +**Step 1: Write the failing test** + +```typescript +// src/tools/builtin/shell.test.ts +import { describe, it, expect } from 'vitest'; +import { shellExecTool } from './shell.js'; +import { tmpdir } from 'os'; +import { mkdtempSync, writeFileSync, rmSync } from 'fs'; +import { join } from 'path'; + +describe('shell.exec tool', () => { + it('has correct metadata', () => { + expect(shellExecTool.name).toBe('shell.exec'); + expect(shellExecTool.inputSchema.required).toContain('command'); + }); + + it('runs a simple command', async () => { + const result = await shellExecTool.execute({ command: 'echo hello' }); + expect(result.success).toBe(true); + expect(result.output.trim()).toBe('hello'); + }); + + it('captures stderr on failure', async () => { + const result = await shellExecTool.execute({ command: 'ls /nonexistent_dir_xyz' }); + expect(result.success).toBe(false); + expect(result.error).toBeTruthy(); + }); + + it('respects cwd parameter', async () => { + const dir = mkdtempSync(join(tmpdir(), 'flynn-test-')); + writeFileSync(join(dir, 'test.txt'), 'content'); + try { + const result = await shellExecTool.execute({ command: 'ls test.txt', cwd: dir }); + expect(result.success).toBe(true); + expect(result.output.trim()).toBe('test.txt'); + } finally { + rmSync(dir, { recursive: true }); + } + }); + + it('respects timeout parameter', async () => { + const result = await shellExecTool.execute({ command: 'sleep 10', timeout: 200 }); + expect(result.success).toBe(false); + expect(result.error).toContain('timed out'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/builtin/shell.test.ts` +Expected: FAIL — module `./shell.js` not found + +**Step 3: Write the implementation** + +```typescript +// src/tools/builtin/shell.ts +import { execFile } from 'child_process'; +import type { Tool, ToolResult } from '../types.js'; + +interface ShellExecArgs { + command: string; + cwd?: string; + timeout?: number; +} + +export const shellExecTool: Tool = { + name: 'shell.exec', + description: 'Execute a shell command and return stdout/stderr. Use for running build commands, git operations, system tasks, etc.', + inputSchema: { + type: 'object', + properties: { + command: { type: 'string', description: 'The shell command to execute' }, + cwd: { type: 'string', description: 'Working directory (optional)' }, + timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' }, + }, + required: ['command'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as ShellExecArgs; + const timeout = args.timeout ?? 30_000; + + return new Promise((resolve) => { + execFile('bash', ['-c', args.command], { + cwd: args.cwd, + timeout, + maxBuffer: 1024 * 1024, // 1MB + }, (error, stdout, stderr) => { + if (error) { + if (error.killed || error.signal === 'SIGTERM') { + resolve({ success: false, output: stdout, error: `Command timed out after ${timeout}ms` }); + return; + } + resolve({ + success: false, + output: stdout, + error: stderr || error.message, + }); + return; + } + resolve({ success: true, output: stdout + (stderr ? `\nstderr: ${stderr}` : '') }); + }); + }); + }, +}; +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/builtin/shell.test.ts` +Expected: PASS (all 5 tests) + +**Step 5: Commit** + +```bash +git add src/tools/builtin/shell.ts src/tools/builtin/shell.test.ts +git commit -m "feat(tools): add shell.exec builtin tool" +``` + +--- + +## Task 5: File Tools (read, write, edit, list) + +**Files:** +- Create: `src/tools/builtin/file-read.ts` +- Create: `src/tools/builtin/file-write.ts` +- Create: `src/tools/builtin/file-edit.ts` +- Create: `src/tools/builtin/file-list.ts` +- Test: `src/tools/builtin/file.test.ts` (all four in one test file) + +**Step 1: Write the failing test** + +```typescript +// src/tools/builtin/file.test.ts +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { fileReadTool } from './file-read.js'; +import { fileWriteTool } from './file-write.js'; +import { fileEditTool } from './file-edit.js'; +import { fileListTool } from './file-list.js'; +import { mkdtempSync, writeFileSync, readFileSync, rmSync, mkdirSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +let testDir: string; + +beforeEach(() => { + testDir = mkdtempSync(join(tmpdir(), 'flynn-file-test-')); +}); + +afterEach(() => { + rmSync(testDir, { recursive: true }); +}); + +describe('file.read', () => { + it('reads a file', async () => { + writeFileSync(join(testDir, 'hello.txt'), 'hello world'); + const result = await fileReadTool.execute({ path: join(testDir, 'hello.txt') }); + expect(result.success).toBe(true); + expect(result.output).toBe('hello world'); + }); + + it('reads with offset and limit', async () => { + writeFileSync(join(testDir, 'lines.txt'), 'line1\nline2\nline3\nline4\n'); + const result = await fileReadTool.execute({ path: join(testDir, 'lines.txt'), offset: 1, limit: 2 }); + expect(result.success).toBe(true); + expect(result.output).toBe('line2\nline3'); + }); + + it('returns error for missing file', async () => { + const result = await fileReadTool.execute({ path: join(testDir, 'nope.txt') }); + expect(result.success).toBe(false); + expect(result.error).toBeTruthy(); + }); +}); + +describe('file.write', () => { + it('writes a new file', async () => { + const filePath = join(testDir, 'new.txt'); + const result = await fileWriteTool.execute({ path: filePath, content: 'new content' }); + expect(result.success).toBe(true); + expect(readFileSync(filePath, 'utf-8')).toBe('new content'); + }); + + it('creates intermediate directories', async () => { + const filePath = join(testDir, 'sub', 'dir', 'file.txt'); + const result = await fileWriteTool.execute({ path: filePath, content: 'deep' }); + expect(result.success).toBe(true); + expect(readFileSync(filePath, 'utf-8')).toBe('deep'); + }); +}); + +describe('file.edit', () => { + it('replaces a string in a file', async () => { + const filePath = join(testDir, 'edit.txt'); + writeFileSync(filePath, 'hello world'); + const result = await fileEditTool.execute({ + path: filePath, + old_string: 'world', + new_string: 'flynn', + }); + expect(result.success).toBe(true); + expect(readFileSync(filePath, 'utf-8')).toBe('hello flynn'); + }); + + it('fails if old_string not found', async () => { + const filePath = join(testDir, 'edit2.txt'); + writeFileSync(filePath, 'hello world'); + const result = await fileEditTool.execute({ + path: filePath, + old_string: 'xyz', + new_string: 'abc', + }); + expect(result.success).toBe(false); + expect(result.error).toContain('not found'); + }); + + it('fails if old_string matches multiple times without replace_all', async () => { + const filePath = join(testDir, 'edit3.txt'); + writeFileSync(filePath, 'aaa bbb aaa'); + const result = await fileEditTool.execute({ + path: filePath, + old_string: 'aaa', + new_string: 'ccc', + }); + expect(result.success).toBe(false); + expect(result.error).toContain('multiple'); + }); + + it('replaces all when replace_all is true', async () => { + const filePath = join(testDir, 'edit4.txt'); + writeFileSync(filePath, 'aaa bbb aaa'); + const result = await fileEditTool.execute({ + path: filePath, + old_string: 'aaa', + new_string: 'ccc', + replace_all: true, + }); + expect(result.success).toBe(true); + expect(readFileSync(filePath, 'utf-8')).toBe('ccc bbb ccc'); + }); +}); + +describe('file.list', () => { + it('lists files in a directory', async () => { + writeFileSync(join(testDir, 'a.txt'), ''); + writeFileSync(join(testDir, 'b.ts'), ''); + mkdirSync(join(testDir, 'sub')); + writeFileSync(join(testDir, 'sub', 'c.txt'), ''); + + const result = await fileListTool.execute({ path: testDir }); + expect(result.success).toBe(true); + expect(result.output).toContain('a.txt'); + expect(result.output).toContain('b.ts'); + expect(result.output).toContain('sub'); + }); + + it('filters with glob pattern', async () => { + writeFileSync(join(testDir, 'a.txt'), ''); + writeFileSync(join(testDir, 'b.ts'), ''); + + const result = await fileListTool.execute({ path: testDir, pattern: '*.ts' }); + expect(result.success).toBe(true); + expect(result.output).toContain('b.ts'); + expect(result.output).not.toContain('a.txt'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/builtin/file.test.ts` +Expected: FAIL — modules not found + +**Step 3: Write the implementations** + +```typescript +// src/tools/builtin/file-read.ts +import { readFileSync } from 'fs'; +import type { Tool, ToolResult } from '../types.js'; + +interface FileReadArgs { + path: string; + offset?: number; // 0-based line offset + limit?: number; // number of lines +} + +export const fileReadTool: Tool = { + name: 'file.read', + description: 'Read the contents of a file. Optionally read specific lines with offset and limit.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Absolute path to the file' }, + offset: { type: 'number', description: 'Line offset to start reading from (0-based)' }, + limit: { type: 'number', description: 'Number of lines to read' }, + }, + required: ['path'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as FileReadArgs; + try { + const content = readFileSync(args.path, 'utf-8'); + if (args.offset !== undefined || args.limit !== undefined) { + const lines = content.split('\n'); + const start = args.offset ?? 0; + const end = args.limit !== undefined ? start + args.limit : lines.length; + return { success: true, output: lines.slice(start, end).join('\n') }; + } + return { success: true, output: content }; + } catch (error) { + return { success: false, output: '', error: error instanceof Error ? error.message : String(error) }; + } + }, +}; +``` + +```typescript +// src/tools/builtin/file-write.ts +import { writeFileSync, mkdirSync } from 'fs'; +import { dirname } from 'path'; +import type { Tool, ToolResult } from '../types.js'; + +interface FileWriteArgs { + path: string; + content: string; +} + +export const fileWriteTool: Tool = { + name: 'file.write', + description: 'Write content to a file. Creates the file and parent directories if they do not exist.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Absolute path to write to' }, + content: { type: 'string', description: 'Content to write' }, + }, + required: ['path', 'content'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as FileWriteArgs; + try { + mkdirSync(dirname(args.path), { recursive: true }); + writeFileSync(args.path, args.content, 'utf-8'); + return { success: true, output: `Wrote ${args.content.length} bytes to ${args.path}` }; + } catch (error) { + return { success: false, output: '', error: error instanceof Error ? error.message : String(error) }; + } + }, +}; +``` + +```typescript +// src/tools/builtin/file-edit.ts +import { readFileSync, writeFileSync } from 'fs'; +import type { Tool, ToolResult } from '../types.js'; + +interface FileEditArgs { + path: string; + old_string: string; + new_string: string; + replace_all?: boolean; +} + +export const fileEditTool: Tool = { + name: 'file.edit', + description: 'Edit a file by replacing an exact string match. Fails if old_string is not found or matches multiple times (unless replace_all is true).', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Absolute path to the file' }, + old_string: { type: 'string', description: 'Exact string to find' }, + new_string: { type: 'string', description: 'Replacement string' }, + replace_all: { type: 'boolean', description: 'Replace all occurrences (default false)' }, + }, + required: ['path', 'old_string', 'new_string'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as FileEditArgs; + try { + const content = readFileSync(args.path, 'utf-8'); + + if (!content.includes(args.old_string)) { + return { success: false, output: '', error: `old_string not found in ${args.path}` }; + } + + // Count occurrences + const count = content.split(args.old_string).length - 1; + if (count > 1 && !args.replace_all) { + return { success: false, output: '', error: `old_string found multiple times (${count}). Use replace_all or provide more context.` }; + } + + const newContent = args.replace_all + ? content.replaceAll(args.old_string, args.new_string) + : content.replace(args.old_string, args.new_string); + + writeFileSync(args.path, newContent, 'utf-8'); + return { success: true, output: `Edited ${args.path} (${count} replacement${count > 1 ? 's' : ''})` }; + } catch (error) { + return { success: false, output: '', error: error instanceof Error ? error.message : String(error) }; + } + }, +}; +``` + +```typescript +// src/tools/builtin/file-list.ts +import { readdirSync } from 'fs'; +import type { Tool, ToolResult } from '../types.js'; + +interface FileListArgs { + path: string; + pattern?: string; +} + +function matchGlob(name: string, pattern: string): boolean { + const regex = new RegExp('^' + pattern.replace(/\./g, '\\.').replace(/\*/g, '.*') + '$'); + return regex.test(name); +} + +export const fileListTool: Tool = { + name: 'file.list', + description: 'List files and directories in a given path. Optionally filter with a glob pattern.', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'Directory path to list' }, + pattern: { type: 'string', description: 'Glob pattern to filter results (e.g. "*.ts")' }, + }, + required: ['path'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as FileListArgs; + try { + let entries = readdirSync(args.path, { withFileTypes: true }); + if (args.pattern) { + entries = entries.filter(e => matchGlob(e.name, args.pattern!)); + } + const output = entries + .map(e => e.isDirectory() ? `${e.name}/` : e.name) + .sort() + .join('\n'); + return { success: true, output }; + } catch (error) { + return { success: false, output: '', error: error instanceof Error ? error.message : String(error) }; + } + }, +}; +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/builtin/file.test.ts` +Expected: PASS (all 10 tests) + +**Step 5: Commit** + +```bash +git add src/tools/builtin/file-read.ts src/tools/builtin/file-write.ts src/tools/builtin/file-edit.ts src/tools/builtin/file-list.ts src/tools/builtin/file.test.ts +git commit -m "feat(tools): add file read/write/edit/list builtin tools" +``` + +--- + +## Task 6: Web Fetch Tool + +**Files:** +- Create: `src/tools/builtin/web-fetch.ts` +- Test: `src/tools/builtin/web-fetch.test.ts` + +**Step 1: Write the failing test** + +```typescript +// src/tools/builtin/web-fetch.test.ts +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { webFetchTool } from './web-fetch.js'; + +// Mock global fetch +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +beforeEach(() => { + mockFetch.mockReset(); +}); + +describe('web.fetch', () => { + it('has correct metadata', () => { + expect(webFetchTool.name).toBe('web.fetch'); + expect(webFetchTool.inputSchema.required).toContain('url'); + }); + + it('fetches a URL and returns body text', async () => { + mockFetch.mockResolvedValue({ + ok: true, + status: 200, + text: async () => '

Hello

World

', + headers: new Headers({ 'content-type': 'text/html' }), + }); + + const result = await webFetchTool.execute({ url: 'https://example.com' }); + expect(result.success).toBe(true); + expect(result.output).toBeTruthy(); + expect(mockFetch).toHaveBeenCalledWith('https://example.com', expect.any(Object)); + }); + + it('returns error on HTTP failure', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 404, + text: async () => 'Not Found', + headers: new Headers(), + }); + + const result = await webFetchTool.execute({ url: 'https://example.com/nope' }); + expect(result.success).toBe(false); + expect(result.error).toContain('404'); + }); + + it('returns error on network failure', async () => { + mockFetch.mockRejectedValue(new Error('network error')); + + const result = await webFetchTool.execute({ url: 'https://down.example.com' }); + expect(result.success).toBe(false); + expect(result.error).toContain('network error'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/tools/builtin/web-fetch.test.ts` +Expected: FAIL — module not found + +**Step 3: Write the implementation** + +```typescript +// src/tools/builtin/web-fetch.ts +import type { Tool, ToolResult } from '../types.js'; + +interface WebFetchArgs { + url: string; + timeout?: number; +} + +export const webFetchTool: Tool = { + name: 'web.fetch', + description: 'Fetch the content of a URL via HTTP GET. Returns the response body as text.', + inputSchema: { + type: 'object', + properties: { + url: { type: 'string', description: 'The URL to fetch' }, + timeout: { type: 'number', description: 'Timeout in milliseconds (default 15000)' }, + }, + required: ['url'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as WebFetchArgs; + const timeout = args.timeout ?? 15_000; + + try { + const response = await fetch(args.url, { + signal: AbortSignal.timeout(timeout), + headers: { + 'User-Agent': 'Flynn/0.1 (personal AI assistant)', + 'Accept': 'text/html, application/json, text/plain, */*', + }, + }); + + if (!response.ok) { + return { + success: false, + output: '', + error: `HTTP ${response.status}: ${await response.text()}`, + }; + } + + const body = await response.text(); + return { success: true, output: body }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, +}; +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/tools/builtin/web-fetch.test.ts` +Expected: PASS (all 4 tests) + +**Step 5: Commit** + +```bash +git add src/tools/builtin/web-fetch.ts src/tools/builtin/web-fetch.test.ts +git commit -m "feat(tools): add web.fetch builtin tool" +``` + +--- + +## Task 7: Tools Index + Register All Builtins + +**Files:** +- Create: `src/tools/index.ts` +- Create: `src/tools/builtin/index.ts` + +**Step 1: Create the barrel exports** + +```typescript +// src/tools/builtin/index.ts +export { shellExecTool } from './shell.js'; +export { fileReadTool } from './file-read.js'; +export { fileWriteTool } from './file-write.js'; +export { fileEditTool } from './file-edit.js'; +export { fileListTool } from './file-list.js'; +export { webFetchTool } from './web-fetch.js'; + +import type { Tool } from '../types.js'; +import { shellExecTool } from './shell.js'; +import { fileReadTool } from './file-read.js'; +import { fileWriteTool } from './file-write.js'; +import { fileEditTool } from './file-edit.js'; +import { fileListTool } from './file-list.js'; +import { webFetchTool } from './web-fetch.js'; + +export const allBuiltinTools: Tool[] = [ + shellExecTool, + fileReadTool, + fileWriteTool, + fileEditTool, + fileListTool, + webFetchTool, +]; +``` + +```typescript +// src/tools/index.ts +export type { Tool, ToolCall, ToolResult, ToolInputSchema, ToolUseBlock, ToolResultBlock, ToolUseMessage, ToolResultMessage } from './types.js'; +export { ToolRegistry } from './registry.js'; +export type { AnthropicToolDef, OpenAIToolDef } from './registry.js'; +export { ToolExecutor } from './executor.js'; +export type { ToolExecutorConfig } from './executor.js'; +export { allBuiltinTools } from './builtin/index.js'; +export { shellExecTool } from './builtin/shell.js'; +export { fileReadTool } from './builtin/file-read.js'; +export { fileWriteTool } from './builtin/file-write.js'; +export { fileEditTool } from './builtin/file-edit.js'; +export { fileListTool } from './builtin/file-list.js'; +export { webFetchTool } from './builtin/web-fetch.js'; +``` + +**Step 2: Run all tool tests to verify nothing broke** + +Run: `pnpm vitest run src/tools/` +Expected: All tests PASS + +**Step 3: Commit** + +```bash +git add src/tools/index.ts src/tools/builtin/index.ts +git commit -m "feat(tools): add barrel exports and allBuiltinTools list" +``` + +--- + +## Task 8: Update Model Types for Tool Use + +**Files:** +- Modify: `src/models/types.ts` +- Test: `src/models/types.test.ts` (new) + +**Step 1: Write the failing test** + +```typescript +// src/models/types.test.ts +import { describe, it, expect } from 'vitest'; +import type { ChatRequest, ChatResponse, ToolMessage, ContentBlock } from './types.js'; + +describe('Model types with tool support', () => { + it('ChatRequest accepts tools array', () => { + const req: ChatRequest = { + messages: [{ role: 'user', content: 'hi' }], + tools: [{ + name: 'test', + description: 'test tool', + input_schema: { type: 'object', properties: {} }, + }], + }; + expect(req.tools).toHaveLength(1); + }); + + it('ChatResponse has optional toolCalls', () => { + const resp: ChatResponse = { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 0, outputTokens: 0 }, + toolCalls: [{ id: 'call_1', name: 'test', args: {} }], + }; + expect(resp.toolCalls).toHaveLength(1); + expect(resp.stopReason).toBe('tool_use'); + }); + + it('ToolMessage represents tool results in conversation', () => { + const msg: ToolMessage = { + role: 'tool_result', + toolResults: [{ tool_use_id: 'call_1', content: 'result', is_error: false }], + }; + expect(msg.role).toBe('tool_result'); + expect(msg.toolResults).toHaveLength(1); + }); + + it('ContentBlock can be text or tool_use', () => { + const text: ContentBlock = { type: 'text', text: 'hello' }; + const tool: ContentBlock = { type: 'tool_use', id: 'c1', name: 'test', input: {} }; + expect(text.type).toBe('text'); + expect(tool.type).toBe('tool_use'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/models/types.test.ts` +Expected: FAIL — `ToolMessage`, `ContentBlock` not exported + +**Step 3: Update types.ts** + +Update `src/models/types.ts` to add tool-related types. Keep ALL existing types unchanged, add new ones: + +```typescript +// src/models/types.ts + +export interface Message { + role: 'user' | 'assistant'; + content: string; + timestamp?: number; +} + +// Tool definition passed to model API +export interface ToolDefinition { + name: string; + description: string; + input_schema: { + type: 'object'; + properties: Record; + required?: string[]; + }; +} + +// Individual tool call returned by model +export interface ModelToolCall { + id: string; + name: string; + args: unknown; +} + +// Content blocks for multi-content responses +export type ContentBlock = + | { type: 'text'; text: string } + | { type: 'tool_use'; id: string; name: string; input: unknown }; + +// Tool result fed back into conversation +export interface ToolResultEntry { + tool_use_id: string; + content: string; + is_error?: boolean; +} + +// Message type for tool results (distinct from user/assistant) +export interface ToolMessage { + role: 'tool_result'; + toolResults: ToolResultEntry[]; +} + +// Union type for all messages in a conversation +export type ConversationMessage = Message | ToolMessage; + +export interface ChatRequest { + messages: Message[]; + system?: string; + maxTokens?: number; + tools?: ToolDefinition[]; +} + +export interface ChatResponse { + content: string; + stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | 'tool_use' | string; + usage: TokenUsage; + toolCalls?: ModelToolCall[]; +} + +export interface TokenUsage { + inputTokens: number; + outputTokens: number; +} + +export interface ChatStreamEvent { + type: 'content' | 'done' | 'error' | 'tool_use'; + content?: string; + usage?: TokenUsage; + error?: Error; + toolCall?: ModelToolCall; +} + +export interface StreamingModelClient { + chatStream(request: ChatRequest): AsyncIterable; +} + +export interface ModelClient { + chat(request: ChatRequest): Promise; + chatStream?(request: ChatRequest): AsyncIterable; +} +``` + +**Step 4: Run test to verify it passes** + +Run: `pnpm vitest run src/models/types.test.ts` +Expected: PASS + +**Step 5: Run ALL existing model tests to verify no regressions** + +Run: `pnpm vitest run src/models/` +Expected: All existing tests PASS (types are backward compatible — `Message` unchanged, new fields are optional) + +**Step 6: Commit** + +```bash +git add src/models/types.ts src/models/types.test.ts +git commit -m "feat(models): add tool use types to model interfaces" +``` + +--- + +## Task 9: Anthropic Tool Use Support + +**Files:** +- Modify: `src/models/anthropic.ts` +- Modify: `src/models/anthropic.test.ts` + +**Step 1: Write the failing test (add to existing test file)** + +Add these tests to `src/models/anthropic.test.ts`: + +```typescript +// Add after existing describe blocks in src/models/anthropic.test.ts + +describe('AnthropicClient tool use', () => { + it('passes tools to API and parses tool_use response', async () => { + // This test requires updating the mock to return tool_use blocks + // We need to access the mock and override for this test + const Anthropic = (await import('@anthropic-ai/sdk')).default; + const mockInstance = new Anthropic(); + + // Override create to return tool_use + (mockInstance.messages.create as ReturnType).mockResolvedValueOnce({ + content: [ + { type: 'tool_use', id: 'toolu_01', name: 'shell.exec', input: { command: 'ls' } }, + ], + stop_reason: 'tool_use', + usage: { input_tokens: 20, output_tokens: 15 }, + }); + + const client = new AnthropicClient({ + apiKey: 'test-key', + model: 'claude-sonnet-4-20250514', + }); + + const response = await client.chat({ + messages: [{ role: 'user', content: 'list files' }], + tools: [{ + name: 'shell.exec', + description: 'Run shell command', + input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }, + }], + }); + + expect(response.stopReason).toBe('tool_use'); + expect(response.toolCalls).toHaveLength(1); + expect(response.toolCalls![0]).toEqual({ + id: 'toolu_01', + name: 'shell.exec', + args: { command: 'ls' }, + }); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/models/anthropic.test.ts` +Expected: FAIL — `toolCalls` is undefined (current code only extracts text blocks) + +**Step 3: Update anthropic.ts to support tool use** + +Update the `chat` method in `src/models/anthropic.ts`: + +Replace the `chat` method body. Key changes: +1. Pass `tools` to `messages.create()` when present +2. Parse both `text` and `tool_use` content blocks from response +3. Return `toolCalls` array when tool_use blocks present + +Updated `chat` method: + +```typescript + async chat(request: ChatRequest): Promise { + const params: Record = { + model: this.model, + max_tokens: request.maxTokens ?? this.defaultMaxTokens, + system: request.system, + messages: request.messages.map((m) => ({ + role: m.role, + content: m.content, + })), + }; + + if (request.tools && request.tools.length > 0) { + params.tools = request.tools; + } + + const response = await this.client.messages.create(params as Parameters[0]); + + const textContent = response.content.find((c) => c.type === 'text'); + const content = textContent?.type === 'text' ? textContent.text : ''; + + const toolCalls = response.content + .filter((c): c is { type: 'tool_use'; id: string; name: string; input: unknown } => c.type === 'tool_use') + .map(c => ({ id: c.id, name: c.name, args: c.input })); + + return { + content, + stopReason: response.stop_reason ?? 'end_turn', + usage: { + inputTokens: response.usage.input_tokens, + outputTokens: response.usage.output_tokens, + }, + ...(toolCalls.length > 0 ? { toolCalls } : {}), + }; + } +``` + +Also update `chatStream` similarly — pass tools param, and yield `tool_use` events for `content_block_start` events with `tool_use` type. (Details in implementation — the key addition is yielding `{ type: 'tool_use', toolCall: {...} }` events.) + +**Step 4: Run tests to verify they pass** + +Run: `pnpm vitest run src/models/anthropic.test.ts` +Expected: All PASS + +**Step 5: Commit** + +```bash +git add src/models/anthropic.ts src/models/anthropic.test.ts +git commit -m "feat(models): add tool use support to AnthropicClient" +``` + +--- + +## Task 10: OpenAI Tool Use Support + +**Files:** +- Modify: `src/models/openai.ts` +- Modify: `src/models/openai.test.ts` + +**Step 1: Write the failing test** + +Add to `src/models/openai.test.ts`: + +```typescript +describe('OpenAIClient tool use', () => { + it('passes tools to API and parses tool_calls response', async () => { + const OpenAI = (await import('openai')).default; + const mockInstance = new OpenAI(); + + (mockInstance.chat.completions.create as ReturnType).mockResolvedValueOnce({ + choices: [{ + message: { + content: null, + tool_calls: [{ + id: 'call_1', + type: 'function', + function: { name: 'shell.exec', arguments: '{"command":"ls"}' }, + }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 20, completion_tokens: 15 }, + }); + + const client = new OpenAIClient({ + apiKey: 'test-key', + model: 'gpt-4o', + }); + + const response = await client.chat({ + messages: [{ role: 'user', content: 'list files' }], + tools: [{ + name: 'shell.exec', + description: 'Run shell command', + input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }, + }], + }); + + expect(response.stopReason).toBe('tool_calls'); + expect(response.toolCalls).toHaveLength(1); + expect(response.toolCalls![0]).toEqual({ + id: 'call_1', + name: 'shell.exec', + args: { command: 'ls' }, + }); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/models/openai.test.ts` +Expected: FAIL — `toolCalls` undefined + +**Step 3: Update openai.ts** + +Update `chat` method to: +1. Convert `tools` to OpenAI format (`{ type: 'function', function: { name, description, parameters } }`) +2. Parse `tool_calls` from response choice +3. Return `toolCalls` array with parsed JSON arguments + +**Step 4: Run tests** + +Run: `pnpm vitest run src/models/openai.test.ts` +Expected: PASS + +**Step 5: Commit** + +```bash +git add src/models/openai.ts src/models/openai.test.ts +git commit -m "feat(models): add tool use support to OpenAIClient" +``` + +--- + +## Task 11: Agent Loop + +**Files:** +- Modify: `src/backends/native/agent.ts` +- Modify: `src/backends/native/agent.test.ts` + +This is the biggest task. The NativeAgent `process()` method changes from single-turn to iterative loop. + +**Step 1: Write the failing test** + +Add to `src/backends/native/agent.test.ts`: + +```typescript +import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../../tools/index.js'; +import { HookEngine } from '../../hooks/index.js'; +import type { Tool } from '../../tools/index.js'; + +// Simple test tool +const echoTool: Tool = { + name: 'test.echo', + description: 'Echo', + inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, + execute: async (args) => ({ success: true, output: (args as { text: string }).text }), +}; + +describe('NativeAgent tool loop', () => { + it('executes tool calls and feeds results back', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + // First call: model requests tool use + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }], + }; + } + // Second call: model gives final text response + return { + content: 'The tool returned: hello', + stopReason: 'end_turn', + usage: { inputTokens: 15, outputTokens: 10 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const response = await agent.process('echo hello'); + expect(response).toBe('The tool returned: hello'); + expect(mockClient.chat).toHaveBeenCalledTimes(2); + }); + + it('respects max iterations', async () => { + // Model always returns tool_use + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'loop' } }], + }), + }; + + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + toolRegistry: registry, + toolExecutor: executor, + maxIterations: 3, + }); + + const response = await agent.process('loop forever'); + expect(response).toContain('max iterations'); + expect(mockClient.chat).toHaveBeenCalledTimes(3); + }); + + it('works without tools (backward compatible)', async () => { + const mockClient: ModelClient = { + chat: vi.fn().mockResolvedValue({ + content: 'Hello!', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + }), + }; + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You are helpful.', + }); + + const response = await agent.process('Hi'); + expect(response).toBe('Hello!'); + }); +}); +``` + +**Step 2: Run test to verify it fails** + +Run: `pnpm vitest run src/backends/native/agent.test.ts` +Expected: FAIL — NativeAgent doesn't accept `toolRegistry`/`toolExecutor` + +**Step 3: Rewrite agent.ts with tool loop** + +The updated NativeAgent: +- `NativeAgentConfig` gains optional `toolRegistry`, `toolExecutor`, `maxIterations` fields +- `process()` becomes a loop: call model -> if `stopReason === 'tool_use'`, execute tools, append results, loop +- Conversation history stores both regular messages and tool messages +- Model receives tools from registry in each `chat()` call +- Max iterations (default 10) prevents infinite loops +- Backward compatible: if no registry/executor provided, works exactly as before + +Key implementation details: +- Build Anthropic-format messages for tool results: `{ role: 'user', content: [{ type: 'tool_result', tool_use_id, content }] }` +- The agent needs to track the raw content blocks (not just text) for tool_use responses +- On max iterations, return a warning message + +**Step 4: Run tests** + +Run: `pnpm vitest run src/backends/native/agent.test.ts` +Expected: All PASS (existing + new) + +**Step 5: Commit** + +```bash +git add src/backends/native/agent.ts src/backends/native/agent.test.ts +git commit -m "feat(agent): add iterative tool use loop with max iterations" +``` + +--- + +## Task 12: Wire Tools into Daemon + +**Files:** +- Modify: `src/daemon/index.ts` + +**Step 1: No test needed (integration wiring)** + +This is wiring code that creates the tool registry, registers all builtins, creates the executor, and passes them to the NativeAgent. No new logic, just composition. + +**Step 2: Update daemon/index.ts** + +Changes: +1. Import `ToolRegistry`, `ToolExecutor`, `allBuiltinTools` from `../tools/index.js` +2. After creating hookEngine, create registry and executor: + ```typescript + const toolRegistry = new ToolRegistry(); + for (const tool of allBuiltinTools) { + toolRegistry.register(tool); + } + const toolExecutor = new ToolExecutor(toolRegistry, hookEngine); + ``` +3. Pass `toolRegistry` and `toolExecutor` to NativeAgent constructor +4. Add `toolRegistry` and `toolExecutor` to `DaemonContext` interface + +**Step 3: Run typecheck and existing tests** + +Run: `pnpm typecheck && pnpm vitest run` +Expected: PASS + +**Step 4: Commit** + +```bash +git add src/daemon/index.ts +git commit -m "feat(daemon): wire tool registry and executor into agent" +``` + +--- + +## Task 13: Update TUI for Tool Display + +**Files:** +- Modify: `src/frontends/tui/minimal.ts` + +**Step 1: No new test (display-only change)** + +The TUI's `handleMessage` method currently calls `modelClient.chatStream()` or `modelClient.chat()` directly. After this task, it should call `agent.process()` instead (which handles the tool loop internally), and display tool execution status. + +However, for Phase 1, a simpler approach: the NativeAgent's `process()` returns only the final text. For tool status display, add an optional `onToolUse` callback to NativeAgentConfig that the TUI can hook into. + +**Step 2: Add onToolUse callback to NativeAgent** + +In `src/backends/native/agent.ts`, add to NativeAgentConfig: +```typescript +onToolUse?: (event: { type: 'start' | 'end'; tool: string; args?: unknown; result?: ToolResult }) => void; +``` + +The agent loop calls this before and after each tool execution. + +**Step 3: Update MinimalTui to use agent instead of raw model client** + +Change `MinimalTuiConfig` to accept `NativeAgent` instead of raw `ModelClient`. The `handleMessage` method calls `agent.process()` and the `onToolUse` callback prints tool status lines: + +``` +⚡ shell.exec: ls -la +✓ success (24 lines) +``` + +**Step 4: Run existing TUI tests** + +Run: `pnpm vitest run src/frontends/tui/` +Expected: PASS (may need to update test mocks to use agent instead of raw client) + +**Step 5: Commit** + +```bash +git add src/backends/native/agent.ts src/frontends/tui/minimal.ts +git commit -m "feat(tui): display tool execution status in minimal TUI" +``` + +--- + +## Task 14: Update Telegram for Tool Display + +**Files:** +- Modify: `src/frontends/telegram/bot.ts` +- Modify: `src/frontends/telegram/handlers.ts` + +**Step 1: Update handlers to show tool status** + +The Telegram message handler currently calls `agent.process(text)` and gets back text. With the onToolUse callback, we can send status messages during tool execution. + +For Telegram, tool status should appear as edited messages or new messages: +- On tool start: Send a status message ("⚡ Running shell.exec...") +- On tool end: Edit the status message with result summary +- After loop completes: Send the final response + +**Step 2: Update bot.ts** + +The bot needs access to the agent's onToolUse callback, wired to send Telegram status messages for the active chat context. + +**Step 3: Run tests** + +Run: `pnpm vitest run src/frontends/telegram/` +Expected: PASS + +**Step 4: Commit** + +```bash +git add src/frontends/telegram/bot.ts src/frontends/telegram/handlers.ts +git commit -m "feat(telegram): display tool execution status messages" +``` + +--- + +## Task 15: Update Model Index Exports + +**Files:** +- Modify: `src/models/index.ts` + +**Step 1: Add new type exports** + +```typescript +export type { ToolDefinition, ModelToolCall, ContentBlock, ToolResultEntry, ToolMessage, ConversationMessage } from './types.js'; +``` + +**Step 2: Run typecheck** + +Run: `pnpm typecheck` +Expected: PASS + +**Step 3: Commit** + +```bash +git add src/models/index.ts +git commit -m "feat(models): export tool-related types from index" +``` + +--- + +## Task 16: Full Integration Test + +**Files:** +- Create: `src/tools/integration.test.ts` + +**Step 1: Write integration test** + +```typescript +// src/tools/integration.test.ts +import { describe, it, expect, vi } from 'vitest'; +import { NativeAgent } from '../backends/native/agent.js'; +import { ToolRegistry } from './registry.js'; +import { ToolExecutor } from './executor.js'; +import { HookEngine } from '../hooks/engine.js'; +import { shellExecTool } from './builtin/shell.js'; +import { fileReadTool } from './builtin/file-read.js'; +import { fileWriteTool } from './builtin/file-write.js'; +import type { ModelClient, ChatResponse } from '../models/types.js'; +import { mkdtempSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +describe('Tool integration (end-to-end)', () => { + it('agent uses shell tool and returns result', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'c1', name: 'shell.exec', args: { command: 'echo integration_test' } }], + } satisfies ChatResponse; + } + return { + content: 'The command output was: integration_test', + stopReason: 'end_turn', + usage: { inputTokens: 20, outputTokens: 10 }, + } satisfies ChatResponse; + }), + }; + + const registry = new ToolRegistry(); + registry.register(shellExecTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You have tools.', + toolRegistry: registry, + toolExecutor: executor, + }); + + const result = await agent.process('run echo integration_test'); + expect(result).toContain('integration_test'); + }); + + it('agent chains multiple tools', async () => { + const dir = mkdtempSync(join(tmpdir(), 'flynn-integ-')); + let callCount = 0; + + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'c1', name: 'file.write', args: { path: join(dir, 'test.txt'), content: 'hello' } }], + }; + } + if (callCount === 2) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 15, outputTokens: 8 }, + toolCalls: [{ id: 'c2', name: 'file.read', args: { path: join(dir, 'test.txt') } }], + }; + } + return { + content: 'I wrote and read the file. It contains: hello', + stopReason: 'end_turn', + usage: { inputTokens: 20, outputTokens: 10 }, + }; + }), + }; + + const registry = new ToolRegistry(); + registry.register(fileWriteTool); + registry.register(fileReadTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const agent = new NativeAgent({ + modelClient: mockClient, + systemPrompt: 'You have file tools.', + toolRegistry: registry, + toolExecutor: executor, + }); + + try { + const result = await agent.process('write hello to test.txt then read it'); + expect(result).toContain('hello'); + expect(mockClient.chat).toHaveBeenCalledTimes(3); + } finally { + rmSync(dir, { recursive: true }); + } + }); +}); +``` + +**Step 2: Run integration test** + +Run: `pnpm vitest run src/tools/integration.test.ts` +Expected: PASS + +**Step 3: Run full test suite** + +Run: `pnpm vitest run` +Expected: All tests PASS + +**Step 4: Run typecheck** + +Run: `pnpm typecheck` +Expected: PASS (no type errors) + +**Step 5: Commit** + +```bash +git add src/tools/integration.test.ts +git commit -m "test: add end-to-end tool integration tests" +``` + +--- + +## Task 17: Update System Prompt for Tool Awareness + +**Files:** +- Modify: `src/daemon/index.ts` + +**Step 1: Update SYSTEM_PROMPT** + +Add tool awareness to the system prompt so the model knows it has tools: + +```typescript +const SYSTEM_PROMPT = `You are Flynn, a helpful personal AI assistant running on the user's machine. You are direct, concise, and helpful. + +You have access to tools that let you interact with the system: +- shell.exec: Run shell commands (bash) +- file.read: Read file contents +- file.write: Write/create files +- file.edit: Edit files (find and replace) +- file.list: List directory contents +- web.fetch: Fetch web pages + +Use tools when the user's request requires interacting with the filesystem, running commands, or fetching web content. For conversational questions, respond directly without tools. + +Keep responses focused. Use markdown when it improves readability.`; +``` + +**Step 2: Run tests (nothing should break)** + +Run: `pnpm vitest run` +Expected: PASS + +**Step 3: Commit** + +```bash +git add src/daemon/index.ts +git commit -m "feat(daemon): update system prompt with tool descriptions" +``` + +--- + +## Summary + +| Task | Description | Files | Tests | +|------|-------------|-------|-------| +| 0 | SOUL.md + system prompt loader | `SOUL.md`, `src/daemon/index.ts` | 0 | +| 1 | Tool type definitions | `src/tools/types.ts` | 5 | +| 2 | Tool registry | `src/tools/registry.ts` | 5 | +| 3 | Tool executor | `src/tools/executor.ts` | 7 | +| 4 | Shell exec tool | `src/tools/builtin/shell.ts` | 5 | +| 5 | File tools (4 files) | `src/tools/builtin/file-*.ts` | 10 | +| 6 | Web fetch tool | `src/tools/builtin/web-fetch.ts` | 4 | +| 7 | Index/barrel exports | `src/tools/index.ts` + `builtin/index.ts` | 0 | +| 8 | Model types for tool use | `src/models/types.ts` | 4 | +| 9 | Anthropic tool use | `src/models/anthropic.ts` | 1+ | +| 10 | OpenAI tool use | `src/models/openai.ts` | 1+ | +| 11 | Agent loop | `src/backends/native/agent.ts` | 3+ | +| 12 | Wire into daemon | `src/daemon/index.ts` | 0 | +| 13 | TUI tool display | `src/frontends/tui/minimal.ts` | 0 | +| 14 | Telegram tool display | `src/frontends/telegram/*.ts` | 0 | +| 15 | Model index exports | `src/models/index.ts` | 0 | +| 16 | Integration tests | `src/tools/integration.test.ts` | 2 | +| 17 | System prompt update | `src/daemon/index.ts` | 0 | + +**Total: ~47+ new tests across 18 tasks, ~16 new files, ~5 modified files** + +**Execution model:** Opus 4.6 supervises and reviews. Subagents via GitHub Copilot execute implementation. + +**Subagent models:** +- **Claude Haiku 4.5** (`github-copilot/claude-haiku-4.5`): Mechanical tasks (types, file tools, wiring, exports) +- **Claude Sonnet 4.5** (`github-copilot/claude-sonnet-4.5`): Complex tasks (registry, executor, model integration, agent loop, frontend updates) + +**Task grouping for subagents:** +- **Haiku 4.5** (mechanical): Tasks 0, 1, 5, 6, 7, 12, 15, 17 +- **Sonnet 4.5** (complex): Tasks 2, 3, 4, 8, 9, 10, 11, 13, 14, 16 + +**Estimated effort:** Tasks 0-7 are foundational (types + tools). Tasks 8-11 are core complexity (model integration + agent loop). Tasks 12-17 are wiring/polish. diff --git a/docs/plans/2026-02-05-phase3-channel-adapters.md b/docs/plans/2026-02-05-phase3-channel-adapters.md new file mode 100644 index 0000000..6ea6b70 --- /dev/null +++ b/docs/plans/2026-02-05-phase3-channel-adapters.md @@ -0,0 +1,203 @@ +# Phase 3: Channel Adapters — Implementation Plan + +## Goal + +Introduce a `ChannelAdapter` abstraction that decouples message sources (Telegram, WebChat, future Discord/WhatsApp/Slack) from the agent. Each adapter handles platform-specific I/O and maps messages to a common interface. A `ChannelRegistry` manages adapter lifecycle and routes messages to/from the agent. + +## Scope (This Iteration) + +1. **Channel types** — `ChannelAdapter` interface, `InboundMessage`, `OutboundMessage`, `ChannelStatus` +2. **Channel registry** — Register, start/stop, route messages, adapter lifecycle +3. **Telegram adapter** — Refactor existing `src/frontends/telegram/` into a `ChannelAdapter` +4. **WebChat adapter** — Wrap the existing gateway WS into a `ChannelAdapter` +5. **Daemon integration** — Replace direct bot/gateway creation with registry-managed adapters + +Discord, WhatsApp, and Slack adapters are deferred (require new dependencies + credentials). + +## Architecture + +``` +src/channels/ +├── types.ts # ChannelAdapter interface, message types +├── registry.ts # ChannelRegistry: lifecycle + message routing +├── registry.test.ts # Registry tests +├── index.ts # Barrel exports +├── telegram/ +│ ├── adapter.ts # TelegramAdapter implements ChannelAdapter +│ ├── adapter.test.ts # Adapter tests +│ └── index.ts # Barrel +└── webchat/ + ├── adapter.ts # WebChatAdapter implements ChannelAdapter + ├── adapter.test.ts # Adapter tests + └── index.ts # Barrel +``` + +The existing `src/frontends/telegram/` code (bot.ts, handlers.ts, confirmations.ts) stays in place and is wrapped by the adapter. The adapter delegates to the existing bot creation logic. No breaking changes to existing code. + +## Types Design + +### InboundMessage +```typescript +interface InboundMessage { + id: string; // Platform message ID + channel: string; // Adapter name: "telegram", "webchat", etc. + senderId: string; // Platform user ID + senderName?: string; // Display name (optional) + text: string; // Message text + replyTo?: string; // ID of message being replied to + timestamp: number; // Unix ms + metadata?: Record; // Platform-specific extras +} +``` + +### OutboundMessage +```typescript +interface OutboundMessage { + text: string; // Response text (markdown) + replyTo?: string; // Original message ID + metadata?: Record; // Platform-specific extras (e.g. parse_mode) +} +``` + +### ChannelAdapter +```typescript +interface ChannelAdapter { + readonly name: string; + readonly status: ChannelStatus; + + /** Start the adapter (connect to platform, begin listening). */ + connect(): Promise; + + /** Stop the adapter (disconnect, clean up). */ + disconnect(): Promise; + + /** Send a message to a specific peer. */ + send(peerId: string, message: OutboundMessage): Promise; + + /** Register the inbound message handler. Called by registry. */ + onMessage(handler: (msg: InboundMessage) => void): void; + + /** Register a tool event handler for displaying tool execution status. */ + onToolEvent?(handler: (peerId: string, event: ToolStatusEvent) => void): void; +} + +type ChannelStatus = 'disconnected' | 'connecting' | 'connected' | 'error'; + +interface ToolStatusEvent { + type: 'start' | 'end'; + tool: string; + args?: unknown; + result?: { success: boolean; output: string; error?: string }; +} +``` + +### ChannelRegistry +```typescript +class ChannelRegistry { + register(adapter: ChannelAdapter): void; + unregister(name: string): void; + get(name: string): ChannelAdapter | undefined; + list(): ChannelAdapter[]; + + /** Start all registered adapters. */ + startAll(): Promise; + + /** Stop all registered adapters. */ + stopAll(): Promise; + + /** Set the message handler that all adapters route to. */ + setMessageHandler(handler: (msg: InboundMessage, reply: (msg: OutboundMessage) => Promise) => Promise): void; +} +``` + +## Telegram Adapter Design + +The `TelegramAdapter` wraps the existing `createTelegramBot()` logic: + +- `connect()`: Creates grammy Bot, starts long polling +- `disconnect()`: Stops the bot +- `send()`: Calls `bot.api.sendMessage(peerId, text, { parse_mode: 'Markdown' })` +- `onMessage()`: Sets up `bot.on('message:text', ...)` to convert grammy context to `InboundMessage` +- Preserves existing confirmations, commands (/start, /reset, /status, /local, /cloud, /model) +- Preserves chat ID allowlist check as middleware +- Tool status display: adapter handles the `onToolUse` events by posting/editing Telegram messages + +Constructor takes: +```typescript +interface TelegramAdapterConfig { + botToken: string; + allowedChatIds: number[]; + hookEngine?: HookEngine; +} +``` + +The adapter does NOT take an agent directly — the registry routes messages to the agent. + +## WebChat Adapter Design + +The `WebChatAdapter` is a thin shim since the gateway already handles WS connections. + +- `connect()`: No-op (gateway server is already running) +- `disconnect()`: No-op (gateway lifecycle managed by daemon) +- `send()`: Sends via the gateway's WS connection to the peer +- `onMessage()`: Hooks into the gateway's agent.send handler to intercept messages + +Constructor takes: +```typescript +interface WebChatAdapterConfig { + gateway: GatewayServer; +} +``` + +This adapter is simpler because the gateway already has its own session bridge and agent management. The adapter primarily exists to: +1. Report WebChat as a registered channel in the registry +2. Allow the daemon to manage all channels uniformly +3. Provide status/metrics via a common interface + +## Daemon Integration + +The daemon currently: +1. Creates a grammy Bot directly +2. Creates a GatewayServer directly +3. Starts both independently + +After refactor: +1. Creates a ChannelRegistry +2. Creates TelegramAdapter + WebChatAdapter +3. Registers both with the registry +4. Registry starts all adapters + +The message handler in the registry creates per-channel agents via the session manager, same as the existing session bridge pattern. + +## Implementation Order + +1. `src/channels/types.ts` — Pure types (no runtime) +2. `src/channels/registry.ts` — Registry class +3. `src/channels/registry.test.ts` — Registry unit tests +4. `src/channels/telegram/adapter.ts` — Telegram adapter +5. `src/channels/telegram/adapter.test.ts` — Telegram adapter tests +6. `src/channels/webchat/adapter.ts` — WebChat adapter +7. `src/channels/webchat/adapter.test.ts` — WebChat adapter tests +8. `src/channels/index.ts` + sub-barrel exports +9. `src/daemon/index.ts` — Wire registry +10. Run full test suite + +## Existing Code Impact + +- `src/frontends/telegram/` — **NOT deleted**. The adapter wraps these existing modules. +- `src/gateway/` — **NOT modified**. WebChat adapter wraps the existing gateway. +- `src/daemon/index.ts` — **Modified** to use ChannelRegistry. +- `src/backends/native/agent.ts` — **NOT modified**. Agent creation happens in the registry message handler. + +## Test Strategy + +- Unit tests for ChannelRegistry (mock adapters) +- Unit tests for TelegramAdapter (mock grammy Bot) +- Unit tests for WebChatAdapter (mock GatewayServer) +- Existing tests remain unchanged (frontends/telegram, gateway) + +--- + +*Plan Version: 1.0* +*Created: 2026-02-05* +*Parent: docs/plans/2026-02-05-openclaw-parity-design.md Phase 3* diff --git a/src/channels/index.ts b/src/channels/index.ts new file mode 100644 index 0000000..5baa453 --- /dev/null +++ b/src/channels/index.ts @@ -0,0 +1,11 @@ +export type { + ChannelAdapter, + ChannelStatus, + InboundMessage, + OutboundMessage, + ToolStatusEvent, + MessageHandler, +} from './types.js'; +export { ChannelRegistry } from './registry.js'; +export { TelegramAdapter, type TelegramAdapterConfig } from './telegram/index.js'; +export { WebChatAdapter, type WebChatAdapterConfig } from './webchat/index.js'; diff --git a/src/channels/registry.test.ts b/src/channels/registry.test.ts new file mode 100644 index 0000000..1355b20 --- /dev/null +++ b/src/channels/registry.test.ts @@ -0,0 +1,196 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ChannelRegistry } from './registry.js'; +import type { ChannelAdapter, InboundMessage, OutboundMessage } from './types.js'; + +/** Create a mock adapter with spy functions and a triggerMessage helper. */ +function createMockAdapter(name: string): ChannelAdapter & { + connectFn: ReturnType; + disconnectFn: ReturnType; + sendFn: ReturnType; + triggerMessage: (msg: InboundMessage) => void; +} { + let messageHandler: ((msg: InboundMessage) => void) | undefined; + let _status: 'disconnected' | 'connecting' | 'connected' | 'error' = 'disconnected'; + + const connectFn = vi.fn(async () => { _status = 'connected'; }); + const disconnectFn = vi.fn(async () => { _status = 'disconnected'; }); + const sendFn = vi.fn(async () => {}); + + return { + name, + get status() { return _status; }, + connect: connectFn, + disconnect: disconnectFn, + send: sendFn, + onMessage: (handler: (msg: InboundMessage) => void) => { messageHandler = handler; }, + triggerMessage: (msg: InboundMessage) => { messageHandler?.(msg); }, + connectFn, + disconnectFn, + sendFn, + }; +} + +/** Create a sample inbound message for a given channel. */ +function makeMessage(channel: string): InboundMessage { + return { + id: 'msg-1', + channel, + senderId: 'user-42', + senderName: 'Alice', + text: 'Hello', + timestamp: Date.now(), + }; +} + +describe('ChannelRegistry', () => { + let registry: ChannelRegistry; + + beforeEach(() => { + registry = new ChannelRegistry(); + }); + + it('registers and lists adapters', () => { + const a1 = createMockAdapter('alpha'); + const a2 = createMockAdapter('beta'); + + registry.register(a1); + registry.register(a2); + + const listed = registry.list(); + expect(listed).toHaveLength(2); + expect(listed.map((a) => a.name)).toContain('alpha'); + expect(listed.map((a) => a.name)).toContain('beta'); + }); + + it('throws on duplicate registration', () => { + const a1 = createMockAdapter('dup'); + registry.register(a1); + + const a2 = createMockAdapter('dup'); + expect(() => registry.register(a2)).toThrow('already registered'); + }); + + it('gets adapter by name', () => { + const adapter = createMockAdapter('test'); + registry.register(adapter); + + expect(registry.get('test')).toBe(adapter); + expect(registry.get('unknown')).toBeUndefined(); + }); + + it('starts all adapters', async () => { + const a1 = createMockAdapter('one'); + const a2 = createMockAdapter('two'); + + registry.register(a1); + registry.register(a2); + + await registry.startAll(); + + expect(a1.connectFn).toHaveBeenCalledOnce(); + expect(a2.connectFn).toHaveBeenCalledOnce(); + }); + + it('stops all adapters', async () => { + const a1 = createMockAdapter('one'); + const a2 = createMockAdapter('two'); + + registry.register(a1); + registry.register(a2); + + // Connect first so they are in connected state + await a1.connect(); + await a2.connect(); + + await registry.stopAll(); + + expect(a1.disconnectFn).toHaveBeenCalled(); + expect(a2.disconnectFn).toHaveBeenCalled(); + }); + + it('routes inbound messages to handler', async () => { + const adapter = createMockAdapter('test-channel'); + registry.register(adapter); + + const handler = vi.fn(async (_msg: InboundMessage, reply: (r: OutboundMessage) => Promise) => { + await reply({ text: 'pong' }); + }); + registry.setMessageHandler(handler); + + const msg = makeMessage('test-channel'); + adapter.triggerMessage(msg); + + // Allow the async handler to settle + await vi.waitFor(() => { + expect(handler).toHaveBeenCalledOnce(); + }); + + // Handler receives the original inbound message + expect(handler.mock.calls[0][0]).toBe(msg); + + // The reply function should have called adapter.send with the sender's peerId + expect(adapter.sendFn).toHaveBeenCalledWith('user-42', { text: 'pong' }); + }); + + it('unregisters adapter', () => { + const adapter = createMockAdapter('removeme'); + registry.register(adapter); + + registry.unregister('removeme'); + + expect(registry.list()).toHaveLength(0); + expect(registry.get('removeme')).toBeUndefined(); + }); + + it('unregister disconnects connected adapter', async () => { + const adapter = createMockAdapter('connected-one'); + registry.register(adapter); + + await adapter.connect(); + expect(adapter.status).toBe('connected'); + + await registry.unregister('connected-one'); + + expect(adapter.disconnectFn).toHaveBeenCalled(); + }); + + it('logs warning when no message handler set', () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const adapter = createMockAdapter('no-handler'); + registry.register(adapter); + + // Trigger a message WITHOUT calling setMessageHandler + adapter.triggerMessage(makeMessage('no-handler')); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('No message handler set'), + ); + + warnSpy.mockRestore(); + }); + + it('handles errors in message handler gracefully', async () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + + const adapter = createMockAdapter('err-channel'); + registry.register(adapter); + + registry.setMessageHandler(async () => { + throw new Error('handler exploded'); + }); + + // Trigger a message — should not throw + adapter.triggerMessage(makeMessage('err-channel')); + + // Allow the async error path to settle + await vi.waitFor(() => { + expect(errorSpy).toHaveBeenCalledWith( + expect.stringContaining('Error handling message'), + expect.any(Error), + ); + }); + + errorSpy.mockRestore(); + }); +}); diff --git a/src/channels/registry.ts b/src/channels/registry.ts new file mode 100644 index 0000000..95b149f --- /dev/null +++ b/src/channels/registry.ts @@ -0,0 +1,116 @@ +/** + * Channel registry — manages adapter lifecycle and message routing. + * + * The ChannelRegistry holds all registered channel adapters and routes + * inbound messages through a single MessageHandler. Each adapter's + * onMessage callback is wired at registration time so that messages + * flow through handleInbound → messageHandler → reply. + */ + +import type { + ChannelAdapter, + InboundMessage, + MessageHandler, + OutboundMessage, +} from './types.js'; + +export class ChannelRegistry { + private adapters: Map = new Map(); + private messageHandler?: MessageHandler; + + /** Register an adapter. Throws if name already registered. */ + register(adapter: ChannelAdapter): void { + if (this.adapters.has(adapter.name)) { + throw new Error(`Channel adapter '${adapter.name}' is already registered`); + } + + // Wire the adapter's onMessage to route through our messageHandler + adapter.onMessage((msg) => this.handleInbound(msg)); + this.adapters.set(adapter.name, adapter); + } + + /** Unregister an adapter by name. Calls disconnect() if connected. */ + async unregister(name: string): Promise { + const adapter = this.adapters.get(name); + if (!adapter) return; + + if (adapter.status === 'connected' || adapter.status === 'connecting') { + await adapter.disconnect(); + } + + this.adapters.delete(name); + } + + /** Get an adapter by name. */ + get(name: string): ChannelAdapter | undefined { + return this.adapters.get(name); + } + + /** List all registered adapters. */ + list(): ChannelAdapter[] { + return Array.from(this.adapters.values()); + } + + /** Set the message handler that all adapters route inbound messages to. */ + setMessageHandler(handler: MessageHandler): void { + this.messageHandler = handler; + } + + /** Start all registered adapters. Logs errors per adapter, doesn't throw. */ + async startAll(): Promise { + const adapters = Array.from(this.adapters.values()); + const results = await Promise.allSettled( + adapters.map((a) => a.connect()), + ); + + for (const [i, result] of results.entries()) { + if (result.status === 'rejected') { + console.error( + `Failed to start channel '${adapters[i].name}':`, + result.reason, + ); + } + } + } + + /** Stop all registered adapters. */ + async stopAll(): Promise { + const adapters = Array.from(this.adapters.values()); + const results = await Promise.allSettled( + adapters.map((a) => a.disconnect()), + ); + + for (const [i, result] of results.entries()) { + if (result.status === 'rejected') { + console.error( + `Failed to stop channel '${adapters[i].name}':`, + result.reason, + ); + } + } + } + + /** Internal: route an inbound message to the message handler. */ + private handleInbound(msg: InboundMessage): void { + if (!this.messageHandler) { + console.warn(`No message handler set, dropping message from '${msg.channel}'`); + return; + } + + const adapter = this.adapters.get(msg.channel); + if (!adapter) { + console.warn(`Unknown channel '${msg.channel}' in inbound message`); + return; + } + + // Create a reply function bound to this message's channel and sender + const reply = async (response: OutboundMessage): Promise => { + await adapter.send(msg.senderId, response); + }; + + // Fire and forget — errors are logged, not propagated + this.messageHandler(msg, reply).catch((err: unknown) => { + console.error(`Error handling message from '${msg.channel}':`, err); + }); + } +} diff --git a/src/channels/telegram/adapter.test.ts b/src/channels/telegram/adapter.test.ts new file mode 100644 index 0000000..0722697 --- /dev/null +++ b/src/channels/telegram/adapter.test.ts @@ -0,0 +1,256 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +// ── Mock grammy before importing adapter ────────────────────────── + +const mockUse = vi.fn(); +const mockOn = vi.fn(); +const mockCommand = vi.fn(); +const mockStart = vi.fn(); +const mockStop = vi.fn(); +const mockSendMessage = vi.fn(); + +vi.mock('grammy', () => ({ + Bot: vi.fn().mockImplementation(() => ({ + use: mockUse, + on: mockOn, + command: mockCommand, + start: mockStart, + stop: mockStop, + api: { sendMessage: mockSendMessage }, + })), +})); + +import { TelegramAdapter, type TelegramAdapterConfig } from './adapter.js'; +import type { InboundMessage } from '../types.js'; + +const baseConfig: TelegramAdapterConfig = { + botToken: 'test-token-123', + allowedChatIds: [100, 200], +}; + +describe('TelegramAdapter', () => { + let adapter: TelegramAdapter; + + beforeEach(() => { + vi.clearAllMocks(); + adapter = new TelegramAdapter(baseConfig); + }); + + // ── Basic properties ────────────────────────────────────────── + + it('has name "telegram"', () => { + expect(adapter.name).toBe('telegram'); + }); + + it('starts as disconnected', () => { + expect(adapter.status).toBe('disconnected'); + }); + + // ── connect / disconnect ────────────────────────────────────── + + it('connect creates a bot and sets status to connected', async () => { + await adapter.connect(); + + expect(adapter.status).toBe('connected'); + // Bot constructor called with the token + const { Bot } = await import('grammy'); + expect(Bot).toHaveBeenCalledWith('test-token-123'); + }); + + it('connect registers auth middleware, commands, and message handler', async () => { + await adapter.connect(); + + // .use() for auth middleware + expect(mockUse).toHaveBeenCalledTimes(1); + // .command() for /start and /reset + expect(mockCommand).toHaveBeenCalledTimes(2); + expect(mockCommand.mock.calls[0][0]).toBe('start'); + expect(mockCommand.mock.calls[1][0]).toBe('reset'); + // .on('message:text', ...) for text handler + expect(mockOn).toHaveBeenCalledWith('message:text', expect.any(Function)); + // .start() to begin long polling + expect(mockStart).toHaveBeenCalledTimes(1); + }); + + it('connect registers callback_query handler when hookEngine is provided', async () => { + const hookEngine = { resolveConfirmation: vi.fn() }; + const adapterWithHooks = new TelegramAdapter({ + ...baseConfig, + hookEngine: hookEngine as never, + }); + + await adapterWithHooks.connect(); + + // Should have .on('callback_query:data', ...) plus .on('message:text', ...) + expect(mockOn).toHaveBeenCalledWith('callback_query:data', expect.any(Function)); + expect(mockOn).toHaveBeenCalledWith('message:text', expect.any(Function)); + }); + + it('disconnect stops the bot and sets status to disconnected', async () => { + await adapter.connect(); + expect(adapter.status).toBe('connected'); + + await adapter.disconnect(); + expect(mockStop).toHaveBeenCalledTimes(1); + expect(adapter.status).toBe('disconnected'); + }); + + it('disconnect is safe to call when not connected', async () => { + await adapter.disconnect(); + expect(adapter.status).toBe('disconnected'); + expect(mockStop).not.toHaveBeenCalled(); + }); + + // ── send ────────────────────────────────────────────────────── + + it('send throws when adapter is not connected', async () => { + await expect(adapter.send('100', { text: 'hello' })).rejects.toThrow( + 'Telegram adapter not connected', + ); + }); + + it('send delivers a short message in a single API call', async () => { + await adapter.connect(); + + await adapter.send('100', { text: 'Hello there' }); + + expect(mockSendMessage).toHaveBeenCalledTimes(1); + expect(mockSendMessage).toHaveBeenCalledWith(100, 'Hello there', { parse_mode: 'Markdown' }); + }); + + it('send chunks a long message that exceeds 4096 chars', async () => { + await adapter.connect(); + + // Create a message that is longer than 4096 chars — two halves joined by a newline + const half = 'A'.repeat(3000); + const longMessage = `${half}\n${'B'.repeat(3000)}`; + + await adapter.send('200', { text: longMessage }); + + // Should have been split into 2 chunks + expect(mockSendMessage.mock.calls.length).toBeGreaterThanOrEqual(2); + // Each call uses numeric chatId and parse_mode + for (const call of mockSendMessage.mock.calls) { + expect(call[0]).toBe(200); + expect(call[2]).toEqual({ parse_mode: 'Markdown' }); + } + }); + + // ── onMessage / inbound handling ────────────────────────────── + + it('onMessage registers a handler that receives text messages', async () => { + const handler = vi.fn(); + adapter.onMessage(handler); + + await adapter.connect(); + + // Get the registered message:text handler from mockOn + const textHandlerCall = mockOn.mock.calls.find( + (call) => call[0] === 'message:text', + ); + expect(textHandlerCall).toBeDefined(); + + const textHandler = textHandlerCall![1]; + + // Simulate a grammy context object + const ctx = { + message: { message_id: 42, text: 'Hello Flynn' }, + chat: { id: 100 }, + from: { first_name: 'Will' }, + replyWithChatAction: vi.fn(), + }; + + await textHandler(ctx); + + expect(ctx.replyWithChatAction).toHaveBeenCalledWith('typing'); + expect(handler).toHaveBeenCalledTimes(1); + + const msg: InboundMessage = handler.mock.calls[0][0]; + expect(msg.channel).toBe('telegram'); + expect(msg.senderId).toBe('100'); + expect(msg.senderName).toBe('Will'); + expect(msg.text).toBe('Hello Flynn'); + expect(msg.id).toBe('42'); + }); + + it('text handler does nothing when no message handler is registered', async () => { + // Don't call onMessage — no handler + await adapter.connect(); + + const textHandlerCall = mockOn.mock.calls.find( + (call) => call[0] === 'message:text', + ); + const textHandler = textHandlerCall![1]; + + const ctx = { + message: { message_id: 1, text: 'test' }, + chat: { id: 100 }, + from: { first_name: 'Will' }, + replyWithChatAction: vi.fn(), + }; + + // Should not throw + await textHandler(ctx); + expect(ctx.replyWithChatAction).not.toHaveBeenCalled(); + }); + + // ── /reset command ──────────────────────────────────────────── + + it('/reset command delivers a reset inbound message', async () => { + const handler = vi.fn(); + adapter.onMessage(handler); + + await adapter.connect(); + + // Find the /reset command handler + const resetCall = mockCommand.mock.calls.find((call) => call[0] === 'reset'); + expect(resetCall).toBeDefined(); + + const resetHandler = resetCall![1]; + + const ctx = { + message: { message_id: 99 }, + chat: { id: 100 }, + from: { first_name: 'Will' }, + reply: vi.fn(), + }; + + await resetHandler(ctx); + + expect(ctx.reply).toHaveBeenCalledWith('Conversation reset.'); + expect(handler).toHaveBeenCalledTimes(1); + + const msg: InboundMessage = handler.mock.calls[0][0]; + expect(msg.text).toBe('/reset'); + expect(msg.metadata).toEqual({ isCommand: true, command: 'reset' }); + }); + + // ── Auth middleware ─────────────────────────────────────────── + + it('auth middleware blocks unauthorized chat IDs', async () => { + await adapter.connect(); + + // The first .use() call is the auth middleware + const authMiddleware = mockUse.mock.calls[0][0]; + + const next = vi.fn(); + const ctx = { chat: { id: 999 } }; // Not in allowedChatIds + + await authMiddleware(ctx, next); + + expect(next).not.toHaveBeenCalled(); + }); + + it('auth middleware allows authorized chat IDs', async () => { + await adapter.connect(); + + const authMiddleware = mockUse.mock.calls[0][0]; + + const next = vi.fn(); + const ctx = { chat: { id: 100 } }; // In allowedChatIds + + await authMiddleware(ctx, next); + + expect(next).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/channels/telegram/adapter.ts b/src/channels/telegram/adapter.ts new file mode 100644 index 0000000..824110c --- /dev/null +++ b/src/channels/telegram/adapter.ts @@ -0,0 +1,208 @@ +import { Bot } from 'grammy'; + +import type { HookEngine } from '../../hooks/index.js'; +import type { + InboundMessage, + OutboundMessage, + ChannelAdapter, + ChannelStatus, +} from '../types.js'; +import { isAllowedChat } from '../../frontends/telegram/handlers.js'; +import { parseConfirmationCallback } from '../../frontends/telegram/confirmations.js'; + +/** Configuration for the Telegram channel adapter. */ +export interface TelegramAdapterConfig { + botToken: string; + allowedChatIds: number[]; + hookEngine?: HookEngine; +} + +/** + * Split a long message into chunks that respect Telegram's 4096 char limit. + * Prefers splitting at newlines, then spaces, then hard-cuts. + */ +function splitMessage(text: string, maxLength: number): string[] { + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxLength) { + chunks.push(remaining); + break; + } + + // Try to split at a newline within the allowed window + let splitIndex = remaining.lastIndexOf('\n', maxLength); + if (splitIndex === -1 || splitIndex < maxLength / 2) { + splitIndex = remaining.lastIndexOf(' ', maxLength); + } + if (splitIndex === -1 || splitIndex < maxLength / 2) { + splitIndex = maxLength; + } + + chunks.push(remaining.slice(0, splitIndex)); + remaining = remaining.slice(splitIndex).trimStart(); + } + + return chunks; +} + +/** + * Telegram channel adapter backed by grammy. + * + * Handles authentication via allowed-chat-id filtering, + * confirmation callbacks (when a HookEngine is provided), + * and message chunking for Telegram's 4096-char limit. + */ +export class TelegramAdapter implements ChannelAdapter { + readonly name = 'telegram'; + + private _status: ChannelStatus = 'disconnected'; + private bot: Bot | null = null; + private messageHandler?: (msg: InboundMessage) => void; + private config: TelegramAdapterConfig; + + get status(): ChannelStatus { + return this._status; + } + + constructor(config: TelegramAdapterConfig) { + this.config = config; + } + + /** Register the inbound message handler. Called by the registry before connect(). */ + onMessage(handler: (msg: InboundMessage) => void): void { + this.messageHandler = handler; + } + + /** Create the grammy bot, wire up middleware & handlers, and start long-polling. */ + async connect(): Promise { + this.bot = new Bot(this.config.botToken); + this._status = 'connecting'; + + // ── Auth middleware — reject messages from unknown chats ── + this.bot.use(async (ctx, next) => { + const chatId = ctx.chat?.id; + if (chatId === undefined || !isAllowedChat(chatId, this.config.allowedChatIds)) { + console.log(`Rejected message from unauthorized chat: ${chatId}`); + return; + } + await next(); + }); + + // ── Confirmation callback handler (requires hookEngine) ── + if (this.config.hookEngine) { + const hookEngine = this.config.hookEngine; + + this.bot.on('callback_query:data', async (ctx) => { + const data = ctx.callbackQuery.data; + const parsed = parseConfirmationCallback(data); + + if (!parsed) { + await ctx.answerCallbackQuery({ text: 'Invalid action' }); + return; + } + + const resolved = hookEngine.resolveConfirmation(parsed.id, { + approved: parsed.approved, + reason: parsed.approved ? undefined : 'Denied by user', + }); + + if (resolved) { + await ctx.answerCallbackQuery({ + text: parsed.approved ? '✅ Approved' : '❌ Denied', + }); + await ctx.editMessageText( + ctx.callbackQuery.message?.text + `\n\n${parsed.approved ? '✅ Approved' : '❌ Denied'}`, + { parse_mode: 'Markdown' }, + ); + } else { + await ctx.answerCallbackQuery({ text: 'Confirmation expired or not found' }); + } + }); + } + + // ── Command handlers ── + + this.bot.command('start', async (ctx) => { + await ctx.reply('Flynn is ready. Send me a message!'); + }); + + this.bot.command('reset', async (ctx) => { + // Deliver a special reset message through the channel + if (this.messageHandler) { + this.messageHandler({ + id: String(ctx.message?.message_id ?? Date.now()), + channel: 'telegram', + senderId: String(ctx.chat.id), + senderName: ctx.from?.first_name, + text: '/reset', + timestamp: Date.now(), + metadata: { isCommand: true, command: 'reset' }, + }); + } + await ctx.reply('Conversation reset.'); + }); + + // ── Text message handler ── + + this.bot.on('message:text', async (ctx) => { + if (!this.messageHandler) return; + + const text = ctx.message.text; + + // Show typing indicator while processing + await ctx.replyWithChatAction('typing'); + + this.messageHandler({ + id: String(ctx.message.message_id), + channel: 'telegram', + senderId: String(ctx.chat.id), + senderName: ctx.from?.first_name, + text, + timestamp: Date.now(), + }); + }); + + // ── Start long polling ── + + this.bot.start({ + onStart: (botInfo) => { + console.log(`Telegram bot started: @${botInfo.username}`); + this._status = 'connected'; + }, + }); + + // bot.start() returns immediately for long polling. + // The onStart callback sets connected above; also set here for safety + // in case the callback fires before this line is reached. + this._status = 'connected'; + } + + /** Stop the bot and clean up. */ + async disconnect(): Promise { + if (this.bot) { + await this.bot.stop(); + this.bot = null; + } + this._status = 'disconnected'; + } + + /** Send an outbound message, automatically chunking if it exceeds Telegram's limit. */ + async send(peerId: string, message: OutboundMessage): Promise { + if (!this.bot) throw new Error('Telegram adapter not connected'); + + const chatId = Number(peerId); + const text = message.text; + + // Telegram enforces a 4096-character limit per message + if (text.length <= 4096) { + await this.bot.api.sendMessage(chatId, text, { parse_mode: 'Markdown' }); + } else { + const chunks = splitMessage(text, 4096); + for (const chunk of chunks) { + await this.bot.api.sendMessage(chatId, chunk, { parse_mode: 'Markdown' }); + } + } + } +} diff --git a/src/channels/telegram/index.ts b/src/channels/telegram/index.ts new file mode 100644 index 0000000..9891eb7 --- /dev/null +++ b/src/channels/telegram/index.ts @@ -0,0 +1 @@ +export { TelegramAdapter, type TelegramAdapterConfig } from './adapter.js'; diff --git a/src/channels/types.ts b/src/channels/types.ts new file mode 100644 index 0000000..f8ee818 --- /dev/null +++ b/src/channels/types.ts @@ -0,0 +1,75 @@ +/** + * Channel adapter type definitions. + * + * Pure type definitions for the channel abstraction layer. + * Each channel adapter (Telegram, webchat, etc.) implements + * the ChannelAdapter interface to provide a uniform messaging API. + */ + +/** Inbound message received from a channel platform. */ +export interface InboundMessage { + /** Platform message ID. */ + id: string; + /** Adapter name: "telegram", "webchat", etc. */ + channel: string; + /** Platform user ID. */ + senderId: string; + /** Display name (optional). */ + senderName?: string; + /** Message text. */ + text: string; + /** ID of message being replied to. */ + replyTo?: string; + /** Unix ms. */ + timestamp: number; + /** Platform-specific extras. */ + metadata?: Record; +} + +/** Outbound message to send via a channel adapter. */ +export interface OutboundMessage { + /** Response text (markdown). */ + text: string; + /** Original message ID to reply to. */ + replyTo?: string; + /** Platform-specific extras. */ + metadata?: Record; +} + +/** Tool execution status event for streaming feedback. */ +export interface ToolStatusEvent { + type: 'start' | 'end'; + tool: string; + args?: unknown; + result?: { success: boolean; output: string; error?: string }; +} + +/** Connection status of a channel adapter. */ +export type ChannelStatus = 'disconnected' | 'connecting' | 'connected' | 'error'; + +/** Uniform interface that every channel adapter must implement. */ +export interface ChannelAdapter { + /** Unique channel name (e.g. "telegram", "webchat"). */ + readonly name: string; + + /** Current connection status. */ + readonly status: ChannelStatus; + + /** Start the adapter (connect to platform, begin listening). */ + connect(): Promise; + + /** Stop the adapter (disconnect, clean up). */ + disconnect(): Promise; + + /** Send a message to a specific peer on this channel. */ + send(peerId: string, message: OutboundMessage): Promise; + + /** Register the inbound message handler. Called by registry before connect(). */ + onMessage(handler: (msg: InboundMessage) => void): void; +} + +/** Callback type for the registry's message handler. */ +export type MessageHandler = ( + msg: InboundMessage, + reply: (response: OutboundMessage) => Promise, +) => Promise; diff --git a/src/channels/webchat/adapter.test.ts b/src/channels/webchat/adapter.test.ts new file mode 100644 index 0000000..f9f71ea --- /dev/null +++ b/src/channels/webchat/adapter.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { WebChatAdapter } from './adapter.js'; +import type { GatewayServer } from '../../gateway/index.js'; + +/** Mock GatewayServer — the adapter wraps this but doesn't manage its lifecycle. */ +const mockGateway = { + start: vi.fn(), + stop: vi.fn(), + getWss: vi.fn(() => null), + getHttpServer: vi.fn(() => null), + getSessionBridge: vi.fn(), + getMethods: vi.fn(() => []), +} as unknown as GatewayServer; + +describe('WebChatAdapter', () => { + let adapter: WebChatAdapter; + + beforeEach(() => { + vi.clearAllMocks(); + adapter = new WebChatAdapter({ gateway: mockGateway }); + }); + + it('has correct name', () => { + expect(adapter.name).toBe('webchat'); + }); + + it('starts as disconnected', () => { + expect(adapter.status).toBe('disconnected'); + }); + + it('connect sets status to connected', async () => { + await adapter.connect(); + expect(adapter.status).toBe('connected'); + }); + + it('disconnect sets status to disconnected', async () => { + await adapter.connect(); + expect(adapter.status).toBe('connected'); + + await adapter.disconnect(); + expect(adapter.status).toBe('disconnected'); + }); + + it('connect does not call gateway.start', async () => { + await adapter.connect(); + expect(mockGateway.start).not.toHaveBeenCalled(); + }); + + it('disconnect does not call gateway.stop', async () => { + await adapter.connect(); + await adapter.disconnect(); + expect(mockGateway.stop).not.toHaveBeenCalled(); + }); + + it('send is a no-op', async () => { + // Should not throw + await adapter.send('peer1', { text: 'hello' }); + }); + + it('getGateway returns the gateway instance', () => { + expect(adapter.getGateway()).toBe(mockGateway); + }); +}); diff --git a/src/channels/webchat/adapter.ts b/src/channels/webchat/adapter.ts new file mode 100644 index 0000000..d173b82 --- /dev/null +++ b/src/channels/webchat/adapter.ts @@ -0,0 +1,81 @@ +/** + * WebChat channel adapter. + * + * Thin wrapper around the existing GatewayServer. The gateway already + * handles WebSocket connections, sessions, and agent routing. This adapter + * exposes the gateway as a ChannelAdapter so the ChannelRegistry has a + * uniform interface for all channels. + */ + +import type { GatewayServer } from '../../gateway/index.js'; +import type { + InboundMessage, + OutboundMessage, + ChannelAdapter, + ChannelStatus, +} from '../types.js'; + +/** Configuration for the WebChat adapter. */ +export interface WebChatAdapterConfig { + gateway: GatewayServer; +} + +/** + * WebChatAdapter wraps a GatewayServer to satisfy the ChannelAdapter interface. + * + * The gateway's lifecycle (start/stop) is managed by the daemon, not by + * this adapter. Connect/disconnect only track the adapter's logical status. + */ +export class WebChatAdapter implements ChannelAdapter { + readonly name = 'webchat'; + + private _status: ChannelStatus = 'disconnected'; + private gateway: GatewayServer; + private messageHandler?: (msg: InboundMessage) => void; + + get status(): ChannelStatus { + return this._status; + } + + constructor(config: WebChatAdapterConfig) { + this.gateway = config.gateway; + } + + /** Register the inbound message handler. Called by registry before connect(). */ + onMessage(handler: (msg: InboundMessage) => void): void { + this.messageHandler = handler; + } + + /** + * Connect the adapter. The gateway's lifecycle is managed by the daemon, + * so this just marks the adapter as connected. The gateway should already + * be started (or will be started) by the daemon. + */ + async connect(): Promise { + this._status = 'connected'; + } + + /** + * Disconnect the adapter. Does NOT stop the gateway — that's managed + * by the daemon lifecycle. Just marks this adapter as disconnected. + */ + async disconnect(): Promise { + this._status = 'disconnected'; + } + + /** + * Send a message to a WebSocket peer. This is a no-op placeholder — + * the gateway handles outbound messages directly via its own WS connections. + * This method exists to satisfy the ChannelAdapter interface. + */ + async send(_peerId: string, _message: OutboundMessage): Promise { + // Gateway handles outbound via its own WS event system (GatewayEvent). + // This adapter doesn't need to implement send() because the gateway's + // agent.send handler already streams responses back to the WS client. + } + + /** Get the underlying gateway server. */ + getGateway(): GatewayServer { + return this.gateway; + } +} diff --git a/src/channels/webchat/index.ts b/src/channels/webchat/index.ts new file mode 100644 index 0000000..8007030 --- /dev/null +++ b/src/channels/webchat/index.ts @@ -0,0 +1 @@ +export { WebChatAdapter, type WebChatAdapterConfig } from './adapter.js'; diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 290978e..dd43de1 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -1,13 +1,13 @@ -import { Bot } from 'grammy'; import { Lifecycle } from './lifecycle.js'; import type { Config } from '../config/index.js'; import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js'; import { NativeAgent } from '../backends/index.js'; -import { createTelegramBot } from '../frontends/telegram/index.js'; import { SessionStore, SessionManager } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../tools/index.js'; import { GatewayServer } from '../gateway/index.js'; +import { ChannelRegistry, TelegramAdapter, WebChatAdapter } from '../channels/index.js'; +import type { InboundMessage, OutboundMessage } from '../channels/index.js'; import { resolve } from 'path'; import { homedir } from 'os'; import { mkdirSync, readFileSync, existsSync } from 'fs'; @@ -15,8 +15,6 @@ import { mkdirSync, readFileSync, existsSync } from 'fs'; export interface DaemonContext { config: Config; lifecycle: Lifecycle; - bot: Bot; - agent: NativeAgent; sessionStore: SessionStore; sessionManager: SessionManager; hookEngine: HookEngine; @@ -24,6 +22,7 @@ export interface DaemonContext { toolRegistry: ToolRegistry; toolExecutor: ToolExecutor; gateway: GatewayServer; + channelRegistry: ChannelRegistry; } function loadSystemPrompt(): string { @@ -106,6 +105,59 @@ function createModelRouter(config: Config): ModelRouter { }); } +/** + * Create the unified message handler for the channel registry. + * Each channel+sender pair gets its own NativeAgent backed by a persistent session. + */ +function createMessageRouter(deps: { + sessionManager: SessionManager; + modelRouter: ModelRouter; + systemPrompt: string; + toolRegistry: ToolRegistry; + toolExecutor: ToolExecutor; +}) { + // Cache agents by session ID to avoid recreating on every message + const agents = new Map(); + + function getOrCreateAgent(channel: string, senderId: string): NativeAgent { + const sessionId = `${channel}:${senderId}`; + let agent = agents.get(sessionId); + if (!agent) { + const session = deps.sessionManager.getSession(channel, senderId); + agent = new NativeAgent({ + modelClient: deps.modelRouter, + systemPrompt: deps.systemPrompt, + session, + toolRegistry: deps.toolRegistry, + toolExecutor: deps.toolExecutor, + }); + agents.set(sessionId, agent); + } + return agent; + } + + return async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise): Promise => { + const agent = getOrCreateAgent(msg.channel, msg.senderId); + + // Handle special commands + if (msg.metadata?.isCommand && msg.metadata.command === 'reset') { + agent.reset(); + return; + } + + try { + const response = await agent.process(msg.text); + await reply({ text: response, replyTo: msg.id }); + } catch (error) { + console.error(`Error processing message from ${msg.channel}:${msg.senderId}:`, error); + await reply({ + text: 'Sorry, an error occurred while processing your message.', + replyTo: msg.id, + }); + } + }; +} + export async function startDaemon(config: Config): Promise { const lifecycle = new Lifecycle(); @@ -138,26 +190,6 @@ export async function startDaemon(config: Config): Promise { // Load system prompt once for reuse const systemPrompt = loadSystemPrompt(); - // Get Telegram session - const telegramUserId = String(config.telegram.allowed_chat_ids[0]); - const session = sessionManager.getSession('telegram', telegramUserId); - - // Initialize native agent with session and tools - const agent = new NativeAgent({ - modelClient: modelRouter, - systemPrompt, - session, - toolRegistry, - toolExecutor, - }); - - // Initialize Telegram bot with hook engine - const bot = createTelegramBot({ - telegram: config.telegram, - agent, - hookEngine, - }); - // Initialize gateway WebSocket server const gateway = new GatewayServer({ port: config.server.port, @@ -167,9 +199,43 @@ export async function startDaemon(config: Config): Promise { systemPrompt, toolRegistry, toolExecutor, + uiDir: resolve(import.meta.dirname, '../gateway/ui'), + config, + restart: async () => { + console.log('Restart requested via gateway'); + await lifecycle.shutdown(); + // Exit with code 75 (EX_TEMPFAIL) — process supervisor should restart + process.exit(75); + }, }); - // Register signal handlers + // ── Channel Registry ────────────────────────────────────────── + + const channelRegistry = new ChannelRegistry(); + + // Set up the unified message handler + channelRegistry.setMessageHandler(createMessageRouter({ + sessionManager, + modelRouter, + systemPrompt, + toolRegistry, + toolExecutor, + })); + + // Register Telegram adapter + const telegramAdapter = new TelegramAdapter({ + botToken: config.telegram.bot_token, + allowedChatIds: config.telegram.allowed_chat_ids, + hookEngine, + }); + channelRegistry.register(telegramAdapter); + + // Register WebChat adapter (wraps the gateway) + const webChatAdapter = new WebChatAdapter({ gateway }); + channelRegistry.register(webChatAdapter); + + // ── Signal Handlers ─────────────────────────────────────────── + const signalHandler = () => { lifecycle.shutdown().then(() => process.exit(0)); }; @@ -182,20 +248,18 @@ export async function startDaemon(config: Config): Promise { process.off('SIGTERM', signalHandler); }); - // Start bot + // ── Start Services ──────────────────────────────────────────── + + // Register shutdown handler for channels (stops Telegram bot etc.) lifecycle.onShutdown(async () => { - await bot.stop(); - console.log('Telegram bot stopped'); + await channelRegistry.stopAll(); + console.log('Channel adapters stopped'); }); - // Use long polling (no webhook, no internet exposure) - bot.start({ - onStart: (botInfo) => { - console.log(`Telegram bot started: @${botInfo.username}`); - }, - }); + // Start all channel adapters (Telegram long polling, WebChat status) + await channelRegistry.startAll(); - // Start gateway + // Start gateway (HTTP + WS server) lifecycle.onShutdown(async () => { await gateway.stop(); console.log('Gateway server stopped'); @@ -205,7 +269,18 @@ export async function startDaemon(config: Config): Promise { console.log('Flynn daemon started'); - return { config, lifecycle, bot, agent, sessionStore, sessionManager, hookEngine, modelRouter, toolRegistry, toolExecutor, gateway }; + return { + config, + lifecycle, + sessionStore, + sessionManager, + hookEngine, + modelRouter, + toolRegistry, + toolExecutor, + gateway, + channelRegistry, + }; } export { Lifecycle } from './lifecycle.js'; diff --git a/src/gateway/handlers/config.ts b/src/gateway/handlers/config.ts new file mode 100644 index 0000000..441528a --- /dev/null +++ b/src/gateway/handlers/config.ts @@ -0,0 +1,98 @@ +import type { GatewayRequest, OutboundMessage } from '../protocol.js'; +import { makeResponse, makeError, ErrorCode } from '../protocol.js'; +import type { Config } from '../../config/index.js'; + +export interface ConfigHandlerDeps { + config: Config; +} + +/** + * Redact sensitive values from config before returning. + * Replaces API keys, tokens, and passwords with "***". + */ +function redactConfig(config: Config): Record { + const raw = JSON.parse(JSON.stringify(config)) as Record; + + // Redact telegram bot token + const telegram = raw.telegram as Record | undefined; + if (telegram?.bot_token) { + telegram.bot_token = '***'; + } + + // Redact model keys/tokens + const models = raw.models as Record | undefined; + if (models) { + for (const tier of ['default', 'fast', 'complex', 'local'] as const) { + const m = models[tier] as Record | undefined; + if (m?.api_key) m.api_key = '***'; + if (m?.auth_token) m.auth_token = '***'; + } + const localProviders = models.local_providers as Record> | undefined; + if (localProviders) { + for (const provider of Object.values(localProviders)) { + if (provider.api_key) provider.api_key = '***'; + if (provider.auth_token) provider.auth_token = '***'; + } + } + } + + return raw; +} + +/** Keys that are safe to update at runtime via config.patch. */ +const PATCHABLE_KEYS: Record boolean> = { + 'hooks.confirm': (config, value) => { + if (!Array.isArray(value) || !value.every((v) => typeof v === 'string')) return false; + config.hooks.confirm = value as string[]; + return true; + }, + 'hooks.log': (config, value) => { + if (!Array.isArray(value) || !value.every((v) => typeof v === 'string')) return false; + config.hooks.log = value as string[]; + return true; + }, + 'hooks.silent': (config, value) => { + if (!Array.isArray(value) || !value.every((v) => typeof v === 'string')) return false; + config.hooks.silent = value as string[]; + return true; + }, + 'server.localhost': (config, value) => { + if (typeof value !== 'boolean') return false; + config.server.localhost = value; + return true; + }, +}; + +export function createConfigHandlers(deps: ConfigHandlerDeps) { + return { + 'config.get': async (request: GatewayRequest): Promise => { + return makeResponse(request.id, redactConfig(deps.config)); + }, + + 'config.patch': async (request: GatewayRequest): Promise => { + const patches = request.params?.patches; + if (!patches || typeof patches !== 'object' || Array.isArray(patches)) { + return makeError(request.id, ErrorCode.InvalidRequest, 'params.patches must be an object of { key: value } pairs'); + } + + const applied: string[] = []; + const rejected: string[] = []; + + for (const [key, value] of Object.entries(patches as Record)) { + const patcher = PATCHABLE_KEYS[key]; + if (!patcher) { + rejected.push(key); + continue; + } + const ok = patcher(deps.config, value); + if (ok) { + applied.push(key); + } else { + rejected.push(key); + } + } + + return makeResponse(request.id, { applied, rejected }); + }, + }; +} diff --git a/src/gateway/handlers/handlers.test.ts b/src/gateway/handlers/handlers.test.ts index 686f1d2..782e866 100644 --- a/src/gateway/handlers/handlers.test.ts +++ b/src/gateway/handlers/handlers.test.ts @@ -3,6 +3,7 @@ import { createSystemHandlers } from './system.js'; import { createSessionHandlers } from './sessions.js'; import { createToolHandlers } from './tools.js'; import { createAgentHandlers } from './agent.js'; +import { createConfigHandlers } from './config.js'; import { ErrorCode } from '../protocol.js'; import type { GatewayRequest, GatewayResponse, GatewayError, GatewayEvent, OutboundMessage } from '../protocol.js'; @@ -269,3 +270,144 @@ describe('agent handlers', () => { expect((result.result as any).cancelled).toBe(true); }); }); + +describe('system.restart handler', () => { + it('returns restarting:true and calls restart callback', async () => { + const restartFn = vi.fn(async () => {}); + const handlers = createSystemHandlers({ + startTime: Date.now(), + version: '0.1.0', + getSessionCount: () => 0, + getToolCount: () => 0, + getConnectionCount: () => 0, + restart: restartFn, + }); + + const req: GatewayRequest = { id: 1, method: 'system.restart' }; + const result = await handlers['system.restart'](req) as GatewayResponse; + + expect(result.id).toBe(1); + expect((result.result as any).restarting).toBe(true); + + // Restart is called asynchronously via queueMicrotask + await new Promise((resolve) => queueMicrotask(resolve)); + expect(restartFn).toHaveBeenCalledOnce(); + }); + + it('returns error when restart is not available', async () => { + const handlers = createSystemHandlers({ + startTime: Date.now(), + version: '0.1.0', + getSessionCount: () => 0, + getToolCount: () => 0, + getConnectionCount: () => 0, + }); + + const req: GatewayRequest = { id: 2, method: 'system.restart' }; + const result = await handlers['system.restart'](req) as GatewayError; + + expect(result.error.code).toBe(ErrorCode.InternalError); + expect(result.error.message).toContain('not available'); + }); +}); + +describe('config handlers', () => { + function makeConfig() { + return { + telegram: { bot_token: 'secret-token-123', allowed_chat_ids: [12345] }, + server: { tailscale_only: true, localhost: true, port: 18800 }, + models: { + default: { provider: 'anthropic' as const, model: 'claude-3-haiku', api_key: 'sk-secret-key' }, + fallback_chain: ['anthropic'], + }, + backends: { claude_code: { enabled: false }, opencode: { enabled: false }, native: { enabled: true } }, + hooks: { confirm: ['shell.exec'], log: [], silent: [] }, + mcp: { servers: [] }, + }; + } + + it('config.get returns redacted config', async () => { + const config = makeConfig(); + const handlers = createConfigHandlers({ config: config as any }); + const req: GatewayRequest = { id: 1, method: 'config.get' }; + const result = await handlers['config.get'](req) as GatewayResponse; + + const r = result.result as Record; + expect(r.telegram.bot_token).toBe('***'); + expect(r.models.default.api_key).toBe('***'); + // Non-secret values are preserved + expect(r.server.port).toBe(18800); + expect(r.hooks.confirm).toEqual(['shell.exec']); + }); + + it('config.patch applies valid patches', async () => { + const config = makeConfig(); + const handlers = createConfigHandlers({ config: config as any }); + const req: GatewayRequest = { + id: 2, + method: 'config.patch', + params: { + patches: { + 'hooks.confirm': ['shell.exec', 'file.write'], + 'hooks.log': ['file.read'], + }, + }, + }; + const result = await handlers['config.patch'](req) as GatewayResponse; + + const r = result.result as { applied: string[]; rejected: string[] }; + expect(r.applied).toEqual(['hooks.confirm', 'hooks.log']); + expect(r.rejected).toEqual([]); + // Verify the config was actually mutated + expect(config.hooks.confirm).toEqual(['shell.exec', 'file.write']); + expect(config.hooks.log).toEqual(['file.read']); + }); + + it('config.patch rejects unknown keys', async () => { + const config = makeConfig(); + const handlers = createConfigHandlers({ config: config as any }); + const req: GatewayRequest = { + id: 3, + method: 'config.patch', + params: { + patches: { + 'telegram.bot_token': 'hacked', + 'hooks.confirm': [], + }, + }, + }; + const result = await handlers['config.patch'](req) as GatewayResponse; + + const r = result.result as { applied: string[]; rejected: string[] }; + expect(r.applied).toEqual(['hooks.confirm']); + expect(r.rejected).toEqual(['telegram.bot_token']); + }); + + it('config.patch rejects invalid value types', async () => { + const config = makeConfig(); + const handlers = createConfigHandlers({ config: config as any }); + const req: GatewayRequest = { + id: 4, + method: 'config.patch', + params: { + patches: { + 'hooks.confirm': 'not-an-array', + }, + }, + }; + const result = await handlers['config.patch'](req) as GatewayResponse; + + const r = result.result as { applied: string[]; rejected: string[] }; + expect(r.applied).toEqual([]); + expect(r.rejected).toEqual(['hooks.confirm']); + }); + + it('config.patch requires patches object', async () => { + const config = makeConfig(); + const handlers = createConfigHandlers({ config: config as any }); + const req: GatewayRequest = { id: 5, method: 'config.patch', params: {} }; + const result = await handlers['config.patch'](req) as GatewayError; + + expect(result.error.code).toBe(ErrorCode.InvalidRequest); + }); +}); diff --git a/src/gateway/handlers/index.ts b/src/gateway/handlers/index.ts index 6affefd..a033908 100644 --- a/src/gateway/handlers/index.ts +++ b/src/gateway/handlers/index.ts @@ -6,3 +6,5 @@ export { createToolHandlers } from './tools.js'; export type { ToolHandlerDeps } from './tools.js'; export { createAgentHandlers } from './agent.js'; export type { AgentHandlerDeps } from './agent.js'; +export { createConfigHandlers } from './config.js'; +export type { ConfigHandlerDeps } from './config.js'; diff --git a/src/gateway/handlers/system.ts b/src/gateway/handlers/system.ts index b1ae11f..8a77fe2 100644 --- a/src/gateway/handlers/system.ts +++ b/src/gateway/handlers/system.ts @@ -1,5 +1,5 @@ import type { GatewayRequest, OutboundMessage } from '../protocol.js'; -import { makeResponse } from '../protocol.js'; +import { makeResponse, makeError, ErrorCode } from '../protocol.js'; export interface SystemHandlerDeps { startTime: number; @@ -7,6 +7,8 @@ export interface SystemHandlerDeps { getSessionCount: () => number; getToolCount: () => number; getConnectionCount: () => number; + /** Optional callback to trigger a graceful restart. If not provided, system.restart returns an error. */ + restart?: () => Promise; } export function createSystemHandlers(deps: SystemHandlerDeps) { @@ -21,5 +23,23 @@ export function createSystemHandlers(deps: SystemHandlerDeps) { connections: deps.getConnectionCount(), }); }, + + 'system.restart': async (request: GatewayRequest): Promise => { + if (!deps.restart) { + return makeError(request.id, ErrorCode.InternalError, 'Restart not available in this environment'); + } + + // Send response before initiating restart (client receives confirmation) + const response = makeResponse(request.id, { restarting: true }); + + // Schedule restart after response is sent (next tick) + queueMicrotask(() => { + deps.restart!().catch((err) => { + console.error('Restart failed:', err); + }); + }); + + return response; + }, }; } diff --git a/src/gateway/server.ts b/src/gateway/server.ts index ea62f14..7a1dd7b 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -18,8 +18,10 @@ import { createSessionHandlers, createToolHandlers, createAgentHandlers, + createConfigHandlers, } from './handlers/index.js'; import type { SessionManager } from '../session/manager.js'; +import type { Config } from '../config/index.js'; import type { ToolRegistry } from '../tools/registry.js'; import type { ToolExecutor } from '../tools/executor.js'; @@ -34,6 +36,9 @@ export interface GatewayServerConfig { version?: string; auth?: AuthConfig; uiDir?: string; + config?: Config; + /** Optional callback for system.restart. Should trigger graceful shutdown + process restart. */ + restart?: () => Promise; } export class GatewayServer { @@ -67,6 +72,7 @@ export class GatewayServer { getSessionCount: () => this.sessionBridge.listSessions().length, getToolCount: () => this.config.toolRegistry.list().length, getConnectionCount: () => this.sessionBridge.connectionCount, + restart: this.config.restart, }); const sessionHandlers = createSessionHandlers({ @@ -82,6 +88,14 @@ export class GatewayServer { sessionBridge: this.sessionBridge, }); + // Config handlers (only if config object is provided) + if (this.config.config) { + const configHandlers = createConfigHandlers({ config: this.config.config }); + for (const [method, handler] of Object.entries(configHandlers)) { + this.router.register(method, handler); + } + } + // Register all methods for (const [method, handler] of Object.entries(systemHandlers)) { this.router.register(method, handler);