From 430cb3f96e733a7e21ea6cc4d5dce0cf9118ed96 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 6 Feb 2026 15:52:21 -0800 Subject: [PATCH] feat: add sandboxed tool wrappers for shell.exec and process.start --- src/sandbox/tools.test.ts | 93 +++++++++++++++++++++++++++++++++++++++ src/sandbox/tools.ts | 92 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 src/sandbox/tools.test.ts create mode 100644 src/sandbox/tools.ts diff --git a/src/sandbox/tools.test.ts b/src/sandbox/tools.test.ts new file mode 100644 index 0000000..45b2a89 --- /dev/null +++ b/src/sandbox/tools.test.ts @@ -0,0 +1,93 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { createSandboxedShellTool, createSandboxedProcessStartTool } from './tools.js'; +import type { DockerSandbox } from './docker.js'; + +function mockSandbox(): DockerSandbox { + return { + exec: vi.fn().mockResolvedValue({ stdout: 'output', stderr: '' }), + create: vi.fn(), + destroy: vi.fn(), + containerId: 'test-container', + containerName: 'flynn-test', + config: {}, + } as unknown as DockerSandbox; +} + +describe('createSandboxedShellTool', () => { + let sandbox: DockerSandbox; + + beforeEach(() => { + sandbox = mockSandbox(); + }); + + it('has the same name as shell.exec', () => { + const tool = createSandboxedShellTool(sandbox); + expect(tool.name).toBe('shell.exec'); + }); + + it('delegates to sandbox.exec', async () => { + const tool = createSandboxedShellTool(sandbox); + const result = await tool.execute({ command: 'echo hello' }); + + expect(sandbox.exec).toHaveBeenCalledWith('echo hello', { cwd: undefined, timeout: 30000 }); + expect(result.success).toBe(true); + expect(result.output).toBe('output'); + }); + + it('passes cwd to sandbox.exec', async () => { + const tool = createSandboxedShellTool(sandbox); + await tool.execute({ command: 'ls', cwd: '/workspace/project' }); + + expect(sandbox.exec).toHaveBeenCalledWith('ls', { cwd: '/workspace/project', timeout: 30000 }); + }); + + it('passes timeout to sandbox.exec', async () => { + const tool = createSandboxedShellTool(sandbox); + await tool.execute({ command: 'sleep 10', timeout: 5000 }); + + expect(sandbox.exec).toHaveBeenCalledWith('sleep 10', { cwd: undefined, timeout: 5000 }); + }); + + it('returns error on sandbox.exec failure', async () => { + (sandbox.exec as ReturnType).mockRejectedValue(new Error('container dead')); + const tool = createSandboxedShellTool(sandbox); + const result = await tool.execute({ command: 'fail' }); + + expect(result.success).toBe(false); + expect(result.error).toBe('container dead'); + }); + + it('includes stderr in output', async () => { + (sandbox.exec as ReturnType).mockResolvedValue({ stdout: 'out', stderr: 'warn' }); + const tool = createSandboxedShellTool(sandbox); + const result = await tool.execute({ command: 'cmd' }); + + expect(result.output).toContain('out'); + expect(result.output).toContain('stderr: warn'); + }); +}); + +describe('createSandboxedProcessStartTool', () => { + let sandbox: DockerSandbox; + + beforeEach(() => { + sandbox = mockSandbox(); + }); + + it('has the same name as process.start', () => { + const tool = createSandboxedProcessStartTool(sandbox); + expect(tool.name).toBe('process.start'); + }); + + it('runs detached command via sandbox', async () => { + const tool = createSandboxedProcessStartTool(sandbox); + const result = await tool.execute({ command: 'npm run dev' }); + + expect(sandbox.exec).toHaveBeenCalledWith( + expect.stringContaining('npm run dev'), + expect.any(Object), + ); + expect(result.success).toBe(true); + expect(result.output).toContain('Started sandboxed background process'); + }); +}); diff --git a/src/sandbox/tools.ts b/src/sandbox/tools.ts new file mode 100644 index 0000000..3528491 --- /dev/null +++ b/src/sandbox/tools.ts @@ -0,0 +1,92 @@ +import type { Tool, ToolResult } from '../tools/types.js'; +import type { DockerSandbox } from './docker.js'; + +interface ShellExecArgs { + command: string; + cwd?: string; + timeout?: number; +} + +interface ProcessStartArgs { + command: string; + cwd?: string; +} + +/** + * Create a sandboxed version of shell.exec that delegates to docker exec. + * Same Tool interface — drop-in replacement for the host shell.exec. + */ +export function createSandboxedShellTool(sandbox: DockerSandbox): Tool { + return { + name: 'shell.exec', + description: 'Execute a shell command inside a sandboxed container and return stdout/stderr.', + inputSchema: { + type: 'object', + properties: { + command: { type: 'string', description: 'The shell command to execute' }, + cwd: { type: 'string', description: 'Working directory inside the container (optional)' }, + timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' }, + }, + required: ['command'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as ShellExecArgs; + const timeout = args.timeout ?? 30_000; + + try { + const result = await sandbox.exec(args.command, { + cwd: args.cwd, + timeout, + }); + + const output = result.stdout + (result.stderr ? `\nstderr: ${result.stderr}` : ''); + return { success: true, output }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }; +} + +/** + * Create a sandboxed version of process.start that runs in the container. + * Uses `nohup ... &` via docker exec since we can't spawn detached inside containers. + */ +export function createSandboxedProcessStartTool(sandbox: DockerSandbox): Tool { + return { + name: 'process.start', + description: 'Start a command in the background inside a sandboxed container.', + inputSchema: { + type: 'object', + properties: { + command: { type: 'string', description: 'The shell command to run in the background' }, + cwd: { type: 'string', description: 'Working directory inside the container (optional)' }, + }, + required: ['command'], + }, + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as ProcessStartArgs; + + try { + const wrappedCmd = `nohup bash -c '${args.command.replace(/'/g, "'\\''")}' > /tmp/proc.log 2>&1 & echo $!`; + const result = await sandbox.exec(wrappedCmd, { cwd: args.cwd }); + + const pid = result.stdout.trim(); + return { + success: true, + output: `Started sandboxed background process (PID ${pid})\nCommand: ${args.command}`, + }; + } catch (error) { + return { + success: false, + output: '', + error: error instanceof Error ? error.message : 'Failed to start sandboxed process', + }; + } + }, + }; +}