feat: add sandboxed tool wrappers for shell.exec and process.start

This commit is contained in:
William Valentin
2026-02-06 15:52:21 -08:00
parent daf8cac3fe
commit 430cb3f96e
2 changed files with 185 additions and 0 deletions
+93
View File
@@ -0,0 +1,93 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { createSandboxedShellTool, createSandboxedProcessStartTool } from './tools.js';
import type { DockerSandbox } from './docker.js';
function mockSandbox(): DockerSandbox {
return {
exec: vi.fn().mockResolvedValue({ stdout: 'output', stderr: '' }),
create: vi.fn(),
destroy: vi.fn(),
containerId: 'test-container',
containerName: 'flynn-test',
config: {},
} as unknown as DockerSandbox;
}
describe('createSandboxedShellTool', () => {
let sandbox: DockerSandbox;
beforeEach(() => {
sandbox = mockSandbox();
});
it('has the same name as shell.exec', () => {
const tool = createSandboxedShellTool(sandbox);
expect(tool.name).toBe('shell.exec');
});
it('delegates to sandbox.exec', async () => {
const tool = createSandboxedShellTool(sandbox);
const result = await tool.execute({ command: 'echo hello' });
expect(sandbox.exec).toHaveBeenCalledWith('echo hello', { cwd: undefined, timeout: 30000 });
expect(result.success).toBe(true);
expect(result.output).toBe('output');
});
it('passes cwd to sandbox.exec', async () => {
const tool = createSandboxedShellTool(sandbox);
await tool.execute({ command: 'ls', cwd: '/workspace/project' });
expect(sandbox.exec).toHaveBeenCalledWith('ls', { cwd: '/workspace/project', timeout: 30000 });
});
it('passes timeout to sandbox.exec', async () => {
const tool = createSandboxedShellTool(sandbox);
await tool.execute({ command: 'sleep 10', timeout: 5000 });
expect(sandbox.exec).toHaveBeenCalledWith('sleep 10', { cwd: undefined, timeout: 5000 });
});
it('returns error on sandbox.exec failure', async () => {
(sandbox.exec as ReturnType<typeof vi.fn>).mockRejectedValue(new Error('container dead'));
const tool = createSandboxedShellTool(sandbox);
const result = await tool.execute({ command: 'fail' });
expect(result.success).toBe(false);
expect(result.error).toBe('container dead');
});
it('includes stderr in output', async () => {
(sandbox.exec as ReturnType<typeof vi.fn>).mockResolvedValue({ stdout: 'out', stderr: 'warn' });
const tool = createSandboxedShellTool(sandbox);
const result = await tool.execute({ command: 'cmd' });
expect(result.output).toContain('out');
expect(result.output).toContain('stderr: warn');
});
});
describe('createSandboxedProcessStartTool', () => {
let sandbox: DockerSandbox;
beforeEach(() => {
sandbox = mockSandbox();
});
it('has the same name as process.start', () => {
const tool = createSandboxedProcessStartTool(sandbox);
expect(tool.name).toBe('process.start');
});
it('runs detached command via sandbox', async () => {
const tool = createSandboxedProcessStartTool(sandbox);
const result = await tool.execute({ command: 'npm run dev' });
expect(sandbox.exec).toHaveBeenCalledWith(
expect.stringContaining('npm run dev'),
expect.any(Object),
);
expect(result.success).toBe(true);
expect(result.output).toContain('Started sandboxed background process');
});
});
+92
View File
@@ -0,0 +1,92 @@
import type { Tool, ToolResult } from '../tools/types.js';
import type { DockerSandbox } from './docker.js';
interface ShellExecArgs {
command: string;
cwd?: string;
timeout?: number;
}
interface ProcessStartArgs {
command: string;
cwd?: string;
}
/**
* Create a sandboxed version of shell.exec that delegates to docker exec.
* Same Tool interface — drop-in replacement for the host shell.exec.
*/
export function createSandboxedShellTool(sandbox: DockerSandbox): Tool {
return {
name: 'shell.exec',
description: 'Execute a shell command inside a sandboxed container and return stdout/stderr.',
inputSchema: {
type: 'object',
properties: {
command: { type: 'string', description: 'The shell command to execute' },
cwd: { type: 'string', description: 'Working directory inside the container (optional)' },
timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' },
},
required: ['command'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as ShellExecArgs;
const timeout = args.timeout ?? 30_000;
try {
const result = await sandbox.exec(args.command, {
cwd: args.cwd,
timeout,
});
const output = result.stdout + (result.stderr ? `\nstderr: ${result.stderr}` : '');
return { success: true, output };
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : String(error),
};
}
},
};
}
/**
* Create a sandboxed version of process.start that runs in the container.
* Uses `nohup ... &` via docker exec since we can't spawn detached inside containers.
*/
export function createSandboxedProcessStartTool(sandbox: DockerSandbox): Tool {
return {
name: 'process.start',
description: 'Start a command in the background inside a sandboxed container.',
inputSchema: {
type: 'object',
properties: {
command: { type: 'string', description: 'The shell command to run in the background' },
cwd: { type: 'string', description: 'Working directory inside the container (optional)' },
},
required: ['command'],
},
execute: async (rawArgs: unknown): Promise<ToolResult> => {
const args = rawArgs as ProcessStartArgs;
try {
const wrappedCmd = `nohup bash -c '${args.command.replace(/'/g, "'\\''")}' > /tmp/proc.log 2>&1 & echo $!`;
const result = await sandbox.exec(wrappedCmd, { cwd: args.cwd });
const pid = result.stdout.trim();
return {
success: true,
output: `Started sandboxed background process (PID ${pid})\nCommand: ${args.command}`,
};
} catch (error) {
return {
success: false,
output: '',
error: error instanceof Error ? error.message : 'Failed to start sandboxed process',
};
}
},
};
}