feat: add sandboxed tool wrappers for shell.exec and process.start
This commit is contained in:
@@ -0,0 +1,93 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
import { createSandboxedShellTool, createSandboxedProcessStartTool } from './tools.js';
|
||||||
|
import type { DockerSandbox } from './docker.js';
|
||||||
|
|
||||||
|
function mockSandbox(): DockerSandbox {
|
||||||
|
return {
|
||||||
|
exec: vi.fn().mockResolvedValue({ stdout: 'output', stderr: '' }),
|
||||||
|
create: vi.fn(),
|
||||||
|
destroy: vi.fn(),
|
||||||
|
containerId: 'test-container',
|
||||||
|
containerName: 'flynn-test',
|
||||||
|
config: {},
|
||||||
|
} as unknown as DockerSandbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('createSandboxedShellTool', () => {
|
||||||
|
let sandbox: DockerSandbox;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
sandbox = mockSandbox();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('has the same name as shell.exec', () => {
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
expect(tool.name).toBe('shell.exec');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('delegates to sandbox.exec', async () => {
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
const result = await tool.execute({ command: 'echo hello' });
|
||||||
|
|
||||||
|
expect(sandbox.exec).toHaveBeenCalledWith('echo hello', { cwd: undefined, timeout: 30000 });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.output).toBe('output');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes cwd to sandbox.exec', async () => {
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
await tool.execute({ command: 'ls', cwd: '/workspace/project' });
|
||||||
|
|
||||||
|
expect(sandbox.exec).toHaveBeenCalledWith('ls', { cwd: '/workspace/project', timeout: 30000 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes timeout to sandbox.exec', async () => {
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
await tool.execute({ command: 'sleep 10', timeout: 5000 });
|
||||||
|
|
||||||
|
expect(sandbox.exec).toHaveBeenCalledWith('sleep 10', { cwd: undefined, timeout: 5000 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns error on sandbox.exec failure', async () => {
|
||||||
|
(sandbox.exec as ReturnType<typeof vi.fn>).mockRejectedValue(new Error('container dead'));
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
const result = await tool.execute({ command: 'fail' });
|
||||||
|
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.error).toBe('container dead');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes stderr in output', async () => {
|
||||||
|
(sandbox.exec as ReturnType<typeof vi.fn>).mockResolvedValue({ stdout: 'out', stderr: 'warn' });
|
||||||
|
const tool = createSandboxedShellTool(sandbox);
|
||||||
|
const result = await tool.execute({ command: 'cmd' });
|
||||||
|
|
||||||
|
expect(result.output).toContain('out');
|
||||||
|
expect(result.output).toContain('stderr: warn');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('createSandboxedProcessStartTool', () => {
|
||||||
|
let sandbox: DockerSandbox;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
sandbox = mockSandbox();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('has the same name as process.start', () => {
|
||||||
|
const tool = createSandboxedProcessStartTool(sandbox);
|
||||||
|
expect(tool.name).toBe('process.start');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('runs detached command via sandbox', async () => {
|
||||||
|
const tool = createSandboxedProcessStartTool(sandbox);
|
||||||
|
const result = await tool.execute({ command: 'npm run dev' });
|
||||||
|
|
||||||
|
expect(sandbox.exec).toHaveBeenCalledWith(
|
||||||
|
expect.stringContaining('npm run dev'),
|
||||||
|
expect.any(Object),
|
||||||
|
);
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.output).toContain('Started sandboxed background process');
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
import type { Tool, ToolResult } from '../tools/types.js';
|
||||||
|
import type { DockerSandbox } from './docker.js';
|
||||||
|
|
||||||
|
interface ShellExecArgs {
|
||||||
|
command: string;
|
||||||
|
cwd?: string;
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ProcessStartArgs {
|
||||||
|
command: string;
|
||||||
|
cwd?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a sandboxed version of shell.exec that delegates to docker exec.
|
||||||
|
* Same Tool interface — drop-in replacement for the host shell.exec.
|
||||||
|
*/
|
||||||
|
export function createSandboxedShellTool(sandbox: DockerSandbox): Tool {
|
||||||
|
return {
|
||||||
|
name: 'shell.exec',
|
||||||
|
description: 'Execute a shell command inside a sandboxed container and return stdout/stderr.',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
command: { type: 'string', description: 'The shell command to execute' },
|
||||||
|
cwd: { type: 'string', description: 'Working directory inside the container (optional)' },
|
||||||
|
timeout: { type: 'number', description: 'Timeout in milliseconds (default 30000)' },
|
||||||
|
},
|
||||||
|
required: ['command'],
|
||||||
|
},
|
||||||
|
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||||
|
const args = rawArgs as ShellExecArgs;
|
||||||
|
const timeout = args.timeout ?? 30_000;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await sandbox.exec(args.command, {
|
||||||
|
cwd: args.cwd,
|
||||||
|
timeout,
|
||||||
|
});
|
||||||
|
|
||||||
|
const output = result.stdout + (result.stderr ? `\nstderr: ${result.stderr}` : '');
|
||||||
|
return { success: true, output };
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
output: '',
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a sandboxed version of process.start that runs in the container.
|
||||||
|
* Uses `nohup ... &` via docker exec since we can't spawn detached inside containers.
|
||||||
|
*/
|
||||||
|
export function createSandboxedProcessStartTool(sandbox: DockerSandbox): Tool {
|
||||||
|
return {
|
||||||
|
name: 'process.start',
|
||||||
|
description: 'Start a command in the background inside a sandboxed container.',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
command: { type: 'string', description: 'The shell command to run in the background' },
|
||||||
|
cwd: { type: 'string', description: 'Working directory inside the container (optional)' },
|
||||||
|
},
|
||||||
|
required: ['command'],
|
||||||
|
},
|
||||||
|
execute: async (rawArgs: unknown): Promise<ToolResult> => {
|
||||||
|
const args = rawArgs as ProcessStartArgs;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const wrappedCmd = `nohup bash -c '${args.command.replace(/'/g, "'\\''")}' > /tmp/proc.log 2>&1 & echo $!`;
|
||||||
|
const result = await sandbox.exec(wrappedCmd, { cwd: args.cwd });
|
||||||
|
|
||||||
|
const pid = result.stdout.trim();
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
output: `Started sandboxed background process (PID ${pid})\nCommand: ${args.command}`,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
output: '',
|
||||||
|
error: error instanceof Error ? error.message : 'Failed to start sandboxed process',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user