feat(security): enforce elevated mode and sandbox execution

This commit is contained in:
William Valentin
2026-02-15 17:02:05 -08:00
parent b574d170d1
commit ab89378fce
4 changed files with 152 additions and 8 deletions
+64
View File
@@ -262,4 +262,68 @@ describe('ToolExecutor', () => {
expect(result.success).toBe(false);
expect(result.error).toContain('refusing to pass');
});
it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'shell.exec',
description: 'shell',
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: 'ok' }),
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const denied = await executor.execute('shell.exec', { command: 'echo hi' }, {
skillName: 'test-skill',
skillPermissions: { execution_environment: 'sandbox' },
executionEnvironment: 'host',
autonomyLevel: 'autonomous',
});
expect(denied.success).toBe(false);
expect(denied.error).toContain('execution_environment=host');
const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, {
skillName: 'test-skill',
skillPermissions: { execution_environment: 'sandbox' },
executionEnvironment: 'host',
elevatedHostUntilMs: Date.now() + 60_000,
elevatedHostId: 'e1',
autonomyLevel: 'autonomous',
});
const pending = hooks.getPendingConfirmations();
expect(pending).toHaveLength(1);
hooks.resolveConfirmation(pending[0].id, { approved: true });
const allowed = await allowedPromise;
expect(allowed.success).toBe(true);
});
it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => {
const registry = new ToolRegistry();
registry.register({
name: 'shell.exec',
description: 'host shell',
inputSchema: { type: 'object', properties: {} },
execute: async () => { throw new Error('host should not run'); },
});
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
const executor = new ToolExecutor(registry, hooks);
const fakeSandbox = {
exec: async () => ({ stdout: 'sandbox-out', stderr: '' }),
} as any;
const fakeManager = {
getOrCreate: async () => fakeSandbox,
} as any;
executor.setSandboxManager(fakeManager);
const result = await executor.execute('shell.exec', { command: 'echo hi' }, {
executionEnvironment: 'sandbox',
sessionId: 's1',
autonomyLevel: 'autonomous',
});
expect(result.success).toBe(true);
expect(result.output).toContain('sandbox-out');
});
});