330 lines
12 KiB
TypeScript
330 lines
12 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { ToolExecutor } from './executor.js';
|
|
import { ToolRegistry } from './registry.js';
|
|
import { HookEngine } from '../hooks/engine.js';
|
|
import type { Tool } from './types.js';
|
|
|
|
const echoTool: Tool = {
|
|
name: 'test.echo',
|
|
description: 'Echoes input',
|
|
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
|
|
execute: async (args) => ({ success: true, output: (args as { text: string }).text }),
|
|
};
|
|
|
|
const slowTool: Tool = {
|
|
name: 'test.slow',
|
|
description: 'Takes forever',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => {
|
|
await new Promise(r => setTimeout(r, 5000));
|
|
return { success: true, output: 'done' };
|
|
},
|
|
};
|
|
|
|
const failTool: Tool = {
|
|
name: 'test.fail',
|
|
description: 'Throws',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => { throw new Error('kaboom'); },
|
|
};
|
|
|
|
const bigOutputTool: Tool = {
|
|
name: 'test.big',
|
|
description: 'Returns huge output',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
|
|
};
|
|
|
|
const fileWriteLikeTool: Tool = {
|
|
name: 'file.write',
|
|
description: 'Test file write tool',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
};
|
|
|
|
describe('ToolExecutor', () => {
|
|
it('executes a tool and returns result', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('test.echo', { text: 'hello' });
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe('hello');
|
|
});
|
|
|
|
it('returns error for unknown tool', async () => {
|
|
const registry = new ToolRegistry();
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('nonexistent', {});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('not found');
|
|
});
|
|
|
|
it('catches tool execution errors', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(failTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('test.fail', {});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('kaboom');
|
|
});
|
|
|
|
it('enforces timeout', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(slowTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks, { defaultTimeoutMs: 100 });
|
|
|
|
const result = await executor.execute('test.slow', {});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('timed out');
|
|
});
|
|
|
|
it('truncates large output', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(bigOutputTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks, { maxOutputBytes: 1000 });
|
|
|
|
const result = await executor.execute('test.big', {});
|
|
expect(result.success).toBe(true);
|
|
expect(result.output.length).toBeLessThanOrEqual(1100);
|
|
expect(result.output).toContain('[truncated]');
|
|
});
|
|
|
|
it('blocks on confirm hook and resolves when approved', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const resultPromise = executor.execute('test.echo', { text: 'hi' });
|
|
|
|
const pending = hooks.getPendingConfirmations();
|
|
expect(pending).toHaveLength(1);
|
|
hooks.resolveConfirmation(pending[0].id, { approved: true });
|
|
|
|
const result = await resultPromise;
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe('hi');
|
|
});
|
|
|
|
it('blocks on confirm hook and returns denied', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const resultPromise = executor.execute('test.echo', { text: 'hi' });
|
|
|
|
const pending = hooks.getPendingConfirmations();
|
|
hooks.resolveConfirmation(pending[0].id, { approved: false, reason: 'nope' });
|
|
|
|
const result = await resultPromise;
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('denied');
|
|
});
|
|
|
|
it('conservative autonomy requires confirm for dangerous tools', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(fileWriteLikeTool);
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const resultPromise = executor.execute('file.write', {}, { autonomyLevel: 'conservative' });
|
|
const pending = hooks.getPendingConfirmations();
|
|
expect(pending).toHaveLength(1);
|
|
hooks.resolveConfirmation(pending[0].id, { approved: true });
|
|
|
|
const result = await resultPromise;
|
|
expect(result.success).toBe(true);
|
|
});
|
|
|
|
it('autonomous mode defers to explicit confirm hooks', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register(echoTool);
|
|
const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const resultPromise = executor.execute('test.echo', { text: 'hi' }, { autonomyLevel: 'autonomous' });
|
|
const pending = hooks.getPendingConfirmations();
|
|
expect(pending).toHaveLength(1);
|
|
hooks.resolveConfirmation(pending[0].id, { approved: true });
|
|
|
|
const result = await resultPromise;
|
|
expect(result.success).toBe(true);
|
|
});
|
|
|
|
it('enforces skill filesystem write allowlist', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'file.write',
|
|
description: 'write',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const allowed = await executor.execute(
|
|
'file.write',
|
|
{ path: '/tmp/flynn-skill-ok.txt', content: 'hello' },
|
|
{
|
|
skillName: 'test-skill',
|
|
skillPermissions: {
|
|
execution_environment: 'host',
|
|
fs: { write: ['/tmp/**'] },
|
|
},
|
|
executionEnvironment: 'host',
|
|
autonomyLevel: 'autonomous',
|
|
},
|
|
);
|
|
expect(allowed.success).toBe(true);
|
|
|
|
const denied = await executor.execute(
|
|
'file.write',
|
|
{ path: '/etc/passwd', content: 'nope' },
|
|
{
|
|
skillName: 'test-skill',
|
|
skillPermissions: {
|
|
execution_environment: 'host',
|
|
fs: { write: ['/tmp/**'] },
|
|
},
|
|
executionEnvironment: 'host',
|
|
autonomyLevel: 'autonomous',
|
|
},
|
|
);
|
|
expect(denied.success).toBe(false);
|
|
expect(denied.error).toContain('path not allowed');
|
|
});
|
|
|
|
it('enforces tool secret scopes for skill contexts', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'gmail.list',
|
|
description: 'gmail',
|
|
requiredSecretScopes: ['gmail'],
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('gmail.list', {}, {
|
|
skillName: 'no-secrets-skill',
|
|
skillPermissions: { secrets: [] },
|
|
executionEnvironment: 'host',
|
|
});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('missing secret scopes');
|
|
});
|
|
|
|
it('blocks high-risk tool calls with injection markers when untrusted content is present', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'shell.exec',
|
|
description: 'shell',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('shell.exec', { command: 'rm -rf /' }, {
|
|
untrustedContent: true,
|
|
executionEnvironment: 'host',
|
|
});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('blocked');
|
|
});
|
|
|
|
it('blocks passing secret-like args to network tools when untrusted content is present', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'web.fetch',
|
|
description: 'fetch',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const result = await executor.execute('web.fetch', { url: 'https://example.com', authorization: 'Bearer abcdef' }, {
|
|
untrustedContent: true,
|
|
executionEnvironment: 'host',
|
|
});
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('refusing to pass');
|
|
});
|
|
|
|
it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'shell.exec',
|
|
description: 'shell',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => ({ success: true, output: 'ok' }),
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const denied = await executor.execute('shell.exec', { command: 'echo hi' }, {
|
|
skillName: 'test-skill',
|
|
skillPermissions: { execution_environment: 'sandbox' },
|
|
executionEnvironment: 'host',
|
|
autonomyLevel: 'autonomous',
|
|
});
|
|
expect(denied.success).toBe(false);
|
|
expect(denied.error).toContain('execution_environment=host');
|
|
|
|
const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, {
|
|
skillName: 'test-skill',
|
|
skillPermissions: { execution_environment: 'sandbox' },
|
|
executionEnvironment: 'host',
|
|
elevatedHostUntilMs: Date.now() + 60_000,
|
|
elevatedHostId: 'e1',
|
|
autonomyLevel: 'autonomous',
|
|
});
|
|
const pending = hooks.getPendingConfirmations();
|
|
expect(pending).toHaveLength(1);
|
|
hooks.resolveConfirmation(pending[0].id, { approved: true });
|
|
|
|
const allowed = await allowedPromise;
|
|
expect(allowed.success).toBe(true);
|
|
});
|
|
|
|
it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => {
|
|
const registry = new ToolRegistry();
|
|
registry.register({
|
|
name: 'shell.exec',
|
|
description: 'host shell',
|
|
inputSchema: { type: 'object', properties: {} },
|
|
execute: async () => { throw new Error('host should not run'); },
|
|
});
|
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
|
const executor = new ToolExecutor(registry, hooks);
|
|
|
|
const fakeSandbox = {
|
|
exec: async () => ({ stdout: 'sandbox-out', stderr: '' }),
|
|
} as any;
|
|
const fakeManager = {
|
|
getOrCreate: async () => fakeSandbox,
|
|
} as any;
|
|
executor.setSandboxManager(fakeManager);
|
|
|
|
const result = await executor.execute('shell.exec', { command: 'echo hi' }, {
|
|
executionEnvironment: 'sandbox',
|
|
sessionId: 's1',
|
|
autonomyLevel: 'autonomous',
|
|
});
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toContain('sandbox-out');
|
|
});
|
|
});
|