feat: add tool allow/deny profiles with per-agent and per-provider filtering

Implements configurable tool filtering with four built-in profiles
(minimal, messaging, coding, full), global and per-agent/per-provider
allow/deny lists with glob pattern support, and defense-in-depth
enforcement at both tool listing and execution time.

New: src/tools/policy.ts (ToolPolicy engine), src/tools/policy.test.ts (37 tests)
Modified: config schema, tool registry, tool executor, NativeAgent,
AgentOrchestrator, daemon wiring, gateway tool handler, test mocks
This commit is contained in:
William Valentin
2026-02-06 15:30:34 -08:00
parent 8238d3e981
commit ee0af0cc06
13 changed files with 794 additions and 8 deletions
+442
View File
@@ -0,0 +1,442 @@
import { describe, it, expect } from 'vitest';
import { ToolPolicy, PROFILE_TOOLS, matchesAnyPattern } from './policy.js';
import type { ToolsConfig } from '../config/schema.js';
import type { Tool } from './types.js';
// ── Helpers ─────────────────────────────────────────────────────────
/** All tool names that would be in a fully loaded Flynn instance. */
const ALL_TOOL_NAMES = [
'shell.exec',
'file.read',
'file.write',
'file.edit',
'file.list',
'web.fetch',
'web.search',
'memory.read',
'memory.write',
'memory.search',
'process.start',
'process.status',
'process.output',
'process.kill',
'process.list',
'mcp:filesystem:read_file',
'mcp:filesystem:write_file',
];
function makeTool(name: string): Tool {
return {
name,
description: `Mock ${name}`,
inputSchema: { type: 'object', properties: {} },
execute: async () => ({ success: true, output: '' }),
};
}
const ALL_TOOLS = ALL_TOOL_NAMES.map(makeTool);
function defaultConfig(overrides: Partial<ToolsConfig> = {}): ToolsConfig {
return {
profile: 'full',
allow: [],
deny: [],
agents: {},
providers: {},
...overrides,
};
}
// ── matchesAnyPattern ───────────────────────────────────────────────
describe('matchesAnyPattern', () => {
it('matches exact names', () => {
expect(matchesAnyPattern('shell.exec', ['shell.exec'])).toBe(true);
expect(matchesAnyPattern('shell.exec', ['file.read'])).toBe(false);
});
it('matches wildcard patterns', () => {
expect(matchesAnyPattern('file.read', ['file.*'])).toBe(true);
expect(matchesAnyPattern('file.write', ['file.*'])).toBe(true);
expect(matchesAnyPattern('shell.exec', ['file.*'])).toBe(false);
});
it('matches mcp wildcard patterns', () => {
expect(matchesAnyPattern('mcp:filesystem:read_file', ['mcp:*'])).toBe(true);
expect(matchesAnyPattern('mcp:filesystem:read_file', ['mcp:filesystem:*'])).toBe(true);
expect(matchesAnyPattern('shell.exec', ['mcp:*'])).toBe(false);
});
it('matches catch-all wildcard', () => {
expect(matchesAnyPattern('anything', ['*'])).toBe(true);
});
});
// ── Profile definitions ─────────────────────────────────────────────
describe('PROFILE_TOOLS', () => {
it('minimal contains only read-only tools', () => {
expect(PROFILE_TOOLS.minimal.has('file.read')).toBe(true);
expect(PROFILE_TOOLS.minimal.has('file.list')).toBe(true);
expect(PROFILE_TOOLS.minimal.has('web.fetch')).toBe(true);
expect(PROFILE_TOOLS.minimal.has('shell.exec')).toBe(false);
expect(PROFILE_TOOLS.minimal.has('file.write')).toBe(false);
});
it('messaging is a superset of minimal', () => {
for (const tool of PROFILE_TOOLS.minimal) {
expect(PROFILE_TOOLS.messaging.has(tool)).toBe(true);
}
expect(PROFILE_TOOLS.messaging.has('memory.read')).toBe(true);
expect(PROFILE_TOOLS.messaging.has('web.search')).toBe(true);
});
it('coding is a superset of messaging', () => {
for (const tool of PROFILE_TOOLS.messaging) {
expect(PROFILE_TOOLS.coding.has(tool)).toBe(true);
}
expect(PROFILE_TOOLS.coding.has('shell.exec')).toBe(true);
expect(PROFILE_TOOLS.coding.has('file.write')).toBe(true);
expect(PROFILE_TOOLS.coding.has('process.start')).toBe(true);
});
it('full is empty (special: matches everything)', () => {
expect(PROFILE_TOOLS.full.size).toBe(0);
});
});
// ── ToolPolicy ──────────────────────────────────────────────────────
describe('ToolPolicy', () => {
describe('default config (full profile)', () => {
it('allows all tools when profile is full', () => {
const policy = new ToolPolicy(defaultConfig());
const result = policy.filterTools(ALL_TOOLS);
expect(result).toHaveLength(ALL_TOOLS.length);
});
it('allows all tool names when profile is full', () => {
const policy = new ToolPolicy(defaultConfig());
const allowed = policy.resolveAllowedNames(ALL_TOOL_NAMES);
expect(allowed.size).toBe(ALL_TOOL_NAMES.length);
});
});
describe('profile filtering', () => {
it('minimal profile only allows read-only tools', () => {
const policy = new ToolPolicy(defaultConfig({ profile: 'minimal' }));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).toContain('file.list');
expect(names).toContain('web.fetch');
expect(names).not.toContain('shell.exec');
expect(names).not.toContain('file.write');
expect(names).not.toContain('memory.read');
expect(names).not.toContain('mcp:filesystem:read_file');
});
it('messaging profile includes memory and web search', () => {
const policy = new ToolPolicy(defaultConfig({ profile: 'messaging' }));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).toContain('memory.read');
expect(names).toContain('memory.write');
expect(names).toContain('web.search');
expect(names).not.toContain('shell.exec');
expect(names).not.toContain('file.write');
});
it('coding profile includes file writes and shell', () => {
const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).toContain('shell.exec');
expect(names).toContain('file.write');
expect(names).toContain('file.edit');
expect(names).toContain('process.start');
// MCP tools are not in the coding profile by default
expect(names).not.toContain('mcp:filesystem:read_file');
});
});
describe('global allow list', () => {
it('adds specific tools beyond profile', () => {
const policy = new ToolPolicy(defaultConfig({
profile: 'minimal',
allow: ['shell.exec'],
}));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).toContain('shell.exec');
expect(names).not.toContain('file.write');
});
it('adds tools matching glob patterns', () => {
const policy = new ToolPolicy(defaultConfig({
profile: 'minimal',
allow: ['mcp:*'],
}));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).toContain('mcp:filesystem:read_file');
expect(names).toContain('mcp:filesystem:write_file');
expect(names).not.toContain('shell.exec');
});
});
describe('global deny list', () => {
it('removes tools from full profile', () => {
const policy = new ToolPolicy(defaultConfig({
deny: ['shell.exec'],
}));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).not.toContain('shell.exec');
expect(names).toContain('file.read');
});
it('removes tools matching glob patterns', () => {
const policy = new ToolPolicy(defaultConfig({
deny: ['mcp:*'],
}));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).not.toContain('mcp:filesystem:read_file');
expect(names).not.toContain('mcp:filesystem:write_file');
expect(names).toContain('shell.exec');
});
it('deny wins over allow', () => {
const policy = new ToolPolicy(defaultConfig({
profile: 'minimal',
allow: ['shell.exec'],
deny: ['shell.exec'],
}));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).not.toContain('shell.exec');
});
});
describe('agent overrides', () => {
it('restricts tools for a specific agent tier', () => {
const policy = new ToolPolicy(defaultConfig({
agents: {
fast: { profile: 'minimal', allow: [], deny: [] },
},
}));
// Without agent context, full profile applies
const allResult = policy.filterTools(ALL_TOOLS);
expect(allResult).toHaveLength(ALL_TOOLS.length);
// With agent context, minimal profile applies
const fastResult = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
const fastNames = fastResult.map(t => t.name);
expect(fastNames).toContain('file.read');
expect(fastNames).not.toContain('shell.exec');
});
it('agent deny removes tools from agent set', () => {
const policy = new ToolPolicy(defaultConfig({
agents: {
fast: { profile: 'minimal', allow: [], deny: ['web.fetch'] },
},
}));
const result = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).not.toContain('web.fetch');
});
it('agent allow adds tools beyond agent profile', () => {
const policy = new ToolPolicy(defaultConfig({
agents: {
complex: { profile: 'coding', allow: ['mcp:*'], deny: [] },
},
}));
const result = policy.filterTools(ALL_TOOLS, { agent: 'complex' });
const names = result.map(t => t.name);
expect(names).toContain('shell.exec');
expect(names).toContain('mcp:filesystem:read_file');
});
it('agent override intersects with global — cannot add what global denies', () => {
const policy = new ToolPolicy(defaultConfig({
deny: ['shell.exec'],
agents: {
complex: { profile: 'coding', allow: ['shell.exec'], deny: [] },
},
}));
const result = policy.filterTools(ALL_TOOLS, { agent: 'complex' });
const names = result.map(t => t.name);
// Global deny of shell.exec overrides agent allow
expect(names).not.toContain('shell.exec');
});
it('unknown agent tier has no effect', () => {
const policy = new ToolPolicy(defaultConfig());
const result = policy.filterTools(ALL_TOOLS, { agent: 'nonexistent' });
expect(result).toHaveLength(ALL_TOOLS.length);
});
});
describe('provider overrides', () => {
it('restricts tools for a specific provider', () => {
const policy = new ToolPolicy(defaultConfig({
providers: {
ollama: { profile: 'minimal', allow: [], deny: [] },
},
}));
const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).not.toContain('shell.exec');
});
it('provider deny takes effect', () => {
const policy = new ToolPolicy(defaultConfig({
providers: {
ollama: { profile: 'messaging', allow: [], deny: ['web.search'] },
},
}));
const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
const names = result.map(t => t.name);
expect(names).toContain('memory.read');
expect(names).not.toContain('web.search');
});
});
describe('combined agent + provider', () => {
it('intersects agent and provider restrictions', () => {
const policy = new ToolPolicy(defaultConfig({
agents: {
fast: { profile: 'messaging', allow: [], deny: [] },
},
providers: {
ollama: { profile: 'coding', allow: [], deny: [] },
},
}));
// Fast agent has messaging tools, ollama provider has coding tools.
// Intersection = messaging tools (subset of coding).
const result = policy.filterTools(ALL_TOOLS, { agent: 'fast', provider: 'ollama' });
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).toContain('memory.read');
expect(names).not.toContain('shell.exec'); // in coding but not messaging
});
});
describe('isAllowed', () => {
it('returns true for allowed tools', () => {
const policy = new ToolPolicy(defaultConfig());
expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES)).toBe(true);
});
it('returns false for denied tools', () => {
const policy = new ToolPolicy(defaultConfig({ deny: ['shell.exec'] }));
expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES)).toBe(false);
});
it('respects context', () => {
const policy = new ToolPolicy(defaultConfig({
agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
}));
expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES, { agent: 'fast' })).toBe(false);
expect(policy.isAllowed('file.read', ALL_TOOL_NAMES, { agent: 'fast' })).toBe(true);
});
});
describe('getEffectiveProfile', () => {
it('returns global profile by default', () => {
const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
expect(policy.getEffectiveProfile()).toBe('coding');
});
it('returns agent profile override', () => {
const policy = new ToolPolicy(defaultConfig({
profile: 'full',
agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
}));
expect(policy.getEffectiveProfile({ agent: 'fast' })).toBe('minimal');
});
it('returns provider profile override', () => {
const policy = new ToolPolicy(defaultConfig({
providers: { ollama: { profile: 'messaging', allow: [], deny: [] } },
}));
expect(policy.getEffectiveProfile({ provider: 'ollama' })).toBe('messaging');
});
it('agent override takes precedence over provider', () => {
const policy = new ToolPolicy(defaultConfig({
agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
providers: { ollama: { profile: 'messaging', allow: [], deny: [] } },
}));
expect(policy.getEffectiveProfile({ agent: 'fast', provider: 'ollama' })).toBe('minimal');
});
});
describe('backward compatibility', () => {
it('no tools config means full profile (all tools allowed)', () => {
// This simulates the default Zod output when no tools: section in yaml
const policy = new ToolPolicy({
profile: 'full',
allow: [],
deny: [],
agents: {},
providers: {},
});
const result = policy.filterTools(ALL_TOOLS);
expect(result).toHaveLength(ALL_TOOLS.length);
});
});
describe('edge cases', () => {
it('handles empty tool list', () => {
const policy = new ToolPolicy(defaultConfig());
const result = policy.filterTools([]);
expect(result).toHaveLength(0);
});
it('handles profile with unregistered tools', () => {
// If only some tools from the profile are registered
const fewTools = [makeTool('file.read'), makeTool('web.fetch')];
const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
const result = policy.filterTools(fewTools);
const names = result.map(t => t.name);
expect(names).toContain('file.read');
expect(names).toContain('web.fetch');
expect(names).toHaveLength(2);
});
it('deny pattern removes multiple tools', () => {
const policy = new ToolPolicy(defaultConfig({ deny: ['process.*'] }));
const result = policy.filterTools(ALL_TOOLS);
const names = result.map(t => t.name);
expect(names).not.toContain('process.start');
expect(names).not.toContain('process.status');
expect(names).not.toContain('process.output');
expect(names).not.toContain('process.kill');
expect(names).not.toContain('process.list');
expect(names).toContain('shell.exec');
});
});
});