flynn/src/tools/policy.test.ts

import { describe, it, expect } from 'vitest';
import { ToolPolicy, PROFILE_TOOLS, matchesAnyPattern } from './policy.js';
import type { ToolsConfig } from '../config/schema.js';
import type { Tool } from './types.js';

// ── Helpers ─────────────────────────────────────────────────────────

/** All tool names that would be in a fully loaded Flynn instance. */
const ALL_TOOL_NAMES = [
  'shell.exec',
  'file.read',
  'file.write',
  'file.edit',
  'file.list',
  'web.fetch',
  'web.search',
  'memory.read',
  'memory.write',
  'memory.search',
  'minio.share',
  'minio.ingest',
  'minio.sync',
  'process.start',
  'process.status',
  'process.output',
  'process.kill',
  'process.list',
  'mcp:filesystem:read_file',
  'mcp:filesystem:write_file',
];

function makeTool(name: string): Tool {
  return {
    name,
    description: `Mock ${name}`,
    inputSchema: { type: 'object', properties: {} },
    execute: async () => ({ success: true, output: '' }),
  };
}

const ALL_TOOLS = ALL_TOOL_NAMES.map(makeTool);

function defaultConfig(overrides: Partial<ToolsConfig> = {}): ToolsConfig {
  return {
    profile: 'full',
    allow: [],
    deny: [],
    agents: {},
    providers: {},
    ...overrides,
  };
}

// ── matchesAnyPattern ───────────────────────────────────────────────

describe('matchesAnyPattern', () => {
  it('matches exact names', () => {
    expect(matchesAnyPattern('shell.exec', ['shell.exec'])).toBe(true);
    expect(matchesAnyPattern('shell.exec', ['file.read'])).toBe(false);
  });

  it('matches wildcard patterns', () => {
    expect(matchesAnyPattern('file.read', ['file.*'])).toBe(true);
    expect(matchesAnyPattern('file.write', ['file.*'])).toBe(true);
    expect(matchesAnyPattern('shell.exec', ['file.*'])).toBe(false);
  });

  it('matches mcp wildcard patterns', () => {
    expect(matchesAnyPattern('mcp:filesystem:read_file', ['mcp:*'])).toBe(true);
    expect(matchesAnyPattern('mcp:filesystem:read_file', ['mcp:filesystem:*'])).toBe(true);
    expect(matchesAnyPattern('shell.exec', ['mcp:*'])).toBe(false);
  });

  it('matches catch-all wildcard', () => {
    expect(matchesAnyPattern('anything', ['*'])).toBe(true);
  });
});

// ── Profile definitions ─────────────────────────────────────────────

describe('PROFILE_TOOLS', () => {
  it('minimal contains only read-only tools', () => {
    expect(PROFILE_TOOLS.minimal.has('file.read')).toBe(true);
    expect(PROFILE_TOOLS.minimal.has('file.list')).toBe(true);
    expect(PROFILE_TOOLS.minimal.has('web.fetch')).toBe(true);
    expect(PROFILE_TOOLS.minimal.has('shell.exec')).toBe(false);
    expect(PROFILE_TOOLS.minimal.has('file.write')).toBe(false);
  });

  it('messaging is a superset of minimal', () => {
    for (const tool of PROFILE_TOOLS.minimal) {
      expect(PROFILE_TOOLS.messaging.has(tool)).toBe(true);
    }
    expect(PROFILE_TOOLS.messaging.has('memory.read')).toBe(true);
    expect(PROFILE_TOOLS.messaging.has('web.search')).toBe(true);
  });

  it('coding is a superset of messaging', () => {
    for (const tool of PROFILE_TOOLS.messaging) {
      expect(PROFILE_TOOLS.coding.has(tool)).toBe(true);
    }
    expect(PROFILE_TOOLS.coding.has('shell.exec')).toBe(true);
    expect(PROFILE_TOOLS.coding.has('file.write')).toBe(true);
    expect(PROFILE_TOOLS.coding.has('process.start')).toBe(true);
  });

  it('full is empty (special: matches everything)', () => {
    expect(PROFILE_TOOLS.full.size).toBe(0);
  });
});

// ── ToolPolicy ──────────────────────────────────────────────────────

describe('ToolPolicy', () => {
  describe('default config (full profile)', () => {
    it('allows all tools when profile is full', () => {
      const policy = new ToolPolicy(defaultConfig());
      const result = policy.filterTools(ALL_TOOLS);
      expect(result).toHaveLength(ALL_TOOLS.length);
    });

    it('allows all tool names when profile is full', () => {
      const policy = new ToolPolicy(defaultConfig());
      const allowed = policy.resolveAllowedNames(ALL_TOOL_NAMES);
      expect(allowed.size).toBe(ALL_TOOL_NAMES.length);
    });
  });

  describe('profile filtering', () => {
    it('minimal profile only allows read-only tools', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'minimal' }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).toContain('file.read');
      expect(names).toContain('file.list');
      expect(names).toContain('web.fetch');
      expect(names).not.toContain('shell.exec');
      expect(names).not.toContain('file.write');
      expect(names).not.toContain('memory.read');
      expect(names).not.toContain('mcp:filesystem:read_file');
    });

    it('messaging profile includes memory and web search', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'messaging' }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).toContain('memory.read');
      expect(names).toContain('memory.write');
      expect(names).toContain('web.search');
      expect(names).not.toContain('shell.exec');
      expect(names).not.toContain('file.write');
    });

    it('coding profile includes file writes and shell', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).toContain('shell.exec');
      expect(names).toContain('file.write');
      expect(names).toContain('file.edit');
      expect(names).toContain('process.start');
      // MCP tools are not in the coding profile by default
      expect(names).not.toContain('mcp:filesystem:read_file');
    });
  });

  describe('global allow list', () => {
    it('adds specific tools beyond profile', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['shell.exec'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).toContain('file.read');
      expect(names).toContain('shell.exec');
      expect(names).not.toContain('file.write');
    });

    it('adds tools matching glob patterns', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['mcp:*'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).toContain('mcp:filesystem:read_file');
      expect(names).toContain('mcp:filesystem:write_file');
      expect(names).not.toContain('shell.exec');
    });
  });

  describe('global deny list', () => {
    it('removes tools from full profile', () => {
      const policy = new ToolPolicy(defaultConfig({
        deny: ['shell.exec'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).not.toContain('shell.exec');
      expect(names).toContain('file.read');
    });

    it('removes tools matching glob patterns', () => {
      const policy = new ToolPolicy(defaultConfig({
        deny: ['mcp:*'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).not.toContain('mcp:filesystem:read_file');
      expect(names).not.toContain('mcp:filesystem:write_file');
      expect(names).toContain('shell.exec');
    });

    it('deny wins over allow', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['shell.exec'],
        deny: ['shell.exec'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);

      expect(names).not.toContain('shell.exec');
    });
  });

  describe('agent overrides', () => {
    it('restricts tools for a specific agent tier', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: {
          fast: { profile: 'minimal', allow: [], deny: [] },
        },
      }));

      // Without agent context, full profile applies
      const allResult = policy.filterTools(ALL_TOOLS);
      expect(allResult).toHaveLength(ALL_TOOLS.length);

      // With agent context, minimal profile applies
      const fastResult = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
      const fastNames = fastResult.map(t => t.name);
      expect(fastNames).toContain('file.read');
      expect(fastNames).not.toContain('shell.exec');
    });

    it('agent deny removes tools from agent set', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: {
          fast: { profile: 'minimal', allow: [], deny: ['web.fetch'] },
        },
      }));

      const result = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).not.toContain('web.fetch');
    });

    it('agent allow adds tools beyond agent profile', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: {
          complex: { profile: 'coding', allow: ['mcp:*'], deny: [] },
        },
      }));

      const result = policy.filterTools(ALL_TOOLS, { agent: 'complex' });
      const names = result.map(t => t.name);
      expect(names).toContain('shell.exec');
      expect(names).toContain('mcp:filesystem:read_file');
    });

    it('agent override intersects with global — cannot add what global denies', () => {
      const policy = new ToolPolicy(defaultConfig({
        deny: ['shell.exec'],
        agents: {
          complex: { profile: 'coding', allow: ['shell.exec'], deny: [] },
        },
      }));

      const result = policy.filterTools(ALL_TOOLS, { agent: 'complex' });
      const names = result.map(t => t.name);
      // Global deny of shell.exec overrides agent allow
      expect(names).not.toContain('shell.exec');
    });

    it('unknown agent tier has no effect', () => {
      const policy = new ToolPolicy(defaultConfig());
      const result = policy.filterTools(ALL_TOOLS, { agent: 'nonexistent' });
      expect(result).toHaveLength(ALL_TOOLS.length);
    });
  });

  describe('provider overrides', () => {
    it('restricts tools for a specific provider', () => {
      const policy = new ToolPolicy(defaultConfig({
        providers: {
          ollama: { profile: 'minimal', allow: [], deny: [] },
        },
      }));

      const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).not.toContain('shell.exec');
    });

    it('provider deny takes effect', () => {
      const policy = new ToolPolicy(defaultConfig({
        providers: {
          ollama: { profile: 'messaging', allow: [], deny: ['web.search'] },
        },
      }));

      const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
      const names = result.map(t => t.name);
      expect(names).toContain('memory.read');
      expect(names).not.toContain('web.search');
    });
  });

  describe('combined agent + provider', () => {
    it('intersects agent and provider restrictions', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: {
          fast: { profile: 'messaging', allow: [], deny: [] },
        },
        providers: {
          ollama: { profile: 'coding', allow: [], deny: [] },
        },
      }));

      // Fast agent has messaging tools, ollama provider has coding tools.
      // Intersection = messaging tools (subset of coding).
      const result = policy.filterTools(ALL_TOOLS, { agent: 'fast', provider: 'ollama' });
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).toContain('memory.read');
      expect(names).not.toContain('shell.exec'); // in coding but not messaging
    });
  });

  describe('isAllowed', () => {
    it('returns true for allowed tools', () => {
      const policy = new ToolPolicy(defaultConfig());
      expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES)).toBe(true);
    });

    it('returns false for denied tools', () => {
      const policy = new ToolPolicy(defaultConfig({ deny: ['shell.exec'] }));
      expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES)).toBe(false);
    });

    it('respects context', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
      }));
      expect(policy.isAllowed('shell.exec', ALL_TOOL_NAMES, { agent: 'fast' })).toBe(false);
      expect(policy.isAllowed('file.read', ALL_TOOL_NAMES, { agent: 'fast' })).toBe(true);
    });
  });

  describe('getEffectiveProfile', () => {
    it('returns global profile by default', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
      expect(policy.getEffectiveProfile()).toBe('coding');
    });

    it('returns agent profile override', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'full',
        agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
      }));
      expect(policy.getEffectiveProfile({ agent: 'fast' })).toBe('minimal');
    });

    it('returns provider profile override', () => {
      const policy = new ToolPolicy(defaultConfig({
        providers: { ollama: { profile: 'messaging', allow: [], deny: [] } },
      }));
      expect(policy.getEffectiveProfile({ provider: 'ollama' })).toBe('messaging');
    });

    it('agent override takes precedence over provider', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: { fast: { profile: 'minimal', allow: [], deny: [] } },
        providers: { ollama: { profile: 'messaging', allow: [], deny: [] } },
      }));
      expect(policy.getEffectiveProfile({ agent: 'fast', provider: 'ollama' })).toBe('minimal');
    });
  });

  describe('backward compatibility', () => {
    it('no tools config means full profile (all tools allowed)', () => {
      // This simulates the default Zod output when no tools: section in yaml
      const policy = new ToolPolicy({
        profile: 'full',
        allow: [],
        deny: [],
        agents: {},
        providers: {},
      });
      const result = policy.filterTools(ALL_TOOLS);
      expect(result).toHaveLength(ALL_TOOLS.length);
    });
  });

  describe('tool groups', () => {
    it('expands group:fs in allow list', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['group:fs'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).toContain('file.write');
      expect(names).toContain('file.edit');
      expect(names).toContain('file.list');
      expect(names).not.toContain('shell.exec');
    });

    it('expands group:runtime in deny list', () => {
      const policy = new ToolPolicy(defaultConfig({
        deny: ['group:runtime'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);
      expect(names).not.toContain('shell.exec');
      expect(names).not.toContain('process.start');
      expect(names).not.toContain('process.status');
      expect(names).not.toContain('process.output');
      expect(names).not.toContain('process.kill');
      expect(names).not.toContain('process.list');
      expect(names).toContain('file.read');
    });

    it('expands groups in agent overrides', () => {
      const policy = new ToolPolicy(defaultConfig({
        agents: {
          fast: { profile: 'minimal', allow: ['group:memory'], deny: [] },
        },
      }));
      const result = policy.filterTools(ALL_TOOLS, { agent: 'fast' });
      const names = result.map(t => t.name);
      expect(names).toContain('memory.read');
      expect(names).toContain('memory.write');
      expect(names).toContain('memory.search');
      expect(names).toContain('file.read'); // from minimal profile
      expect(names).not.toContain('shell.exec');
    });

    it('expands groups in provider deny', () => {
      const policy = new ToolPolicy(defaultConfig({
        providers: {
          ollama: { allow: [], deny: ['group:web'] },
        },
      }));
      const result = policy.filterTools(ALL_TOOLS, { provider: 'ollama' });
      const names = result.map(t => t.name);
      expect(names).not.toContain('web.fetch');
      expect(names).not.toContain('web.search');
      expect(names).toContain('file.read');
      expect(names).toContain('shell.exec');
    });

    it('mixes groups with individual names', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['group:memory', 'shell.exec'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);
      expect(names).toContain('memory.read');
      expect(names).toContain('shell.exec');
      expect(names).toContain('file.read'); // from minimal
    });

    it('expands group:minio', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['group:minio'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);
      expect(names).toContain('minio.share');
      expect(names).toContain('minio.ingest');
      expect(names).toContain('minio.sync');
      expect(names).toContain('file.read');
      expect(names).not.toContain('shell.exec');
    });

    it('unknown group name passes through as literal', () => {
      const policy = new ToolPolicy(defaultConfig({
        profile: 'minimal',
        allow: ['group:nonexistent'],
      }));
      const result = policy.filterTools(ALL_TOOLS);
      // Should only have minimal tools — 'group:nonexistent' doesn't match any real tool
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).not.toContain('shell.exec');
    });
  });

  describe('skill capability restrictions', () => {
    it('intersects tool policy with skill tool_groups', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'full' }));
      const allowed = policy.resolveAllowedNames(ALL_TOOL_NAMES, {
        skillName: 'web-only-skill',
        skillPermissions: { tool_groups: ['group:web'] },
      });

      expect(allowed.has('web.fetch')).toBe(true);
      expect(allowed.has('web.search')).toBe(true);
      expect(allowed.has('shell.exec')).toBe(false);
      expect(allowed.has('file.write')).toBe(false);
    });

    it('uses explicit permissions.tools when present (overrides tool_groups)', () => {
      const policy = new ToolPolicy(defaultConfig({ profile: 'full' }));
      const allowed = policy.resolveAllowedNames(ALL_TOOL_NAMES, {
        skillName: 'explicit-tool-skill',
        skillPermissions: {
          tool_groups: ['group:fs'],
          tools: ['web.fetch'],
        },
      });

      expect(allowed.has('web.fetch')).toBe(true);
      expect(allowed.has('file.read')).toBe(false);
      expect(allowed.has('file.write')).toBe(false);
    });
  });

  describe('edge cases', () => {
    it('handles empty tool list', () => {
      const policy = new ToolPolicy(defaultConfig());
      const result = policy.filterTools([]);
      expect(result).toHaveLength(0);
    });

    it('handles profile with unregistered tools', () => {
      // If only some tools from the profile are registered
      const fewTools = [makeTool('file.read'), makeTool('web.fetch')];
      const policy = new ToolPolicy(defaultConfig({ profile: 'coding' }));
      const result = policy.filterTools(fewTools);
      const names = result.map(t => t.name);
      expect(names).toContain('file.read');
      expect(names).toContain('web.fetch');
      expect(names).toHaveLength(2);
    });

    it('deny pattern removes multiple tools', () => {
      const policy = new ToolPolicy(defaultConfig({ deny: ['process.*'] }));
      const result = policy.filterTools(ALL_TOOLS);
      const names = result.map(t => t.name);
      expect(names).not.toContain('process.start');
      expect(names).not.toContain('process.status');
      expect(names).not.toContain('process.output');
      expect(names).not.toContain('process.kill');
      expect(names).not.toContain('process.list');
      expect(names).toContain('shell.exec');
    });
  });
});