flynn/src/tools/executor.ts

import type { ToolResult } from './types.js';
import type { ToolRegistry } from './registry.js';
import type { HookEngine } from '../hooks/engine.js';
import type { ToolPolicyContext } from './policy.js';
import { resolveAutonomy } from '../hooks/autonomy.js';
import { auditLogger } from '../audit/index.js';
import { randomUUID } from 'crypto';
import { matchesAnyPattern, patternToRegex } from './policy.js';
import { redactForAudit, containsSecretLikeKeys } from '../audit/redact.js';
import type { SandboxManager } from '../sandbox/index.js';
import { createSandboxedProcessStartTool, createSandboxedShellTool } from '../sandbox/index.js';

export interface ToolExecutorConfig {
  defaultTimeoutMs?: number;
  maxOutputBytes?: number;
}

export class ToolExecutor {
  private registry: ToolRegistry;
  private hooks: HookEngine;
  private defaultTimeoutMs: number;
  private maxOutputBytes: number;
  private sandboxManager?: SandboxManager;

  constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) {
    this.registry = registry;
    this.hooks = hooks;
    this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000;
    this.maxOutputBytes = config?.maxOutputBytes ?? 51_200;
  }

  setSandboxManager(manager?: SandboxManager): void {
    this.sandboxManager = manager;
  }

  private isElevationActive(context?: ToolPolicyContext): boolean {
    const untilMs = context?.elevatedHostUntilMs;
    return typeof untilMs === 'number' && Number.isFinite(untilMs) && untilMs > Date.now();
  }

  private resolveEffectiveExecutionEnvironment(toolName: string, context?: ToolPolicyContext): 'host' | 'sandbox' {
    const base = context?.executionEnvironment ?? 'host';
    if (this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
      return 'host';
    }
    return base;
  }

  async execute(toolName: string, args: unknown, context?: ToolPolicyContext): Promise<ToolResult> {
    const executionId = randomUUID();
    const executionEnvironment = this.resolveEffectiveExecutionEnvironment(toolName, context);
    const skillName = context?.skillName;

    const tool = this.registry.getByApiName(toolName);
    if (!tool) {
      auditLogger?.toolDenied({
        tool_name: toolName,
        reason: 'Tool not found',
        denial_type: 'not_found',
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        session_id: context?.sessionId,
      });
      return { success: false, output: '', error: `Tool '${toolName}' not found` };
    }

    const argsRedaction = redactForAudit(args);

    // Secret scope enforcement
    const requiredScopes = tool.requiredSecretScopes ?? [];
    const allowedScopes = this.resolveAllowedSecretScopes(context);
    if (requiredScopes.length > 0 && !this.hasAllScopes(allowedScopes, requiredScopes)) {
      auditLogger?.toolDenied({
        tool_name: tool.name,
        reason: `Tool requires secret scope(s): ${requiredScopes.join(', ')}`,
        denial_type: 'policy',
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions,
        session_id: context?.sessionId,
      });
      return {
        success: false,
        output: '',
        error: `Tool '${tool.name}' denied: missing secret scopes (${requiredScopes.join(', ')})`,
      };
    }

    // Capability enforcement: filesystem + network constraints
    const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context, executionEnvironment);
    if (capabilityViolation) {
      auditLogger?.toolDenied({
        tool_name: tool.name,
        reason: capabilityViolation,
        denial_type: 'policy',
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions,
        session_id: context?.sessionId,
      });
      return { success: false, output: '', error: `Tool '${tool.name}' denied: ${capabilityViolation}` };
    }

    // Prompt-injection guard: block obviously unsafe tool calls when untrusted content is present
    const guard = this.evaluatePromptInjectionGuard(tool.name, args, context);
    if (guard) {
      auditLogger?.toolDenied({
        tool_name: tool.name,
        reason: guard,
        denial_type: 'policy',
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions,
        session_id: context?.sessionId,
      });
      return { success: false, output: '', error: `Tool '${tool.name}' blocked: ${guard}` };
    }

    // Policy check (defense in depth — tools should also be filtered at listing time)
    const policy = this.registry.getPolicy();
    if (policy) {
      const allNames = this.registry.list().map(t => t.name);
      if (!policy.isAllowed(toolName, allNames, context)) {
        auditLogger?.toolDenied({
          tool_name: toolName,
          reason: 'Tool not allowed by policy',
          denial_type: 'policy',
          execution_id: executionId,
          execution_environment: executionEnvironment,
          skill_name: skillName,
          redactions_applied: argsRedaction.redactions,
          session_id: context?.sessionId,
        });
        return {
          success: false,
          output: '',
          error: `Tool '${toolName}' is not allowed by tool policy`,
        };
      }
    }

    // Check hooks with autonomy resolution
    const baseAction = this.hooks.getAction(toolName);
    const autonomyLevel = context?.autonomyLevel ?? 'standard';
    const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel);
    let finalAction = autonomyDecision.action;

    // Elevated mode must always require explicit confirmation for host high-risk tool calls.
    if (executionEnvironment === 'host' && this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
      finalAction = 'confirm';
    }

    // Log autonomy override if applicable
    if (autonomyDecision.overridden) {
      auditLogger?.toolDenied({
        tool_name: toolName,
        reason: `Autonomy override: ${autonomyDecision.reason}`,
        denial_type: 'autonomy_override',
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions,
        session_id: context?.sessionId,
      });
    }

    if (finalAction === 'confirm') {
      const hookResult = await this.hooks.requestConfirmation(
        toolName,
        args as Record<string, unknown>,
      );

      auditLogger?.toolApproval({
        tool_name: toolName,
        approved: hookResult.approved,
        reason: hookResult.reason,
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions,
        session_id: context?.sessionId,
      });

      if (!hookResult.approved) {
        const denyReason = hookResult.reason ?? 'no reason';
        const detailedReason = autonomyDecision.overridden
          ? `${denyReason} (autonomy: ${autonomyDecision.reason})`
          : denyReason;
        auditLogger?.toolDenied({
          tool_name: toolName,
          reason: detailedReason,
          denial_type: 'hook',
          execution_id: executionId,
          execution_environment: executionEnvironment,
          skill_name: skillName,
          redactions_applied: argsRedaction.redactions,
          session_id: context?.sessionId,
        });
        return {
          success: false,
          output: '',
          error: `Tool '${toolName}' denied by user: ${detailedReason}`,
        };
      }
    }

    // Execute with timeout
    const startTime = Date.now();

    auditLogger?.toolStart({
      tool_name: toolName,
      tool_args: argsRedaction.value,
      execution_id: executionId,
      execution_environment: executionEnvironment,
      skill_name: skillName,
      redactions_applied: argsRedaction.redactions,
      session_id: context?.sessionId,
      channel: context?.channel,
      sender: context?.sender,
      agent_tier: context?.tier,
    });

    try {
      const result = await Promise.race([
        (async () => {
          if (executionEnvironment === 'sandbox' && this.sandboxManager) {
            const sandboxSessionId = context?.sessionId ?? `${context?.channel ?? 'unknown'}:${context?.sender ?? 'unknown'}`;
            const sandbox = await this.sandboxManager.getOrCreate(sandboxSessionId);
            if (toolName === 'shell.exec') {
              return createSandboxedShellTool(sandbox).execute(args);
            }
            if (toolName === 'process.start') {
              return createSandboxedProcessStartTool(sandbox).execute(args);
            }
          }
          return tool.execute(args);
        })(),
        new Promise<ToolResult>((_, reject) =>
          setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs),
        ),
      ]);

      const duration = Date.now() - startTime;

      // Truncate output if too large
      if (result.output.length > this.maxOutputBytes) {
        result.output = result.output.slice(0, this.maxOutputBytes) + '\n[truncated]';
      }

      const resultRedaction = redactForAudit(result);
      auditLogger?.toolSuccess({
        tool_name: toolName,
        result: resultRedaction.value as { success: boolean; output: string; error?: string },
        duration_ms: duration,
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions + resultRedaction.redactions,
        session_id: context?.sessionId,
      });

      return result;
    } catch (error) {
      const duration = Date.now() - startTime;
      const errorMessage = error instanceof Error ? error.message : String(error);

      const errorRedaction = redactForAudit(errorMessage);

      auditLogger?.toolError({
        tool_name: toolName,
        error: String(errorRedaction.value),
        duration_ms: duration,
        session_id: context?.sessionId,
        execution_id: executionId,
        execution_environment: executionEnvironment,
        skill_name: skillName,
        redactions_applied: argsRedaction.redactions + errorRedaction.redactions,
      });

      return {
        success: false,
        output: '',
        error: String(errorRedaction.value),
      };
    }
  }

  private resolveAllowedSecretScopes(context?: ToolPolicyContext): string[] {
    if (context?.allowedSecretScopes) {
      return context.allowedSecretScopes;
    }
    if (context?.skillPermissions?.secrets) {
      return context.skillPermissions.secrets;
    }
    if (context?.skillName) {
      return [];
    }
    return ['*'];
  }

  private hasAllScopes(allowed: string[], required: string[]): boolean {
    if (allowed.includes('*')) {
      return true;
    }
    return required.every((scope) => allowed.includes(scope));
  }

  private isHighRiskTool(toolName: string): boolean {
    if (toolName.startsWith('browser.')) {
      return true;
    }
    return [
      'file.write',
      'file.edit',
      'file.patch',
      'shell.exec',
      'process.start',
      'process.kill',
    ].includes(toolName);
  }

  private checkCapabilityConstraints(toolName: string, args: unknown, context: ToolPolicyContext | undefined, effectiveEnv: 'host' | 'sandbox'): string | null {
    const perms = context?.skillPermissions;
    if (!perms) {
      if (context?.skillName && this.isHighRiskTool(toolName)) {
        return 'skill has no permissions manifest; high-risk tool denied by default';
      }
      return null;
    }

    // Sandbox enforcement for high-risk tools unless explicitly allowed.
    if (this.isHighRiskTool(toolName)) {
      const requested = perms.execution_environment ?? 'sandbox';
      if (context?.skillName && effectiveEnv === 'host' && requested !== 'host' && !this.isElevationActive(context)) {
        return 'high-risk tool execution on host is not allowed for this skill (requires execution_environment=host)';
      }
    }

    // FS path enforcement
    const fs = perms.fs;
    if (fs && toolName.startsWith('file.')) {
      const mode: 'read' | 'write' = (toolName === 'file.read' || toolName === 'file.list') ? 'read' : 'write';
      const allowlist = mode === 'read' ? (fs.read ?? []) : (fs.write ?? []);
      if (allowlist.length === 0) {
        return `filesystem ${mode} access not permitted by skill permissions`;
      }

      const paths = this.extractFilePaths(toolName, args);
      for (const p of paths) {
        if (!this.pathAllowed(p, allowlist)) {
          return `path not allowed by skill permissions (${mode}): ${p}`;
        }
      }
    }

    // Network host enforcement (best-effort)
    if (perms.net && perms.net.length > 0 && toolName === 'web.fetch') {
      const url = (args as { url?: unknown } | null)?.url;
      if (typeof url === 'string') {
        try {
          const parsed = new URL(url);
          const host = parsed.hostname;
          const port = parsed.port
            ? Number.parseInt(parsed.port, 10)
            : parsed.protocol === 'https:'
              ? 443
              : parsed.protocol === 'http:'
                ? 80
                : undefined;

          const allowed = perms.net.some((rule) => {
            if (!matchesAnyPattern(host, [rule.host])) {
              return false;
            }
            if (!rule.ports || rule.ports.length === 0) {
              return true;
            }
            if (!port || !Number.isFinite(port)) {
              return false;
            }
            return rule.ports.includes(port);
          });

          if (!allowed) {
            return `network access denied by skill permissions: ${host}${port ? `:${port}` : ''}`;
          }
        } catch {
          return 'invalid url for web.fetch';
        }
      }
    }

    return null;
  }

  private extractFilePaths(toolName: string, args: unknown): string[] {
    const out: string[] = [];
    const record = (args ?? null) as Record<string, unknown> | null;
    if (!record || typeof record !== 'object') {
      return out;
    }

    if (toolName === 'file.patch') {
      const patches = record.patches;
      if (Array.isArray(patches)) {
        for (const patch of patches) {
          if (patch && typeof patch === 'object') {
            const p = (patch as Record<string, unknown>).path;
            if (typeof p === 'string') {
              out.push(p);
            }
          }
        }
      }
      return out;
    }

    const p = record.path;
    if (typeof p === 'string') {
      out.push(p);
    }
    return out;
  }

  private pathAllowed(pathValue: string, allowlist: string[]): boolean {
    return allowlist.some((pattern) => patternToRegex(pattern).test(pathValue));
  }

  private evaluatePromptInjectionGuard(toolName: string, args: unknown, context?: ToolPolicyContext): string | null {
    if (!context?.untrustedContent) {
      return null;
    }

    // When untrusted content is present, forbid passing secrets directly via tool args.
    if ((toolName === 'web.fetch' || toolName === 'web.search') && containsSecretLikeKeys(args)) {
      return 'refusing to pass secret-like fields to a network tool while untrusted content is present';
    }

    const serialized = JSON.stringify(args ?? {});
    const lower = serialized.toLowerCase();

    const markers = [
      'ignore previous',
      'ignore all previous',
      'system prompt',
      'exfiltrate',
      'send to',
      'upload',
      'curl ',
      'wget ',
      'powershell',
      'rm -rf',
      'chmod ',
      'ssh ',
      'scp ',
      'BEGIN PRIVATE KEY'.toLowerCase(),
    ];

    if (this.isHighRiskTool(toolName) && markers.some((m) => lower.includes(m))) {
      return 'blocked high-risk tool call due to prompt-injection markers in arguments';
    }

    return null;
  }
}