Files
flynn/src/tools/executor.ts
T

652 lines
22 KiB
TypeScript

import type { ToolResult } from './types.js';
import type { ToolRegistry } from './registry.js';
import type { HookEngine } from '../hooks/engine.js';
import type { ImmutableDenyRule, SensitiveMode, ToolPolicyContext } from './policy.js';
import { resolveAutonomy } from '../hooks/autonomy.js';
import { auditLogger } from '../audit/index.js';
import { randomUUID } from 'crypto';
import { matchesAnyPattern, patternToRegex } from './policy.js';
import { redactForAudit, containsSecretLikeKeys } from '../audit/redact.js';
import type { SandboxManager } from '../sandbox/index.js';
import { createSandboxedProcessStartTool, createSandboxedShellTool } from '../sandbox/index.js';
export interface ToolExecutorConfig {
defaultTimeoutMs?: number;
maxOutputBytes?: number;
sensitiveMode?: SensitiveMode;
immutableDenylist?: ImmutableDenyRule[];
}
export interface ToolExecutionObserverEvent {
toolName: string;
sessionId?: string;
success: boolean;
timestampSeconds: number;
}
export interface ToolExecuteOptions {
signal?: AbortSignal;
}
export class ToolExecutor {
private registry: ToolRegistry;
private hooks: HookEngine;
private defaultTimeoutMs: number;
private maxOutputBytes: number;
private sensitiveMode: SensitiveMode;
private immutableDenylist: ImmutableDenyRule[];
private sandboxManager?: SandboxManager;
private executionObserver?: (event: ToolExecutionObserverEvent) => void;
constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) {
this.registry = registry;
this.hooks = hooks;
this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000;
this.maxOutputBytes = config?.maxOutputBytes ?? 51_200;
this.sensitiveMode = config?.sensitiveMode ?? 'deny_without_elevation';
this.immutableDenylist = config?.immutableDenylist ?? [];
}
setSandboxManager(manager?: SandboxManager): void {
this.sandboxManager = manager;
}
setExecutionObserver(observer?: (event: ToolExecutionObserverEvent) => void): void {
this.executionObserver = observer;
}
private isElevationActive(context?: ToolPolicyContext): boolean {
const untilMs = context?.elevatedHostUntilMs;
return typeof untilMs === 'number' && Number.isFinite(untilMs) && untilMs > Date.now();
}
private resolveEffectiveExecutionEnvironment(toolName: string, context?: ToolPolicyContext): 'host' | 'sandbox' {
const base = context?.executionEnvironment ?? 'host';
if (this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
return 'host';
}
return base;
}
async execute(
toolName: string,
args: unknown,
context?: ToolPolicyContext,
options?: ToolExecuteOptions,
): Promise<ToolResult> {
const executionId = randomUUID();
const executionEnvironment = this.resolveEffectiveExecutionEnvironment(toolName, context);
const skillName = context?.skillName;
const tool = this.registry.getByApiName(toolName);
if (!tool) {
auditLogger?.toolDenied({
tool_name: toolName,
reason: 'Tool not found',
denial_type: 'not_found',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
session_id: context?.sessionId,
});
return { success: false, output: '', error: `Tool '${toolName}' not found` };
}
const argsRedaction = redactForAudit(args);
const immutableDenyReason = this.evaluateImmutableDenylist(tool.name, args, context);
if (immutableDenyReason) {
auditLogger?.toolDenied({
tool_name: tool.name,
reason: immutableDenyReason,
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return { success: false, output: '', error: `Tool '${tool.name}' denied: ${immutableDenyReason}` };
}
// Secret scope enforcement
const requiredScopes = tool.requiredSecretScopes ?? [];
const allowedScopes = this.resolveAllowedSecretScopes(context);
if (requiredScopes.length > 0 && !this.hasAllScopes(allowedScopes, requiredScopes)) {
auditLogger?.toolDenied({
tool_name: tool.name,
reason: `Tool requires secret scope(s): ${requiredScopes.join(', ')}`,
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return {
success: false,
output: '',
error: `Tool '${tool.name}' denied: missing secret scopes (${requiredScopes.join(', ')})`,
};
}
// Capability enforcement: filesystem + network constraints
const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context, executionEnvironment);
if (capabilityViolation) {
auditLogger?.toolDenied({
tool_name: tool.name,
reason: capabilityViolation,
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return { success: false, output: '', error: `Tool '${tool.name}' denied: ${capabilityViolation}` };
}
// Prompt-injection guard: block obviously unsafe tool calls when untrusted content is present
const guard = this.evaluatePromptInjectionGuard(tool.name, args, context);
if (guard) {
auditLogger?.toolDenied({
tool_name: tool.name,
reason: guard,
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return { success: false, output: '', error: `Tool '${tool.name}' blocked: ${guard}` };
}
if (this.shouldDenyWithoutElevation(tool.name, executionEnvironment, context)) {
const mode = context?.sensitiveMode ?? this.sensitiveMode;
const reason = `sensitive tool requires /elevate before host execution (mode=${mode})`;
auditLogger?.toolDenied({
tool_name: tool.name,
reason,
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return { success: false, output: '', error: `Tool '${tool.name}' denied: ${reason}` };
}
// Policy check (defense in depth — tools should also be filtered at listing time)
const policy = this.registry.getPolicy();
if (policy) {
const allNames = this.registry.list().map(t => t.name);
if (!policy.isAllowed(toolName, allNames, context)) {
auditLogger?.toolDenied({
tool_name: toolName,
reason: 'Tool not allowed by policy',
denial_type: 'policy',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return {
success: false,
output: '',
error: `Tool '${toolName}' is not allowed by tool policy`,
};
}
}
// Check hooks with autonomy resolution
const baseAction = this.hooks.getAction(toolName);
const autonomyLevel = context?.autonomyLevel ?? 'standard';
const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel);
let finalAction = autonomyDecision.action;
// Elevated mode must always require explicit confirmation for host high-risk tool calls.
if (executionEnvironment === 'host' && this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
finalAction = 'confirm';
}
// Log autonomy override if applicable
if (autonomyDecision.overridden) {
auditLogger?.toolDenied({
tool_name: toolName,
reason: `Autonomy override: ${autonomyDecision.reason}`,
denial_type: 'autonomy_override',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
}
if (finalAction === 'confirm') {
const hookResult = await this.hooks.requestConfirmation(
toolName,
args as Record<string, unknown>,
{
sessionId: context?.sessionId,
channel: context?.channel,
sender: context?.sender,
},
);
auditLogger?.toolApproval({
tool_name: toolName,
approved: hookResult.approved,
reason: hookResult.reason,
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
if (!hookResult.approved) {
const denyReason = hookResult.reason ?? 'no reason';
const detailedReason = autonomyDecision.overridden
? `${denyReason} (autonomy: ${autonomyDecision.reason})`
: denyReason;
auditLogger?.toolDenied({
tool_name: toolName,
reason: detailedReason,
denial_type: 'hook',
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
});
return {
success: false,
output: '',
error: `Tool '${toolName}' denied by user: ${detailedReason}`,
};
}
}
// Execute with timeout
const startTime = Date.now();
auditLogger?.toolStart({
tool_name: toolName,
tool_args: argsRedaction.value,
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions,
session_id: context?.sessionId,
channel: context?.channel,
sender: context?.sender,
agent_tier: context?.tier,
});
let timeoutHandle: NodeJS.Timeout | undefined;
const timeoutAbortController = new AbortController();
const externalSignal = options?.signal;
const combinedSignal = externalSignal
? AbortSignal.any([externalSignal, timeoutAbortController.signal])
: timeoutAbortController.signal;
let externalAbortCleanup: (() => void) | undefined;
try {
const externalAbortPromise = externalSignal
? new Promise<ToolResult>((_, reject) => {
if (externalSignal.aborted) {
const error = new Error('Operation cancelled by user.');
error.name = 'AbortError';
reject(error);
return;
}
const onAbort = () => {
const error = new Error('Operation cancelled by user.');
error.name = 'AbortError';
reject(error);
};
externalSignal.addEventListener('abort', onAbort, { once: true });
externalAbortCleanup = () => externalSignal.removeEventListener('abort', onAbort);
})
: null;
const result = await Promise.race([
(async () => {
if (executionEnvironment === 'sandbox' && this.sandboxManager) {
const sandboxSessionId = context?.sessionId ?? `${context?.channel ?? 'unknown'}:${context?.sender ?? 'unknown'}`;
const sandbox = await this.sandboxManager.getOrCreate(sandboxSessionId);
if (toolName === 'shell.exec') {
return createSandboxedShellTool(sandbox).execute(args, { signal: combinedSignal });
}
if (toolName === 'process.start') {
return createSandboxedProcessStartTool(sandbox).execute(args, { signal: combinedSignal });
}
}
return tool.execute(args, { signal: combinedSignal });
})(),
new Promise<ToolResult>((_, reject) => {
timeoutHandle = setTimeout(
() => {
timeoutAbortController.abort();
reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`));
},
this.defaultTimeoutMs,
);
}),
...(externalAbortPromise ? [externalAbortPromise] : []),
]);
const duration = Date.now() - startTime;
// Truncate output if too large
if (result.output.length > this.maxOutputBytes) {
result.output = result.output.slice(0, this.maxOutputBytes) + '\n[truncated]';
}
const resultRedaction = redactForAudit(result);
auditLogger?.toolSuccess({
tool_name: toolName,
result: resultRedaction.value as { success: boolean; output: string; error?: string },
duration_ms: duration,
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions + resultRedaction.redactions,
session_id: context?.sessionId,
});
this.notifyExecutionObserver({
toolName,
sessionId: context?.sessionId,
success: result.success,
timestampSeconds: Math.floor(Date.now() / 1000),
});
return result;
} catch (error) {
const duration = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : String(error);
const errorRedaction = redactForAudit(errorMessage);
auditLogger?.toolError({
tool_name: toolName,
error: String(errorRedaction.value),
duration_ms: duration,
session_id: context?.sessionId,
execution_id: executionId,
execution_environment: executionEnvironment,
skill_name: skillName,
redactions_applied: argsRedaction.redactions + errorRedaction.redactions,
});
this.notifyExecutionObserver({
toolName,
sessionId: context?.sessionId,
success: false,
timestampSeconds: Math.floor(Date.now() / 1000),
});
if (externalSignal?.aborted && this.isAbortError(error)) {
throw error;
}
return {
success: false,
output: '',
error: String(errorRedaction.value),
};
} finally {
if (timeoutHandle) {
clearTimeout(timeoutHandle);
}
externalAbortCleanup?.();
}
}
private isAbortError(error: unknown): boolean {
return error instanceof Error && error.name === 'AbortError';
}
private notifyExecutionObserver(event: ToolExecutionObserverEvent): void {
if (!this.executionObserver) {
return;
}
try {
this.executionObserver(event);
} catch (error) {
console.warn(
'ToolExecutor: execution observer failed:',
error instanceof Error ? error.message : String(error),
);
}
}
private resolveAllowedSecretScopes(context?: ToolPolicyContext): string[] {
if (context?.allowedSecretScopes) {
return context.allowedSecretScopes;
}
if (context?.skillPermissions?.secrets) {
return context.skillPermissions.secrets;
}
if (context?.skillName) {
return [];
}
return ['*'];
}
private hasAllScopes(allowed: string[], required: string[]): boolean {
if (allowed.includes('*')) {
return true;
}
return required.every((scope) => allowed.includes(scope));
}
private isHighRiskTool(toolName: string): boolean {
if (toolName.startsWith('browser.')) {
return true;
}
return [
'file.write',
'file.edit',
'file.patch',
'shell.exec',
'process.start',
'process.kill',
].includes(toolName);
}
private isSensitiveTool(toolName: string): boolean {
if (toolName === 'shell.exec' || toolName === 'process.start' || toolName === 'process.kill') {
return true;
}
if (toolName.startsWith('browser.')) {
return true;
}
return ['message.send', 'cron.create', 'cron.delete'].includes(toolName);
}
private shouldDenyWithoutElevation(toolName: string, executionEnvironment: 'host' | 'sandbox', context?: ToolPolicyContext): boolean {
const mode = context?.sensitiveMode ?? this.sensitiveMode;
if (mode !== 'deny_without_elevation') {
return false;
}
if (executionEnvironment !== 'host') {
return false;
}
if (!this.isSensitiveTool(toolName)) {
return false;
}
return !this.isElevationActive(context);
}
private evaluateImmutableDenylist(toolName: string, args: unknown, context?: ToolPolicyContext): string | null {
const rules = context?.immutableDenylist ?? this.immutableDenylist;
if (!rules || rules.length === 0) {
return null;
}
const serializedArgs = JSON.stringify(args ?? {}).toLowerCase();
for (const rule of rules) {
if (!matchesAnyPattern(toolName, [rule.tool])) {
continue;
}
if (rule.argsPattern && !serializedArgs.includes(rule.argsPattern.toLowerCase())) {
continue;
}
return rule.reason ?? `blocked by immutable denylist rule (${rule.tool}${rule.argsPattern ? ` / ${rule.argsPattern}` : ''})`;
}
return null;
}
private checkCapabilityConstraints(toolName: string, args: unknown, context: ToolPolicyContext | undefined, effectiveEnv: 'host' | 'sandbox'): string | null {
const perms = context?.skillPermissions;
if (!perms) {
if (context?.skillName && this.isHighRiskTool(toolName)) {
return 'skill has no permissions manifest; high-risk tool denied by default';
}
return null;
}
// Sandbox enforcement for high-risk tools unless explicitly allowed.
if (this.isHighRiskTool(toolName)) {
const requested = perms.execution_environment ?? 'sandbox';
if (context?.skillName && effectiveEnv === 'host' && requested !== 'host' && !this.isElevationActive(context)) {
return 'high-risk tool execution on host is not allowed for this skill (requires execution_environment=host)';
}
}
// FS path enforcement
const fs = perms.fs;
if (fs && toolName.startsWith('file.')) {
const mode: 'read' | 'write' = (toolName === 'file.read' || toolName === 'file.list') ? 'read' : 'write';
const allowlist = mode === 'read' ? (fs.read ?? []) : (fs.write ?? []);
if (allowlist.length === 0) {
return `filesystem ${mode} access not permitted by skill permissions`;
}
const paths = this.extractFilePaths(toolName, args);
for (const p of paths) {
if (!this.pathAllowed(p, allowlist)) {
return `path not allowed by skill permissions (${mode}): ${p}`;
}
}
}
// Network host enforcement (best-effort)
if (perms.net && perms.net.length > 0 && toolName === 'web.fetch') {
const url = (args as { url?: unknown } | null)?.url;
if (typeof url === 'string') {
try {
const parsed = new URL(url);
const host = parsed.hostname;
const port = parsed.port
? Number.parseInt(parsed.port, 10)
: parsed.protocol === 'https:'
? 443
: parsed.protocol === 'http:'
? 80
: undefined;
const allowed = perms.net.some((rule) => {
if (!matchesAnyPattern(host, [rule.host])) {
return false;
}
if (!rule.ports || rule.ports.length === 0) {
return true;
}
if (!port || !Number.isFinite(port)) {
return false;
}
return rule.ports.includes(port);
});
if (!allowed) {
return `network access denied by skill permissions: ${host}${port ? `:${port}` : ''}`;
}
} catch {
return 'invalid url for web.fetch';
}
}
}
return null;
}
private extractFilePaths(toolName: string, args: unknown): string[] {
const out: string[] = [];
const record = (args ?? null) as Record<string, unknown> | null;
if (!record || typeof record !== 'object') {
return out;
}
if (toolName === 'file.patch') {
const patches = record.patches;
if (Array.isArray(patches)) {
for (const patch of patches) {
if (patch && typeof patch === 'object') {
const p = (patch as Record<string, unknown>).path;
if (typeof p === 'string') {
out.push(p);
}
}
}
}
return out;
}
const p = record.path;
if (typeof p === 'string') {
out.push(p);
}
return out;
}
private pathAllowed(pathValue: string, allowlist: string[]): boolean {
return allowlist.some((pattern) => patternToRegex(pattern).test(pathValue));
}
private evaluatePromptInjectionGuard(toolName: string, args: unknown, context?: ToolPolicyContext): string | null {
if (!context?.untrustedContent) {
return null;
}
// When untrusted content is present, forbid passing secrets directly via tool args.
if ((toolName === 'web.fetch' || toolName === 'web.search') && containsSecretLikeKeys(args)) {
return 'refusing to pass secret-like fields to a network tool while untrusted content is present';
}
const serialized = JSON.stringify(args ?? {});
const lower = serialized.toLowerCase();
const markers = [
'ignore previous',
'ignore all previous',
'system prompt',
'exfiltrate',
'send to',
'upload',
'curl ',
'wget ',
'powershell',
'rm -rf',
'chmod ',
'ssh ',
'scp ',
'BEGIN PRIVATE KEY'.toLowerCase(),
];
if (this.isHighRiskTool(toolName) && markers.some((m) => lower.includes(m))) {
return 'blocked high-risk tool call due to prompt-injection markers in arguments';
}
return null;
}
}