diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 7ce8d6f..cc882cd 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -5,6 +5,7 @@ import type { ToolRegistry } from '../../tools/registry.js'; import type { ToolExecutor } from '../../tools/executor.js'; import type { ToolResult } from '../../tools/types.js'; import type { ToolPolicyContext } from '../../tools/policy.js'; +import { auditLogger } from '../../audit/index.js'; import type { Attachment } from '../../channels/types.js'; import type { OutboundAttachmentCollector } from './attachments.js'; import { buildUserMessage, getMessageText } from '../../models/media.js'; @@ -266,8 +267,48 @@ export class NativeAgent { const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name; this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); + let elevationUntilMs: number | undefined; + let elevationReason: string | undefined; + let elevationId: string | undefined; + + if (this.session) { + const untilRaw = this.session.getConfig('elevation.until_ms'); + const idRaw = this.session.getConfig('elevation.id'); + const reasonRaw = this.session.getConfig('elevation.reason'); + if (untilRaw && idRaw) { + const untilMs = Number.parseInt(untilRaw, 10); + if (Number.isFinite(untilMs)) { + const now = Date.now(); + if (untilMs > now) { + elevationUntilMs = untilMs; + elevationId = idRaw; + elevationReason = reasonRaw ?? undefined; + } else { + // Auto-expire elevation. + this.session.deleteConfig('elevation.until_ms'); + this.session.deleteConfig('elevation.reason'); + this.session.deleteConfig('elevation.id'); + auditLogger?.securityElevationExpired({ + session_id: this.session.id, + channel: this._toolPolicyContext?.channel ?? 'unknown', + sender: this._toolPolicyContext?.sender ?? 'unknown', + elevation_id: idRaw, + until_ms: untilMs, + reason: reasonRaw ?? undefined, + }); + } + } + } + } + const perCallContext: ToolPolicyContext | undefined = this._toolPolicyContext - ? { ...this._toolPolicyContext, untrustedContent: untrustedContentSeen } + ? { + ...this._toolPolicyContext, + untrustedContent: untrustedContentSeen, + elevatedHostUntilMs: elevationUntilMs, + elevatedHostReason: elevationReason, + elevatedHostId: elevationId, + } : undefined; const result = await this.toolExecutor!.execute(internalName, tc.args, perCallContext); diff --git a/src/daemon/index.ts b/src/daemon/index.ts index 4a57c9b..d5b2abf 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -107,6 +107,9 @@ export async function startDaemon(config: Config): Promise { const { skillRegistry, skillInstaller } = initSkills(config, lifecycle); const { agentConfigRegistry, agentRouter, sandboxManager } = await initAgents({ config, lifecycle }); + // Ensure ToolExecutor can enforce sandbox execution at runtime. + toolExecutor.setSandboxManager(sandboxManager); + const modelRouter = createModelRouter(config); const commandRegistry = new CommandRegistry(); registerBuiltinCommands(commandRegistry); diff --git a/src/tools/executor.test.ts b/src/tools/executor.test.ts index b31b611..d1c6ba1 100644 --- a/src/tools/executor.test.ts +++ b/src/tools/executor.test.ts @@ -262,4 +262,68 @@ describe('ToolExecutor', () => { expect(result.success).toBe(false); expect(result.error).toContain('refusing to pass'); }); + + it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => { + const registry = new ToolRegistry(); + registry.register({ + name: 'shell.exec', + description: 'shell', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'ok' }), + }); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const denied = await executor.execute('shell.exec', { command: 'echo hi' }, { + skillName: 'test-skill', + skillPermissions: { execution_environment: 'sandbox' }, + executionEnvironment: 'host', + autonomyLevel: 'autonomous', + }); + expect(denied.success).toBe(false); + expect(denied.error).toContain('execution_environment=host'); + + const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, { + skillName: 'test-skill', + skillPermissions: { execution_environment: 'sandbox' }, + executionEnvironment: 'host', + elevatedHostUntilMs: Date.now() + 60_000, + elevatedHostId: 'e1', + autonomyLevel: 'autonomous', + }); + const pending = hooks.getPendingConfirmations(); + expect(pending).toHaveLength(1); + hooks.resolveConfirmation(pending[0].id, { approved: true }); + + const allowed = await allowedPromise; + expect(allowed.success).toBe(true); + }); + + it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => { + const registry = new ToolRegistry(); + registry.register({ + name: 'shell.exec', + description: 'host shell', + inputSchema: { type: 'object', properties: {} }, + execute: async () => { throw new Error('host should not run'); }, + }); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const fakeSandbox = { + exec: async () => ({ stdout: 'sandbox-out', stderr: '' }), + } as any; + const fakeManager = { + getOrCreate: async () => fakeSandbox, + } as any; + executor.setSandboxManager(fakeManager); + + const result = await executor.execute('shell.exec', { command: 'echo hi' }, { + executionEnvironment: 'sandbox', + sessionId: 's1', + autonomyLevel: 'autonomous', + }); + expect(result.success).toBe(true); + expect(result.output).toContain('sandbox-out'); + }); }); diff --git a/src/tools/executor.ts b/src/tools/executor.ts index 88918ac..cb670ec 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -7,6 +7,8 @@ import { auditLogger } from '../audit/index.js'; import { randomUUID } from 'crypto'; import { matchesAnyPattern, patternToRegex } from './policy.js'; import { redactForAudit, containsSecretLikeKeys } from '../audit/redact.js'; +import type { SandboxManager } from '../sandbox/index.js'; +import { createSandboxedProcessStartTool, createSandboxedShellTool } from '../sandbox/index.js'; export interface ToolExecutorConfig { defaultTimeoutMs?: number; @@ -18,6 +20,7 @@ export class ToolExecutor { private hooks: HookEngine; private defaultTimeoutMs: number; private maxOutputBytes: number; + private sandboxManager?: SandboxManager; constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) { this.registry = registry; @@ -26,9 +29,26 @@ export class ToolExecutor { this.maxOutputBytes = config?.maxOutputBytes ?? 51_200; } + setSandboxManager(manager?: SandboxManager): void { + this.sandboxManager = manager; + } + + private isElevationActive(context?: ToolPolicyContext): boolean { + const untilMs = context?.elevatedHostUntilMs; + return typeof untilMs === 'number' && Number.isFinite(untilMs) && untilMs > Date.now(); + } + + private resolveEffectiveExecutionEnvironment(toolName: string, context?: ToolPolicyContext): 'host' | 'sandbox' { + const base = context?.executionEnvironment ?? 'host'; + if (this.isHighRiskTool(toolName) && this.isElevationActive(context)) { + return 'host'; + } + return base; + } + async execute(toolName: string, args: unknown, context?: ToolPolicyContext): Promise { const executionId = randomUUID(); - const executionEnvironment = context?.executionEnvironment; + const executionEnvironment = this.resolveEffectiveExecutionEnvironment(toolName, context); const skillName = context?.skillName; const tool = this.registry.getByApiName(toolName); @@ -69,7 +89,7 @@ export class ToolExecutor { } // Capability enforcement: filesystem + network constraints - const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context); + const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context, executionEnvironment); if (capabilityViolation) { auditLogger?.toolDenied({ tool_name: tool.name, @@ -127,7 +147,12 @@ export class ToolExecutor { const baseAction = this.hooks.getAction(toolName); const autonomyLevel = context?.autonomyLevel ?? 'standard'; const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel); - const finalAction = autonomyDecision.action; + let finalAction = autonomyDecision.action; + + // Elevated mode must always require explicit confirmation for host high-risk tool calls. + if (executionEnvironment === 'host' && this.isHighRiskTool(toolName) && this.isElevationActive(context)) { + finalAction = 'confirm'; + } // Log autonomy override if applicable if (autonomyDecision.overridden) { @@ -201,7 +226,19 @@ export class ToolExecutor { try { const result = await Promise.race([ - tool.execute(args), + (async () => { + if (executionEnvironment === 'sandbox' && this.sandboxManager) { + const sandboxSessionId = context?.sessionId ?? `${context?.channel ?? 'unknown'}:${context?.sender ?? 'unknown'}`; + const sandbox = await this.sandboxManager.getOrCreate(sandboxSessionId); + if (toolName === 'shell.exec') { + return createSandboxedShellTool(sandbox).execute(args); + } + if (toolName === 'process.start') { + return createSandboxedProcessStartTool(sandbox).execute(args); + } + } + return tool.execute(args); + })(), new Promise((_, reject) => setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs), ), @@ -286,7 +323,7 @@ export class ToolExecutor { ].includes(toolName); } - private checkCapabilityConstraints(toolName: string, args: unknown, context?: ToolPolicyContext): string | null { + private checkCapabilityConstraints(toolName: string, args: unknown, context: ToolPolicyContext | undefined, effectiveEnv: 'host' | 'sandbox'): string | null { const perms = context?.skillPermissions; if (!perms) { if (context?.skillName && this.isHighRiskTool(toolName)) { @@ -297,9 +334,8 @@ export class ToolExecutor { // Sandbox enforcement for high-risk tools unless explicitly allowed. if (this.isHighRiskTool(toolName)) { - const env = context?.executionEnvironment ?? 'host'; const requested = perms.execution_environment ?? 'sandbox'; - if (context?.skillName && env === 'host' && requested !== 'host') { + if (context?.skillName && effectiveEnv === 'host' && requested !== 'host' && !this.isElevationActive(context)) { return 'high-risk tool execution on host is not allowed for this skill (requires execution_environment=host)'; } }