feat(security): enforce elevated mode and sandbox execution

2026-02-15 17:02:05 -08:00
parent b574d170d1
commit ab89378fce
4 changed files with 152 additions and 8 deletions
@@ -5,6 +5,7 @@ import type { ToolRegistry } from '../../tools/registry.js';
 import type { ToolExecutor } from '../../tools/executor.js';
 import type { ToolResult } from '../../tools/types.js';
 import type { ToolPolicyContext } from '../../tools/policy.js';
+import { auditLogger } from '../../audit/index.js';
 import type { Attachment } from '../../channels/types.js';
 import type { OutboundAttachmentCollector } from './attachments.js';
 import { buildUserMessage, getMessageText } from '../../models/media.js';
@@ -266,8 +267,48 @@ export class NativeAgent {
          const internalName = this.toolRegistry!.getByApiName(tc.name)?.name ?? tc.name;
          this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args });

+          let elevationUntilMs: number | undefined;
+          let elevationReason: string | undefined;
+          let elevationId: string | undefined;
+
+          if (this.session) {
+            const untilRaw = this.session.getConfig('elevation.until_ms');
+            const idRaw = this.session.getConfig('elevation.id');
+            const reasonRaw = this.session.getConfig('elevation.reason');
+            if (untilRaw && idRaw) {
+              const untilMs = Number.parseInt(untilRaw, 10);
+              if (Number.isFinite(untilMs)) {
+                const now = Date.now();
+                if (untilMs > now) {
+                  elevationUntilMs = untilMs;
+                  elevationId = idRaw;
+                  elevationReason = reasonRaw ?? undefined;
+                } else {
+                  // Auto-expire elevation.
+                  this.session.deleteConfig('elevation.until_ms');
+                  this.session.deleteConfig('elevation.reason');
+                  this.session.deleteConfig('elevation.id');
+                  auditLogger?.securityElevationExpired({
+                    session_id: this.session.id,
+                    channel: this._toolPolicyContext?.channel ?? 'unknown',
+                    sender: this._toolPolicyContext?.sender ?? 'unknown',
+                    elevation_id: idRaw,
+                    until_ms: untilMs,
+                    reason: reasonRaw ?? undefined,
+                  });
+                }
+              }
+            }
+          }
+
          const perCallContext: ToolPolicyContext | undefined = this._toolPolicyContext
-            ? { ...this._toolPolicyContext, untrustedContent: untrustedContentSeen }
+            ? {
+              ...this._toolPolicyContext,
+              untrustedContent: untrustedContentSeen,
+              elevatedHostUntilMs: elevationUntilMs,
+              elevatedHostReason: elevationReason,
+              elevatedHostId: elevationId,
+            }
            : undefined;

          const result = await this.toolExecutor!.execute(internalName, tc.args, perCallContext);
@@ -107,6 +107,9 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
  const { skillRegistry, skillInstaller } = initSkills(config, lifecycle);
  const { agentConfigRegistry, agentRouter, sandboxManager } = await initAgents({ config, lifecycle });

+  // Ensure ToolExecutor can enforce sandbox execution at runtime.
+  toolExecutor.setSandboxManager(sandboxManager);
+
  const modelRouter = createModelRouter(config);
  const commandRegistry = new CommandRegistry();
  registerBuiltinCommands(commandRegistry);
@@ -262,4 +262,68 @@ describe('ToolExecutor', () => {
    expect(result.success).toBe(false);
    expect(result.error).toContain('refusing to pass');
  });
+
+  it('denies host high-risk tools for sandboxed skills unless elevation is active', async () => {
+    const registry = new ToolRegistry();
+    registry.register({
+      name: 'shell.exec',
+      description: 'shell',
+      inputSchema: { type: 'object', properties: {} },
+      execute: async () => ({ success: true, output: 'ok' }),
+    });
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const denied = await executor.execute('shell.exec', { command: 'echo hi' }, {
+      skillName: 'test-skill',
+      skillPermissions: { execution_environment: 'sandbox' },
+      executionEnvironment: 'host',
+      autonomyLevel: 'autonomous',
+    });
+    expect(denied.success).toBe(false);
+    expect(denied.error).toContain('execution_environment=host');
+
+    const allowedPromise = executor.execute('shell.exec', { command: 'echo hi' }, {
+      skillName: 'test-skill',
+      skillPermissions: { execution_environment: 'sandbox' },
+      executionEnvironment: 'host',
+      elevatedHostUntilMs: Date.now() + 60_000,
+      elevatedHostId: 'e1',
+      autonomyLevel: 'autonomous',
+    });
+    const pending = hooks.getPendingConfirmations();
+    expect(pending).toHaveLength(1);
+    hooks.resolveConfirmation(pending[0].id, { approved: true });
+
+    const allowed = await allowedPromise;
+    expect(allowed.success).toBe(true);
+  });
+
+  it('executes shell.exec in sandbox when executionEnvironment is sandbox', async () => {
+    const registry = new ToolRegistry();
+    registry.register({
+      name: 'shell.exec',
+      description: 'host shell',
+      inputSchema: { type: 'object', properties: {} },
+      execute: async () => { throw new Error('host should not run'); },
+    });
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const fakeSandbox = {
+      exec: async () => ({ stdout: 'sandbox-out', stderr: '' }),
+    } as any;
+    const fakeManager = {
+      getOrCreate: async () => fakeSandbox,
+    } as any;
+    executor.setSandboxManager(fakeManager);
+
+    const result = await executor.execute('shell.exec', { command: 'echo hi' }, {
+      executionEnvironment: 'sandbox',
+      sessionId: 's1',
+      autonomyLevel: 'autonomous',
+    });
+    expect(result.success).toBe(true);
+    expect(result.output).toContain('sandbox-out');
+  });
 });
@@ -7,6 +7,8 @@ import { auditLogger } from '../audit/index.js';
 import { randomUUID } from 'crypto';
 import { matchesAnyPattern, patternToRegex } from './policy.js';
 import { redactForAudit, containsSecretLikeKeys } from '../audit/redact.js';
+import type { SandboxManager } from '../sandbox/index.js';
+import { createSandboxedProcessStartTool, createSandboxedShellTool } from '../sandbox/index.js';

 export interface ToolExecutorConfig {
  defaultTimeoutMs?: number;
@@ -18,6 +20,7 @@ export class ToolExecutor {
  private hooks: HookEngine;
  private defaultTimeoutMs: number;
  private maxOutputBytes: number;
+  private sandboxManager?: SandboxManager;

  constructor(registry: ToolRegistry, hooks: HookEngine, config?: ToolExecutorConfig) {
    this.registry = registry;
@@ -26,9 +29,26 @@ export class ToolExecutor {
    this.maxOutputBytes = config?.maxOutputBytes ?? 51_200;
  }

+  setSandboxManager(manager?: SandboxManager): void {
+    this.sandboxManager = manager;
+  }
+
+  private isElevationActive(context?: ToolPolicyContext): boolean {
+    const untilMs = context?.elevatedHostUntilMs;
+    return typeof untilMs === 'number' && Number.isFinite(untilMs) && untilMs > Date.now();
+  }
+
+  private resolveEffectiveExecutionEnvironment(toolName: string, context?: ToolPolicyContext): 'host' | 'sandbox' {
+    const base = context?.executionEnvironment ?? 'host';
+    if (this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
+      return 'host';
+    }
+    return base;
+  }
+
  async execute(toolName: string, args: unknown, context?: ToolPolicyContext): Promise<ToolResult> {
    const executionId = randomUUID();
-    const executionEnvironment = context?.executionEnvironment;
+    const executionEnvironment = this.resolveEffectiveExecutionEnvironment(toolName, context);
    const skillName = context?.skillName;

    const tool = this.registry.getByApiName(toolName);
@@ -69,7 +89,7 @@ export class ToolExecutor {
    }

    // Capability enforcement: filesystem + network constraints
-    const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context);
+    const capabilityViolation = this.checkCapabilityConstraints(tool.name, args, context, executionEnvironment);
    if (capabilityViolation) {
      auditLogger?.toolDenied({
        tool_name: tool.name,
@@ -127,7 +147,12 @@ export class ToolExecutor {
    const baseAction = this.hooks.getAction(toolName);
    const autonomyLevel = context?.autonomyLevel ?? 'standard';
    const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel);
-    const finalAction = autonomyDecision.action;
+    let finalAction = autonomyDecision.action;
+
+    // Elevated mode must always require explicit confirmation for host high-risk tool calls.
+    if (executionEnvironment === 'host' && this.isHighRiskTool(toolName) && this.isElevationActive(context)) {
+      finalAction = 'confirm';
+    }

    // Log autonomy override if applicable
    if (autonomyDecision.overridden) {
@@ -201,7 +226,19 @@ export class ToolExecutor {

    try {
      const result = await Promise.race([
-        tool.execute(args),
+        (async () => {
+          if (executionEnvironment === 'sandbox' && this.sandboxManager) {
+            const sandboxSessionId = context?.sessionId ?? `${context?.channel ?? 'unknown'}:${context?.sender ?? 'unknown'}`;
+            const sandbox = await this.sandboxManager.getOrCreate(sandboxSessionId);
+            if (toolName === 'shell.exec') {
+              return createSandboxedShellTool(sandbox).execute(args);
+            }
+            if (toolName === 'process.start') {
+              return createSandboxedProcessStartTool(sandbox).execute(args);
+            }
+          }
+          return tool.execute(args);
+        })(),
        new Promise<ToolResult>((_, reject) =>
          setTimeout(() => reject(new Error(`Tool '${toolName}' timed out after ${this.defaultTimeoutMs}ms`)), this.defaultTimeoutMs),
        ),
@@ -286,7 +323,7 @@ export class ToolExecutor {
    ].includes(toolName);
  }

-  private checkCapabilityConstraints(toolName: string, args: unknown, context?: ToolPolicyContext): string | null {
+  private checkCapabilityConstraints(toolName: string, args: unknown, context: ToolPolicyContext | undefined, effectiveEnv: 'host' | 'sandbox'): string | null {
    const perms = context?.skillPermissions;
    if (!perms) {
      if (context?.skillName && this.isHighRiskTool(toolName)) {
@@ -297,9 +334,8 @@ export class ToolExecutor {

    // Sandbox enforcement for high-risk tools unless explicitly allowed.
    if (this.isHighRiskTool(toolName)) {
-      const env = context?.executionEnvironment ?? 'host';
      const requested = perms.execution_environment ?? 'sandbox';
-      if (context?.skillName && env === 'host' && requested !== 'host') {
+      if (context?.skillName && effectiveEnv === 'host' && requested !== 'host' && !this.isElevationActive(context)) {
        return 'high-risk tool execution on host is not allowed for this skill (requires execution_environment=host)';
      }
    }