diff --git a/README.md b/README.md index 70cb01b..d9014e7 100644 --- a/README.md +++ b/README.md @@ -593,6 +593,21 @@ hooks: - notify ``` +For unrestricted deployments, pair hooks with agent-level sensitive gating: + +```yaml +agents: + # deny_without_elevation | confirm_without_elevation + sensitive_mode: deny_without_elevation + immutable_denylist: + - tool: shell.exec + args_pattern: "git push origin main" + reason: "direct main pushes are blocked" + - tool: shell.exec + args_pattern: "git reset --hard" + reason: "destructive hard reset is blocked" +``` + ## Browser Automation Tools Flynn ships these browser tools: diff --git a/docs/plans/state.json b/docs/plans/state.json index 13962b8..ceefe79 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -5077,6 +5077,25 @@ ], "test_status": "pnpm test:run src/session/manager.test.ts src/gateway/handlers/handlers.test.ts + pnpm typecheck passing" }, + "sensitive-tool-gating-with-elevation": { + "status": "completed", + "date": "2026-02-18", + "updated": "2026-02-18", + "summary": "Implemented fail-closed sensitive tool gating for unrestricted deployments by adding `agents.sensitive_mode` and `agents.immutable_denylist`, enforcing immutable deny rules in `ToolExecutor`, requiring `/elevate` before host execution of sensitive tools in deny mode, and preserving explicit per-call confirmation during elevation. Added schema and executor regression coverage and documented operator configuration.", + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/tools/policy.ts", + "src/tools/executor.ts", + "src/tools/executor.test.ts", + "src/daemon/tools.ts", + "src/daemon/routing.ts", + "src/gateway/session-bridge.ts", + "README.md", + "docs/plans/state.json" + ], + "test_status": "pnpm test:run src/tools/executor.test.ts src/config/schema.test.ts + pnpm typecheck passing" + }, "native-agent-model-timeout-hardening": { "status": "completed", "date": "2026-02-18", @@ -5091,7 +5110,7 @@ } }, "overall_progress": { - "total_test_count": 1889, + "total_test_count": 1895, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index bd14fe8..ca53a93 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -1395,6 +1395,13 @@ describe('configSchema — agents truthfulness/autonomy', () => { const result = configSchema.parse(minimalConfig); expect(result.agents.truthfulness_mode).toBe('standard'); expect(result.agents.autonomy_level).toBe('standard'); + expect(result.agents.sensitive_mode).toBe('deny_without_elevation'); + expect(result.agents.immutable_denylist).toEqual( + expect.arrayContaining([ + expect.objectContaining({ tool: 'shell.exec', args_pattern: 'git push origin main' }), + expect.objectContaining({ tool: 'shell.exec', args_pattern: 'git reset --hard' }), + ]), + ); }); it('accepts explicit truthfulness and autonomy modes', () => { @@ -1403,11 +1410,19 @@ describe('configSchema — agents truthfulness/autonomy', () => { agents: { truthfulness_mode: 'strict', autonomy_level: 'conservative', + sensitive_mode: 'confirm_without_elevation', + immutable_denylist: [ + { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, + ], }, }); expect(result.agents.truthfulness_mode).toBe('strict'); expect(result.agents.autonomy_level).toBe('conservative'); + expect(result.agents.sensitive_mode).toBe('confirm_without_elevation'); + expect(result.agents.immutable_denylist).toEqual([ + { tool: 'shell.exec', args_pattern: 'rm -rf /', reason: 'too destructive' }, + ]); }); it('rejects invalid truthfulness_mode', () => { @@ -1427,6 +1442,15 @@ describe('configSchema — agents truthfulness/autonomy', () => { }, })).toThrow(); }); + + it('rejects invalid sensitive_mode', () => { + expect(() => configSchema.parse({ + ...minimalConfig, + agents: { + sensitive_mode: 'allow_everything', + }, + })).toThrow(); + }); }); describe('configSchema — skills registry source', () => { diff --git a/src/config/schema.ts b/src/config/schema.ts index 00c151c..dbcbceb 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -436,6 +436,12 @@ const automationSchema = z.object({ const truthfulnessModeSchema = z.enum(['strict', 'standard', 'relaxed']); const autonomyLevelSchema = z.enum(['conservative', 'standard', 'autonomous']); +const sensitiveModeSchema = z.enum(['deny_without_elevation', 'confirm_without_elevation']); +const immutableDenyRuleSchema = z.object({ + tool: z.string().min(1), + args_pattern: z.string().min(1).optional(), + reason: z.string().min(1).optional(), +}); const agentsSchema = z.object({ primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'), @@ -460,6 +466,26 @@ const agentsSchema = z.object({ truthfulness_mode: truthfulnessModeSchema.default('standard'), /** Autonomy level for tool execution: conservative | standard | autonomous. */ autonomy_level: autonomyLevelSchema.default('standard'), + /** Sensitive host-action behavior for high-impact tools. */ + sensitive_mode: sensitiveModeSchema.default('deny_without_elevation'), + /** Immutable denylist enforced even during elevated mode. */ + immutable_denylist: z.array(immutableDenyRuleSchema).default([ + { + tool: 'shell.exec', + args_pattern: 'git push origin main', + reason: 'direct push to main is blocked by immutable policy', + }, + { + tool: 'shell.exec', + args_pattern: 'git reset --hard', + reason: 'destructive hard reset is blocked by immutable policy', + }, + { + tool: 'shell.exec', + args_pattern: 'git clean -fd', + reason: 'destructive clean is blocked by immutable policy', + }, + ]), }).default({}); const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']); @@ -950,3 +976,5 @@ export type AuditConfig = z.infer; export type AuditLevel = z.infer; export type TruthfulnessMode = z.infer; export type AutonomyLevel = z.infer; +export type SensitiveMode = z.infer; +export type ImmutableDenyRule = z.infer; diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index 2c0e906..3f8c0b0 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -273,6 +273,12 @@ export function createMessageRouter(deps: { sender: senderId, tier: effectiveTier, autonomyLevel: deps.config.agents.autonomy_level ?? 'standard', + sensitiveMode: deps.config.agents.sensitive_mode, + immutableDenylist: deps.config.agents.immutable_denylist.map((rule) => ({ + tool: rule.tool, + argsPattern: rule.args_pattern, + reason: rule.reason, + })), skillName: activeSkillName, skillPermissions: activeSkill?.manifest.permissions, allowedSecretScopes: activeSkill?.manifest.permissions?.secrets, diff --git a/src/daemon/tools.ts b/src/daemon/tools.ts index 224c43a..fe14c2f 100644 --- a/src/daemon/tools.ts +++ b/src/daemon/tools.ts @@ -91,7 +91,14 @@ export function initTools(deps: ToolsDeps): ToolsResult { console.log('Browser tools disabled (set browser.enabled=true to register browser.* tools)'); } - const toolExecutor = new ToolExecutor(toolRegistry, hookEngine); + const toolExecutor = new ToolExecutor(toolRegistry, hookEngine, { + sensitiveMode: config.agents.sensitive_mode, + immutableDenylist: config.agents.immutable_denylist.map((rule) => ({ + tool: rule.tool, + argsPattern: rule.args_pattern, + reason: rule.reason, + })), + }); // Initialize tool policy from config const toolPolicy = new ToolPolicy(config.tools); diff --git a/src/gateway/session-bridge.ts b/src/gateway/session-bridge.ts index fc67c88..1e73acb 100644 --- a/src/gateway/session-bridge.ts +++ b/src/gateway/session-bridge.ts @@ -293,6 +293,12 @@ export class SessionBridge { agent: primaryTier, provider: config?.models.default.provider, autonomyLevel: config?.agents.autonomy_level ?? 'standard', + sensitiveMode: config?.agents.sensitive_mode ?? 'deny_without_elevation', + immutableDenylist: (config?.agents.immutable_denylist ?? []).map((rule) => ({ + tool: rule.tool, + argsPattern: rule.args_pattern, + reason: rule.reason, + })), }, }); diff --git a/src/tools/executor.test.ts b/src/tools/executor.test.ts index 9bb8fa0..9a1e58c 100644 --- a/src/tools/executor.test.ts +++ b/src/tools/executor.test.ts @@ -348,6 +348,7 @@ describe('ToolExecutor', () => { const result = await executor.execute('shell.exec', { command: 'rm -rf /' }, { untrustedContent: true, executionEnvironment: 'host', + sensitiveMode: 'confirm_without_elevation', }); expect(result.success).toBe(false); expect(result.error).toContain('blocked'); @@ -388,6 +389,7 @@ describe('ToolExecutor', () => { skillPermissions: { execution_environment: 'sandbox' }, executionEnvironment: 'host', autonomyLevel: 'autonomous', + sensitiveMode: 'confirm_without_elevation', }); expect(denied.success).toBe(false); expect(denied.error).toContain('execution_environment=host'); @@ -399,6 +401,7 @@ describe('ToolExecutor', () => { elevatedHostUntilMs: Date.now() + 60_000, elevatedHostId: 'e1', autonomyLevel: 'autonomous', + sensitiveMode: 'confirm_without_elevation', }); const pending = hooks.getPendingConfirmations(); expect(pending).toHaveLength(1); @@ -435,4 +438,74 @@ describe('ToolExecutor', () => { expect(result.success).toBe(true); expect(result.output).toContain('sandbox-out'); }); + + it('denies sensitive host tools without elevation in deny_without_elevation mode', async () => { + const registry = new ToolRegistry(); + registry.register({ + name: 'shell.exec', + description: 'shell', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'ok' }), + }); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'deny_without_elevation' }); + + const result = await executor.execute('shell.exec', { command: 'echo hi' }, { + executionEnvironment: 'host', + autonomyLevel: 'autonomous', + }); + expect(result.success).toBe(false); + expect(result.error).toContain('requires /elevate'); + }); + + it('allows sensitive host tools after elevation and requires confirmation', async () => { + const registry = new ToolRegistry(); + registry.register({ + name: 'shell.exec', + description: 'shell', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'ok' }), + }); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { sensitiveMode: 'deny_without_elevation' }); + + const pendingResult = executor.execute('shell.exec', { command: 'echo hi' }, { + executionEnvironment: 'host', + autonomyLevel: 'autonomous', + elevatedHostUntilMs: Date.now() + 60_000, + elevatedHostId: 'elev-1', + }); + const pending = hooks.getPendingConfirmations(); + expect(pending).toHaveLength(1); + hooks.resolveConfirmation(pending[0].id, { approved: true }); + + const result = await pendingResult; + expect(result.success).toBe(true); + }); + + it('enforces immutable denylist even during elevation', async () => { + const registry = new ToolRegistry(); + registry.register({ + name: 'shell.exec', + description: 'shell', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'ok' }), + }); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks, { + sensitiveMode: 'deny_without_elevation', + immutableDenylist: [ + { tool: 'shell.exec', argsPattern: 'git reset --hard', reason: 'blocked by policy' }, + ], + }); + + const result = await executor.execute('shell.exec', { command: 'git reset --hard HEAD~1' }, { + executionEnvironment: 'host', + elevatedHostUntilMs: Date.now() + 60_000, + elevatedHostId: 'elev-2', + autonomyLevel: 'autonomous', + }); + expect(result.success).toBe(false); + expect(result.error).toContain('blocked by policy'); + }); }); diff --git a/src/tools/executor.ts b/src/tools/executor.ts index 5fd8b96..eb61221 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -1,7 +1,7 @@ import type { ToolResult } from './types.js'; import type { ToolRegistry } from './registry.js'; import type { HookEngine } from '../hooks/engine.js'; -import type { ToolPolicyContext } from './policy.js'; +import type { ImmutableDenyRule, SensitiveMode, ToolPolicyContext } from './policy.js'; import { resolveAutonomy } from '../hooks/autonomy.js'; import { auditLogger } from '../audit/index.js'; import { randomUUID } from 'crypto'; @@ -13,6 +13,8 @@ import { createSandboxedProcessStartTool, createSandboxedShellTool } from '../sa export interface ToolExecutorConfig { defaultTimeoutMs?: number; maxOutputBytes?: number; + sensitiveMode?: SensitiveMode; + immutableDenylist?: ImmutableDenyRule[]; } export interface ToolExecutionObserverEvent { @@ -27,6 +29,8 @@ export class ToolExecutor { private hooks: HookEngine; private defaultTimeoutMs: number; private maxOutputBytes: number; + private sensitiveMode: SensitiveMode; + private immutableDenylist: ImmutableDenyRule[]; private sandboxManager?: SandboxManager; private executionObserver?: (event: ToolExecutionObserverEvent) => void; @@ -35,6 +39,8 @@ export class ToolExecutor { this.hooks = hooks; this.defaultTimeoutMs = config?.defaultTimeoutMs ?? 30_000; this.maxOutputBytes = config?.maxOutputBytes ?? 51_200; + this.sensitiveMode = config?.sensitiveMode ?? 'deny_without_elevation'; + this.immutableDenylist = config?.immutableDenylist ?? []; } setSandboxManager(manager?: SandboxManager): void { @@ -79,6 +85,21 @@ export class ToolExecutor { const argsRedaction = redactForAudit(args); + const immutableDenyReason = this.evaluateImmutableDenylist(tool.name, args, context); + if (immutableDenyReason) { + auditLogger?.toolDenied({ + tool_name: tool.name, + reason: immutableDenyReason, + denial_type: 'policy', + execution_id: executionId, + execution_environment: executionEnvironment, + skill_name: skillName, + redactions_applied: argsRedaction.redactions, + session_id: context?.sessionId, + }); + return { success: false, output: '', error: `Tool '${tool.name}' denied: ${immutableDenyReason}` }; + } + // Secret scope enforcement const requiredScopes = tool.requiredSecretScopes ?? []; const allowedScopes = this.resolveAllowedSecretScopes(context); @@ -132,6 +153,22 @@ export class ToolExecutor { return { success: false, output: '', error: `Tool '${tool.name}' blocked: ${guard}` }; } + if (this.shouldDenyWithoutElevation(tool.name, executionEnvironment, context)) { + const mode = context?.sensitiveMode ?? this.sensitiveMode; + const reason = `sensitive tool requires /elevate before host execution (mode=${mode})`; + auditLogger?.toolDenied({ + tool_name: tool.name, + reason, + denial_type: 'policy', + execution_id: executionId, + execution_environment: executionEnvironment, + skill_name: skillName, + redactions_applied: argsRedaction.redactions, + session_id: context?.sessionId, + }); + return { success: false, output: '', error: `Tool '${tool.name}' denied: ${reason}` }; + } + // Policy check (defense in depth — tools should also be filtered at listing time) const policy = this.registry.getPolicy(); if (policy) { @@ -375,6 +412,51 @@ export class ToolExecutor { ].includes(toolName); } + private isSensitiveTool(toolName: string): boolean { + if (toolName === 'shell.exec' || toolName === 'process.start' || toolName === 'process.kill') { + return true; + } + if (toolName.startsWith('browser.')) { + return true; + } + return ['message.send', 'cron.create', 'cron.delete'].includes(toolName); + } + + private shouldDenyWithoutElevation(toolName: string, executionEnvironment: 'host' | 'sandbox', context?: ToolPolicyContext): boolean { + const mode = context?.sensitiveMode ?? this.sensitiveMode; + if (mode !== 'deny_without_elevation') { + return false; + } + if (executionEnvironment !== 'host') { + return false; + } + if (!this.isSensitiveTool(toolName)) { + return false; + } + return !this.isElevationActive(context); + } + + private evaluateImmutableDenylist(toolName: string, args: unknown, context?: ToolPolicyContext): string | null { + const rules = context?.immutableDenylist ?? this.immutableDenylist; + if (!rules || rules.length === 0) { + return null; + } + + const serializedArgs = JSON.stringify(args ?? {}).toLowerCase(); + + for (const rule of rules) { + if (!matchesAnyPattern(toolName, [rule.tool])) { + continue; + } + if (rule.argsPattern && !serializedArgs.includes(rule.argsPattern.toLowerCase())) { + continue; + } + return rule.reason ?? `blocked by immutable denylist rule (${rule.tool}${rule.argsPattern ? ` / ${rule.argsPattern}` : ''})`; + } + + return null; + } + private checkCapabilityConstraints(toolName: string, args: unknown, context: ToolPolicyContext | undefined, effectiveEnv: 'host' | 'sandbox'): string | null { const perms = context?.skillPermissions; if (!perms) { diff --git a/src/tools/policy.ts b/src/tools/policy.ts index 8da4960..7119704 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -150,6 +150,17 @@ function matchesAnyPattern(toolName: string, patterns: string[]): boolean { // ── Policy context ────────────────────────────────────────────────── /** Identifies the runtime context for tool policy resolution. */ +export type SensitiveMode = 'deny_without_elevation' | 'confirm_without_elevation'; + +export interface ImmutableDenyRule { + /** Tool name glob pattern (e.g. shell.exec, process.*). */ + tool: string; + /** Optional case-insensitive substring matched against serialized args. */ + argsPattern?: string; + /** Optional human-readable denial reason. */ + reason?: string; +} + export interface ToolPolicyContext { /** Model tier name (e.g. 'fast', 'default', 'complex', 'local'). */ agent?: string; @@ -186,6 +197,11 @@ export interface ToolPolicyContext { elevatedHostReason?: string; /** Correlation id for elevation window. */ elevatedHostId?: string; + + /** Sensitive operation mode for host-executed sensitive tools. */ + sensitiveMode?: SensitiveMode; + /** Immutable denylist enforced before hooks/autonomy checks. */ + immutableDenylist?: ImmutableDenyRule[]; } function resolveSkillAllowedNames(allToolNames: string[], permissions?: SkillPermissions): Set | null {