feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
@@ -35,6 +35,13 @@ const bigOutputTool: Tool = {
  execute: async () => ({ success: true, output: 'x'.repeat(100_000) }),
 };

+const fileWriteLikeTool: Tool = {
+  name: 'file.write',
+  description: 'Test file write tool',
+  inputSchema: { type: 'object', properties: {} },
+  execute: async () => ({ success: true, output: 'ok' }),
+};
+
 describe('ToolExecutor', () => {
  it('executes a tool and returns result', async () => {
    const registry = new ToolRegistry();
@@ -123,4 +130,34 @@ describe('ToolExecutor', () => {
    expect(result.success).toBe(false);
    expect(result.error).toContain('denied');
  });
+
+  it('conservative autonomy requires confirm for dangerous tools', async () => {
+    const registry = new ToolRegistry();
+    registry.register(fileWriteLikeTool);
+    const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const resultPromise = executor.execute('file.write', {}, { autonomyLevel: 'conservative' });
+    const pending = hooks.getPendingConfirmations();
+    expect(pending).toHaveLength(1);
+    hooks.resolveConfirmation(pending[0].id, { approved: true });
+
+    const result = await resultPromise;
+    expect(result.success).toBe(true);
+  });
+
+  it('autonomous mode defers to explicit confirm hooks', async () => {
+    const registry = new ToolRegistry();
+    registry.register(echoTool);
+    const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] });
+    const executor = new ToolExecutor(registry, hooks);
+
+    const resultPromise = executor.execute('test.echo', { text: 'hi' }, { autonomyLevel: 'autonomous' });
+    const pending = hooks.getPendingConfirmations();
+    expect(pending).toHaveLength(1);
+    hooks.resolveConfirmation(pending[0].id, { approved: true });
+
+    const result = await resultPromise;
+    expect(result.success).toBe(true);
+  });
 });
@@ -2,6 +2,7 @@ import type { ToolResult } from './types.js';
 import type { ToolRegistry } from './registry.js';
 import type { HookEngine } from '../hooks/engine.js';
 import type { ToolPolicyContext } from './policy.js';
+import { resolveAutonomy } from '../hooks/autonomy.js';
 import { auditLogger } from '../audit/index.js';

 export interface ToolExecutorConfig {
@@ -53,31 +54,49 @@ export class ToolExecutor {
      }
    }

-    // Check hooks
-    const action = this.hooks.getAction(toolName);
-    if (action === 'confirm') {
+    // Check hooks with autonomy resolution
+    const baseAction = this.hooks.getAction(toolName);
+    const autonomyLevel = context?.autonomyLevel ?? 'standard';
+    const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel);
+    const finalAction = autonomyDecision.action;
+
+    // Log autonomy override if applicable
+    if (autonomyDecision.overridden) {
+      auditLogger?.toolDenied({
+        tool_name: toolName,
+        reason: `Autonomy override: ${autonomyDecision.reason}`,
+        denial_type: 'autonomy_override',
+        session_id: context?.sessionId,
+      });
+    }
+
+    if (finalAction === 'confirm') {
      const hookResult = await this.hooks.requestConfirmation(
        toolName,
        args as Record<string, unknown>,
      );
      if (!hookResult.approved) {
+        const denyReason = hookResult.reason ?? 'no reason';
+        const detailedReason = autonomyDecision.overridden
+          ? `${denyReason} (autonomy: ${autonomyDecision.reason})`
+          : denyReason;
        auditLogger?.toolDenied({
          tool_name: toolName,
-          reason: hookResult.reason ?? 'no reason',
+          reason: detailedReason,
          denial_type: 'hook',
          session_id: context?.sessionId,
        });
        return {
          success: false,
          output: '',
-          error: `Tool '${toolName}' denied by user: ${hookResult.reason ?? 'no reason'}`,
+          error: `Tool '${toolName}' denied by user: ${detailedReason}`,
        };
      }
    }

    // Execute with timeout
    const startTime = Date.now();
-    
+
    auditLogger?.toolStart({
      tool_name: toolName,
      tool_args: args,
@@ -113,7 +132,7 @@ export class ToolExecutor {
    } catch (error) {
      const duration = Date.now() - startTime;
      const errorMessage = error instanceof Error ? error.message : String(error);
-      
+
      auditLogger?.toolError({
        tool_name: toolName,
        error: errorMessage,
@@ -44,6 +44,7 @@ describe('Tool integration (end-to-end)', () => {
      systemPrompt: 'You have tools.',
      toolRegistry: registry,
      toolExecutor: executor,
+      toolPolicyContext: { autonomyLevel: 'autonomous' },
    });

    const result = await agent.process('run echo integration_test');
@@ -89,9 +90,10 @@ describe('Tool integration (end-to-end)', () => {

    const agent = new NativeAgent({
      modelClient: mockClient,
-      systemPrompt: 'You have file tools.',
+      systemPrompt: 'You have tools.',
      toolRegistry: registry,
      toolExecutor: executor,
+      toolPolicyContext: { autonomyLevel: 'autonomous' },
    });

    try {
@@ -141,6 +143,7 @@ describe('Tool integration (end-to-end)', () => {
      systemPrompt: 'You have tools.',
      toolRegistry: registry,
      toolExecutor: executor,
+      toolPolicyContext: { autonomyLevel: 'autonomous' },
    });

    const result = await agent.process('verify tool results');
@@ -1,4 +1,4 @@
-import type { ToolsConfig, ToolProfile } from '../config/schema.js';
+import type { AutonomyLevel, ToolsConfig, ToolProfile } from '../config/schema.js';
 import type { Tool } from './types.js';

 // ── Profile definitions ─────────────────────────────────────────────
@@ -140,6 +140,8 @@ export interface ToolPolicyContext {
  sender?: string;
  /** Model tier for audit logging. */
  tier?: string;
+  /** Autonomy level for tool execution (affects confirmation requirements). */
+  autonomyLevel?: AutonomyLevel;
 }

 // ── ToolPolicy engine ───────────────────────────────────────────────