feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials.

Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
William Valentin
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
+149
View File
@@ -0,0 +1,149 @@
import { describe, it, expect } from 'vitest';
import { resolveAutonomy, type AutonomyLevel } from './autonomy.js';
import type { HookAction } from './types.js';
describe('autonomy', () => {
describe('resolveAutonomy', () => {
describe('conservative mode', () => {
const level: AutonomyLevel = 'conservative';
it('overrides silent to confirm for dangerous tools', () => {
const decision = resolveAutonomy('file.write', 'silent', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(true);
expect(decision.reason).toContain('conservative mode');
expect(decision.level).toBe('conservative');
});
it('preserves confirm for dangerous tools', () => {
const decision = resolveAutonomy('shell.exec', 'confirm', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(false);
expect(decision.reason).toContain('already requires confirmation');
});
it('allows safe read tools with base action', () => {
const decision = resolveAutonomy('file.read', 'silent', level);
expect(decision.action).toBe('silent');
expect(decision.overridden).toBe(false);
expect(decision.reason).toContain('safe');
});
it('handles all dangerous tools', () => {
const dangerous = ['file.write', 'file.edit', 'file.patch', 'shell.exec', 'process.start', 'process.kill'];
for (const tool of dangerous) {
const decision = resolveAutonomy(tool, 'silent', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(true);
}
});
});
describe('standard mode', () => {
const level: AutonomyLevel = 'standard';
it('overrides silent to confirm for dangerous tools without explicit hook', () => {
const decision = resolveAutonomy('file.write', 'silent', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(true);
expect(decision.reason).toContain('standard mode');
expect(decision.reason).toContain('dangerous tool without explicit hook');
});
it('preserves confirm for dangerous tools', () => {
const decision = resolveAutonomy('shell.exec', 'confirm', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(false);
});
it('preserves log for dangerous tools', () => {
const decision = resolveAutonomy('file.edit', 'log', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(true);
});
it('allows safe tools with base action', () => {
const decision = resolveAutonomy('file.read', 'silent', level);
expect(decision.action).toBe('silent');
expect(decision.overridden).toBe(false);
});
it('allows non-dangerous tools with any base action', () => {
const decision = resolveAutonomy('custom.tool', 'silent', level);
expect(decision.action).toBe('silent');
expect(decision.overridden).toBe(false);
});
});
describe('autonomous mode', () => {
const level: AutonomyLevel = 'autonomous';
it('defers to base action by default', () => {
const decision = resolveAutonomy('file.write', 'silent', level);
expect(decision.action).toBe('silent');
expect(decision.overridden).toBe(false);
expect(decision.reason).toContain('Base action');
});
it('preserves log action', () => {
const decision = resolveAutonomy('shell.exec', 'log', level);
expect(decision.action).toBe('log');
expect(decision.overridden).toBe(false);
});
it('preserves explicit confirm hooks', () => {
const decision = resolveAutonomy('dangerous.operation', 'confirm', level);
expect(decision.action).toBe('confirm');
expect(decision.overridden).toBe(false);
expect(decision.reason).toContain("Base action 'confirm'");
});
it('does not force confirmation for dangerous tools', () => {
const dangerous = ['file.write', 'file.edit', 'file.patch', 'shell.exec', 'process.start', 'process.kill'];
for (const tool of dangerous) {
const decision = resolveAutonomy(tool, 'silent', level);
expect(decision.action).toBe('silent');
}
});
});
describe('action matrix coverage', () => {
it('handles all combinations of levels and base actions', () => {
const levels: AutonomyLevel[] = ['conservative', 'standard', 'autonomous'];
const actions: HookAction[] = ['silent', 'log', 'confirm'];
const tools = ['file.write', 'file.read', 'custom.tool'];
for (const level of levels) {
for (const action of actions) {
for (const tool of tools) {
const decision = resolveAutonomy(tool, action, level);
expect(decision.action).toBeDefined();
expect(decision.level).toBe(level);
expect(decision.reason).toBeTruthy();
expect(typeof decision.overridden).toBe('boolean');
}
}
}
});
});
describe('metadata', () => {
it('always includes reason and level', () => {
const decision = resolveAutonomy('file.write', 'silent', 'conservative');
expect(decision.reason).toBeTruthy();
expect(decision.reason.length).toBeGreaterThan(0);
expect(decision.level).toBe('conservative');
});
it('marks overridden correctly', () => {
// Conservative mode overrides
expect(resolveAutonomy('file.write', 'silent', 'conservative').overridden).toBe(true);
expect(resolveAutonomy('file.read', 'silent', 'conservative').overridden).toBe(false);
// Autonomous mode does not override
expect(resolveAutonomy('shell.exec', 'log', 'autonomous').overridden).toBe(false);
expect(resolveAutonomy('shell.exec', 'silent', 'autonomous').overridden).toBe(false);
});
});
});
});
+115
View File
@@ -0,0 +1,115 @@
/**
* Autonomy-aware tool execution policy.
*
* Determines whether a tool action should proceed automatically, require
* confirmation, or be denied based on the configured autonomy level and
* tool characteristics.
*/
import type { AutonomyLevel } from '../config/schema.js';
import type { HookAction } from './types.js';
export type { AutonomyLevel } from '../config/schema.js';
/**
* Metadata about an autonomy decision.
*/
export interface AutonomyDecision {
/** The final action to take. */
action: HookAction;
/** Whether the action was overridden by autonomy policy. */
overridden: boolean;
/** Explanation of why the action was chosen or overridden. */
reason: string;
/** The autonomy level that was applied. */
level: AutonomyLevel;
}
/**
* Tool categories for autonomy classification.
*/
const DANGEROUS_TOOLS = new Set([
'file.write',
'file.edit',
'file.patch',
'shell.exec',
'process.start',
'process.kill',
]);
/**
* Resolve the appropriate hook action for a tool given the autonomy level.
*
* @param toolName - The tool being executed.
* @param baseAction - The base action from the HookEngine (if any).
* @param level - The autonomy level to apply.
* @returns An AutonomyDecision with the final action and metadata.
*/
export function resolveAutonomy(
toolName: string,
baseAction: HookAction,
level: AutonomyLevel,
): AutonomyDecision {
// Conservative mode: confirm all dangerous tools, even if base action is silent
if (level === 'conservative') {
if (DANGEROUS_TOOLS.has(toolName)) {
if (baseAction !== 'confirm') {
return {
action: 'confirm',
overridden: true,
reason: `Tool '${toolName}' requires confirmation in conservative mode`,
level,
};
}
return {
action: 'confirm',
overridden: false,
reason: `Tool '${toolName}' already requires confirmation`,
level,
};
}
// Safe tools can use base action
return {
action: baseAction,
overridden: false,
reason: `Tool '${toolName}' is safe in conservative mode`,
level,
};
}
// Standard mode: dangerous tools still require confirmation if not explicitly silenced
if (level === 'standard') {
if (DANGEROUS_TOOLS.has(toolName) && baseAction !== 'confirm') {
return {
action: 'confirm',
overridden: true,
reason: `Tool '${toolName}' requires confirmation in standard mode (dangerous tool without explicit hook)`,
level,
};
}
return {
action: baseAction,
overridden: false,
reason: `Base action '${baseAction}' applied in standard mode`,
level,
};
}
// Autonomous mode: defer to explicit hook policy
if (level === 'autonomous') {
return {
action: baseAction,
overridden: false,
reason: `Base action '${baseAction}' applied in autonomous mode`,
level,
};
}
// Fallback (should not happen with correct AutonomyLevel type)
return {
action: baseAction,
overridden: false,
reason: 'Unknown autonomy level; using base action',
level,
};
}
+2
View File
@@ -1,2 +1,4 @@
export { HookEngine } from './engine.js';
export { resolveAutonomy } from './autonomy.js';
export type { HookAction, HookResult, PendingConfirmation, HookConfig } from './types.js';
export type { AutonomyLevel, AutonomyDecision } from './autonomy.js';