feat(policy): enforce truthfulness and autonomy guardrails
Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Autonomy-aware tool execution policy.
|
||||
*
|
||||
* Determines whether a tool action should proceed automatically, require
|
||||
* confirmation, or be denied based on the configured autonomy level and
|
||||
* tool characteristics.
|
||||
*/
|
||||
|
||||
import type { AutonomyLevel } from '../config/schema.js';
|
||||
import type { HookAction } from './types.js';
|
||||
|
||||
export type { AutonomyLevel } from '../config/schema.js';
|
||||
|
||||
/**
|
||||
* Metadata about an autonomy decision.
|
||||
*/
|
||||
export interface AutonomyDecision {
|
||||
/** The final action to take. */
|
||||
action: HookAction;
|
||||
/** Whether the action was overridden by autonomy policy. */
|
||||
overridden: boolean;
|
||||
/** Explanation of why the action was chosen or overridden. */
|
||||
reason: string;
|
||||
/** The autonomy level that was applied. */
|
||||
level: AutonomyLevel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool categories for autonomy classification.
|
||||
*/
|
||||
const DANGEROUS_TOOLS = new Set([
|
||||
'file.write',
|
||||
'file.edit',
|
||||
'file.patch',
|
||||
'shell.exec',
|
||||
'process.start',
|
||||
'process.kill',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Resolve the appropriate hook action for a tool given the autonomy level.
|
||||
*
|
||||
* @param toolName - The tool being executed.
|
||||
* @param baseAction - The base action from the HookEngine (if any).
|
||||
* @param level - The autonomy level to apply.
|
||||
* @returns An AutonomyDecision with the final action and metadata.
|
||||
*/
|
||||
export function resolveAutonomy(
|
||||
toolName: string,
|
||||
baseAction: HookAction,
|
||||
level: AutonomyLevel,
|
||||
): AutonomyDecision {
|
||||
// Conservative mode: confirm all dangerous tools, even if base action is silent
|
||||
if (level === 'conservative') {
|
||||
if (DANGEROUS_TOOLS.has(toolName)) {
|
||||
if (baseAction !== 'confirm') {
|
||||
return {
|
||||
action: 'confirm',
|
||||
overridden: true,
|
||||
reason: `Tool '${toolName}' requires confirmation in conservative mode`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
return {
|
||||
action: 'confirm',
|
||||
overridden: false,
|
||||
reason: `Tool '${toolName}' already requires confirmation`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
// Safe tools can use base action
|
||||
return {
|
||||
action: baseAction,
|
||||
overridden: false,
|
||||
reason: `Tool '${toolName}' is safe in conservative mode`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
|
||||
// Standard mode: dangerous tools still require confirmation if not explicitly silenced
|
||||
if (level === 'standard') {
|
||||
if (DANGEROUS_TOOLS.has(toolName) && baseAction !== 'confirm') {
|
||||
return {
|
||||
action: 'confirm',
|
||||
overridden: true,
|
||||
reason: `Tool '${toolName}' requires confirmation in standard mode (dangerous tool without explicit hook)`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
return {
|
||||
action: baseAction,
|
||||
overridden: false,
|
||||
reason: `Base action '${baseAction}' applied in standard mode`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
|
||||
// Autonomous mode: defer to explicit hook policy
|
||||
if (level === 'autonomous') {
|
||||
return {
|
||||
action: baseAction,
|
||||
overridden: false,
|
||||
reason: `Base action '${baseAction}' applied in autonomous mode`,
|
||||
level,
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback (should not happen with correct AutonomyLevel type)
|
||||
return {
|
||||
action: baseAction,
|
||||
overridden: false,
|
||||
reason: 'Unknown autonomy level; using base action',
|
||||
level,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user