feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials.

Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
William Valentin
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
+188
View File
@@ -215,3 +215,191 @@ describe('configSchema automation', () => {
expect(result.automation.cron[0].timezone).toBe('America/New_York');
});
});
describe('configSchema — intents', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults intents to disabled with no rules', () => {
const result = configSchema.parse(minimalConfig);
expect(result.intents.enabled).toBe(false);
expect(result.intents.rules).toEqual([]);
});
it('accepts intent rule config', () => {
const result = configSchema.parse({
...minimalConfig,
intents: {
enabled: true,
match_threshold: 0.6,
rules: [
{
name: 'deploy-rule',
patterns: ['deploy *'],
target: { type: 'agent', name: 'coder' },
priority: 5,
enabled: true,
},
],
},
});
expect(result.intents.enabled).toBe(true);
expect(result.intents.rules[0].target.type).toBe('agent');
expect(result.intents.rules[0].target.name).toBe('coder');
});
});
describe('configSchema — routing_policy', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults routing_policy values', () => {
const result = configSchema.parse(minimalConfig);
expect(result.routing_policy.enabled).toBe(false);
expect(result.routing_policy.fast_path_threshold).toBe(0.85);
expect(result.routing_policy.llm_threshold).toBe(0.5);
expect(result.routing_policy.default_path).toBe('llm');
});
});
describe('configSchema — history_index', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults history indexing config', () => {
const result = configSchema.parse(minimalConfig);
expect(result.history_index.enabled).toBe(false);
expect(result.history_index.max_keywords).toBe(8);
expect(result.history_index.search_limit).toBe(10);
});
});
describe('configSchema — memory injection strategy', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults memory injection settings', () => {
const result = configSchema.parse(minimalConfig);
expect(result.memory.injection_strategy).toBe('all');
expect(result.memory.max_injection_tokens).toBe(2000);
});
it('accepts adaptive memory injection settings', () => {
const result = configSchema.parse({
...minimalConfig,
memory: {
injection_strategy: 'adaptive',
max_injection_tokens: 1200,
},
});
expect(result.memory.injection_strategy).toBe('adaptive');
expect(result.memory.max_injection_tokens).toBe(1200);
});
});
describe('configSchema — compaction importance threshold', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults compaction importance threshold to disabled behavior', () => {
const result = configSchema.parse(minimalConfig);
expect(result.compaction.importance_threshold).toBe(1);
});
it('accepts a custom importance threshold', () => {
const result = configSchema.parse({
...minimalConfig,
compaction: {
importance_threshold: 0.5,
},
});
expect(result.compaction.importance_threshold).toBe(0.5);
});
});
describe('configSchema — prompt context level', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults prompt.context_level to normal', () => {
const result = configSchema.parse(minimalConfig);
expect(result.prompt.context_level).toBe('normal');
});
it('accepts valid context levels', () => {
const result = configSchema.parse({
...minimalConfig,
prompt: {
context_level: 'debug',
},
});
expect(result.prompt.context_level).toBe('debug');
});
it('rejects invalid context levels', () => {
expect(() => configSchema.parse({
...minimalConfig,
prompt: {
context_level: 'verbose',
},
})).toThrow();
});
});
describe('configSchema — agents truthfulness/autonomy', () => {
const minimalConfig = {
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
models: { default: { provider: 'anthropic', model: 'claude-3' } },
};
it('defaults to standard truthfulness and autonomy', () => {
const result = configSchema.parse(minimalConfig);
expect(result.agents.truthfulness_mode).toBe('standard');
expect(result.agents.autonomy_level).toBe('standard');
});
it('accepts explicit truthfulness and autonomy modes', () => {
const result = configSchema.parse({
...minimalConfig,
agents: {
truthfulness_mode: 'strict',
autonomy_level: 'conservative',
},
});
expect(result.agents.truthfulness_mode).toBe('strict');
expect(result.agents.autonomy_level).toBe('conservative');
});
it('rejects invalid truthfulness_mode', () => {
expect(() => configSchema.parse({
...minimalConfig,
agents: {
truthfulness_mode: 'always',
},
})).toThrow();
});
it('rejects invalid autonomy_level', () => {
expect(() => configSchema.parse({
...minimalConfig,
agents: {
autonomy_level: 'manual',
},
})).toThrow();
});
});
+59
View File
@@ -212,6 +212,9 @@ const automationSchema = z.object({
heartbeat: heartbeatSchema,
}).default({});
const truthfulnessModeSchema = z.enum(['strict', 'standard', 'relaxed']);
const autonomyLevelSchema = z.enum(['conservative', 'standard', 'autonomous']);
const agentsSchema = z.object({
primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
delegation: z.object({
@@ -231,6 +234,10 @@ const agentsSchema = z.object({
max_delegation_depth: z.number().min(1).max(10).default(3),
/** Maximum tool-loop iterations before the agent stops. */
max_iterations: z.number().min(1).max(50).default(10),
/** Truthfulness enforcement level: strict | standard | relaxed. */
truthfulness_mode: truthfulnessModeSchema.default('standard'),
/** Autonomy level for tool execution: conservative | standard | autonomous. */
autonomy_level: autonomyLevelSchema.default('standard'),
}).default({});
const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']);
@@ -252,6 +259,8 @@ const memorySchema = z.object({
enabled: z.boolean().default(true),
dir: z.string().optional(), // Default: ~/.local/share/flynn/memory
auto_extract: z.boolean().default(true),
injection_strategy: z.enum(['all', 'recent', 'adaptive']).default('all'),
max_injection_tokens: z.number().min(100).max(10000).default(2000),
max_context_tokens: z.number().min(100).max(10000).default(2000),
embedding: embeddingSchema,
}).default({});
@@ -261,6 +270,7 @@ const compactionSchema = z.object({
threshold_pct: z.number().min(10).max(100).default(80),
keep_turns: z.number().min(1).max(50).default(4),
summary_max_tokens: z.number().min(128).max(4096).default(1024),
importance_threshold: z.number().min(0).max(1).default(1),
}).default({});
const discordSchema = z.object({
@@ -375,6 +385,34 @@ const routingSchema = z.object({
senders: z.record(z.string(), z.string()).default({}),
}).default({});
const intentTargetTypeSchema = z.enum(['agent', 'skill']);
const intentRuleSchema = z.object({
name: z.string().min(1),
patterns: z.array(z.string().min(1)).min(1),
target: z.object({
type: intentTargetTypeSchema,
name: z.string().min(1),
}),
priority: z.number().default(0),
enabled: z.boolean().default(true),
});
const intentsSchema = z.object({
enabled: z.boolean().default(false),
match_threshold: z.number().min(0).max(1).default(0.7),
rules: z.array(intentRuleSchema).default([]),
}).default({});
const routingPolicySchema = z.object({
enabled: z.boolean().default(false),
fast_path_threshold: z.number().min(0).max(1).default(0.85),
llm_threshold: z.number().min(0).max(1).default(0.5),
default_path: z.enum(['fast', 'llm']).default('llm'),
}).default({});
const contextLevelSchema = z.enum(['minimal', 'normal', 'detailed', 'debug']);
const promptSchema = z.object({
/** Additional directories to search for prompt template files. */
search_dirs: z.array(z.string()).default([]),
@@ -383,12 +421,22 @@ const promptSchema = z.object({
name: z.string(),
content: z.string(),
})).default([]),
/** Prompt context depth control: minimal | normal | detailed | debug. */
context_level: contextLevelSchema.default('normal'),
}).default({});
const sessionsSchema = z.object({
ttl: z.string().default('30d'),
}).default({});
const historyIndexSchema = z.object({
enabled: z.boolean().default(false),
max_keywords: z.number().min(1).max(20).default(8),
search_limit: z.number().min(1).max(100).default(10),
min_score: z.number().min(0).max(1).default(0.15),
routing_boost: z.number().min(0).max(0.2).default(0.05),
}).default({});
const logLevelSchema = z.enum(['debug', 'info', 'warn', 'error', 'silent']).default('info');
const auditLevelSchema = z.enum(['debug', 'info', 'warn', 'error']).default('debug');
@@ -432,6 +480,9 @@ export const configSchema = z.object({
sandbox: sandboxSchema,
agent_configs: agentConfigsSchema,
routing: routingSchema,
intents: intentsSchema,
routing_policy: routingPolicySchema,
history_index: historyIndexSchema,
sessions: sessionsSchema,
pairing: pairingSchema,
});
@@ -453,6 +504,7 @@ export type DiscordConfig = z.infer<typeof discordSchema>;
export type SlackConfig = z.infer<typeof slackSchema>;
export type WhatsAppConfig = z.infer<typeof whatsappSchema>;
export type RetryPolicyConfig = z.infer<typeof retrySchema>;
export type ContextLevel = z.infer<typeof contextLevelSchema>;
export type PromptConfig = z.infer<typeof promptSchema>;
export type ToolProfile = z.infer<typeof toolProfileEnum>;
export type ToolOverrideConfig = z.infer<typeof toolOverrideSchema>;
@@ -460,6 +512,11 @@ export type ToolsConfig = z.infer<typeof toolsSchema>;
export type SandboxConfig = z.infer<typeof sandboxSchema>;
export type AgentConfigEntry = z.infer<typeof agentConfigEntrySchema>;
export type RoutingConfig = z.infer<typeof routingSchema>;
export type IntentTargetType = z.infer<typeof intentTargetTypeSchema>;
export type IntentRuleConfig = z.infer<typeof intentRuleSchema>;
export type IntentsConfig = z.infer<typeof intentsSchema>;
export type RoutingPolicyConfig = z.infer<typeof routingPolicySchema>;
export type HistoryIndexConfig = z.infer<typeof historyIndexSchema>;
export type ServerConfig = z.infer<typeof serverSchema>;
export type SessionsConfig = z.infer<typeof sessionsSchema>;
export type ThinkingConfig = z.infer<typeof thinkingSchema>;
@@ -475,3 +532,5 @@ export type PairingCodeConfig = z.infer<typeof pairingSchema>;
export type LogLevel = z.infer<typeof logLevelSchema>;
export type AuditConfig = z.infer<typeof auditSchema>;
export type AuditLevel = z.infer<typeof auditLevelSchema>;
export type TruthfulnessMode = z.infer<typeof truthfulnessModeSchema>;
export type AutonomyLevel = z.infer<typeof autonomyLevelSchema>;