feat(policy): enforce truthfulness and autonomy guardrails
Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
@@ -215,3 +215,191 @@ describe('configSchema automation', () => {
|
||||
expect(result.automation.cron[0].timezone).toBe('America/New_York');
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — intents', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults intents to disabled with no rules', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.intents.enabled).toBe(false);
|
||||
expect(result.intents.rules).toEqual([]);
|
||||
});
|
||||
|
||||
it('accepts intent rule config', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
intents: {
|
||||
enabled: true,
|
||||
match_threshold: 0.6,
|
||||
rules: [
|
||||
{
|
||||
name: 'deploy-rule',
|
||||
patterns: ['deploy *'],
|
||||
target: { type: 'agent', name: 'coder' },
|
||||
priority: 5,
|
||||
enabled: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.intents.enabled).toBe(true);
|
||||
expect(result.intents.rules[0].target.type).toBe('agent');
|
||||
expect(result.intents.rules[0].target.name).toBe('coder');
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — routing_policy', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults routing_policy values', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.routing_policy.enabled).toBe(false);
|
||||
expect(result.routing_policy.fast_path_threshold).toBe(0.85);
|
||||
expect(result.routing_policy.llm_threshold).toBe(0.5);
|
||||
expect(result.routing_policy.default_path).toBe('llm');
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — history_index', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults history indexing config', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.history_index.enabled).toBe(false);
|
||||
expect(result.history_index.max_keywords).toBe(8);
|
||||
expect(result.history_index.search_limit).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — memory injection strategy', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults memory injection settings', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.memory.injection_strategy).toBe('all');
|
||||
expect(result.memory.max_injection_tokens).toBe(2000);
|
||||
});
|
||||
|
||||
it('accepts adaptive memory injection settings', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
memory: {
|
||||
injection_strategy: 'adaptive',
|
||||
max_injection_tokens: 1200,
|
||||
},
|
||||
});
|
||||
expect(result.memory.injection_strategy).toBe('adaptive');
|
||||
expect(result.memory.max_injection_tokens).toBe(1200);
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — compaction importance threshold', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults compaction importance threshold to disabled behavior', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.compaction.importance_threshold).toBe(1);
|
||||
});
|
||||
|
||||
it('accepts a custom importance threshold', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
compaction: {
|
||||
importance_threshold: 0.5,
|
||||
},
|
||||
});
|
||||
expect(result.compaction.importance_threshold).toBe(0.5);
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — prompt context level', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults prompt.context_level to normal', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.prompt.context_level).toBe('normal');
|
||||
});
|
||||
|
||||
it('accepts valid context levels', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
prompt: {
|
||||
context_level: 'debug',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.prompt.context_level).toBe('debug');
|
||||
});
|
||||
|
||||
it('rejects invalid context levels', () => {
|
||||
expect(() => configSchema.parse({
|
||||
...minimalConfig,
|
||||
prompt: {
|
||||
context_level: 'verbose',
|
||||
},
|
||||
})).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('configSchema — agents truthfulness/autonomy', () => {
|
||||
const minimalConfig = {
|
||||
telegram: { bot_token: 'test', allowed_chat_ids: [1] },
|
||||
models: { default: { provider: 'anthropic', model: 'claude-3' } },
|
||||
};
|
||||
|
||||
it('defaults to standard truthfulness and autonomy', () => {
|
||||
const result = configSchema.parse(minimalConfig);
|
||||
expect(result.agents.truthfulness_mode).toBe('standard');
|
||||
expect(result.agents.autonomy_level).toBe('standard');
|
||||
});
|
||||
|
||||
it('accepts explicit truthfulness and autonomy modes', () => {
|
||||
const result = configSchema.parse({
|
||||
...minimalConfig,
|
||||
agents: {
|
||||
truthfulness_mode: 'strict',
|
||||
autonomy_level: 'conservative',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.agents.truthfulness_mode).toBe('strict');
|
||||
expect(result.agents.autonomy_level).toBe('conservative');
|
||||
});
|
||||
|
||||
it('rejects invalid truthfulness_mode', () => {
|
||||
expect(() => configSchema.parse({
|
||||
...minimalConfig,
|
||||
agents: {
|
||||
truthfulness_mode: 'always',
|
||||
},
|
||||
})).toThrow();
|
||||
});
|
||||
|
||||
it('rejects invalid autonomy_level', () => {
|
||||
expect(() => configSchema.parse({
|
||||
...minimalConfig,
|
||||
agents: {
|
||||
autonomy_level: 'manual',
|
||||
},
|
||||
})).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -212,6 +212,9 @@ const automationSchema = z.object({
|
||||
heartbeat: heartbeatSchema,
|
||||
}).default({});
|
||||
|
||||
const truthfulnessModeSchema = z.enum(['strict', 'standard', 'relaxed']);
|
||||
const autonomyLevelSchema = z.enum(['conservative', 'standard', 'autonomous']);
|
||||
|
||||
const agentsSchema = z.object({
|
||||
primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
|
||||
delegation: z.object({
|
||||
@@ -231,6 +234,10 @@ const agentsSchema = z.object({
|
||||
max_delegation_depth: z.number().min(1).max(10).default(3),
|
||||
/** Maximum tool-loop iterations before the agent stops. */
|
||||
max_iterations: z.number().min(1).max(50).default(10),
|
||||
/** Truthfulness enforcement level: strict | standard | relaxed. */
|
||||
truthfulness_mode: truthfulnessModeSchema.default('standard'),
|
||||
/** Autonomy level for tool execution: conservative | standard | autonomous. */
|
||||
autonomy_level: autonomyLevelSchema.default('standard'),
|
||||
}).default({});
|
||||
|
||||
const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']);
|
||||
@@ -252,6 +259,8 @@ const memorySchema = z.object({
|
||||
enabled: z.boolean().default(true),
|
||||
dir: z.string().optional(), // Default: ~/.local/share/flynn/memory
|
||||
auto_extract: z.boolean().default(true),
|
||||
injection_strategy: z.enum(['all', 'recent', 'adaptive']).default('all'),
|
||||
max_injection_tokens: z.number().min(100).max(10000).default(2000),
|
||||
max_context_tokens: z.number().min(100).max(10000).default(2000),
|
||||
embedding: embeddingSchema,
|
||||
}).default({});
|
||||
@@ -261,6 +270,7 @@ const compactionSchema = z.object({
|
||||
threshold_pct: z.number().min(10).max(100).default(80),
|
||||
keep_turns: z.number().min(1).max(50).default(4),
|
||||
summary_max_tokens: z.number().min(128).max(4096).default(1024),
|
||||
importance_threshold: z.number().min(0).max(1).default(1),
|
||||
}).default({});
|
||||
|
||||
const discordSchema = z.object({
|
||||
@@ -375,6 +385,34 @@ const routingSchema = z.object({
|
||||
senders: z.record(z.string(), z.string()).default({}),
|
||||
}).default({});
|
||||
|
||||
const intentTargetTypeSchema = z.enum(['agent', 'skill']);
|
||||
|
||||
const intentRuleSchema = z.object({
|
||||
name: z.string().min(1),
|
||||
patterns: z.array(z.string().min(1)).min(1),
|
||||
target: z.object({
|
||||
type: intentTargetTypeSchema,
|
||||
name: z.string().min(1),
|
||||
}),
|
||||
priority: z.number().default(0),
|
||||
enabled: z.boolean().default(true),
|
||||
});
|
||||
|
||||
const intentsSchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
match_threshold: z.number().min(0).max(1).default(0.7),
|
||||
rules: z.array(intentRuleSchema).default([]),
|
||||
}).default({});
|
||||
|
||||
const routingPolicySchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
fast_path_threshold: z.number().min(0).max(1).default(0.85),
|
||||
llm_threshold: z.number().min(0).max(1).default(0.5),
|
||||
default_path: z.enum(['fast', 'llm']).default('llm'),
|
||||
}).default({});
|
||||
|
||||
const contextLevelSchema = z.enum(['minimal', 'normal', 'detailed', 'debug']);
|
||||
|
||||
const promptSchema = z.object({
|
||||
/** Additional directories to search for prompt template files. */
|
||||
search_dirs: z.array(z.string()).default([]),
|
||||
@@ -383,12 +421,22 @@ const promptSchema = z.object({
|
||||
name: z.string(),
|
||||
content: z.string(),
|
||||
})).default([]),
|
||||
/** Prompt context depth control: minimal | normal | detailed | debug. */
|
||||
context_level: contextLevelSchema.default('normal'),
|
||||
}).default({});
|
||||
|
||||
const sessionsSchema = z.object({
|
||||
ttl: z.string().default('30d'),
|
||||
}).default({});
|
||||
|
||||
const historyIndexSchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
max_keywords: z.number().min(1).max(20).default(8),
|
||||
search_limit: z.number().min(1).max(100).default(10),
|
||||
min_score: z.number().min(0).max(1).default(0.15),
|
||||
routing_boost: z.number().min(0).max(0.2).default(0.05),
|
||||
}).default({});
|
||||
|
||||
const logLevelSchema = z.enum(['debug', 'info', 'warn', 'error', 'silent']).default('info');
|
||||
|
||||
const auditLevelSchema = z.enum(['debug', 'info', 'warn', 'error']).default('debug');
|
||||
@@ -432,6 +480,9 @@ export const configSchema = z.object({
|
||||
sandbox: sandboxSchema,
|
||||
agent_configs: agentConfigsSchema,
|
||||
routing: routingSchema,
|
||||
intents: intentsSchema,
|
||||
routing_policy: routingPolicySchema,
|
||||
history_index: historyIndexSchema,
|
||||
sessions: sessionsSchema,
|
||||
pairing: pairingSchema,
|
||||
});
|
||||
@@ -453,6 +504,7 @@ export type DiscordConfig = z.infer<typeof discordSchema>;
|
||||
export type SlackConfig = z.infer<typeof slackSchema>;
|
||||
export type WhatsAppConfig = z.infer<typeof whatsappSchema>;
|
||||
export type RetryPolicyConfig = z.infer<typeof retrySchema>;
|
||||
export type ContextLevel = z.infer<typeof contextLevelSchema>;
|
||||
export type PromptConfig = z.infer<typeof promptSchema>;
|
||||
export type ToolProfile = z.infer<typeof toolProfileEnum>;
|
||||
export type ToolOverrideConfig = z.infer<typeof toolOverrideSchema>;
|
||||
@@ -460,6 +512,11 @@ export type ToolsConfig = z.infer<typeof toolsSchema>;
|
||||
export type SandboxConfig = z.infer<typeof sandboxSchema>;
|
||||
export type AgentConfigEntry = z.infer<typeof agentConfigEntrySchema>;
|
||||
export type RoutingConfig = z.infer<typeof routingSchema>;
|
||||
export type IntentTargetType = z.infer<typeof intentTargetTypeSchema>;
|
||||
export type IntentRuleConfig = z.infer<typeof intentRuleSchema>;
|
||||
export type IntentsConfig = z.infer<typeof intentsSchema>;
|
||||
export type RoutingPolicyConfig = z.infer<typeof routingPolicySchema>;
|
||||
export type HistoryIndexConfig = z.infer<typeof historyIndexSchema>;
|
||||
export type ServerConfig = z.infer<typeof serverSchema>;
|
||||
export type SessionsConfig = z.infer<typeof sessionsSchema>;
|
||||
export type ThinkingConfig = z.infer<typeof thinkingSchema>;
|
||||
@@ -475,3 +532,5 @@ export type PairingCodeConfig = z.infer<typeof pairingSchema>;
|
||||
export type LogLevel = z.infer<typeof logLevelSchema>;
|
||||
export type AuditConfig = z.infer<typeof auditSchema>;
|
||||
export type AuditLevel = z.infer<typeof auditLevelSchema>;
|
||||
export type TruthfulnessMode = z.infer<typeof truthfulnessModeSchema>;
|
||||
export type AutonomyLevel = z.infer<typeof autonomyLevelSchema>;
|
||||
|
||||
Reference in New Issue
Block a user