feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
@@ -215,3 +215,191 @@ describe('configSchema automation', () => {
    expect(result.automation.cron[0].timezone).toBe('America/New_York');
  });
 });
+
+describe('configSchema — intents', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults intents to disabled with no rules', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.intents.enabled).toBe(false);
+    expect(result.intents.rules).toEqual([]);
+  });
+
+  it('accepts intent rule config', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      intents: {
+        enabled: true,
+        match_threshold: 0.6,
+        rules: [
+          {
+            name: 'deploy-rule',
+            patterns: ['deploy *'],
+            target: { type: 'agent', name: 'coder' },
+            priority: 5,
+            enabled: true,
+          },
+        ],
+      },
+    });
+
+    expect(result.intents.enabled).toBe(true);
+    expect(result.intents.rules[0].target.type).toBe('agent');
+    expect(result.intents.rules[0].target.name).toBe('coder');
+  });
+});
+
+describe('configSchema — routing_policy', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults routing_policy values', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.routing_policy.enabled).toBe(false);
+    expect(result.routing_policy.fast_path_threshold).toBe(0.85);
+    expect(result.routing_policy.llm_threshold).toBe(0.5);
+    expect(result.routing_policy.default_path).toBe('llm');
+  });
+});
+
+describe('configSchema — history_index', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults history indexing config', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.history_index.enabled).toBe(false);
+    expect(result.history_index.max_keywords).toBe(8);
+    expect(result.history_index.search_limit).toBe(10);
+  });
+});
+
+describe('configSchema — memory injection strategy', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults memory injection settings', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.memory.injection_strategy).toBe('all');
+    expect(result.memory.max_injection_tokens).toBe(2000);
+  });
+
+  it('accepts adaptive memory injection settings', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      memory: {
+        injection_strategy: 'adaptive',
+        max_injection_tokens: 1200,
+      },
+    });
+    expect(result.memory.injection_strategy).toBe('adaptive');
+    expect(result.memory.max_injection_tokens).toBe(1200);
+  });
+});
+
+describe('configSchema — compaction importance threshold', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults compaction importance threshold to disabled behavior', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.compaction.importance_threshold).toBe(1);
+  });
+
+  it('accepts a custom importance threshold', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      compaction: {
+        importance_threshold: 0.5,
+      },
+    });
+    expect(result.compaction.importance_threshold).toBe(0.5);
+  });
+});
+
+describe('configSchema — prompt context level', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults prompt.context_level to normal', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.prompt.context_level).toBe('normal');
+  });
+
+  it('accepts valid context levels', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      prompt: {
+        context_level: 'debug',
+      },
+    });
+
+    expect(result.prompt.context_level).toBe('debug');
+  });
+
+  it('rejects invalid context levels', () => {
+    expect(() => configSchema.parse({
+      ...minimalConfig,
+      prompt: {
+        context_level: 'verbose',
+      },
+    })).toThrow();
+  });
+});
+
+describe('configSchema — agents truthfulness/autonomy', () => {
+  const minimalConfig = {
+    telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+    models: { default: { provider: 'anthropic', model: 'claude-3' } },
+  };
+
+  it('defaults to standard truthfulness and autonomy', () => {
+    const result = configSchema.parse(minimalConfig);
+    expect(result.agents.truthfulness_mode).toBe('standard');
+    expect(result.agents.autonomy_level).toBe('standard');
+  });
+
+  it('accepts explicit truthfulness and autonomy modes', () => {
+    const result = configSchema.parse({
+      ...minimalConfig,
+      agents: {
+        truthfulness_mode: 'strict',
+        autonomy_level: 'conservative',
+      },
+    });
+
+    expect(result.agents.truthfulness_mode).toBe('strict');
+    expect(result.agents.autonomy_level).toBe('conservative');
+  });
+
+  it('rejects invalid truthfulness_mode', () => {
+    expect(() => configSchema.parse({
+      ...minimalConfig,
+      agents: {
+        truthfulness_mode: 'always',
+      },
+    })).toThrow();
+  });
+
+  it('rejects invalid autonomy_level', () => {
+    expect(() => configSchema.parse({
+      ...minimalConfig,
+      agents: {
+        autonomy_level: 'manual',
+      },
+    })).toThrow();
+  });
+});
@@ -212,6 +212,9 @@ const automationSchema = z.object({
  heartbeat: heartbeatSchema,
 }).default({});

+const truthfulnessModeSchema = z.enum(['strict', 'standard', 'relaxed']);
+const autonomyLevelSchema = z.enum(['conservative', 'standard', 'autonomous']);
+
 const agentsSchema = z.object({
  primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
  delegation: z.object({
@@ -231,6 +234,10 @@ const agentsSchema = z.object({
  max_delegation_depth: z.number().min(1).max(10).default(3),
  /** Maximum tool-loop iterations before the agent stops. */
  max_iterations: z.number().min(1).max(50).default(10),
+  /** Truthfulness enforcement level: strict | standard | relaxed. */
+  truthfulness_mode: truthfulnessModeSchema.default('standard'),
+  /** Autonomy level for tool execution: conservative | standard | autonomous. */
+  autonomy_level: autonomyLevelSchema.default('standard'),
 }).default({});

 const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']);
@@ -252,6 +259,8 @@ const memorySchema = z.object({
  enabled: z.boolean().default(true),
  dir: z.string().optional(), // Default: ~/.local/share/flynn/memory
  auto_extract: z.boolean().default(true),
+  injection_strategy: z.enum(['all', 'recent', 'adaptive']).default('all'),
+  max_injection_tokens: z.number().min(100).max(10000).default(2000),
  max_context_tokens: z.number().min(100).max(10000).default(2000),
  embedding: embeddingSchema,
 }).default({});
@@ -261,6 +270,7 @@ const compactionSchema = z.object({
  threshold_pct: z.number().min(10).max(100).default(80),
  keep_turns: z.number().min(1).max(50).default(4),
  summary_max_tokens: z.number().min(128).max(4096).default(1024),
+  importance_threshold: z.number().min(0).max(1).default(1),
 }).default({});

 const discordSchema = z.object({
@@ -375,6 +385,34 @@ const routingSchema = z.object({
  senders: z.record(z.string(), z.string()).default({}),
 }).default({});

+const intentTargetTypeSchema = z.enum(['agent', 'skill']);
+
+const intentRuleSchema = z.object({
+  name: z.string().min(1),
+  patterns: z.array(z.string().min(1)).min(1),
+  target: z.object({
+    type: intentTargetTypeSchema,
+    name: z.string().min(1),
+  }),
+  priority: z.number().default(0),
+  enabled: z.boolean().default(true),
+});
+
+const intentsSchema = z.object({
+  enabled: z.boolean().default(false),
+  match_threshold: z.number().min(0).max(1).default(0.7),
+  rules: z.array(intentRuleSchema).default([]),
+}).default({});
+
+const routingPolicySchema = z.object({
+  enabled: z.boolean().default(false),
+  fast_path_threshold: z.number().min(0).max(1).default(0.85),
+  llm_threshold: z.number().min(0).max(1).default(0.5),
+  default_path: z.enum(['fast', 'llm']).default('llm'),
+}).default({});
+
+const contextLevelSchema = z.enum(['minimal', 'normal', 'detailed', 'debug']);
+
 const promptSchema = z.object({
  /** Additional directories to search for prompt template files. */
  search_dirs: z.array(z.string()).default([]),
@@ -383,12 +421,22 @@ const promptSchema = z.object({
    name: z.string(),
    content: z.string(),
  })).default([]),
+  /** Prompt context depth control: minimal | normal | detailed | debug. */
+  context_level: contextLevelSchema.default('normal'),
 }).default({});

 const sessionsSchema = z.object({
  ttl: z.string().default('30d'),
 }).default({});

+const historyIndexSchema = z.object({
+  enabled: z.boolean().default(false),
+  max_keywords: z.number().min(1).max(20).default(8),
+  search_limit: z.number().min(1).max(100).default(10),
+  min_score: z.number().min(0).max(1).default(0.15),
+  routing_boost: z.number().min(0).max(0.2).default(0.05),
+}).default({});
+
 const logLevelSchema = z.enum(['debug', 'info', 'warn', 'error', 'silent']).default('info');

 const auditLevelSchema = z.enum(['debug', 'info', 'warn', 'error']).default('debug');
@@ -432,6 +480,9 @@ export const configSchema = z.object({
  sandbox: sandboxSchema,
  agent_configs: agentConfigsSchema,
  routing: routingSchema,
+  intents: intentsSchema,
+  routing_policy: routingPolicySchema,
+  history_index: historyIndexSchema,
  sessions: sessionsSchema,
  pairing: pairingSchema,
 });
@@ -453,6 +504,7 @@ export type DiscordConfig = z.infer<typeof discordSchema>;
 export type SlackConfig = z.infer<typeof slackSchema>;
 export type WhatsAppConfig = z.infer<typeof whatsappSchema>;
 export type RetryPolicyConfig = z.infer<typeof retrySchema>;
+export type ContextLevel = z.infer<typeof contextLevelSchema>;
 export type PromptConfig = z.infer<typeof promptSchema>;
 export type ToolProfile = z.infer<typeof toolProfileEnum>;
 export type ToolOverrideConfig = z.infer<typeof toolOverrideSchema>;
@@ -460,6 +512,11 @@ export type ToolsConfig = z.infer<typeof toolsSchema>;
 export type SandboxConfig = z.infer<typeof sandboxSchema>;
 export type AgentConfigEntry = z.infer<typeof agentConfigEntrySchema>;
 export type RoutingConfig = z.infer<typeof routingSchema>;
+export type IntentTargetType = z.infer<typeof intentTargetTypeSchema>;
+export type IntentRuleConfig = z.infer<typeof intentRuleSchema>;
+export type IntentsConfig = z.infer<typeof intentsSchema>;
+export type RoutingPolicyConfig = z.infer<typeof routingPolicySchema>;
+export type HistoryIndexConfig = z.infer<typeof historyIndexSchema>;
 export type ServerConfig = z.infer<typeof serverSchema>;
 export type SessionsConfig = z.infer<typeof sessionsSchema>;
 export type ThinkingConfig = z.infer<typeof thinkingSchema>;
@@ -475,3 +532,5 @@ export type PairingCodeConfig = z.infer<typeof pairingSchema>;
 export type LogLevel = z.infer<typeof logLevelSchema>;
 export type AuditConfig = z.infer<typeof auditSchema>;
 export type AuditLevel = z.infer<typeof auditLevelSchema>;
+export type TruthfulnessMode = z.infer<typeof truthfulnessModeSchema>;
+export type AutonomyLevel = z.infer<typeof autonomyLevelSchema>;