feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
@@ -14,6 +14,9 @@ import { ModelRouter, type ModelTier } from '../models/index.js';
 import { ToolRegistry, ToolExecutor } from '../tools/index.js';
 import { SessionManager } from '../session/index.js';
 import { AgentConfigRegistry, AgentRouter } from '../agents/index.js';
+import type { CommandRegistry } from '../commands/index.js';
+import type { ComponentRegistry } from '../intents/index.js';
+import type { RoutingPolicy } from '../routing/index.js';

 /**
 * Create the unified message handler for the channel registry.
@@ -33,6 +36,9 @@ export function createMessageRouter(deps: {
  agentConfigRegistry?: AgentConfigRegistry;
  agentRouter?: AgentRouter;
  sandboxManager?: SandboxManager;
+  commandRegistry?: CommandRegistry;
+  intentRegistry?: ComponentRegistry;
+  routingPolicy?: RoutingPolicy;
 }): {
  handler: (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>) => Promise<void>;
  agents: Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>;
@@ -40,9 +46,9 @@ export function createMessageRouter(deps: {
  // Cache agents by session ID + agent config name to avoid recreating on every message
  const agents = new Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>();

-  function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
+  function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>, agentOverride?: string): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
    // Resolve agent config name via routing (sender → channel → default fallback)
-    const agentConfigName = deps.agentRouter?.resolve(channel, senderId);
+    const agentConfigName = agentOverride ?? deps.agentRouter?.resolve(channel, senderId);
    const agentConfig = agentConfigName ? deps.agentConfigRegistry?.get(agentConfigName) : undefined;

    // Cron job tier wins over agent config tier
@@ -152,13 +158,17 @@ export function createMessageRouter(deps: {
          thresholdPct: deps.config.compaction.threshold_pct,
          keepTurns: deps.config.compaction.keep_turns,
          summaryMaxTokens: deps.config.compaction.summary_max_tokens,
+          importanceThreshold: deps.config.compaction.importance_threshold,
        } : undefined,
        modelName: deps.config.models.default.model,
        contextWindow: deps.config.models.default.context_window,
        memoryStore: deps.memoryStore,
+        memoryInjectionStrategy: deps.config.memory?.injection_strategy,
+        memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
        toolPolicyContext: {
          agent: effectiveTier,
          provider: effectiveProvider,
+          autonomyLevel: deps.config.agents.autonomy_level ?? 'standard',
        },
        attachmentCollector: collector,
      });
@@ -169,94 +179,118 @@ export function createMessageRouter(deps: {
  }

  const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>): Promise<void> => {
-    const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata);
+    let intentAgentOverride: string | undefined;
+    if (deps.config.intents?.enabled && deps.intentRegistry) {
+      const intentMatch = deps.intentRegistry.match(msg.text);

-    // Handle special commands
-    if (msg.metadata?.isCommand) {
-      if (msg.metadata.command === 'reset') {
-        agent.reset();
-        // Clear per-session config overrides
-        const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
-        session.deleteConfig('modelTier');
-        return;
-      }
-      if (msg.metadata.command === 'model') {
-        const modelArg = msg.metadata.commandArgs as string | undefined;
-        const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
-
-        if (!modelArg) {
-          // Show current model tier
-          const currentTier = agent.getModelTier();
-          const sessionOverride = session.getConfig('modelTier');
-          const available = deps.modelRouter.getAvailableTiers();
-          const labels = deps.modelRouter.getAllLabels();
-          const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`];
-          for (const tier of available) {
-            const label = labels[tier] ?? 'unknown';
-            const marker = tier === currentTier ? ' ←' : '';
-            lines.push(`  ${tier}: ${label}${marker}`);
-          }
-          await reply({ text: lines.join('\n'), replyTo: msg.id });
-          return;
-        }
-
-        // Validate tier
-        const validTiers = deps.modelRouter.getAvailableTiers();
-        if (!validTiers.includes(modelArg as ModelTier)) {
-          await reply({ text: `Model tier not available: ${modelArg}`, replyTo: msg.id });
-          return;
-        }
-
-        // Persist to session config
-        session.setConfig('modelTier', modelArg);
-
-        // Update the orchestrator's agent tier
-        agent.setModelTier(modelArg as ModelTier);
-
-        const label = deps.modelRouter.getLabel(modelArg as ModelTier);
-        await reply({ text: `Switched to model: ${modelArg} (${label})`, replyTo: msg.id });
-        return;
-      }
-      if (msg.metadata.command === 'compact') {
-        const result = await agent.compact();
-        if (result && result.compactedCount > 0) {
-          await reply({
-            text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`,
-            replyTo: msg.id,
+      if (intentMatch?.rule.target.type === 'agent') {
+        let confidence = intentMatch.score;
+        if (deps.config.history_index?.enabled) {
+          const historySessionId = `${msg.channel}:${msg.senderId}`;
+          const historyHits = deps.sessionManager.searchHistory(msg.text, {
+            sessionId: historySessionId,
+            limit: 1,
          });
-        } else {
-          await reply({
-            text: 'Nothing to compact.',
-            replyTo: msg.id,
-          });
-        }
-        return;
-      }
-      if (msg.metadata.command === 'usage') {
-        const usage = agent.getUsage();
-        const lines = [
-          '**Token Usage**',
-          '',
-          `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`,
-        ];
-
-        const delegationEntries = Object.entries(usage.delegation);
-        if (delegationEntries.length > 0) {
-          lines.push('');
-          lines.push('Delegation:');
-          for (const [tier, stats] of delegationEntries) {
-            lines.push(`  ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`);
+          if (historyHits.length > 0 && historyHits[0].score >= (deps.config.history_index.min_score ?? 0.15)) {
+            confidence = Math.min(1, confidence + (deps.config.history_index.routing_boost ?? 0.05));
          }
        }

-        lines.push('');
-        lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
+        const decision = deps.routingPolicy
+          ? deps.routingPolicy.decide({ confidence })
+          : { path: 'fast' as const, reason: 'high_confidence' as const };

-        if (usage.total.estimatedCost > 0) {
-          lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
+        console.log(`[routing] intent=${intentMatch.rule.name} confidence=${confidence.toFixed(3)} path=${decision.path} reason=${decision.reason}`);
+
+        if (decision.path === 'fast') {
+          intentAgentOverride = intentMatch.rule.target.name;
        }
+      }
+    }

-        await reply({ text: lines.join('\n'), replyTo: msg.id });
+    const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata, intentAgentOverride);
+
+    const commandInput = msg.metadata?.isCommand && typeof msg.metadata.command === 'string'
+      ? `/${msg.metadata.command}${msg.metadata.commandArgs ? ` ${msg.metadata.commandArgs}` : ''}`
+      : msg.text;
+
+    if (deps.commandRegistry && deps.commandRegistry.isCommand(commandInput)) {
+      const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
+      const commandResult = await deps.commandRegistry.execute(commandInput, {
+        channel: msg.channel,
+        senderId: msg.senderId,
+        sessionId: session.id,
+        rawInput: commandInput,
+        services: {
+          getStatus: () => `Flynn is running. Active model tier: ${agent.getModelTier()}`,
+          getUsage: () => {
+            const usage = agent.getUsage();
+            const lines = [
+              '**Token Usage**',
+              '',
+              `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`,
+            ];
+
+            const delegationEntries = Object.entries(usage.delegation);
+            if (delegationEntries.length > 0) {
+              lines.push('');
+              lines.push('Delegation:');
+              for (const [tier, stats] of delegationEntries) {
+                lines.push(`  ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`);
+              }
+            }
+
+            lines.push('');
+            lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
+
+            if (usage.total.estimatedCost > 0) {
+              lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
+            }
+
+            return lines.join('\n');
+          },
+          getModel: () => {
+            const currentTier = agent.getModelTier();
+            const sessionOverride = session.getConfig('modelTier');
+            const available = deps.modelRouter.getAvailableTiers();
+            const labels = deps.modelRouter.getAllLabels();
+            const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`];
+            for (const tier of available) {
+              const label = labels[tier] ?? 'unknown';
+              const marker = tier === currentTier ? ' ←' : '';
+              lines.push(`  ${tier}: ${label}${marker}`);
+            }
+            return lines.join('\n');
+          },
+          setModel: (tier) => {
+            const validTiers = deps.modelRouter.getAvailableTiers();
+            if (!validTiers.includes(tier as ModelTier)) {
+              return `Model tier not available: ${tier}`;
+            }
+            session.setConfig('modelTier', tier);
+            agent.setModelTier(tier as ModelTier);
+            const label = deps.modelRouter.getLabel(tier as ModelTier);
+            return `Switched to model: ${tier} (${label})`;
+          },
+          compact: async () => {
+            const result = await agent.compact();
+            if (result && result.compactedCount > 0) {
+              return `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`;
+            }
+            return 'Nothing to compact.';
+          },
+          reset: () => {
+            agent.reset();
+            session.deleteConfig('modelTier');
+            return '';
+          },
+        },
+      });
+
+      if (commandResult.handled) {
+        if (commandResult.text.trim()) {
+          await reply({ text: commandResult.text, replyTo: msg.id });
+        }
        return;
      }
    }
@@ -310,7 +344,7 @@ export function createMessageRouter(deps: {
          }
        } else {
          // No transcription endpoint configured — inform the user gracefully
-          messageText = `[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]`;
+          messageText = '[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]';
        }
        // Remove audio attachments so buildUserMessage doesn't create audio content parts
        attachments = (msg.attachments ?? []).filter((a: Attachment) => !isSupportedAudio(a));