feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials.

Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
William Valentin
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
+117 -83
View File
@@ -14,6 +14,9 @@ import { ModelRouter, type ModelTier } from '../models/index.js';
import { ToolRegistry, ToolExecutor } from '../tools/index.js';
import { SessionManager } from '../session/index.js';
import { AgentConfigRegistry, AgentRouter } from '../agents/index.js';
import type { CommandRegistry } from '../commands/index.js';
import type { ComponentRegistry } from '../intents/index.js';
import type { RoutingPolicy } from '../routing/index.js';
/**
* Create the unified message handler for the channel registry.
@@ -33,6 +36,9 @@ export function createMessageRouter(deps: {
agentConfigRegistry?: AgentConfigRegistry;
agentRouter?: AgentRouter;
sandboxManager?: SandboxManager;
commandRegistry?: CommandRegistry;
intentRegistry?: ComponentRegistry;
routingPolicy?: RoutingPolicy;
}): {
handler: (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>) => Promise<void>;
agents: Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>;
@@ -40,9 +46,9 @@ export function createMessageRouter(deps: {
// Cache agents by session ID + agent config name to avoid recreating on every message
const agents = new Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }>();
function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
function getOrCreateAgent(channel: string, senderId: string, metadata?: Record<string, unknown>, agentOverride?: string): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } {
// Resolve agent config name via routing (sender → channel → default fallback)
const agentConfigName = deps.agentRouter?.resolve(channel, senderId);
const agentConfigName = agentOverride ?? deps.agentRouter?.resolve(channel, senderId);
const agentConfig = agentConfigName ? deps.agentConfigRegistry?.get(agentConfigName) : undefined;
// Cron job tier wins over agent config tier
@@ -152,13 +158,17 @@ export function createMessageRouter(deps: {
thresholdPct: deps.config.compaction.threshold_pct,
keepTurns: deps.config.compaction.keep_turns,
summaryMaxTokens: deps.config.compaction.summary_max_tokens,
importanceThreshold: deps.config.compaction.importance_threshold,
} : undefined,
modelName: deps.config.models.default.model,
contextWindow: deps.config.models.default.context_window,
memoryStore: deps.memoryStore,
memoryInjectionStrategy: deps.config.memory?.injection_strategy,
memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
toolPolicyContext: {
agent: effectiveTier,
provider: effectiveProvider,
autonomyLevel: deps.config.agents.autonomy_level ?? 'standard',
},
attachmentCollector: collector,
});
@@ -169,94 +179,118 @@ export function createMessageRouter(deps: {
}
const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise<void>): Promise<void> => {
const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata);
let intentAgentOverride: string | undefined;
if (deps.config.intents?.enabled && deps.intentRegistry) {
const intentMatch = deps.intentRegistry.match(msg.text);
// Handle special commands
if (msg.metadata?.isCommand) {
if (msg.metadata.command === 'reset') {
agent.reset();
// Clear per-session config overrides
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
session.deleteConfig('modelTier');
return;
}
if (msg.metadata.command === 'model') {
const modelArg = msg.metadata.commandArgs as string | undefined;
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
if (!modelArg) {
// Show current model tier
const currentTier = agent.getModelTier();
const sessionOverride = session.getConfig('modelTier');
const available = deps.modelRouter.getAvailableTiers();
const labels = deps.modelRouter.getAllLabels();
const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`];
for (const tier of available) {
const label = labels[tier] ?? 'unknown';
const marker = tier === currentTier ? ' ←' : '';
lines.push(` ${tier}: ${label}${marker}`);
}
await reply({ text: lines.join('\n'), replyTo: msg.id });
return;
}
// Validate tier
const validTiers = deps.modelRouter.getAvailableTiers();
if (!validTiers.includes(modelArg as ModelTier)) {
await reply({ text: `Model tier not available: ${modelArg}`, replyTo: msg.id });
return;
}
// Persist to session config
session.setConfig('modelTier', modelArg);
// Update the orchestrator's agent tier
agent.setModelTier(modelArg as ModelTier);
const label = deps.modelRouter.getLabel(modelArg as ModelTier);
await reply({ text: `Switched to model: ${modelArg} (${label})`, replyTo: msg.id });
return;
}
if (msg.metadata.command === 'compact') {
const result = await agent.compact();
if (result && result.compactedCount > 0) {
await reply({
text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore}${result.tokensAfter} tokens`,
replyTo: msg.id,
if (intentMatch?.rule.target.type === 'agent') {
let confidence = intentMatch.score;
if (deps.config.history_index?.enabled) {
const historySessionId = `${msg.channel}:${msg.senderId}`;
const historyHits = deps.sessionManager.searchHistory(msg.text, {
sessionId: historySessionId,
limit: 1,
});
} else {
await reply({
text: 'Nothing to compact.',
replyTo: msg.id,
});
}
return;
}
if (msg.metadata.command === 'usage') {
const usage = agent.getUsage();
const lines = [
'**Token Usage**',
'',
`Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`,
];
const delegationEntries = Object.entries(usage.delegation);
if (delegationEntries.length > 0) {
lines.push('');
lines.push('Delegation:');
for (const [tier, stats] of delegationEntries) {
lines.push(` ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`);
if (historyHits.length > 0 && historyHits[0].score >= (deps.config.history_index.min_score ?? 0.15)) {
confidence = Math.min(1, confidence + (deps.config.history_index.routing_boost ?? 0.05));
}
}
lines.push('');
lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
const decision = deps.routingPolicy
? deps.routingPolicy.decide({ confidence })
: { path: 'fast' as const, reason: 'high_confidence' as const };
if (usage.total.estimatedCost > 0) {
lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
console.log(`[routing] intent=${intentMatch.rule.name} confidence=${confidence.toFixed(3)} path=${decision.path} reason=${decision.reason}`);
if (decision.path === 'fast') {
intentAgentOverride = intentMatch.rule.target.name;
}
}
}
await reply({ text: lines.join('\n'), replyTo: msg.id });
const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata, intentAgentOverride);
const commandInput = msg.metadata?.isCommand && typeof msg.metadata.command === 'string'
? `/${msg.metadata.command}${msg.metadata.commandArgs ? ` ${msg.metadata.commandArgs}` : ''}`
: msg.text;
if (deps.commandRegistry && deps.commandRegistry.isCommand(commandInput)) {
const session = deps.sessionManager.getSession(msg.channel, msg.senderId);
const commandResult = await deps.commandRegistry.execute(commandInput, {
channel: msg.channel,
senderId: msg.senderId,
sessionId: session.id,
rawInput: commandInput,
services: {
getStatus: () => `Flynn is running. Active model tier: ${agent.getModelTier()}`,
getUsage: () => {
const usage = agent.getUsage();
const lines = [
'**Token Usage**',
'',
`Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`,
];
const delegationEntries = Object.entries(usage.delegation);
if (delegationEntries.length > 0) {
lines.push('');
lines.push('Delegation:');
for (const [tier, stats] of delegationEntries) {
lines.push(` ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`);
}
}
lines.push('');
lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
if (usage.total.estimatedCost > 0) {
lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
}
return lines.join('\n');
},
getModel: () => {
const currentTier = agent.getModelTier();
const sessionOverride = session.getConfig('modelTier');
const available = deps.modelRouter.getAvailableTiers();
const labels = deps.modelRouter.getAllLabels();
const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`];
for (const tier of available) {
const label = labels[tier] ?? 'unknown';
const marker = tier === currentTier ? ' ←' : '';
lines.push(` ${tier}: ${label}${marker}`);
}
return lines.join('\n');
},
setModel: (tier) => {
const validTiers = deps.modelRouter.getAvailableTiers();
if (!validTiers.includes(tier as ModelTier)) {
return `Model tier not available: ${tier}`;
}
session.setConfig('modelTier', tier);
agent.setModelTier(tier as ModelTier);
const label = deps.modelRouter.getLabel(tier as ModelTier);
return `Switched to model: ${tier} (${label})`;
},
compact: async () => {
const result = await agent.compact();
if (result && result.compactedCount > 0) {
return `Compacted ${result.compactedCount} messages: ${result.tokensBefore}${result.tokensAfter} tokens`;
}
return 'Nothing to compact.';
},
reset: () => {
agent.reset();
session.deleteConfig('modelTier');
return '';
},
},
});
if (commandResult.handled) {
if (commandResult.text.trim()) {
await reply({ text: commandResult.text, replyTo: msg.id });
}
return;
}
}
@@ -310,7 +344,7 @@ export function createMessageRouter(deps: {
}
} else {
// No transcription endpoint configured — inform the user gracefully
messageText = `[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]`;
messageText = '[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]';
}
// Remove audio attachments so buildUserMessage doesn't create audio content parts
attachments = (msg.attachments ?? []).filter((a: Attachment) => !isSupportedAudio(a));
+11
View File
@@ -14,6 +14,9 @@ import { assembleSystemPrompt } from '../prompt/index.js';
import { resolve } from 'path';
import { homedir } from 'os';
import type { MemoryStore } from '../memory/store.js';
import type { CommandRegistry } from '../commands/index.js';
import type { ComponentRegistry } from '../intents/index.js';
import type { RoutingPolicy } from '../routing/index.js';
// ── Skills ──────────────────────────────────────────────────────
@@ -75,6 +78,8 @@ export function loadSystemPrompt(config: Config, skillRegistry: SkillRegistry):
const result = assembleSystemPrompt({
searchDirs,
extraSections: config.prompt.extra_sections,
contextLevel: config.prompt.context_level,
truthfulnessMode: config.agents.truthfulness_mode,
});
if (result.loadedFiles.length > 0) {
@@ -123,6 +128,9 @@ export interface GatewayDeps {
lifecycle: Lifecycle;
getChannelAgents: () => Map<string, { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector }> | null;
memoryStore?: MemoryStore;
commandRegistry?: CommandRegistry;
intentRegistry?: ComponentRegistry;
routingPolicy?: RoutingPolicy;
}
export function createGateway(deps: GatewayDeps): GatewayServer {
@@ -142,6 +150,9 @@ export function createGateway(deps: GatewayDeps): GatewayServer {
},
authHttp: config.server.auth_http,
lock: config.server.lock,
commandRegistry: deps.commandRegistry,
intentRegistry: deps.intentRegistry,
routingPolicy: deps.routingPolicy,
uiDir: resolve(import.meta.dirname, '../gateway/ui'),
config,
channelRegistry,