From 90ce6220800991f1bad6c04c954927cc193c273e Mon Sep 17 00:00:00 2001 From: William Valentin Date: Thu, 12 Feb 2026 16:06:45 -0800 Subject: [PATCH] feat(policy): enforce truthfulness and autonomy guardrails Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion. --- docs/plans/state.json | 166 +++++++++++++++++++- src/audit/types.ts | 4 +- src/backends/native/guardrails.test.ts | 47 ++++++ src/backends/native/guardrails.ts | 61 ++++++++ src/config/schema.test.ts | 188 +++++++++++++++++++++++ src/config/schema.ts | 59 ++++++++ src/daemon/routing.ts | 200 +++++++++++++++---------- src/daemon/services.ts | 11 ++ src/gateway/session-bridge.ts | 9 +- src/hooks/autonomy.test.ts | 149 ++++++++++++++++++ src/hooks/autonomy.ts | 115 ++++++++++++++ src/hooks/index.ts | 2 + src/prompt/template.test.ts | 140 +++++++++++++++++ src/prompt/template.ts | 46 +++++- src/tools/executor.test.ts | 37 +++++ src/tools/executor.ts | 33 +++- src/tools/integration.test.ts | 5 +- src/tools/policy.ts | 4 +- 18 files changed, 1172 insertions(+), 104 deletions(-) create mode 100644 src/backends/native/guardrails.test.ts create mode 100644 src/backends/native/guardrails.ts create mode 100644 src/hooks/autonomy.test.ts create mode 100644 src/hooks/autonomy.ts diff --git a/docs/plans/state.json b/docs/plans/state.json index d1cdb9b..37221be 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -1,6 +1,6 @@ { "version": "1.0", - "updated_at": "2026-02-11", + "updated_at": "2026-02-12", "description": "Tracks the status of all Flynn plans and implementation phases", "plans": { @@ -1040,7 +1040,162 @@ } } }, - "skills_infrastructure": { + "remaining-phases-phase1-pr1-context-levels": { + "file": "phase1-pr1-context-level-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added prompt context levels (minimal/normal/detailed/debug) with config schema support, prompt assembly behavior changes, daemon wiring, and coverage for parsing plus level-specific prompt output.", + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/prompt/template.ts", + "src/prompt/template.test.ts", + "src/daemon/services.ts", + "config/default.yaml" + ], + "test_status": "typecheck + targeted template tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase1-pr2-command-registry": { + "file": "phase1-pr2-command-registry-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added a deterministic fast-path CommandRegistry with built-in slash commands and integrated it into channel routing and gateway agent handling so known commands execute before orchestrator message processing.", + "files_created": [ + "src/commands/types.ts", + "src/commands/registry.ts", + "src/commands/builtin/index.ts", + "src/commands/index.ts", + "src/commands/registry.test.ts", + "src/gateway/handlers/agent.test.ts" + ], + "files_modified": [ + "src/daemon/index.ts", + "src/daemon/routing.ts", + "src/daemon/routing.test.ts", + "src/daemon/services.ts", + "src/gateway/server.ts", + "src/gateway/handlers/agent.ts" + ], + "test_status": "typecheck + targeted command/routing/gateway tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase1-pr3-memory-structure": { + "file": "phase1-pr3-memory-structure-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added structured memory categories (facts/preferences/decisions/projects) with backward-compatible MemoryStore category APIs, optional category-aware search filters, prompt-context category inclusion, and updated memory tool namespace docs.", + "files_created": [ + "src/memory/categories.ts", + "src/memory/categories.test.ts" + ], + "files_modified": [ + "src/memory/store.ts", + "src/memory/store.test.ts", + "src/memory/index.ts", + "src/tools/builtin/memory-read.ts", + "src/tools/builtin/memory-write.ts", + "src/tools/builtin/memory-search.ts" + ], + "test_status": "typecheck + targeted categories/store tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase2-pr1-component-registry": { + "file": "phase2-pr1-component-registry-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added intent/component registry routing with configurable match rules and deterministic resolution, integrated opt-in intent-based agent override in daemon routing, and exposed gateway inspection endpoints (`intents.list`, `intents.match`).", + "files_created": [ + "src/intents/registry.ts", + "src/intents/index.ts", + "src/intents/registry.test.ts", + "src/gateway/handlers/intents.ts" + ], + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/daemon/index.ts", + "src/daemon/routing.ts", + "src/daemon/routing.test.ts", + "src/daemon/services.ts", + "src/gateway/server.ts", + "src/gateway/handlers/index.ts", + "src/gateway/handlers/handlers.test.ts" + ], + "test_status": "typecheck + targeted intents/routing/handlers tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase2-pr2-confidence-routing": { + "file": "phase2-pr2-confidence-routing-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added confidence-based routing policy with configurable thresholds to choose fast-path intent routing versus standard LLM orchestration, and exposed routing decision inspection via gateway.", + "files_created": [ + "src/routing/policy.ts", + "src/routing/index.ts", + "src/routing/policy.test.ts", + "src/gateway/handlers/routing.ts" + ], + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/intents/registry.test.ts", + "src/daemon/index.ts", + "src/daemon/routing.ts", + "src/daemon/routing.test.ts", + "src/daemon/services.ts", + "src/gateway/server.ts", + "src/gateway/handlers/index.ts", + "src/gateway/handlers/handlers.test.ts" + ], + "test_status": "typecheck + targeted policy/intents/routing tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase3-pr1-adaptive-memory-compaction": { + "file": "phase3-pr1-adaptive-memory-compaction-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added configurable memory injection strategies (all/recent/adaptive) with adaptive relevance scoring and budget clipping, plus weighted compaction that can preserve high-importance turns while keeping default behavior when thresholds remain at defaults.", + "files_created": [ + "src/memory/adaptive.ts", + "src/memory/adaptive.test.ts", + "src/context/weighting.ts", + "src/context/weighting.test.ts" + ], + "files_modified": [ + "src/config/schema.ts", + "src/config/schema.test.ts", + "src/memory/store.ts", + "src/memory/index.ts", + "src/backends/native/orchestrator.ts", + "src/backends/native/orchestrator.test.ts", + "src/context/compaction.ts", + "src/context/compaction.test.ts", + "src/daemon/routing.ts", + "src/gateway/session-bridge.ts" + ], + "test_status": "typecheck + targeted adaptive/weighting/compaction/orchestrator tests + full test suite + build passing; lint currently fails due pre-existing unrelated repo issues" + }, + "remaining-phases-phase3-pr2-policy-autonomy-hardening": { + "file": "phase3-pr2-policy-autonomy-hardening-checklist.md", + "status": "completed", + "date": "2026-02-12", + "summary": "Added runtime-enforceable truthfulness guardrails (strict/standard/relaxed modes injected into system prompts) and autonomy-aware tool execution controls (conservative/standard/autonomous levels with per-tool danger classification and override logic). Both configs default to 'standard' for safe behavior.", + "files_created": [ + "src/backends/native/guardrails.ts", + "src/backends/native/guardrails.test.ts", + "src/hooks/autonomy.ts", + "src/hooks/autonomy.test.ts" + ], + "files_modified": [ + "src/config/schema.ts", + "src/tools/policy.ts", + "src/tools/executor.ts", + "src/audit/types.ts", + "src/prompt/template.ts", + "src/daemon/services.ts", + "src/daemon/routing.ts", + "src/gateway/session-bridge.ts", + "src/tools/integration.test.ts" + ], + "test_status": "typecheck + targeted guardrails/autonomy/executor/engine/schema/template tests + full suite passing (1490/1490); lint passing baseline (394 warnings, 0 errors); build passing" + }, + "skills_infrastructure": { "file": "2026-02-11-skills-infrastructure-plan.md", "status": "planned", "date": "2026-02-11", @@ -1089,7 +1244,7 @@ }, "overall_progress": { - "total_test_count": 1369, + "total_test_count": 1490, "all_tests_passing": true, "p0_completion": "3/3 (100%)", "p1_completion": "4/4 (100%)", @@ -1108,7 +1263,8 @@ "operator_dx_milestone": "Phase 3 (Live Ops Dashboard): 1/2 plans complete — metrics backend done, dashboard UI next", "gmail_auth_cli": "flynn gmail-auth command implemented with OAuth2 flow, doctor check, config routed to Telegram", "native_audio_support": "completed — smart routing for native audio (Gemini/OpenAI/GitHub) vs Whisper transcription fallback", - "next_up": "End-to-end test that Flynn follows through on tool calls via GitHub Copilot fallback. Remaining gaps: Tier 4 channels (Signal, Matrix, Teams, Google Chat), Tier 5 deferred/niche items" + "remaining_phases_completion": "Phase 1: 3/3 (100%) — context levels, command registry, memory structure. Phase 2: 2/2 (100%) — component registry, confidence routing. Phase 3: 2/2 (100%) — adaptive memory/compaction, truthfulness/autonomy hardening", + "next_up": "Skills infrastructure implementation (Phase 1: command dispatch)" }, "soul_md_and_cron_create": { "date": "2026-02-11", @@ -1199,4 +1355,4 @@ ], "test_status": "4/4 passing" } -} \ No newline at end of file +} diff --git a/src/audit/types.ts b/src/audit/types.ts index 72b43fd..446820a 100644 --- a/src/audit/types.ts +++ b/src/audit/types.ts @@ -1,6 +1,6 @@ export type AuditLevel = 'debug' | 'info' | 'warn' | 'error'; -export type AuditEventType = +export type AuditEventType = // Tool execution | 'tool.start' | 'tool.success' | 'tool.error' | 'tool.denied' // Session lifecycle @@ -72,7 +72,7 @@ export interface ToolDeniedEvent { tool_name: string; reason: string; session_id?: string; - denial_type: 'policy' | 'hook' | 'not_found'; + denial_type: 'policy' | 'hook' | 'not_found' | 'autonomy_override'; } export interface SessionCreateEvent { diff --git a/src/backends/native/guardrails.test.ts b/src/backends/native/guardrails.test.ts new file mode 100644 index 0000000..e2232ad --- /dev/null +++ b/src/backends/native/guardrails.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from 'vitest'; +import { getTruthfulnessGuidance, type TruthfulnessMode } from './guardrails.js'; + +describe('guardrails', () => { + describe('getTruthfulnessGuidance', () => { + it('returns strict guidance for strict mode', () => { + const guidance = getTruthfulnessGuidance('strict'); + expect(guidance).toContain('STRICT MODE'); + expect(guidance).toContain('Always tell the truth'); + expect(guidance).toContain('No lies. No invention. No fabrication.'); + expect(guidance).toContain('Tool output and user data'); + }); + + it('returns standard guidance for standard mode', () => { + const guidance = getTruthfulnessGuidance('standard'); + expect(guidance).toContain('Truthfulness Policy'); + expect(guidance).not.toContain('STRICT MODE'); + expect(guidance).toContain('actual tool output'); + expect(guidance).toContain('report the failure accurately'); + }); + + it('returns relaxed guidance for relaxed mode', () => { + const guidance = getTruthfulnessGuidance('relaxed'); + expect(guidance).toContain('Truthfulness Policy'); + expect(guidance).toContain('Be accurate'); + expect(guidance.length).toBeLessThan(getTruthfulnessGuidance('standard').length); + }); + + it('all modes return non-empty strings', () => { + const modes: TruthfulnessMode[] = ['strict', 'standard', 'relaxed']; + for (const mode of modes) { + const guidance = getTruthfulnessGuidance(mode); + expect(guidance).toBeTruthy(); + expect(guidance.trim().length).toBeGreaterThan(0); + } + }); + + it('strict mode has the longest guidance', () => { + const strict = getTruthfulnessGuidance('strict'); + const standard = getTruthfulnessGuidance('standard'); + const relaxed = getTruthfulnessGuidance('relaxed'); + + expect(strict.length).toBeGreaterThan(standard.length); + expect(standard.length).toBeGreaterThan(relaxed.length); + }); + }); +}); diff --git a/src/backends/native/guardrails.ts b/src/backends/native/guardrails.ts new file mode 100644 index 0000000..c3650a5 --- /dev/null +++ b/src/backends/native/guardrails.ts @@ -0,0 +1,61 @@ +/** + * Guardrails for enforcing truthfulness policies in agent behavior. + * + * Provides textual guidance that can be injected into system prompts + * to enforce different levels of truthfulness constraints. + */ + +import type { TruthfulnessMode } from '../../config/schema.js'; + +export type { TruthfulnessMode } from '../../config/schema.js'; + +const STRICT_GUIDANCE = `## Truthfulness Policy (STRICT MODE) + +**Always tell the truth. No lies. No invention. No fabrication. No guessing presented as fact.** + +This is the single most important rule. It applies to everything — not just tool output, but all communication. + +**General truthfulness:** +- Never state something as fact unless you know it to be true. +- If you don't know something, say "I don't know." +- If you're uncertain, say so explicitly — never present a guess as a fact. +- Never invent information to appear helpful. Being honest about limitations IS being helpful. +- Do not embellish, exaggerate, or speculate without clearly labeling it as speculation. + +**Tool output and user data:** +- Only present information that was actually returned by a tool, script, or API call. +- If a tool fails or returns an error, **report the failure honestly** — do not fill in plausible content. +- If a tool returns no results, say so — do not invent results that "might" exist. +- When summarizing tool output, every claim must trace back to actual output. No embellishment. + +**Prefer "I don't know" or "the tool failed" over any fabricated content.** Always. This applies to all data: emails, calendar events, files, Kubernetes state, metrics, logs, and any other information accessed via tools or from memory.`; + +const STANDARD_GUIDANCE = `## Truthfulness Policy + +Always base your responses on actual tool output or verified information. Do not fabricate data or present guesses as facts. + +- If a tool fails, report the failure accurately. +- If you don't have information, say so clearly. +- When summarizing tool output, stay faithful to what was actually returned. + +Being honest about limitations is more valuable than inventing plausible-sounding content.`; + +const RELAXED_GUIDANCE = `## Truthfulness Policy + +Be accurate with tool output and avoid fabricating data when precision matters.`; + +/** + * Get the truthfulness guidance text for a given mode. + * + * Returns a markdown section suitable for injection into a system prompt. + */ +export function getTruthfulnessGuidance(mode: TruthfulnessMode): string { + switch (mode) { + case 'strict': + return STRICT_GUIDANCE; + case 'standard': + return STANDARD_GUIDANCE; + case 'relaxed': + return RELAXED_GUIDANCE; + } +} diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index fa7b703..690398a 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -215,3 +215,191 @@ describe('configSchema automation', () => { expect(result.automation.cron[0].timezone).toBe('America/New_York'); }); }); + +describe('configSchema — intents', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults intents to disabled with no rules', () => { + const result = configSchema.parse(minimalConfig); + expect(result.intents.enabled).toBe(false); + expect(result.intents.rules).toEqual([]); + }); + + it('accepts intent rule config', () => { + const result = configSchema.parse({ + ...minimalConfig, + intents: { + enabled: true, + match_threshold: 0.6, + rules: [ + { + name: 'deploy-rule', + patterns: ['deploy *'], + target: { type: 'agent', name: 'coder' }, + priority: 5, + enabled: true, + }, + ], + }, + }); + + expect(result.intents.enabled).toBe(true); + expect(result.intents.rules[0].target.type).toBe('agent'); + expect(result.intents.rules[0].target.name).toBe('coder'); + }); +}); + +describe('configSchema — routing_policy', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults routing_policy values', () => { + const result = configSchema.parse(minimalConfig); + expect(result.routing_policy.enabled).toBe(false); + expect(result.routing_policy.fast_path_threshold).toBe(0.85); + expect(result.routing_policy.llm_threshold).toBe(0.5); + expect(result.routing_policy.default_path).toBe('llm'); + }); +}); + +describe('configSchema — history_index', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults history indexing config', () => { + const result = configSchema.parse(minimalConfig); + expect(result.history_index.enabled).toBe(false); + expect(result.history_index.max_keywords).toBe(8); + expect(result.history_index.search_limit).toBe(10); + }); +}); + +describe('configSchema — memory injection strategy', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults memory injection settings', () => { + const result = configSchema.parse(minimalConfig); + expect(result.memory.injection_strategy).toBe('all'); + expect(result.memory.max_injection_tokens).toBe(2000); + }); + + it('accepts adaptive memory injection settings', () => { + const result = configSchema.parse({ + ...minimalConfig, + memory: { + injection_strategy: 'adaptive', + max_injection_tokens: 1200, + }, + }); + expect(result.memory.injection_strategy).toBe('adaptive'); + expect(result.memory.max_injection_tokens).toBe(1200); + }); +}); + +describe('configSchema — compaction importance threshold', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults compaction importance threshold to disabled behavior', () => { + const result = configSchema.parse(minimalConfig); + expect(result.compaction.importance_threshold).toBe(1); + }); + + it('accepts a custom importance threshold', () => { + const result = configSchema.parse({ + ...minimalConfig, + compaction: { + importance_threshold: 0.5, + }, + }); + expect(result.compaction.importance_threshold).toBe(0.5); + }); +}); + +describe('configSchema — prompt context level', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults prompt.context_level to normal', () => { + const result = configSchema.parse(minimalConfig); + expect(result.prompt.context_level).toBe('normal'); + }); + + it('accepts valid context levels', () => { + const result = configSchema.parse({ + ...minimalConfig, + prompt: { + context_level: 'debug', + }, + }); + + expect(result.prompt.context_level).toBe('debug'); + }); + + it('rejects invalid context levels', () => { + expect(() => configSchema.parse({ + ...minimalConfig, + prompt: { + context_level: 'verbose', + }, + })).toThrow(); + }); +}); + +describe('configSchema — agents truthfulness/autonomy', () => { + const minimalConfig = { + telegram: { bot_token: 'test', allowed_chat_ids: [1] }, + models: { default: { provider: 'anthropic', model: 'claude-3' } }, + }; + + it('defaults to standard truthfulness and autonomy', () => { + const result = configSchema.parse(minimalConfig); + expect(result.agents.truthfulness_mode).toBe('standard'); + expect(result.agents.autonomy_level).toBe('standard'); + }); + + it('accepts explicit truthfulness and autonomy modes', () => { + const result = configSchema.parse({ + ...minimalConfig, + agents: { + truthfulness_mode: 'strict', + autonomy_level: 'conservative', + }, + }); + + expect(result.agents.truthfulness_mode).toBe('strict'); + expect(result.agents.autonomy_level).toBe('conservative'); + }); + + it('rejects invalid truthfulness_mode', () => { + expect(() => configSchema.parse({ + ...minimalConfig, + agents: { + truthfulness_mode: 'always', + }, + })).toThrow(); + }); + + it('rejects invalid autonomy_level', () => { + expect(() => configSchema.parse({ + ...minimalConfig, + agents: { + autonomy_level: 'manual', + }, + })).toThrow(); + }); +}); diff --git a/src/config/schema.ts b/src/config/schema.ts index 0b57612..d1211a1 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -212,6 +212,9 @@ const automationSchema = z.object({ heartbeat: heartbeatSchema, }).default({}); +const truthfulnessModeSchema = z.enum(['strict', 'standard', 'relaxed']); +const autonomyLevelSchema = z.enum(['conservative', 'standard', 'autonomous']); + const agentsSchema = z.object({ primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'), delegation: z.object({ @@ -231,6 +234,10 @@ const agentsSchema = z.object({ max_delegation_depth: z.number().min(1).max(10).default(3), /** Maximum tool-loop iterations before the agent stops. */ max_iterations: z.number().min(1).max(50).default(10), + /** Truthfulness enforcement level: strict | standard | relaxed. */ + truthfulness_mode: truthfulnessModeSchema.default('standard'), + /** Autonomy level for tool execution: conservative | standard | autonomous. */ + autonomy_level: autonomyLevelSchema.default('standard'), }).default({}); const embeddingProviderSchema = z.enum(['openai', 'gemini', 'ollama', 'llamacpp', 'voyage']); @@ -252,6 +259,8 @@ const memorySchema = z.object({ enabled: z.boolean().default(true), dir: z.string().optional(), // Default: ~/.local/share/flynn/memory auto_extract: z.boolean().default(true), + injection_strategy: z.enum(['all', 'recent', 'adaptive']).default('all'), + max_injection_tokens: z.number().min(100).max(10000).default(2000), max_context_tokens: z.number().min(100).max(10000).default(2000), embedding: embeddingSchema, }).default({}); @@ -261,6 +270,7 @@ const compactionSchema = z.object({ threshold_pct: z.number().min(10).max(100).default(80), keep_turns: z.number().min(1).max(50).default(4), summary_max_tokens: z.number().min(128).max(4096).default(1024), + importance_threshold: z.number().min(0).max(1).default(1), }).default({}); const discordSchema = z.object({ @@ -375,6 +385,34 @@ const routingSchema = z.object({ senders: z.record(z.string(), z.string()).default({}), }).default({}); +const intentTargetTypeSchema = z.enum(['agent', 'skill']); + +const intentRuleSchema = z.object({ + name: z.string().min(1), + patterns: z.array(z.string().min(1)).min(1), + target: z.object({ + type: intentTargetTypeSchema, + name: z.string().min(1), + }), + priority: z.number().default(0), + enabled: z.boolean().default(true), +}); + +const intentsSchema = z.object({ + enabled: z.boolean().default(false), + match_threshold: z.number().min(0).max(1).default(0.7), + rules: z.array(intentRuleSchema).default([]), +}).default({}); + +const routingPolicySchema = z.object({ + enabled: z.boolean().default(false), + fast_path_threshold: z.number().min(0).max(1).default(0.85), + llm_threshold: z.number().min(0).max(1).default(0.5), + default_path: z.enum(['fast', 'llm']).default('llm'), +}).default({}); + +const contextLevelSchema = z.enum(['minimal', 'normal', 'detailed', 'debug']); + const promptSchema = z.object({ /** Additional directories to search for prompt template files. */ search_dirs: z.array(z.string()).default([]), @@ -383,12 +421,22 @@ const promptSchema = z.object({ name: z.string(), content: z.string(), })).default([]), + /** Prompt context depth control: minimal | normal | detailed | debug. */ + context_level: contextLevelSchema.default('normal'), }).default({}); const sessionsSchema = z.object({ ttl: z.string().default('30d'), }).default({}); +const historyIndexSchema = z.object({ + enabled: z.boolean().default(false), + max_keywords: z.number().min(1).max(20).default(8), + search_limit: z.number().min(1).max(100).default(10), + min_score: z.number().min(0).max(1).default(0.15), + routing_boost: z.number().min(0).max(0.2).default(0.05), +}).default({}); + const logLevelSchema = z.enum(['debug', 'info', 'warn', 'error', 'silent']).default('info'); const auditLevelSchema = z.enum(['debug', 'info', 'warn', 'error']).default('debug'); @@ -432,6 +480,9 @@ export const configSchema = z.object({ sandbox: sandboxSchema, agent_configs: agentConfigsSchema, routing: routingSchema, + intents: intentsSchema, + routing_policy: routingPolicySchema, + history_index: historyIndexSchema, sessions: sessionsSchema, pairing: pairingSchema, }); @@ -453,6 +504,7 @@ export type DiscordConfig = z.infer; export type SlackConfig = z.infer; export type WhatsAppConfig = z.infer; export type RetryPolicyConfig = z.infer; +export type ContextLevel = z.infer; export type PromptConfig = z.infer; export type ToolProfile = z.infer; export type ToolOverrideConfig = z.infer; @@ -460,6 +512,11 @@ export type ToolsConfig = z.infer; export type SandboxConfig = z.infer; export type AgentConfigEntry = z.infer; export type RoutingConfig = z.infer; +export type IntentTargetType = z.infer; +export type IntentRuleConfig = z.infer; +export type IntentsConfig = z.infer; +export type RoutingPolicyConfig = z.infer; +export type HistoryIndexConfig = z.infer; export type ServerConfig = z.infer; export type SessionsConfig = z.infer; export type ThinkingConfig = z.infer; @@ -475,3 +532,5 @@ export type PairingCodeConfig = z.infer; export type LogLevel = z.infer; export type AuditConfig = z.infer; export type AuditLevel = z.infer; +export type TruthfulnessMode = z.infer; +export type AutonomyLevel = z.infer; diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index eb256cd..a36151b 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -14,6 +14,9 @@ import { ModelRouter, type ModelTier } from '../models/index.js'; import { ToolRegistry, ToolExecutor } from '../tools/index.js'; import { SessionManager } from '../session/index.js'; import { AgentConfigRegistry, AgentRouter } from '../agents/index.js'; +import type { CommandRegistry } from '../commands/index.js'; +import type { ComponentRegistry } from '../intents/index.js'; +import type { RoutingPolicy } from '../routing/index.js'; /** * Create the unified message handler for the channel registry. @@ -33,6 +36,9 @@ export function createMessageRouter(deps: { agentConfigRegistry?: AgentConfigRegistry; agentRouter?: AgentRouter; sandboxManager?: SandboxManager; + commandRegistry?: CommandRegistry; + intentRegistry?: ComponentRegistry; + routingPolicy?: RoutingPolicy; }): { handler: (msg: InboundMessage, reply: (response: OutboundMessage) => Promise) => Promise; agents: Map; @@ -40,9 +46,9 @@ export function createMessageRouter(deps: { // Cache agents by session ID + agent config name to avoid recreating on every message const agents = new Map(); - function getOrCreateAgent(channel: string, senderId: string, metadata?: Record): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } { + function getOrCreateAgent(channel: string, senderId: string, metadata?: Record, agentOverride?: string): { orchestrator: AgentOrchestrator; collector: OutboundAttachmentCollector } { // Resolve agent config name via routing (sender → channel → default fallback) - const agentConfigName = deps.agentRouter?.resolve(channel, senderId); + const agentConfigName = agentOverride ?? deps.agentRouter?.resolve(channel, senderId); const agentConfig = agentConfigName ? deps.agentConfigRegistry?.get(agentConfigName) : undefined; // Cron job tier wins over agent config tier @@ -152,13 +158,17 @@ export function createMessageRouter(deps: { thresholdPct: deps.config.compaction.threshold_pct, keepTurns: deps.config.compaction.keep_turns, summaryMaxTokens: deps.config.compaction.summary_max_tokens, + importanceThreshold: deps.config.compaction.importance_threshold, } : undefined, modelName: deps.config.models.default.model, contextWindow: deps.config.models.default.context_window, memoryStore: deps.memoryStore, + memoryInjectionStrategy: deps.config.memory?.injection_strategy, + memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens, toolPolicyContext: { agent: effectiveTier, provider: effectiveProvider, + autonomyLevel: deps.config.agents.autonomy_level ?? 'standard', }, attachmentCollector: collector, }); @@ -169,94 +179,118 @@ export function createMessageRouter(deps: { } const handler = async (msg: InboundMessage, reply: (response: OutboundMessage) => Promise): Promise => { - const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata); + let intentAgentOverride: string | undefined; + if (deps.config.intents?.enabled && deps.intentRegistry) { + const intentMatch = deps.intentRegistry.match(msg.text); - // Handle special commands - if (msg.metadata?.isCommand) { - if (msg.metadata.command === 'reset') { - agent.reset(); - // Clear per-session config overrides - const session = deps.sessionManager.getSession(msg.channel, msg.senderId); - session.deleteConfig('modelTier'); - return; - } - if (msg.metadata.command === 'model') { - const modelArg = msg.metadata.commandArgs as string | undefined; - const session = deps.sessionManager.getSession(msg.channel, msg.senderId); - - if (!modelArg) { - // Show current model tier - const currentTier = agent.getModelTier(); - const sessionOverride = session.getConfig('modelTier'); - const available = deps.modelRouter.getAvailableTiers(); - const labels = deps.modelRouter.getAllLabels(); - const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`]; - for (const tier of available) { - const label = labels[tier] ?? 'unknown'; - const marker = tier === currentTier ? ' ←' : ''; - lines.push(` ${tier}: ${label}${marker}`); - } - await reply({ text: lines.join('\n'), replyTo: msg.id }); - return; - } - - // Validate tier - const validTiers = deps.modelRouter.getAvailableTiers(); - if (!validTiers.includes(modelArg as ModelTier)) { - await reply({ text: `Model tier not available: ${modelArg}`, replyTo: msg.id }); - return; - } - - // Persist to session config - session.setConfig('modelTier', modelArg); - - // Update the orchestrator's agent tier - agent.setModelTier(modelArg as ModelTier); - - const label = deps.modelRouter.getLabel(modelArg as ModelTier); - await reply({ text: `Switched to model: ${modelArg} (${label})`, replyTo: msg.id }); - return; - } - if (msg.metadata.command === 'compact') { - const result = await agent.compact(); - if (result && result.compactedCount > 0) { - await reply({ - text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`, - replyTo: msg.id, + if (intentMatch?.rule.target.type === 'agent') { + let confidence = intentMatch.score; + if (deps.config.history_index?.enabled) { + const historySessionId = `${msg.channel}:${msg.senderId}`; + const historyHits = deps.sessionManager.searchHistory(msg.text, { + sessionId: historySessionId, + limit: 1, }); - } else { - await reply({ - text: 'Nothing to compact.', - replyTo: msg.id, - }); - } - return; - } - if (msg.metadata.command === 'usage') { - const usage = agent.getUsage(); - const lines = [ - '**Token Usage**', - '', - `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`, - ]; - - const delegationEntries = Object.entries(usage.delegation); - if (delegationEntries.length > 0) { - lines.push(''); - lines.push('Delegation:'); - for (const [tier, stats] of delegationEntries) { - lines.push(` ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`); + if (historyHits.length > 0 && historyHits[0].score >= (deps.config.history_index.min_score ?? 0.15)) { + confidence = Math.min(1, confidence + (deps.config.history_index.routing_boost ?? 0.05)); } } - lines.push(''); - lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`); + const decision = deps.routingPolicy + ? deps.routingPolicy.decide({ confidence }) + : { path: 'fast' as const, reason: 'high_confidence' as const }; - if (usage.total.estimatedCost > 0) { - lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`); + console.log(`[routing] intent=${intentMatch.rule.name} confidence=${confidence.toFixed(3)} path=${decision.path} reason=${decision.reason}`); + + if (decision.path === 'fast') { + intentAgentOverride = intentMatch.rule.target.name; } + } + } - await reply({ text: lines.join('\n'), replyTo: msg.id }); + const { orchestrator: agent, collector } = getOrCreateAgent(msg.channel, msg.senderId, msg.metadata, intentAgentOverride); + + const commandInput = msg.metadata?.isCommand && typeof msg.metadata.command === 'string' + ? `/${msg.metadata.command}${msg.metadata.commandArgs ? ` ${msg.metadata.commandArgs}` : ''}` + : msg.text; + + if (deps.commandRegistry && deps.commandRegistry.isCommand(commandInput)) { + const session = deps.sessionManager.getSession(msg.channel, msg.senderId); + const commandResult = await deps.commandRegistry.execute(commandInput, { + channel: msg.channel, + senderId: msg.senderId, + sessionId: session.id, + rawInput: commandInput, + services: { + getStatus: () => `Flynn is running. Active model tier: ${agent.getModelTier()}`, + getUsage: () => { + const usage = agent.getUsage(); + const lines = [ + '**Token Usage**', + '', + `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`, + ]; + + const delegationEntries = Object.entries(usage.delegation); + if (delegationEntries.length > 0) { + lines.push(''); + lines.push('Delegation:'); + for (const [tier, stats] of delegationEntries) { + lines.push(` ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`); + } + } + + lines.push(''); + lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`); + + if (usage.total.estimatedCost > 0) { + lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`); + } + + return lines.join('\n'); + }, + getModel: () => { + const currentTier = agent.getModelTier(); + const sessionOverride = session.getConfig('modelTier'); + const available = deps.modelRouter.getAvailableTiers(); + const labels = deps.modelRouter.getAllLabels(); + const lines = [`Active tier: ${currentTier}${sessionOverride ? ' (session override)' : ''}`]; + for (const tier of available) { + const label = labels[tier] ?? 'unknown'; + const marker = tier === currentTier ? ' ←' : ''; + lines.push(` ${tier}: ${label}${marker}`); + } + return lines.join('\n'); + }, + setModel: (tier) => { + const validTiers = deps.modelRouter.getAvailableTiers(); + if (!validTiers.includes(tier as ModelTier)) { + return `Model tier not available: ${tier}`; + } + session.setConfig('modelTier', tier); + agent.setModelTier(tier as ModelTier); + const label = deps.modelRouter.getLabel(tier as ModelTier); + return `Switched to model: ${tier} (${label})`; + }, + compact: async () => { + const result = await agent.compact(); + if (result && result.compactedCount > 0) { + return `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`; + } + return 'Nothing to compact.'; + }, + reset: () => { + agent.reset(); + session.deleteConfig('modelTier'); + return ''; + }, + }, + }); + + if (commandResult.handled) { + if (commandResult.text.trim()) { + await reply({ text: commandResult.text, replyTo: msg.id }); + } return; } } @@ -310,7 +344,7 @@ export function createMessageRouter(deps: { } } else { // No transcription endpoint configured — inform the user gracefully - messageText = `[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]`; + messageText = '[Voice message received but audio transcription is not configured. Please configure the audio section in config.yaml to enable voice message support.]'; } // Remove audio attachments so buildUserMessage doesn't create audio content parts attachments = (msg.attachments ?? []).filter((a: Attachment) => !isSupportedAudio(a)); diff --git a/src/daemon/services.ts b/src/daemon/services.ts index aece07e..b8f631e 100644 --- a/src/daemon/services.ts +++ b/src/daemon/services.ts @@ -14,6 +14,9 @@ import { assembleSystemPrompt } from '../prompt/index.js'; import { resolve } from 'path'; import { homedir } from 'os'; import type { MemoryStore } from '../memory/store.js'; +import type { CommandRegistry } from '../commands/index.js'; +import type { ComponentRegistry } from '../intents/index.js'; +import type { RoutingPolicy } from '../routing/index.js'; // ── Skills ────────────────────────────────────────────────────── @@ -75,6 +78,8 @@ export function loadSystemPrompt(config: Config, skillRegistry: SkillRegistry): const result = assembleSystemPrompt({ searchDirs, extraSections: config.prompt.extra_sections, + contextLevel: config.prompt.context_level, + truthfulnessMode: config.agents.truthfulness_mode, }); if (result.loadedFiles.length > 0) { @@ -123,6 +128,9 @@ export interface GatewayDeps { lifecycle: Lifecycle; getChannelAgents: () => Map | null; memoryStore?: MemoryStore; + commandRegistry?: CommandRegistry; + intentRegistry?: ComponentRegistry; + routingPolicy?: RoutingPolicy; } export function createGateway(deps: GatewayDeps): GatewayServer { @@ -142,6 +150,9 @@ export function createGateway(deps: GatewayDeps): GatewayServer { }, authHttp: config.server.auth_http, lock: config.server.lock, + commandRegistry: deps.commandRegistry, + intentRegistry: deps.intentRegistry, + routingPolicy: deps.routingPolicy, uiDir: resolve(import.meta.dirname, '../gateway/ui'), config, channelRegistry, diff --git a/src/gateway/session-bridge.ts b/src/gateway/session-bridge.ts index 3836fd2..ed07907 100644 --- a/src/gateway/session-bridge.ts +++ b/src/gateway/session-bridge.ts @@ -1,6 +1,5 @@ import { randomUUID } from 'crypto'; import type { SessionManager } from '../session/manager.js'; -import type { Session } from '../session/manager.js'; import type { ModelClient } from '../models/types.js'; import type { ModelRouter, ModelTier } from '../models/router.js'; import type { ToolRegistry } from '../tools/registry.js'; @@ -198,10 +197,18 @@ export class SessionBridge { thresholdPct: config.compaction.threshold_pct, keepTurns: config.compaction.keep_turns, summaryMaxTokens: config.compaction.summary_max_tokens, + importanceThreshold: config.compaction.importance_threshold, } : undefined, modelName: config?.models.default.model, contextWindow: config?.models.default.context_window, memoryStore: this.config.memoryStore, + memoryInjectionStrategy: config?.memory?.injection_strategy, + memoryMaxInjectionTokens: config?.memory?.max_injection_tokens, + toolPolicyContext: { + agent: primaryTier, + provider: config?.models.default.provider, + autonomyLevel: config?.agents.autonomy_level ?? 'standard', + }, }); this.agents.set(sessionId, agent); diff --git a/src/hooks/autonomy.test.ts b/src/hooks/autonomy.test.ts new file mode 100644 index 0000000..7344e8a --- /dev/null +++ b/src/hooks/autonomy.test.ts @@ -0,0 +1,149 @@ +import { describe, it, expect } from 'vitest'; +import { resolveAutonomy, type AutonomyLevel } from './autonomy.js'; +import type { HookAction } from './types.js'; + +describe('autonomy', () => { + describe('resolveAutonomy', () => { + describe('conservative mode', () => { + const level: AutonomyLevel = 'conservative'; + + it('overrides silent to confirm for dangerous tools', () => { + const decision = resolveAutonomy('file.write', 'silent', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(true); + expect(decision.reason).toContain('conservative mode'); + expect(decision.level).toBe('conservative'); + }); + + it('preserves confirm for dangerous tools', () => { + const decision = resolveAutonomy('shell.exec', 'confirm', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(false); + expect(decision.reason).toContain('already requires confirmation'); + }); + + it('allows safe read tools with base action', () => { + const decision = resolveAutonomy('file.read', 'silent', level); + expect(decision.action).toBe('silent'); + expect(decision.overridden).toBe(false); + expect(decision.reason).toContain('safe'); + }); + + it('handles all dangerous tools', () => { + const dangerous = ['file.write', 'file.edit', 'file.patch', 'shell.exec', 'process.start', 'process.kill']; + for (const tool of dangerous) { + const decision = resolveAutonomy(tool, 'silent', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(true); + } + }); + }); + + describe('standard mode', () => { + const level: AutonomyLevel = 'standard'; + + it('overrides silent to confirm for dangerous tools without explicit hook', () => { + const decision = resolveAutonomy('file.write', 'silent', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(true); + expect(decision.reason).toContain('standard mode'); + expect(decision.reason).toContain('dangerous tool without explicit hook'); + }); + + it('preserves confirm for dangerous tools', () => { + const decision = resolveAutonomy('shell.exec', 'confirm', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(false); + }); + + it('preserves log for dangerous tools', () => { + const decision = resolveAutonomy('file.edit', 'log', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(true); + }); + + it('allows safe tools with base action', () => { + const decision = resolveAutonomy('file.read', 'silent', level); + expect(decision.action).toBe('silent'); + expect(decision.overridden).toBe(false); + }); + + it('allows non-dangerous tools with any base action', () => { + const decision = resolveAutonomy('custom.tool', 'silent', level); + expect(decision.action).toBe('silent'); + expect(decision.overridden).toBe(false); + }); + }); + + describe('autonomous mode', () => { + const level: AutonomyLevel = 'autonomous'; + + it('defers to base action by default', () => { + const decision = resolveAutonomy('file.write', 'silent', level); + expect(decision.action).toBe('silent'); + expect(decision.overridden).toBe(false); + expect(decision.reason).toContain('Base action'); + }); + + it('preserves log action', () => { + const decision = resolveAutonomy('shell.exec', 'log', level); + expect(decision.action).toBe('log'); + expect(decision.overridden).toBe(false); + }); + + it('preserves explicit confirm hooks', () => { + const decision = resolveAutonomy('dangerous.operation', 'confirm', level); + expect(decision.action).toBe('confirm'); + expect(decision.overridden).toBe(false); + expect(decision.reason).toContain("Base action 'confirm'"); + }); + + it('does not force confirmation for dangerous tools', () => { + const dangerous = ['file.write', 'file.edit', 'file.patch', 'shell.exec', 'process.start', 'process.kill']; + for (const tool of dangerous) { + const decision = resolveAutonomy(tool, 'silent', level); + expect(decision.action).toBe('silent'); + } + }); + }); + + describe('action matrix coverage', () => { + it('handles all combinations of levels and base actions', () => { + const levels: AutonomyLevel[] = ['conservative', 'standard', 'autonomous']; + const actions: HookAction[] = ['silent', 'log', 'confirm']; + const tools = ['file.write', 'file.read', 'custom.tool']; + + for (const level of levels) { + for (const action of actions) { + for (const tool of tools) { + const decision = resolveAutonomy(tool, action, level); + expect(decision.action).toBeDefined(); + expect(decision.level).toBe(level); + expect(decision.reason).toBeTruthy(); + expect(typeof decision.overridden).toBe('boolean'); + } + } + } + }); + }); + + describe('metadata', () => { + it('always includes reason and level', () => { + const decision = resolveAutonomy('file.write', 'silent', 'conservative'); + expect(decision.reason).toBeTruthy(); + expect(decision.reason.length).toBeGreaterThan(0); + expect(decision.level).toBe('conservative'); + }); + + it('marks overridden correctly', () => { + // Conservative mode overrides + expect(resolveAutonomy('file.write', 'silent', 'conservative').overridden).toBe(true); + expect(resolveAutonomy('file.read', 'silent', 'conservative').overridden).toBe(false); + + // Autonomous mode does not override + expect(resolveAutonomy('shell.exec', 'log', 'autonomous').overridden).toBe(false); + expect(resolveAutonomy('shell.exec', 'silent', 'autonomous').overridden).toBe(false); + }); + }); + }); +}); diff --git a/src/hooks/autonomy.ts b/src/hooks/autonomy.ts new file mode 100644 index 0000000..125a4cf --- /dev/null +++ b/src/hooks/autonomy.ts @@ -0,0 +1,115 @@ +/** + * Autonomy-aware tool execution policy. + * + * Determines whether a tool action should proceed automatically, require + * confirmation, or be denied based on the configured autonomy level and + * tool characteristics. + */ + +import type { AutonomyLevel } from '../config/schema.js'; +import type { HookAction } from './types.js'; + +export type { AutonomyLevel } from '../config/schema.js'; + +/** + * Metadata about an autonomy decision. + */ +export interface AutonomyDecision { + /** The final action to take. */ + action: HookAction; + /** Whether the action was overridden by autonomy policy. */ + overridden: boolean; + /** Explanation of why the action was chosen or overridden. */ + reason: string; + /** The autonomy level that was applied. */ + level: AutonomyLevel; +} + +/** + * Tool categories for autonomy classification. + */ +const DANGEROUS_TOOLS = new Set([ + 'file.write', + 'file.edit', + 'file.patch', + 'shell.exec', + 'process.start', + 'process.kill', +]); + +/** + * Resolve the appropriate hook action for a tool given the autonomy level. + * + * @param toolName - The tool being executed. + * @param baseAction - The base action from the HookEngine (if any). + * @param level - The autonomy level to apply. + * @returns An AutonomyDecision with the final action and metadata. + */ +export function resolveAutonomy( + toolName: string, + baseAction: HookAction, + level: AutonomyLevel, +): AutonomyDecision { + // Conservative mode: confirm all dangerous tools, even if base action is silent + if (level === 'conservative') { + if (DANGEROUS_TOOLS.has(toolName)) { + if (baseAction !== 'confirm') { + return { + action: 'confirm', + overridden: true, + reason: `Tool '${toolName}' requires confirmation in conservative mode`, + level, + }; + } + return { + action: 'confirm', + overridden: false, + reason: `Tool '${toolName}' already requires confirmation`, + level, + }; + } + // Safe tools can use base action + return { + action: baseAction, + overridden: false, + reason: `Tool '${toolName}' is safe in conservative mode`, + level, + }; + } + + // Standard mode: dangerous tools still require confirmation if not explicitly silenced + if (level === 'standard') { + if (DANGEROUS_TOOLS.has(toolName) && baseAction !== 'confirm') { + return { + action: 'confirm', + overridden: true, + reason: `Tool '${toolName}' requires confirmation in standard mode (dangerous tool without explicit hook)`, + level, + }; + } + return { + action: baseAction, + overridden: false, + reason: `Base action '${baseAction}' applied in standard mode`, + level, + }; + } + + // Autonomous mode: defer to explicit hook policy + if (level === 'autonomous') { + return { + action: baseAction, + overridden: false, + reason: `Base action '${baseAction}' applied in autonomous mode`, + level, + }; + } + + // Fallback (should not happen with correct AutonomyLevel type) + return { + action: baseAction, + overridden: false, + reason: 'Unknown autonomy level; using base action', + level, + }; +} diff --git a/src/hooks/index.ts b/src/hooks/index.ts index e06e38b..bb872dc 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -1,2 +1,4 @@ export { HookEngine } from './engine.js'; +export { resolveAutonomy } from './autonomy.js'; export type { HookAction, HookResult, PendingConfirmation, HookConfig } from './types.js'; +export type { AutonomyLevel, AutonomyDecision } from './autonomy.js'; diff --git a/src/prompt/template.test.ts b/src/prompt/template.test.ts index 644e6b7..a50c14b 100644 --- a/src/prompt/template.test.ts +++ b/src/prompt/template.test.ts @@ -184,4 +184,144 @@ describe('assembleSystemPrompt', () => { expect(result.prompt).toContain('Current date:'); expect(result.prompt).toContain('Current time:'); }); + + it('uses normal as default context level', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Agent rules.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [{ name: 'Custom', content: 'Keep this.' }], + }); + + expect(result.prompt).toContain('Soul.'); + expect(result.prompt).toContain('# Agent Instructions\n\nAgent rules.'); + expect(result.prompt).toContain('# Custom\n\nKeep this.'); + }); + + it('minimal loads SOUL plus runtime only', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul only.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Do not include.'); + writeFileSync(join(dir, 'IDENTITY.md'), 'Do not include.'); + writeFileSync(join(dir, 'USER.md'), 'Do not include.'); + writeFileSync(join(dir, 'TOOLS.md'), 'Do not include.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [{ name: 'Custom', content: 'Do not include.' }], + contextLevel: 'minimal', + }); + + expect(result.prompt).toContain('Soul only.'); + expect(result.prompt).toContain('# Runtime Context'); + expect(result.prompt).not.toContain('# Agent Instructions'); + expect(result.prompt).not.toContain('# Identity Customization'); + expect(result.prompt).not.toContain('# User Context'); + expect(result.prompt).not.toContain('# Tool Instructions'); + expect(result.prompt).not.toContain('# Custom'); + }); + + it('normal keeps current template behavior', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Agents.'); + writeFileSync(join(dir, 'IDENTITY.md'), 'Identity.'); + writeFileSync(join(dir, 'USER.md'), 'User.'); + writeFileSync(join(dir, 'TOOLS.md'), 'Tools.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + contextLevel: 'normal', + }); + + expect(result.prompt).toContain('Soul.'); + expect(result.prompt).toContain('# Agent Instructions\n\nAgents.'); + expect(result.prompt).toContain('# Identity Customization\n\nIdentity.'); + expect(result.prompt).toContain('# User Context\n\nUser.'); + expect(result.prompt).toContain('# Tool Instructions\n\nTools.'); + }); + + it('detailed includes extra sections', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [{ name: 'Detailed Notes', content: 'Include me.' }], + contextLevel: 'detailed', + }); + + expect(result.prompt).toContain('Soul.'); + expect(result.prompt).toContain('# Detailed Notes\n\nInclude me.'); + }); + + it('debug appends prompt debug section with loaded files', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Agents.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + contextLevel: 'debug', + }); + + expect(result.prompt).toContain('# Prompt Debug'); + expect(result.prompt).toContain('Context level: debug'); + expect(result.prompt).toContain('Loaded files:'); + expect(result.prompt).toContain('SOUL.md'); + expect(result.prompt).toContain('AGENTS.md'); + expect(result.prompt).toContain('Directory resolution notes:'); + }); + + it('minimal skips extra sections', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [{ name: 'Skipped', content: 'Should not appear.' }], + contextLevel: 'minimal', + }); + + expect(result.prompt).not.toContain('# Skipped'); + }); + + it('injects truthfulness policy for standard mode', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + truthfulnessMode: 'standard', + }); + + expect(result.prompt).toContain('## Truthfulness Policy'); + }); + + it('injects strict truthfulness policy in strict mode', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + truthfulnessMode: 'strict', + }); + + expect(result.prompt).toContain('STRICT MODE'); + }); + + it('does not inject truthfulness policy in relaxed mode', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + truthfulnessMode: 'relaxed', + }); + + expect(result.prompt).not.toContain('## Truthfulness Policy'); + expect(result.prompt).toContain('# Runtime Context'); + }); }); diff --git a/src/prompt/template.ts b/src/prompt/template.ts index ef39af6..9ef0ab9 100644 --- a/src/prompt/template.ts +++ b/src/prompt/template.ts @@ -1,5 +1,7 @@ import { readFileSync, existsSync } from 'fs'; import { resolve } from 'path'; +import type { ContextLevel, TruthfulnessMode } from '../config/schema.js'; +import { getTruthfulnessGuidance } from '../backends/native/guardrails.js'; /** Ordered list of prompt template files to look for. */ const PROMPT_FILES = [ @@ -15,6 +17,10 @@ export interface PromptTemplateConfig { searchDirs: string[]; /** Additional sections to inject (e.g., from config). */ extraSections?: Array<{ name: string; content: string }>; + /** Prompt context depth. Defaults to normal. */ + contextLevel?: ContextLevel; + /** Truthfulness enforcement mode. Defaults to standard. */ + truthfulnessMode?: TruthfulnessMode; } export interface PromptTemplateResult { @@ -32,10 +38,21 @@ export interface PromptTemplateResult { * Sections are assembled in the order defined in PROMPT_FILES. */ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTemplateResult { + const level = config.contextLevel ?? 'normal'; + const truthfulnessMode = config.truthfulnessMode ?? 'standard'; + const includeAllTemplates = level !== 'minimal'; + const includeExtraSections = level !== 'minimal'; + const includeDebugSection = level === 'debug'; + const includeTruthfulness = truthfulnessMode === 'strict' || truthfulnessMode === 'standard'; + const sections: string[] = []; const loadedFiles: string[] = []; for (const { name, section } of PROMPT_FILES) { + if (!includeAllTemplates && name !== 'SOUL.md') { + continue; + } + for (const dir of config.searchDirs) { const filePath = resolve(dir, name); if (existsSync(filePath)) { @@ -55,7 +72,7 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla } // Add extra sections - if (config.extraSections) { + if (includeExtraSections && config.extraSections) { for (const { name, content } of config.extraSections) { if (content.trim()) { sections.push(`# ${name}\n\n${content.trim()}`); @@ -63,6 +80,11 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla } } + // Inject truthfulness guidance (for strict and standard modes) + if (includeTruthfulness) { + sections.push(getTruthfulnessGuidance(truthfulnessMode)); + } + // Inject current date/time as runtime context const now = new Date(); const dateStr = now.toLocaleDateString('en-US', { @@ -80,10 +102,26 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla const runtimeContext = `# Runtime Context\n\nCurrent date: ${dateStr}\nCurrent time: ${timeStr}`; sections.push(runtimeContext); - // Fallback if only the runtime context was loaded (no actual prompt files) - if (sections.length === 1) { + if (includeDebugSection) { + const loadedFilesList = loadedFiles.length > 0 + ? loadedFiles.map((filePath) => `- ${filePath}`).join('\n') + : '- none'; + const searchDirsList = config.searchDirs.length > 0 + ? config.searchDirs.map((dir) => `- ${dir}`).join('\n') + : '- none'; + + sections.push( + `# Prompt Debug\n\nContext level: ${level}\n\nLoaded files:\n${loadedFilesList}\n\nDirectory resolution notes:\n${searchDirsList}\n- First match wins per template file.`, + ); + } + + // Fallback when no prompt template files were found. + if (loadedFiles.length === 0) { + const truthfulnessSection = includeTruthfulness + ? `${getTruthfulnessGuidance(truthfulnessMode)}\n\n` + : ''; return { - prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${runtimeContext}`, + prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${truthfulnessSection}${runtimeContext}`, loadedFiles: [], }; } diff --git a/src/tools/executor.test.ts b/src/tools/executor.test.ts index 7018496..90b69ac 100644 --- a/src/tools/executor.test.ts +++ b/src/tools/executor.test.ts @@ -35,6 +35,13 @@ const bigOutputTool: Tool = { execute: async () => ({ success: true, output: 'x'.repeat(100_000) }), }; +const fileWriteLikeTool: Tool = { + name: 'file.write', + description: 'Test file write tool', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ success: true, output: 'ok' }), +}; + describe('ToolExecutor', () => { it('executes a tool and returns result', async () => { const registry = new ToolRegistry(); @@ -123,4 +130,34 @@ describe('ToolExecutor', () => { expect(result.success).toBe(false); expect(result.error).toContain('denied'); }); + + it('conservative autonomy requires confirm for dangerous tools', async () => { + const registry = new ToolRegistry(); + registry.register(fileWriteLikeTool); + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const resultPromise = executor.execute('file.write', {}, { autonomyLevel: 'conservative' }); + const pending = hooks.getPendingConfirmations(); + expect(pending).toHaveLength(1); + hooks.resolveConfirmation(pending[0].id, { approved: true }); + + const result = await resultPromise; + expect(result.success).toBe(true); + }); + + it('autonomous mode defers to explicit confirm hooks', async () => { + const registry = new ToolRegistry(); + registry.register(echoTool); + const hooks = new HookEngine({ confirm: ['test.*'], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const resultPromise = executor.execute('test.echo', { text: 'hi' }, { autonomyLevel: 'autonomous' }); + const pending = hooks.getPendingConfirmations(); + expect(pending).toHaveLength(1); + hooks.resolveConfirmation(pending[0].id, { approved: true }); + + const result = await resultPromise; + expect(result.success).toBe(true); + }); }); diff --git a/src/tools/executor.ts b/src/tools/executor.ts index e8f3136..0ec57a9 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -2,6 +2,7 @@ import type { ToolResult } from './types.js'; import type { ToolRegistry } from './registry.js'; import type { HookEngine } from '../hooks/engine.js'; import type { ToolPolicyContext } from './policy.js'; +import { resolveAutonomy } from '../hooks/autonomy.js'; import { auditLogger } from '../audit/index.js'; export interface ToolExecutorConfig { @@ -53,31 +54,49 @@ export class ToolExecutor { } } - // Check hooks - const action = this.hooks.getAction(toolName); - if (action === 'confirm') { + // Check hooks with autonomy resolution + const baseAction = this.hooks.getAction(toolName); + const autonomyLevel = context?.autonomyLevel ?? 'standard'; + const autonomyDecision = resolveAutonomy(toolName, baseAction, autonomyLevel); + const finalAction = autonomyDecision.action; + + // Log autonomy override if applicable + if (autonomyDecision.overridden) { + auditLogger?.toolDenied({ + tool_name: toolName, + reason: `Autonomy override: ${autonomyDecision.reason}`, + denial_type: 'autonomy_override', + session_id: context?.sessionId, + }); + } + + if (finalAction === 'confirm') { const hookResult = await this.hooks.requestConfirmation( toolName, args as Record, ); if (!hookResult.approved) { + const denyReason = hookResult.reason ?? 'no reason'; + const detailedReason = autonomyDecision.overridden + ? `${denyReason} (autonomy: ${autonomyDecision.reason})` + : denyReason; auditLogger?.toolDenied({ tool_name: toolName, - reason: hookResult.reason ?? 'no reason', + reason: detailedReason, denial_type: 'hook', session_id: context?.sessionId, }); return { success: false, output: '', - error: `Tool '${toolName}' denied by user: ${hookResult.reason ?? 'no reason'}`, + error: `Tool '${toolName}' denied by user: ${detailedReason}`, }; } } // Execute with timeout const startTime = Date.now(); - + auditLogger?.toolStart({ tool_name: toolName, tool_args: args, @@ -113,7 +132,7 @@ export class ToolExecutor { } catch (error) { const duration = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); - + auditLogger?.toolError({ tool_name: toolName, error: errorMessage, diff --git a/src/tools/integration.test.ts b/src/tools/integration.test.ts index 0102df7..867b18d 100644 --- a/src/tools/integration.test.ts +++ b/src/tools/integration.test.ts @@ -44,6 +44,7 @@ describe('Tool integration (end-to-end)', () => { systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, + toolPolicyContext: { autonomyLevel: 'autonomous' }, }); const result = await agent.process('run echo integration_test'); @@ -89,9 +90,10 @@ describe('Tool integration (end-to-end)', () => { const agent = new NativeAgent({ modelClient: mockClient, - systemPrompt: 'You have file tools.', + systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, + toolPolicyContext: { autonomyLevel: 'autonomous' }, }); try { @@ -141,6 +143,7 @@ describe('Tool integration (end-to-end)', () => { systemPrompt: 'You have tools.', toolRegistry: registry, toolExecutor: executor, + toolPolicyContext: { autonomyLevel: 'autonomous' }, }); const result = await agent.process('verify tool results'); diff --git a/src/tools/policy.ts b/src/tools/policy.ts index b22ff94..71dd7e0 100644 --- a/src/tools/policy.ts +++ b/src/tools/policy.ts @@ -1,4 +1,4 @@ -import type { ToolsConfig, ToolProfile } from '../config/schema.js'; +import type { AutonomyLevel, ToolsConfig, ToolProfile } from '../config/schema.js'; import type { Tool } from './types.js'; // ── Profile definitions ───────────────────────────────────────────── @@ -140,6 +140,8 @@ export interface ToolPolicyContext { sender?: string; /** Model tier for audit logging. */ tier?: string; + /** Autonomy level for tool execution (affects confirmation requirements). */ + autonomyLevel?: AutonomyLevel; } // ── ToolPolicy engine ───────────────────────────────────────────────