feat(policy): enforce truthfulness and autonomy guardrails

Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials.

Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
William Valentin
2026-02-12 16:06:45 -08:00
parent 125af4e832
commit 90ce622080
18 changed files with 1172 additions and 104 deletions
+140
View File
@@ -184,4 +184,144 @@ describe('assembleSystemPrompt', () => {
expect(result.prompt).toContain('Current date:');
expect(result.prompt).toContain('Current time:');
});
it('uses normal as default context level', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
writeFileSync(join(dir, 'AGENTS.md'), 'Agent rules.');
const result = assembleSystemPrompt({
searchDirs: [dir],
extraSections: [{ name: 'Custom', content: 'Keep this.' }],
});
expect(result.prompt).toContain('Soul.');
expect(result.prompt).toContain('# Agent Instructions\n\nAgent rules.');
expect(result.prompt).toContain('# Custom\n\nKeep this.');
});
it('minimal loads SOUL plus runtime only', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul only.');
writeFileSync(join(dir, 'AGENTS.md'), 'Do not include.');
writeFileSync(join(dir, 'IDENTITY.md'), 'Do not include.');
writeFileSync(join(dir, 'USER.md'), 'Do not include.');
writeFileSync(join(dir, 'TOOLS.md'), 'Do not include.');
const result = assembleSystemPrompt({
searchDirs: [dir],
extraSections: [{ name: 'Custom', content: 'Do not include.' }],
contextLevel: 'minimal',
});
expect(result.prompt).toContain('Soul only.');
expect(result.prompt).toContain('# Runtime Context');
expect(result.prompt).not.toContain('# Agent Instructions');
expect(result.prompt).not.toContain('# Identity Customization');
expect(result.prompt).not.toContain('# User Context');
expect(result.prompt).not.toContain('# Tool Instructions');
expect(result.prompt).not.toContain('# Custom');
});
it('normal keeps current template behavior', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
writeFileSync(join(dir, 'AGENTS.md'), 'Agents.');
writeFileSync(join(dir, 'IDENTITY.md'), 'Identity.');
writeFileSync(join(dir, 'USER.md'), 'User.');
writeFileSync(join(dir, 'TOOLS.md'), 'Tools.');
const result = assembleSystemPrompt({
searchDirs: [dir],
contextLevel: 'normal',
});
expect(result.prompt).toContain('Soul.');
expect(result.prompt).toContain('# Agent Instructions\n\nAgents.');
expect(result.prompt).toContain('# Identity Customization\n\nIdentity.');
expect(result.prompt).toContain('# User Context\n\nUser.');
expect(result.prompt).toContain('# Tool Instructions\n\nTools.');
});
it('detailed includes extra sections', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
const result = assembleSystemPrompt({
searchDirs: [dir],
extraSections: [{ name: 'Detailed Notes', content: 'Include me.' }],
contextLevel: 'detailed',
});
expect(result.prompt).toContain('Soul.');
expect(result.prompt).toContain('# Detailed Notes\n\nInclude me.');
});
it('debug appends prompt debug section with loaded files', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
writeFileSync(join(dir, 'AGENTS.md'), 'Agents.');
const result = assembleSystemPrompt({
searchDirs: [dir],
contextLevel: 'debug',
});
expect(result.prompt).toContain('# Prompt Debug');
expect(result.prompt).toContain('Context level: debug');
expect(result.prompt).toContain('Loaded files:');
expect(result.prompt).toContain('SOUL.md');
expect(result.prompt).toContain('AGENTS.md');
expect(result.prompt).toContain('Directory resolution notes:');
});
it('minimal skips extra sections', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
const result = assembleSystemPrompt({
searchDirs: [dir],
extraSections: [{ name: 'Skipped', content: 'Should not appear.' }],
contextLevel: 'minimal',
});
expect(result.prompt).not.toContain('# Skipped');
});
it('injects truthfulness policy for standard mode', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
const result = assembleSystemPrompt({
searchDirs: [dir],
truthfulnessMode: 'standard',
});
expect(result.prompt).toContain('## Truthfulness Policy');
});
it('injects strict truthfulness policy in strict mode', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
const result = assembleSystemPrompt({
searchDirs: [dir],
truthfulnessMode: 'strict',
});
expect(result.prompt).toContain('STRICT MODE');
});
it('does not inject truthfulness policy in relaxed mode', () => {
const dir = makeTempDir();
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
const result = assembleSystemPrompt({
searchDirs: [dir],
truthfulnessMode: 'relaxed',
});
expect(result.prompt).not.toContain('## Truthfulness Policy');
expect(result.prompt).toContain('# Runtime Context');
});
});
+42 -4
View File
@@ -1,5 +1,7 @@
import { readFileSync, existsSync } from 'fs';
import { resolve } from 'path';
import type { ContextLevel, TruthfulnessMode } from '../config/schema.js';
import { getTruthfulnessGuidance } from '../backends/native/guardrails.js';
/** Ordered list of prompt template files to look for. */
const PROMPT_FILES = [
@@ -15,6 +17,10 @@ export interface PromptTemplateConfig {
searchDirs: string[];
/** Additional sections to inject (e.g., from config). */
extraSections?: Array<{ name: string; content: string }>;
/** Prompt context depth. Defaults to normal. */
contextLevel?: ContextLevel;
/** Truthfulness enforcement mode. Defaults to standard. */
truthfulnessMode?: TruthfulnessMode;
}
export interface PromptTemplateResult {
@@ -32,10 +38,21 @@ export interface PromptTemplateResult {
* Sections are assembled in the order defined in PROMPT_FILES.
*/
export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTemplateResult {
const level = config.contextLevel ?? 'normal';
const truthfulnessMode = config.truthfulnessMode ?? 'standard';
const includeAllTemplates = level !== 'minimal';
const includeExtraSections = level !== 'minimal';
const includeDebugSection = level === 'debug';
const includeTruthfulness = truthfulnessMode === 'strict' || truthfulnessMode === 'standard';
const sections: string[] = [];
const loadedFiles: string[] = [];
for (const { name, section } of PROMPT_FILES) {
if (!includeAllTemplates && name !== 'SOUL.md') {
continue;
}
for (const dir of config.searchDirs) {
const filePath = resolve(dir, name);
if (existsSync(filePath)) {
@@ -55,7 +72,7 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
}
// Add extra sections
if (config.extraSections) {
if (includeExtraSections && config.extraSections) {
for (const { name, content } of config.extraSections) {
if (content.trim()) {
sections.push(`# ${name}\n\n${content.trim()}`);
@@ -63,6 +80,11 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
}
}
// Inject truthfulness guidance (for strict and standard modes)
if (includeTruthfulness) {
sections.push(getTruthfulnessGuidance(truthfulnessMode));
}
// Inject current date/time as runtime context
const now = new Date();
const dateStr = now.toLocaleDateString('en-US', {
@@ -80,10 +102,26 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
const runtimeContext = `# Runtime Context\n\nCurrent date: ${dateStr}\nCurrent time: ${timeStr}`;
sections.push(runtimeContext);
// Fallback if only the runtime context was loaded (no actual prompt files)
if (sections.length === 1) {
if (includeDebugSection) {
const loadedFilesList = loadedFiles.length > 0
? loadedFiles.map((filePath) => `- ${filePath}`).join('\n')
: '- none';
const searchDirsList = config.searchDirs.length > 0
? config.searchDirs.map((dir) => `- ${dir}`).join('\n')
: '- none';
sections.push(
`# Prompt Debug\n\nContext level: ${level}\n\nLoaded files:\n${loadedFilesList}\n\nDirectory resolution notes:\n${searchDirsList}\n- First match wins per template file.`,
);
}
// Fallback when no prompt template files were found.
if (loadedFiles.length === 0) {
const truthfulnessSection = includeTruthfulness
? `${getTruthfulnessGuidance(truthfulnessMode)}\n\n`
: '';
return {
prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${runtimeContext}`,
prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${truthfulnessSection}${runtimeContext}`,
loadedFiles: [],
};
}