feat(policy): enforce truthfulness and autonomy guardrails
Add runtime truthfulness modes and autonomy-level tool gating with audit metadata for overrides/denials. Wire policy through prompt assembly, tool execution context, and daemon/gateway agent paths; update tests and planning state for Phase 3 PR #2 completion.
This commit is contained in:
@@ -184,4 +184,144 @@ describe('assembleSystemPrompt', () => {
|
||||
expect(result.prompt).toContain('Current date:');
|
||||
expect(result.prompt).toContain('Current time:');
|
||||
});
|
||||
|
||||
it('uses normal as default context level', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
writeFileSync(join(dir, 'AGENTS.md'), 'Agent rules.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
extraSections: [{ name: 'Custom', content: 'Keep this.' }],
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('Soul.');
|
||||
expect(result.prompt).toContain('# Agent Instructions\n\nAgent rules.');
|
||||
expect(result.prompt).toContain('# Custom\n\nKeep this.');
|
||||
});
|
||||
|
||||
it('minimal loads SOUL plus runtime only', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul only.');
|
||||
writeFileSync(join(dir, 'AGENTS.md'), 'Do not include.');
|
||||
writeFileSync(join(dir, 'IDENTITY.md'), 'Do not include.');
|
||||
writeFileSync(join(dir, 'USER.md'), 'Do not include.');
|
||||
writeFileSync(join(dir, 'TOOLS.md'), 'Do not include.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
extraSections: [{ name: 'Custom', content: 'Do not include.' }],
|
||||
contextLevel: 'minimal',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('Soul only.');
|
||||
expect(result.prompt).toContain('# Runtime Context');
|
||||
expect(result.prompt).not.toContain('# Agent Instructions');
|
||||
expect(result.prompt).not.toContain('# Identity Customization');
|
||||
expect(result.prompt).not.toContain('# User Context');
|
||||
expect(result.prompt).not.toContain('# Tool Instructions');
|
||||
expect(result.prompt).not.toContain('# Custom');
|
||||
});
|
||||
|
||||
it('normal keeps current template behavior', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
writeFileSync(join(dir, 'AGENTS.md'), 'Agents.');
|
||||
writeFileSync(join(dir, 'IDENTITY.md'), 'Identity.');
|
||||
writeFileSync(join(dir, 'USER.md'), 'User.');
|
||||
writeFileSync(join(dir, 'TOOLS.md'), 'Tools.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
contextLevel: 'normal',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('Soul.');
|
||||
expect(result.prompt).toContain('# Agent Instructions\n\nAgents.');
|
||||
expect(result.prompt).toContain('# Identity Customization\n\nIdentity.');
|
||||
expect(result.prompt).toContain('# User Context\n\nUser.');
|
||||
expect(result.prompt).toContain('# Tool Instructions\n\nTools.');
|
||||
});
|
||||
|
||||
it('detailed includes extra sections', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
extraSections: [{ name: 'Detailed Notes', content: 'Include me.' }],
|
||||
contextLevel: 'detailed',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('Soul.');
|
||||
expect(result.prompt).toContain('# Detailed Notes\n\nInclude me.');
|
||||
});
|
||||
|
||||
it('debug appends prompt debug section with loaded files', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
writeFileSync(join(dir, 'AGENTS.md'), 'Agents.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
contextLevel: 'debug',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('# Prompt Debug');
|
||||
expect(result.prompt).toContain('Context level: debug');
|
||||
expect(result.prompt).toContain('Loaded files:');
|
||||
expect(result.prompt).toContain('SOUL.md');
|
||||
expect(result.prompt).toContain('AGENTS.md');
|
||||
expect(result.prompt).toContain('Directory resolution notes:');
|
||||
});
|
||||
|
||||
it('minimal skips extra sections', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
extraSections: [{ name: 'Skipped', content: 'Should not appear.' }],
|
||||
contextLevel: 'minimal',
|
||||
});
|
||||
|
||||
expect(result.prompt).not.toContain('# Skipped');
|
||||
});
|
||||
|
||||
it('injects truthfulness policy for standard mode', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
truthfulnessMode: 'standard',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('## Truthfulness Policy');
|
||||
});
|
||||
|
||||
it('injects strict truthfulness policy in strict mode', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
truthfulnessMode: 'strict',
|
||||
});
|
||||
|
||||
expect(result.prompt).toContain('STRICT MODE');
|
||||
});
|
||||
|
||||
it('does not inject truthfulness policy in relaxed mode', () => {
|
||||
const dir = makeTempDir();
|
||||
writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
|
||||
|
||||
const result = assembleSystemPrompt({
|
||||
searchDirs: [dir],
|
||||
truthfulnessMode: 'relaxed',
|
||||
});
|
||||
|
||||
expect(result.prompt).not.toContain('## Truthfulness Policy');
|
||||
expect(result.prompt).toContain('# Runtime Context');
|
||||
});
|
||||
});
|
||||
|
||||
+42
-4
@@ -1,5 +1,7 @@
|
||||
import { readFileSync, existsSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
import type { ContextLevel, TruthfulnessMode } from '../config/schema.js';
|
||||
import { getTruthfulnessGuidance } from '../backends/native/guardrails.js';
|
||||
|
||||
/** Ordered list of prompt template files to look for. */
|
||||
const PROMPT_FILES = [
|
||||
@@ -15,6 +17,10 @@ export interface PromptTemplateConfig {
|
||||
searchDirs: string[];
|
||||
/** Additional sections to inject (e.g., from config). */
|
||||
extraSections?: Array<{ name: string; content: string }>;
|
||||
/** Prompt context depth. Defaults to normal. */
|
||||
contextLevel?: ContextLevel;
|
||||
/** Truthfulness enforcement mode. Defaults to standard. */
|
||||
truthfulnessMode?: TruthfulnessMode;
|
||||
}
|
||||
|
||||
export interface PromptTemplateResult {
|
||||
@@ -32,10 +38,21 @@ export interface PromptTemplateResult {
|
||||
* Sections are assembled in the order defined in PROMPT_FILES.
|
||||
*/
|
||||
export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTemplateResult {
|
||||
const level = config.contextLevel ?? 'normal';
|
||||
const truthfulnessMode = config.truthfulnessMode ?? 'standard';
|
||||
const includeAllTemplates = level !== 'minimal';
|
||||
const includeExtraSections = level !== 'minimal';
|
||||
const includeDebugSection = level === 'debug';
|
||||
const includeTruthfulness = truthfulnessMode === 'strict' || truthfulnessMode === 'standard';
|
||||
|
||||
const sections: string[] = [];
|
||||
const loadedFiles: string[] = [];
|
||||
|
||||
for (const { name, section } of PROMPT_FILES) {
|
||||
if (!includeAllTemplates && name !== 'SOUL.md') {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const dir of config.searchDirs) {
|
||||
const filePath = resolve(dir, name);
|
||||
if (existsSync(filePath)) {
|
||||
@@ -55,7 +72,7 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
|
||||
}
|
||||
|
||||
// Add extra sections
|
||||
if (config.extraSections) {
|
||||
if (includeExtraSections && config.extraSections) {
|
||||
for (const { name, content } of config.extraSections) {
|
||||
if (content.trim()) {
|
||||
sections.push(`# ${name}\n\n${content.trim()}`);
|
||||
@@ -63,6 +80,11 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
|
||||
}
|
||||
}
|
||||
|
||||
// Inject truthfulness guidance (for strict and standard modes)
|
||||
if (includeTruthfulness) {
|
||||
sections.push(getTruthfulnessGuidance(truthfulnessMode));
|
||||
}
|
||||
|
||||
// Inject current date/time as runtime context
|
||||
const now = new Date();
|
||||
const dateStr = now.toLocaleDateString('en-US', {
|
||||
@@ -80,10 +102,26 @@ export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTempla
|
||||
const runtimeContext = `# Runtime Context\n\nCurrent date: ${dateStr}\nCurrent time: ${timeStr}`;
|
||||
sections.push(runtimeContext);
|
||||
|
||||
// Fallback if only the runtime context was loaded (no actual prompt files)
|
||||
if (sections.length === 1) {
|
||||
if (includeDebugSection) {
|
||||
const loadedFilesList = loadedFiles.length > 0
|
||||
? loadedFiles.map((filePath) => `- ${filePath}`).join('\n')
|
||||
: '- none';
|
||||
const searchDirsList = config.searchDirs.length > 0
|
||||
? config.searchDirs.map((dir) => `- ${dir}`).join('\n')
|
||||
: '- none';
|
||||
|
||||
sections.push(
|
||||
`# Prompt Debug\n\nContext level: ${level}\n\nLoaded files:\n${loadedFilesList}\n\nDirectory resolution notes:\n${searchDirsList}\n- First match wins per template file.`,
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback when no prompt template files were found.
|
||||
if (loadedFiles.length === 0) {
|
||||
const truthfulnessSection = includeTruthfulness
|
||||
? `${getTruthfulnessGuidance(truthfulnessMode)}\n\n`
|
||||
: '';
|
||||
return {
|
||||
prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${runtimeContext}`,
|
||||
prompt: `You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.\n\n${truthfulnessSection}${runtimeContext}`,
|
||||
loadedFiles: [],
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user