From 4316dbd3bed1b7690657e0a3964290c6b0910018 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 6 Feb 2026 15:12:35 -0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20P2=20features=20=E2=80=94=20retry?= =?UTF-8?q?=20policy,=20prompt=20templating,=20usage=20tracking,=20tech=20?= =?UTF-8?q?debt=20cleanup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract shared splitMessage() into channels/utils.ts (dedup 4 adapters) - Add Slack user name resolution with caching (users.info API) - Add withRetry() with exponential backoff + jitter, isRetryable() filter - Wire retry config into ModelRouter.chat() (non-streaming only) - Add assembleSystemPrompt() multi-file template system (SOUL/AGENTS/IDENTITY/USER/TOOLS.md) - Add usage tracking accumulators in NativeAgent + AgentOrchestrator - Add estimateCost() with per-model pricing table - Add /usage TUI command with full usage report formatting - Add retrySchema and promptSchema to config schema Tests: 569 passing, typecheck clean --- src/backends/index.ts | 1 + src/backends/native/agent.ts | 22 +++- src/backends/native/index.ts | 1 + src/backends/native/orchestrator.ts | 53 ++++++++- src/channels/discord/adapter.ts | 31 +---- src/channels/index.ts | 1 + src/channels/slack/adapter.ts | 58 ++++----- src/channels/telegram/adapter.ts | 31 +---- src/channels/utils.test.ts | 86 ++++++++++++++ src/channels/utils.ts | 33 ++++++ src/channels/whatsapp/adapter.ts | 31 +---- src/config/schema.ts | 22 ++++ src/daemon/index.ts | 74 +++++++++--- src/frontends/tui/commands.test.ts | 5 + src/frontends/tui/commands.ts | 9 ++ src/models/costs.test.ts | 57 +++++++++ src/models/costs.ts | 21 ++++ src/models/index.ts | 2 + src/models/retry.test.ts | 169 ++++++++++++++++++++++++++ src/models/retry.ts | 71 +++++++++++ src/models/router.ts | 10 +- src/prompt/index.ts | 1 + src/prompt/template.test.ts | 178 ++++++++++++++++++++++++++++ src/prompt/template.ts | 78 ++++++++++++ 24 files changed, 902 insertions(+), 143 deletions(-) create mode 100644 src/channels/utils.test.ts create mode 100644 src/channels/utils.ts create mode 100644 src/models/costs.test.ts create mode 100644 src/models/costs.ts create mode 100644 src/models/retry.test.ts create mode 100644 src/models/retry.ts create mode 100644 src/prompt/index.ts create mode 100644 src/prompt/template.test.ts create mode 100644 src/prompt/template.ts diff --git a/src/backends/index.ts b/src/backends/index.ts index cf41f7e..39f35fb 100644 --- a/src/backends/index.ts +++ b/src/backends/index.ts @@ -5,6 +5,7 @@ export { type SubAgentRequest, type SubAgentResult, type DelegationConfig, + type UsageReport, } from './native/index.js'; export { COMPACTION_SYSTEM_PROMPT, diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index 70c30dc..952a33e 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -1,4 +1,4 @@ -import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall } from '../../models/types.js'; +import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall, TokenUsage } from '../../models/types.js'; import type { ModelRouter, ModelTier } from '../../models/router.js'; import type { Session } from '../../session/index.js'; import type { ToolRegistry } from '../../tools/registry.js'; @@ -39,6 +39,8 @@ export class NativeAgent { private toolExecutor?: ToolExecutor; private maxIterations: number; private onToolUse?: (event: ToolUseEvent) => void; + private _totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 }; + private _callCount: number = 0; constructor(config: NativeAgentConfig) { this.modelClient = config.modelClient; @@ -79,6 +81,10 @@ export class NativeAgent { const response = await this.chatWithRouter(request); + this._totalUsage.inputTokens += response.usage.inputTokens; + this._totalUsage.outputTokens += response.usage.outputTokens; + this._callCount++; + if (response.fallback) { console.warn(`[Flynn] ${response.fallbackReason}`); } @@ -110,6 +116,10 @@ export class NativeAgent { const response = await this.chatWithRouter(request); + this._totalUsage.inputTokens += response.usage.inputTokens; + this._totalUsage.outputTokens += response.usage.outputTokens; + this._callCount++; + if (response.fallback) { console.warn(`[Flynn] ${response.fallbackReason}`); } @@ -185,6 +195,16 @@ export class NativeAgent { } else { this.inMemoryHistory = []; } + this.resetUsage(); + } + + getUsage(): { inputTokens: number; outputTokens: number; calls: number } { + return { ...this._totalUsage, calls: this._callCount }; + } + + resetUsage(): void { + this._totalUsage = { inputTokens: 0, outputTokens: 0 }; + this._callCount = 0; } getHistory(): Message[] { diff --git a/src/backends/native/index.ts b/src/backends/native/index.ts index a6ec3c4..76ffd69 100644 --- a/src/backends/native/index.ts +++ b/src/backends/native/index.ts @@ -5,6 +5,7 @@ export { type SubAgentRequest, type SubAgentResult, type DelegationConfig, + type UsageReport, } from './orchestrator.js'; export { COMPACTION_SYSTEM_PROMPT, diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts index cffb797..abf82ea 100644 --- a/src/backends/native/orchestrator.ts +++ b/src/backends/native/orchestrator.ts @@ -8,6 +8,7 @@ import { NativeAgent } from './agent.js'; import type { ToolUseEvent } from './agent.js'; import { shouldCompact } from '../../context/tokens.js'; import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js'; +import { estimateCost } from '../../models/costs.js'; // ── Public types ────────────────────────────────────────────────────── @@ -44,6 +45,25 @@ interface TierUsageStats { calls: number; } +/** Full usage stats for an orchestrator session. */ +export interface UsageReport { + /** Primary agent (user-facing) usage. */ + primary: { + inputTokens: number; + outputTokens: number; + calls: number; + }; + /** Delegation (sub-agent) usage, broken down by tier. */ + delegation: Record; + /** Combined totals. */ + total: { + inputTokens: number; + outputTokens: number; + calls: number; + estimatedCost: number; + }; +} + /** Full configuration for the AgentOrchestrator. */ export interface OrchestratorConfig { modelRouter: ModelRouter; @@ -228,9 +248,10 @@ export class AgentOrchestrator { return result; } - /** Reset the primary agent's conversation history. */ + /** Reset the primary agent's conversation history and usage stats. */ reset(): void { this._agent.reset(); + this._usageByTier.clear(); } /** Get the primary agent's conversation history. */ @@ -267,6 +288,36 @@ export class AgentOrchestrator { return result; } + /** + * Returns comprehensive usage stats combining primary agent and delegation usage. + * Includes estimated cost based on the primary model's pricing. + */ + getUsage(): UsageReport { + const primary = this._agent.getUsage(); + const delegation = this.getDelegationUsage(); + + let totalInput = primary.inputTokens; + let totalOutput = primary.outputTokens; + let totalCalls = primary.calls; + + for (const stats of Object.values(delegation)) { + totalInput += stats.inputTokens; + totalOutput += stats.outputTokens; + totalCalls += stats.calls; + } + + return { + primary, + delegation, + total: { + inputTokens: totalInput, + outputTokens: totalOutput, + calls: totalCalls, + estimatedCost: estimateCost(totalInput, totalOutput, this._modelName), + }, + }; + } + /** * Look up which model tier is configured for a given delegation task. * Convenience method so callers don't need to access the config directly. diff --git a/src/channels/discord/adapter.ts b/src/channels/discord/adapter.ts index aa3881d..72e5484 100644 --- a/src/channels/discord/adapter.ts +++ b/src/channels/discord/adapter.ts @@ -15,6 +15,7 @@ import type { ChannelAdapter, ChannelStatus, } from '../types.js'; +import { splitMessage } from '../utils.js'; /** Configuration for the Discord channel adapter. */ export interface DiscordAdapterConfig { @@ -27,36 +28,6 @@ export interface DiscordAdapterConfig { requireMention?: boolean; } -/** - * Split a long message into chunks that respect Discord's 2000 char limit. - * Prefers splitting at newlines, then spaces, then hard-cuts. - */ -function splitMessage(text: string, maxLength: number): string[] { - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxLength) { - chunks.push(remaining); - break; - } - - // Try to split at a newline within the allowed window - let splitIndex = remaining.lastIndexOf('\n', maxLength); - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = remaining.lastIndexOf(' ', maxLength); - } - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = maxLength; - } - - chunks.push(remaining.slice(0, splitIndex)); - remaining = remaining.slice(splitIndex).trimStart(); - } - - return chunks; -} - /** * Discord channel adapter backed by discord.js. * diff --git a/src/channels/index.ts b/src/channels/index.ts index c9ed125..0608b21 100644 --- a/src/channels/index.ts +++ b/src/channels/index.ts @@ -7,6 +7,7 @@ export type { MessageHandler, } from './types.js'; export { ChannelRegistry } from './registry.js'; +export { splitMessage } from './utils.js'; export { TelegramAdapter, type TelegramAdapterConfig } from './telegram/index.js'; export { WebChatAdapter, type WebChatAdapterConfig } from './webchat/index.js'; export { DiscordAdapter, type DiscordAdapterConfig } from './discord/index.js'; diff --git a/src/channels/slack/adapter.ts b/src/channels/slack/adapter.ts index b4a71f3..ffcad58 100644 --- a/src/channels/slack/adapter.ts +++ b/src/channels/slack/adapter.ts @@ -13,6 +13,7 @@ import type { ChannelAdapter, ChannelStatus, } from '../types.js'; +import { splitMessage } from '../utils.js'; /** Configuration for the Slack channel adapter. */ export interface SlackAdapterConfig { @@ -34,36 +35,6 @@ interface SlackMessageEvent { subtype?: string; } -/** - * Split a long message into chunks that respect Slack's readability limit. - * Prefers splitting at newlines, then spaces, then hard-cuts. - */ -function splitMessage(text: string, maxLength: number): string[] { - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxLength) { - chunks.push(remaining); - break; - } - - // Try to split at a newline within the allowed window - let splitIndex = remaining.lastIndexOf('\n', maxLength); - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = remaining.lastIndexOf(' ', maxLength); - } - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = maxLength; - } - - chunks.push(remaining.slice(0, splitIndex)); - remaining = remaining.slice(splitIndex).trimStart(); - } - - return chunks; -} - /** * Slack channel adapter backed by @slack/bolt. * @@ -77,6 +48,7 @@ export class SlackAdapter implements ChannelAdapter { private app: App | null = null; private messageHandler?: (msg: InboundMessage) => void; private config: SlackAdapterConfig; + private userNameCache: Map = new Map(); get status(): ChannelStatus { return this._status; @@ -105,7 +77,7 @@ export class SlackAdapter implements ChannelAdapter { // Register message event handler this.app.message(async ({ message }) => { - this.handleMessage(message as unknown as SlackMessageEvent); + await this.handleMessage(message as unknown as SlackMessageEvent); }); await this.app.start(); @@ -161,8 +133,23 @@ export class SlackAdapter implements ChannelAdapter { } } + /** Resolve a Slack user ID to a display name, with caching. */ + private async resolveUserName(userId: string): Promise { + const cached = this.userNameCache.get(userId); + if (cached) return cached; + + try { + const result = await this.app!.client.users.info({ user: userId }); + const name = result.user?.real_name || result.user?.name || userId; + this.userNameCache.set(userId, name); + return name; + } catch { + return userId; + } + } + /** Internal: process an inbound Slack message event. */ - private handleMessage(message: SlackMessageEvent): void { + private async handleMessage(message: SlackMessageEvent): Promise { if (!this.messageHandler) return; // Ignore bot messages @@ -187,9 +174,10 @@ export class SlackAdapter implements ChannelAdapter { // Strip bot mentions: <@U\w+> pattern let text = (message.text ?? '').replace(/<@U\w+>/g, '').trim(); - // TODO: message.user is a Slack user ID (e.g. U0123ABC), not a display name. - // To resolve display names, use this.app.client.users.info() with caching. - const senderName = message.user; + // Resolve display name from Slack user ID + const senderName = message.user + ? await this.resolveUserName(message.user) + : undefined; // Detect reset command if (text === '!reset' || text === 'reset') { diff --git a/src/channels/telegram/adapter.ts b/src/channels/telegram/adapter.ts index 824110c..46716ec 100644 --- a/src/channels/telegram/adapter.ts +++ b/src/channels/telegram/adapter.ts @@ -9,6 +9,7 @@ import type { } from '../types.js'; import { isAllowedChat } from '../../frontends/telegram/handlers.js'; import { parseConfirmationCallback } from '../../frontends/telegram/confirmations.js'; +import { splitMessage } from '../utils.js'; /** Configuration for the Telegram channel adapter. */ export interface TelegramAdapterConfig { @@ -17,36 +18,6 @@ export interface TelegramAdapterConfig { hookEngine?: HookEngine; } -/** - * Split a long message into chunks that respect Telegram's 4096 char limit. - * Prefers splitting at newlines, then spaces, then hard-cuts. - */ -function splitMessage(text: string, maxLength: number): string[] { - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxLength) { - chunks.push(remaining); - break; - } - - // Try to split at a newline within the allowed window - let splitIndex = remaining.lastIndexOf('\n', maxLength); - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = remaining.lastIndexOf(' ', maxLength); - } - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = maxLength; - } - - chunks.push(remaining.slice(0, splitIndex)); - remaining = remaining.slice(splitIndex).trimStart(); - } - - return chunks; -} - /** * Telegram channel adapter backed by grammy. * diff --git a/src/channels/utils.test.ts b/src/channels/utils.test.ts new file mode 100644 index 0000000..db8baba --- /dev/null +++ b/src/channels/utils.test.ts @@ -0,0 +1,86 @@ +import { describe, it, expect } from 'vitest'; +import { splitMessage } from './utils.js'; + +describe('splitMessage', () => { + it('returns single chunk for empty string', () => { + const result = splitMessage('', 100); + // empty string never enters the while loop → returns empty array + expect(result).toEqual([]); + }); + + it('returns single chunk when text is under maxLength', () => { + const result = splitMessage('hello world', 100); + expect(result).toEqual(['hello world']); + }); + + it('returns single chunk when text equals maxLength', () => { + const text = 'a'.repeat(50); + const result = splitMessage(text, 50); + expect(result).toEqual([text]); + }); + + it('splits at newline when possible', () => { + const text = 'line one\nline two\nline three'; + // maxLength 18 → "line one\nline two\n" is 18 chars, lastIndexOf('\n', 18) = 17 + const result = splitMessage(text, 18); + expect(result).toEqual(['line one\nline two', 'line three']); + }); + + it('splits at space when no newline available', () => { + const text = 'word1 word2 word3 word4'; + // maxLength 12 → "word1 word2 " lastIndexOf(' ', 12) = 11 + const result = splitMessage(text, 12); + expect(result[0]).toBe('word1 word2'); + expect(result.length).toBeGreaterThanOrEqual(2); + }); + + it('hard-cuts when no whitespace available', () => { + const text = 'abcdefghijklmnop'; + const result = splitMessage(text, 5); + expect(result[0]).toBe('abcde'); + expect(result[1]).toBe('fghij'); + expect(result[2]).toBe('klmno'); + expect(result[3]).toBe('p'); + }); + + it('produces multiple chunks for long text', () => { + const text = 'chunk one\nchunk two\nchunk three\nchunk four'; + const result = splitMessage(text, 20); + expect(result.length).toBeGreaterThan(1); + // Every chunk respects the limit + for (const chunk of result) { + expect(chunk.length).toBeLessThanOrEqual(20); + } + }); + + it('preserves all content (joined chunks equal original minus trimmed whitespace)', () => { + const text = 'The quick brown fox jumps over the lazy dog. ' + + 'Pack my box with five dozen liquor jugs. ' + + 'How vexingly quick daft zebras jump.'; + const result = splitMessage(text, 30); + + // Reassemble: since trimStart() removes leading whitespace between chunks, + // we verify all words are preserved + const originalWords = text.split(/\s+/); + const resultWords = result.join(' ').split(/\s+/); + expect(resultWords).toEqual(originalWords); + }); + + it('prefers newline split over space split', () => { + // Place newline at a good position and space later + const text = 'first part\nsecond part of the message'; + // maxLength 15: lastIndexOf('\n', 15) = 10, which is >= 15/2 = 7.5 → splits at newline + const result = splitMessage(text, 15); + expect(result[0]).toBe('first part'); + }); + + it('falls back to space when newline is too early', () => { + // Newline at position 2, which is < maxLength/2 for maxLength=14 + const text = 'ab\ncdefghij klmnopqrst'; + // lastIndexOf('\n', 14) = 2, but 2 < 14/2=7 → falls back to space + // lastIndexOf(' ', 14) = 11, which is >= 7 → splits at space + const result = splitMessage(text, 14); + expect(result[0]).toBe('ab\ncdefghij'); + expect(result[1]).toBe('klmnopqrst'); + }); +}); diff --git a/src/channels/utils.ts b/src/channels/utils.ts new file mode 100644 index 0000000..e26703d --- /dev/null +++ b/src/channels/utils.ts @@ -0,0 +1,33 @@ +/** + * Shared utilities for channel adapters. + */ + +/** + * Split a long message into chunks that respect a platform's character limit. + * Prefers splitting at newlines, then spaces, then hard-cuts. + */ +export function splitMessage(text: string, maxLength: number): string[] { + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxLength) { + chunks.push(remaining); + break; + } + + // Try to split at a newline within the allowed window + let splitIndex = remaining.lastIndexOf('\n', maxLength); + if (splitIndex === -1 || splitIndex < maxLength / 2) { + splitIndex = remaining.lastIndexOf(' ', maxLength); + } + if (splitIndex === -1 || splitIndex < maxLength / 2) { + splitIndex = maxLength; + } + + chunks.push(remaining.slice(0, splitIndex)); + remaining = remaining.slice(splitIndex).trimStart(); + } + + return chunks; +} diff --git a/src/channels/whatsapp/adapter.ts b/src/channels/whatsapp/adapter.ts index 3c4114d..e2d9068 100644 --- a/src/channels/whatsapp/adapter.ts +++ b/src/channels/whatsapp/adapter.ts @@ -14,6 +14,7 @@ import type { ChannelAdapter, ChannelStatus, } from '../types.js'; +import { splitMessage } from '../utils.js'; /** Configuration for the WhatsApp channel adapter. */ export interface WhatsAppAdapterConfig { @@ -34,36 +35,6 @@ interface WhatsAppMessage { _data?: { notifyName?: string }; } -/** - * Split a long message into chunks that respect WhatsApp's readability limit. - * Prefers splitting at newlines, then spaces, then hard-cuts. - */ -function splitMessage(text: string, maxLength: number): string[] { - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxLength) { - chunks.push(remaining); - break; - } - - // Try to split at a newline within the allowed window - let splitIndex = remaining.lastIndexOf('\n', maxLength); - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = remaining.lastIndexOf(' ', maxLength); - } - if (splitIndex === -1 || splitIndex < maxLength / 2) { - splitIndex = maxLength; - } - - chunks.push(remaining.slice(0, splitIndex)); - remaining = remaining.slice(splitIndex).trimStart(); - } - - return chunks; -} - /** * WhatsApp channel adapter backed by whatsapp-web.js. * diff --git a/src/config/schema.ts b/src/config/schema.ts index 728c786..88505b2 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -146,6 +146,14 @@ const processSchema = z.object({ buffer_size: z.number().min(1024).max(1048576).default(65536), }).default({}); +const retrySchema = z.object({ + enabled: z.boolean().default(true), + max_retries: z.number().min(0).max(10).default(3), + initial_delay_ms: z.number().min(100).max(60000).default(1000), + backoff_multiplier: z.number().min(1).max(5).default(2), + max_delay_ms: z.number().min(1000).max(120000).default(30000), +}).default({}); + const webSearchSchema = z.object({ provider: z.enum(['brave', 'searxng']).default('brave'), api_key: z.string().optional(), @@ -153,6 +161,16 @@ const webSearchSchema = z.object({ max_results: z.number().min(1).max(20).default(5), }).default({}); +const promptSchema = z.object({ + /** Additional directories to search for prompt template files. */ + search_dirs: z.array(z.string()).default([]), + /** Extra named sections to include in the system prompt. */ + extra_sections: z.array(z.object({ + name: z.string(), + content: z.string(), + })).default([]), +}).default({}); + export const configSchema = z.object({ telegram: telegramSchema, discord: discordSchema, @@ -169,7 +187,9 @@ export const configSchema = z.object({ compaction: compactionSchema, memory: memorySchema, process: processSchema, + retry: retrySchema, web_search: webSearchSchema, + prompt: promptSchema, }); export type Config = z.infer; @@ -184,3 +204,5 @@ export type ProcessConfig = z.infer; export type DiscordConfig = z.infer; export type SlackConfig = z.infer; export type WhatsAppConfig = z.infer; +export type RetryPolicyConfig = z.infer; +export type PromptConfig = z.infer; diff --git a/src/daemon/index.ts b/src/daemon/index.ts index bb45c7d..f7a2fb6 100644 --- a/src/daemon/index.ts +++ b/src/daemon/index.ts @@ -1,7 +1,7 @@ import { Lifecycle } from './lifecycle.js'; import type { Config, ModelConfig } from '../config/index.js'; -import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js'; -import type { ModelClient } from '../models/index.js'; +import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter, DEFAULT_RETRY_CONFIG } from '../models/index.js'; +import type { ModelClient, RetryConfig } from '../models/index.js'; import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js'; import { SessionStore, SessionManager } from '../session/index.js'; import { HookEngine } from '../hooks/index.js'; @@ -14,9 +14,10 @@ import { CronScheduler } from '../automation/index.js'; import type { InboundMessage, OutboundMessage } from '../channels/index.js'; import { McpManager } from '../mcp/index.js'; import { SkillRegistry, SkillInstaller, loadAllSkills } from '../skills/index.js'; +import { assembleSystemPrompt } from '../prompt/index.js'; import { resolve } from 'path'; import { homedir } from 'os'; -import { mkdirSync, readFileSync, existsSync } from 'fs'; +import { mkdirSync } from 'fs'; export interface DaemonContext { config: Config; @@ -34,21 +35,23 @@ export interface DaemonContext { skillInstaller: SkillInstaller; } -function loadSystemPrompt(): string { - // Try to load SOUL.md from working directory first, then from project root - const paths = [ - resolve(process.cwd(), 'SOUL.md'), - resolve(import.meta.dirname, '../../SOUL.md'), +function loadSystemPrompt(config: Config): string { + const searchDirs = [ + process.cwd(), + resolve(import.meta.dirname, '../..'), + ...(config.prompt.search_dirs ?? []), ]; - for (const soulPath of paths) { - if (existsSync(soulPath)) { - return readFileSync(soulPath, 'utf-8'); - } + const result = assembleSystemPrompt({ + searchDirs, + extraSections: config.prompt.extra_sections, + }); + + if (result.loadedFiles.length > 0) { + console.log(`Loaded prompt templates: ${result.loadedFiles.map(f => f.split('/').pop()).join(', ')}`); } - // Fallback if SOUL.md not found - return 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.'; + return result.prompt; } /** @@ -125,12 +128,26 @@ function createModelRouter(config: Config): ModelRouter { console.log(`Model router: default=${models.default.provider}/${models.default.model}, ` + `fallback=[${models.fallback_chain.join(', ')}]`); + // Build retry config if enabled + const retryConfig: RetryConfig | undefined = config.retry.enabled ? { + maxRetries: config.retry.max_retries, + initialDelayMs: config.retry.initial_delay_ms, + backoffMultiplier: config.retry.backoff_multiplier, + maxDelayMs: config.retry.max_delay_ms, + nonRetryablePatterns: DEFAULT_RETRY_CONFIG.nonRetryablePatterns, + } : undefined; + + if (retryConfig) { + console.log(`Retry policy: max_retries=${retryConfig.maxRetries}, initial_delay=${retryConfig.initialDelayMs}ms`); + } + return new ModelRouter({ default: defaultClient, fast: fastClient, complex: complexClient, local: localClient, fallbackChain, + retryConfig, }); } @@ -210,6 +227,33 @@ function createMessageRouter(deps: { } return; } + if (msg.metadata.command === 'usage') { + const usage = agent.getUsage(); + const lines = [ + '**Token Usage**', + '', + `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`, + ]; + + const delegationEntries = Object.entries(usage.delegation); + if (delegationEntries.length > 0) { + lines.push(''); + lines.push('Delegation:'); + for (const [tier, stats] of delegationEntries) { + lines.push(` ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`); + } + } + + lines.push(''); + lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`); + + if (usage.total.estimatedCost > 0) { + lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`); + } + + await reply({ text: lines.join('\n'), replyTo: msg.id }); + return; + } } try { @@ -331,7 +375,7 @@ export async function startDaemon(config: Config): Promise { const modelRouter = createModelRouter(config); // Load system prompt and append skill instructions - let systemPrompt = loadSystemPrompt(); + let systemPrompt = loadSystemPrompt(config); const skillAdditions = skillRegistry.getSystemPromptAdditions(); if (skillAdditions) { systemPrompt = `${systemPrompt}\n\n# Available Skills\n\n${skillAdditions}`; diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts index 730e6ed..63b04ea 100644 --- a/src/frontends/tui/commands.test.ts +++ b/src/frontends/tui/commands.test.ts @@ -30,6 +30,10 @@ describe('parseCommand', () => { expect(parseCommand('/compact')).toEqual({ type: 'compact' }); }); + it('parses /usage command', () => { + expect(parseCommand('/usage')).toEqual({ type: 'usage' }); + }); + it('parses /model command without argument', () => { expect(parseCommand('/model')).toEqual({ type: 'model' }); }); @@ -69,6 +73,7 @@ describe('getHelpText', () => { expect(help).toContain('/model'); expect(help).toContain('/reset'); expect(help).toContain('/compact'); + expect(help).toContain('/usage'); expect(help).toContain('/quit'); }); }); diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts index b7054e7..8e46d3e 100644 --- a/src/frontends/tui/commands.ts +++ b/src/frontends/tui/commands.ts @@ -5,6 +5,7 @@ export type Command = | { type: 'status' } | { type: 'fullscreen' } | { type: 'compact' } + | { type: 'usage' } | { type: 'model'; name?: string } | { type: 'backend'; provider?: string } | { type: 'transfer'; target: string } @@ -44,6 +45,11 @@ export function parseCommand(input: string): Command | null { return { type: 'compact' }; } + // Usage + if (trimmed === '/usage') { + return { type: 'usage' }; + } + // Model (with optional argument) if (trimmed === '/model') { return { type: 'model' }; @@ -80,6 +86,7 @@ Commands: /backend [provider] Show or switch local backend (ollama, llamacpp) /reset, /clear, /new Clear conversation history /compact Compact conversation history + /usage Show token usage and estimated cost /status Show session info and token usage /fullscreen, /fs Switch to fullscreen mode /transfer Transfer session to another frontend @@ -98,6 +105,7 @@ export const SLASH_COMMANDS = [ '/clear', '/new', '/compact', + '/usage', '/status', '/fullscreen', '/fs', @@ -115,6 +123,7 @@ export const COMMAND_TOOLTIPS: Record = { '/clear': 'Clear conversation history', '/new': 'Start a new conversation', '/compact': 'Compact conversation history to save context space', + '/usage': 'Show token usage and estimated cost', '/status': 'Show session info and token usage', '/fullscreen': 'Switch to fullscreen mode', '/fs': 'Switch to fullscreen mode', diff --git a/src/models/costs.test.ts b/src/models/costs.test.ts new file mode 100644 index 0000000..203c5b1 --- /dev/null +++ b/src/models/costs.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from 'vitest'; +import { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js'; + +describe('estimateCost', () => { + it('returns 0 for local/unknown models', () => { + expect(estimateCost(1000, 1000)).toBe(0); + expect(estimateCost(1000, 1000, 'some-local-model')).toBe(0); + }); + + it('uses default costs when model name is undefined', () => { + const cost = estimateCost(1_000_000, 1_000_000); + expect(cost).toBe(0); + }); + + it('calculates correctly for known Anthropic models', () => { + // claude-sonnet-4: $3/M input, $15/M output + const cost = estimateCost(1_000_000, 1_000_000, 'claude-sonnet-4-20250514'); + expect(cost).toBe(3 + 15); + }); + + it('calculates correctly for claude-opus', () => { + // claude-opus-4: $15/M input, $75/M output + const cost = estimateCost(1_000_000, 500_000, 'claude-opus-4-20250514'); + expect(cost).toBe(15 + 37.5); + }); + + it('calculates correctly for OpenAI models', () => { + // gpt-4o: $2.50/M input, $10/M output + const cost = estimateCost(2_000_000, 1_000_000, 'gpt-4o'); + expect(cost).toBe(5 + 10); + }); + + it('handles small token counts', () => { + // 1000 tokens of claude-sonnet input: 1000 * 3 / 1_000_000 = 0.003 + const cost = estimateCost(1000, 0, 'claude-sonnet-4-20250514'); + expect(cost).toBeCloseTo(0.003); + }); + + it('handles zero tokens', () => { + const cost = estimateCost(0, 0, 'claude-sonnet-4-20250514'); + expect(cost).toBe(0); + }); +}); + +describe('MODEL_COSTS_PER_MILLION', () => { + it('has a default entry', () => { + expect(MODEL_COSTS_PER_MILLION['default']).toEqual({ input: 0, output: 0 }); + }); + + it('has entries for all expected models', () => { + expect(MODEL_COSTS_PER_MILLION['claude-sonnet-4-20250514']).toBeDefined(); + expect(MODEL_COSTS_PER_MILLION['claude-3-5-haiku-20241022']).toBeDefined(); + expect(MODEL_COSTS_PER_MILLION['claude-opus-4-20250514']).toBeDefined(); + expect(MODEL_COSTS_PER_MILLION['gpt-4o']).toBeDefined(); + expect(MODEL_COSTS_PER_MILLION['gpt-4o-mini']).toBeDefined(); + }); +}); diff --git a/src/models/costs.ts b/src/models/costs.ts new file mode 100644 index 0000000..ae758ea --- /dev/null +++ b/src/models/costs.ts @@ -0,0 +1,21 @@ +/** Approximate cost per million tokens for known models. */ +export const MODEL_COSTS_PER_MILLION: Record = { + // Anthropic + 'claude-sonnet-4-20250514': { input: 3, output: 15 }, + 'claude-3-5-haiku-20241022': { input: 0.80, output: 4 }, + 'claude-opus-4-20250514': { input: 15, output: 75 }, + // OpenAI + 'gpt-4o': { input: 2.50, output: 10 }, + 'gpt-4o-mini': { input: 0.15, output: 0.60 }, + // Local / unknown models + 'default': { input: 0, output: 0 }, +}; + +/** + * Estimate the dollar cost for a given number of input/output tokens. + * Falls back to zero cost for unknown or local models. + */ +export function estimateCost(inputTokens: number, outputTokens: number, modelName?: string): number { + const costs = MODEL_COSTS_PER_MILLION[modelName ?? ''] ?? MODEL_COSTS_PER_MILLION['default']; + return (inputTokens * costs.input + outputTokens * costs.output) / 1_000_000; +} diff --git a/src/models/index.ts b/src/models/index.ts index 1f8b5e8..346c0b5 100644 --- a/src/models/index.ts +++ b/src/models/index.ts @@ -3,6 +3,8 @@ export { OpenAIClient, type OpenAIClientConfig } from './openai.js'; export { OllamaClient, type OllamaClientConfig } from './local/index.js'; export { LlamaCppClient, type LlamaCppClientConfig } from './local/index.js'; export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js'; +export { withRetry, isRetryable, DEFAULT_RETRY_CONFIG, type RetryConfig } from './retry.js'; +export { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js'; export type { Message, ChatRequest, diff --git a/src/models/retry.test.ts b/src/models/retry.test.ts new file mode 100644 index 0000000..3005d96 --- /dev/null +++ b/src/models/retry.test.ts @@ -0,0 +1,169 @@ +import { describe, it, expect, vi } from 'vitest'; +import { isRetryable, withRetry, DEFAULT_RETRY_CONFIG } from './retry.js'; +import type { RetryConfig } from './retry.js'; + +describe('isRetryable', () => { + it('returns true for generic errors', () => { + const error = new Error('Connection timeout'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(true); + }); + + it('returns false for authentication errors', () => { + const error = new Error('Invalid API key: authentication failed'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('returns false for invalid_api_key errors', () => { + const error = new Error('Error: invalid_api_key'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('returns false for unauthorized errors', () => { + const error = new Error('Request unauthorized'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('returns false for invalid_request errors', () => { + const error = new Error('invalid_request: missing parameter'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('returns false for context_length_exceeded errors', () => { + const error = new Error('context_length_exceeded: max 128k tokens'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('returns false for content_policy errors', () => { + const error = new Error('content_policy violation detected'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('is case-insensitive when matching patterns', () => { + const error = new Error('AUTHENTICATION error'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + + it('uses custom patterns when provided', () => { + const error = new Error('quota exceeded'); + expect(isRetryable(error, ['quota'])).toBe(false); + }); +}); + +describe('withRetry', () => { + // Use minimal real delays to avoid fake-timer race conditions + const fastConfig: RetryConfig = { + maxRetries: 3, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 5, + nonRetryablePatterns: DEFAULT_RETRY_CONFIG.nonRetryablePatterns, + }; + + it('succeeds on first attempt without delay', async () => { + const fn = vi.fn().mockResolvedValue('success'); + + const result = await withRetry(fn, fastConfig); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('retries on transient failure then succeeds', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('timeout')) + .mockRejectedValueOnce(new Error('timeout')) + .mockResolvedValueOnce('recovered'); + + const result = await withRetry(fn, fastConfig, 'test-op'); + + expect(result).toBe('recovered'); + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('throws after maxRetries exhausted', async () => { + const fn = vi.fn().mockRejectedValue(new Error('persistent failure')); + + await expect(withRetry(fn, fastConfig, 'test-op')).rejects.toThrow('persistent failure'); + // 1 initial + 3 retries = 4 total + expect(fn).toHaveBeenCalledTimes(4); + }); + + it('does not retry non-retryable errors', async () => { + const fn = vi.fn().mockRejectedValue(new Error('invalid_api_key')); + + await expect(withRetry(fn, fastConfig)).rejects.toThrow('invalid_api_key'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('does not retry authentication errors', async () => { + const fn = vi.fn().mockRejectedValue(new Error('Request unauthorized')); + + await expect(withRetry(fn, fastConfig)).rejects.toThrow('Request unauthorized'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('converts non-Error throws to Error objects', async () => { + const fn = vi.fn().mockRejectedValue('string error'); + + await expect(withRetry(fn, { ...fastConfig, maxRetries: 0 })).rejects.toThrow('string error'); + }); + + it('respects maxDelayMs cap', async () => { + const cappedConfig: RetryConfig = { + maxRetries: 2, + initialDelayMs: 1, + backoffMultiplier: 10, + maxDelayMs: 2, + nonRetryablePatterns: [], + }; + + let callCount = 0; + const fn = vi.fn().mockImplementation(() => { + callCount++; + if (callCount < 3) return Promise.reject(new Error('fail')); + return Promise.resolve('ok'); + }); + + // If maxDelayMs weren't respected, a 10x multiplier could cause very long waits. + // With maxDelayMs=2ms, this completes quickly. + const result = await withRetry(fn, cappedConfig, 'capped-test'); + expect(result).toBe('ok'); + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('uses default config when none provided', async () => { + const fn = vi.fn().mockResolvedValue('default-config'); + + const result = await withRetry(fn); + + expect(result).toBe('default-config'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('increases delay exponentially between retries', async () => { + const timestamps: number[] = []; + const config: RetryConfig = { + maxRetries: 2, + initialDelayMs: 20, + backoffMultiplier: 2, + maxDelayMs: 1000, + nonRetryablePatterns: [], + }; + + const fn = vi.fn().mockImplementation(() => { + timestamps.push(Date.now()); + if (timestamps.length < 3) return Promise.reject(new Error('fail')); + return Promise.resolve('ok'); + }); + + await withRetry(fn, config, 'backoff-test'); + + expect(fn).toHaveBeenCalledTimes(3); + // First retry delay: ~20ms (jitter 50-100% of 20 = 10-20ms) + // Second retry delay: ~40ms (jitter 50-100% of 40 = 20-40ms) + const firstDelay = timestamps[1] - timestamps[0]; + const secondDelay = timestamps[2] - timestamps[1]; + // Second delay should be roughly double the first (within jitter range) + expect(secondDelay).toBeGreaterThanOrEqual(firstDelay * 0.7); + }); +}); diff --git a/src/models/retry.ts b/src/models/retry.ts new file mode 100644 index 0000000..e295f61 --- /dev/null +++ b/src/models/retry.ts @@ -0,0 +1,71 @@ +export interface RetryConfig { + /** Maximum number of retry attempts (default: 3). Does not count the initial attempt. */ + maxRetries: number; + /** Initial delay in milliseconds before first retry (default: 1000). */ + initialDelayMs: number; + /** Multiplier applied to delay after each retry (default: 2). */ + backoffMultiplier: number; + /** Maximum delay in milliseconds (default: 30000). */ + maxDelayMs: number; + /** Errors matching these patterns should NOT be retried (e.g. auth errors, invalid requests). */ + nonRetryablePatterns: string[]; +} + +export const DEFAULT_RETRY_CONFIG: RetryConfig = { + maxRetries: 3, + initialDelayMs: 1000, + backoffMultiplier: 2, + maxDelayMs: 30000, + nonRetryablePatterns: [ + 'invalid_api_key', + 'authentication', + 'unauthorized', + 'invalid_request', + 'context_length_exceeded', + 'content_policy', + ], +}; + +export function isRetryable(error: Error, nonRetryablePatterns: string[]): boolean { + const msg = error.message.toLowerCase(); + return !nonRetryablePatterns.some(pattern => msg.includes(pattern.toLowerCase())); +} + +export async function withRetry( + fn: () => Promise, + config: RetryConfig = DEFAULT_RETRY_CONFIG, + label?: string, +): Promise { + let lastError: Error | undefined; + + for (let attempt = 0; attempt <= config.maxRetries; attempt++) { + try { + return await fn(); + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + // Don't retry non-retryable errors + if (!isRetryable(lastError, config.nonRetryablePatterns)) { + throw lastError; + } + + // Don't retry if we've exhausted attempts + if (attempt >= config.maxRetries) { + throw lastError; + } + + // Calculate delay with exponential backoff + jitter + const baseDelay = config.initialDelayMs * Math.pow(config.backoffMultiplier, attempt); + const delay = Math.min(baseDelay, config.maxDelayMs); + const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay + + console.warn( + `[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`, + ); + + await new Promise(resolve => setTimeout(resolve, jitter)); + } + } + + throw lastError ?? new Error('Retry failed with no error'); +} diff --git a/src/models/router.ts b/src/models/router.ts index 3a090ca..38d4a46 100644 --- a/src/models/router.ts +++ b/src/models/router.ts @@ -1,4 +1,6 @@ import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js'; +import { withRetry } from './retry.js'; +import type { RetryConfig } from './retry.js'; export type ModelTier = 'fast' | 'default' | 'complex' | 'local'; @@ -8,6 +10,7 @@ export interface ModelRouterConfig { complex?: ModelClient; local?: ModelClient; fallbackChain: ModelClient[]; + retryConfig?: RetryConfig; } export class ModelRouter implements ModelClient { @@ -16,11 +19,13 @@ export class ModelRouter implements ModelClient { private fallbackChain: ModelClient[]; private currentTier: ModelTier = 'default'; private localProviderName?: string; + private retryConfig?: RetryConfig; constructor(config: ModelRouterConfig) { this.clients = new Map(); this.defaultClient = config.default; this.fallbackChain = config.fallbackChain; + this.retryConfig = config.retryConfig; this.clients.set('default', config.default); if (config.fast) this.clients.set('fast', config.fast); @@ -49,8 +54,11 @@ export class ModelRouter implements ModelClient { const primaryClient = this.clients.get(useTier) ?? this.defaultClient; const errors: Error[] = []; - // Try primary client + // Try primary client (with retry if configured) try { + if (this.retryConfig) { + return await withRetry(() => primaryClient.chat(request), this.retryConfig, 'primary model'); + } return await primaryClient.chat(request); } catch (error) { errors.push(error instanceof Error ? error : new Error(String(error))); diff --git a/src/prompt/index.ts b/src/prompt/index.ts new file mode 100644 index 0000000..1801564 --- /dev/null +++ b/src/prompt/index.ts @@ -0,0 +1 @@ +export { assembleSystemPrompt, type PromptTemplateConfig, type PromptTemplateResult } from './template.js'; diff --git a/src/prompt/template.test.ts b/src/prompt/template.test.ts new file mode 100644 index 0000000..9b35abc --- /dev/null +++ b/src/prompt/template.test.ts @@ -0,0 +1,178 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { mkdtempSync, writeFileSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { assembleSystemPrompt } from './template.js'; + +describe('assembleSystemPrompt', () => { + const tempDirs: string[] = []; + + function makeTempDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'flynn-prompt-test-')); + tempDirs.push(dir); + return dir; + } + + afterEach(() => { + for (const dir of tempDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tempDirs.length = 0; + }); + + it('returns fallback when no files found', () => { + const dir = makeTempDir(); + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.prompt).toBe( + 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.', + ); + expect(result.loadedFiles).toEqual([]); + }); + + it('loads SOUL.md without section header', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'You are Flynn.'); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.prompt).toBe('You are Flynn.'); + expect(result.loadedFiles).toHaveLength(1); + expect(result.loadedFiles[0]).toContain('SOUL.md'); + }); + + it('loads AGENTS.md with "# Agent Instructions" section header', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'AGENTS.md'), 'Follow these rules.'); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.prompt).toBe('# Agent Instructions\n\nFollow these rules.'); + expect(result.loadedFiles).toHaveLength(1); + expect(result.loadedFiles[0]).toContain('AGENTS.md'); + }); + + it('loads multiple files in correct order', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'I am Flynn.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Be helpful.'); + writeFileSync(join(dir, 'USER.md'), 'User likes cats.'); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.loadedFiles).toHaveLength(3); + // Verify correct ordering: SOUL → AGENTS → USER + expect(result.prompt).toBe( + 'I am Flynn.\n\n# Agent Instructions\n\nBe helpful.\n\n# User Context\n\nUser likes cats.', + ); + }); + + it('first search dir takes precedence over later dirs', () => { + const dir1 = makeTempDir(); + const dir2 = makeTempDir(); + writeFileSync(join(dir1, 'SOUL.md'), 'Primary identity.'); + writeFileSync(join(dir2, 'SOUL.md'), 'Secondary identity.'); + + const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] }); + + expect(result.prompt).toBe('Primary identity.'); + expect(result.loadedFiles).toHaveLength(1); + expect(result.loadedFiles[0]).toContain(dir1); + }); + + it('falls through to later dir if file missing in first dir', () => { + const dir1 = makeTempDir(); + const dir2 = makeTempDir(); + writeFileSync(join(dir2, 'SOUL.md'), 'Fallback identity.'); + + const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] }); + + expect(result.prompt).toBe('Fallback identity.'); + expect(result.loadedFiles[0]).toContain(dir2); + }); + + it('extra sections are appended', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Base identity.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [ + { name: 'Custom Rules', content: 'Always be polite.' }, + ], + }); + + expect(result.prompt).toBe( + 'Base identity.\n\n# Custom Rules\n\nAlways be polite.', + ); + }); + + it('empty files are skipped', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), ''); + writeFileSync(join(dir, 'AGENTS.md'), ' '); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.prompt).toBe( + 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.', + ); + expect(result.loadedFiles).toEqual([]); + }); + + it('empty extra sections are skipped', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Base identity.'); + + const result = assembleSystemPrompt({ + searchDirs: [dir], + extraSections: [ + { name: 'Empty', content: ' ' }, + { name: 'Populated', content: 'Has content.' }, + ], + }); + + expect(result.prompt).toBe( + 'Base identity.\n\n# Populated\n\nHas content.', + ); + }); + + it('attempts all PROMPT_FILES', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), 'Soul.'); + writeFileSync(join(dir, 'AGENTS.md'), 'Agents.'); + writeFileSync(join(dir, 'IDENTITY.md'), 'Identity.'); + writeFileSync(join(dir, 'USER.md'), 'User.'); + writeFileSync(join(dir, 'TOOLS.md'), 'Tools.'); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.loadedFiles).toHaveLength(5); + expect(result.prompt).toContain('Soul.'); + expect(result.prompt).toContain('# Agent Instructions\n\nAgents.'); + expect(result.prompt).toContain('# Identity Customization\n\nIdentity.'); + expect(result.prompt).toContain('# User Context\n\nUser.'); + expect(result.prompt).toContain('# Tool Instructions\n\nTools.'); + }); + + it('trims whitespace from loaded file content', () => { + const dir = makeTempDir(); + writeFileSync(join(dir, 'SOUL.md'), '\n I am Flynn. \n\n'); + + const result = assembleSystemPrompt({ searchDirs: [dir] }); + + expect(result.prompt).toBe('I am Flynn.'); + }); + + it('mixes files from different search directories', () => { + const dir1 = makeTempDir(); + const dir2 = makeTempDir(); + writeFileSync(join(dir1, 'SOUL.md'), 'Primary soul.'); + writeFileSync(join(dir2, 'AGENTS.md'), 'Agent rules.'); + + const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] }); + + expect(result.loadedFiles).toHaveLength(2); + expect(result.prompt).toBe('Primary soul.\n\n# Agent Instructions\n\nAgent rules.'); + }); +}); diff --git a/src/prompt/template.ts b/src/prompt/template.ts new file mode 100644 index 0000000..9905fe4 --- /dev/null +++ b/src/prompt/template.ts @@ -0,0 +1,78 @@ +import { readFileSync, existsSync } from 'fs'; +import { resolve } from 'path'; + +/** Ordered list of prompt template files to look for. */ +const PROMPT_FILES = [ + { name: 'SOUL.md', section: 'Identity', required: false }, + { name: 'AGENTS.md', section: 'Agent Instructions', required: false }, + { name: 'IDENTITY.md', section: 'Identity Customization', required: false }, + { name: 'USER.md', section: 'User Context', required: false }, + { name: 'TOOLS.md', section: 'Tool Instructions', required: false }, +] as const; + +export interface PromptTemplateConfig { + /** Directories to search for template files, in priority order. */ + searchDirs: string[]; + /** Additional sections to inject (e.g., from config). */ + extraSections?: Array<{ name: string; content: string }>; +} + +export interface PromptTemplateResult { + /** The assembled system prompt. */ + prompt: string; + /** Which files were loaded. */ + loadedFiles: string[]; +} + +/** + * Assemble a system prompt from multiple template files. + * + * Searches `searchDirs` in order for each template file. + * First match wins — a file found in an earlier directory takes precedence. + * Sections are assembled in the order defined in PROMPT_FILES. + */ +export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTemplateResult { + const sections: string[] = []; + const loadedFiles: string[] = []; + + for (const { name, section } of PROMPT_FILES) { + for (const dir of config.searchDirs) { + const filePath = resolve(dir, name); + if (existsSync(filePath)) { + const content = readFileSync(filePath, 'utf-8').trim(); + if (content) { + // SOUL.md is special — it's the base identity, no section header + if (name === 'SOUL.md') { + sections.push(content); + } else { + sections.push(`# ${section}\n\n${content}`); + } + loadedFiles.push(filePath); + } + break; // First match wins for this file + } + } + } + + // Add extra sections + if (config.extraSections) { + for (const { name, content } of config.extraSections) { + if (content.trim()) { + sections.push(`# ${name}\n\n${content.trim()}`); + } + } + } + + // Fallback if nothing was loaded + if (sections.length === 0) { + return { + prompt: 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.', + loadedFiles: [], + }; + } + + return { + prompt: sections.join('\n\n'), + loadedFiles, + }; +}