From 4316dbd3bed1b7690657e0a3964290c6b0910018 Mon Sep 17 00:00:00 2001
From: William Valentin <william.valentin.info@gmail.com>
Date: Fri, 6 Feb 2026 15:12:35 -0800
Subject: [PATCH] =?UTF-8?q?feat:=20add=20P2=20features=20=E2=80=94=20retry?=
 =?UTF-8?q?=20policy,=20prompt=20templating,=20usage=20tracking,=20tech=20?=
 =?UTF-8?q?debt=20cleanup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Extract shared splitMessage() into channels/utils.ts (dedup 4 adapters)
- Add Slack user name resolution with caching (users.info API)
- Add withRetry() with exponential backoff + jitter, isRetryable() filter
- Wire retry config into ModelRouter.chat() (non-streaming only)
- Add assembleSystemPrompt() multi-file template system (SOUL/AGENTS/IDENTITY/USER/TOOLS.md)
- Add usage tracking accumulators in NativeAgent + AgentOrchestrator
- Add estimateCost() with per-model pricing table
- Add /usage TUI command with full usage report formatting
- Add retrySchema and promptSchema to config schema

Tests: 569 passing, typecheck clean
---
 src/backends/index.ts               |   1 +
 src/backends/native/agent.ts        |  22 +++-
 src/backends/native/index.ts        |   1 +
 src/backends/native/orchestrator.ts |  53 ++++++++-
 src/channels/discord/adapter.ts     |  31 +----
 src/channels/index.ts               |   1 +
 src/channels/slack/adapter.ts       |  58 ++++-----
 src/channels/telegram/adapter.ts    |  31 +----
 src/channels/utils.test.ts          |  86 ++++++++++++++
 src/channels/utils.ts               |  33 ++++++
 src/channels/whatsapp/adapter.ts    |  31 +----
 src/config/schema.ts                |  22 ++++
 src/daemon/index.ts                 |  74 +++++++++---
 src/frontends/tui/commands.test.ts  |   5 +
 src/frontends/tui/commands.ts       |   9 ++
 src/models/costs.test.ts            |  57 +++++++++
 src/models/costs.ts                 |  21 ++++
 src/models/index.ts                 |   2 +
 src/models/retry.test.ts            | 169 ++++++++++++++++++++++++++
 src/models/retry.ts                 |  71 +++++++++++
 src/models/router.ts                |  10 +-
 src/prompt/index.ts                 |   1 +
 src/prompt/template.test.ts         | 178 ++++++++++++++++++++++++++++
 src/prompt/template.ts              |  78 ++++++++++++
 24 files changed, 902 insertions(+), 143 deletions(-)
 create mode 100644 src/channels/utils.test.ts
 create mode 100644 src/channels/utils.ts
 create mode 100644 src/models/costs.test.ts
 create mode 100644 src/models/costs.ts
 create mode 100644 src/models/retry.test.ts
 create mode 100644 src/models/retry.ts
 create mode 100644 src/prompt/index.ts
 create mode 100644 src/prompt/template.test.ts
 create mode 100644 src/prompt/template.ts

diff --git a/src/backends/index.ts b/src/backends/index.ts
index cf41f7e..39f35fb 100644
--- a/src/backends/index.ts
+++ b/src/backends/index.ts
@@ -5,6 +5,7 @@ export {
   type SubAgentRequest,
   type SubAgentResult,
   type DelegationConfig,
+  type UsageReport,
 } from './native/index.js';
 export {
   COMPACTION_SYSTEM_PROMPT,
diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts
index 70c30dc..952a33e 100644
--- a/src/backends/native/agent.ts
+++ b/src/backends/native/agent.ts
@@ -1,4 +1,4 @@
-import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall } from '../../models/types.js';
+import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall, TokenUsage } from '../../models/types.js';
 import type { ModelRouter, ModelTier } from '../../models/router.js';
 import type { Session } from '../../session/index.js';
 import type { ToolRegistry } from '../../tools/registry.js';
@@ -39,6 +39,8 @@ export class NativeAgent {
   private toolExecutor?: ToolExecutor;
   private maxIterations: number;
   private onToolUse?: (event: ToolUseEvent) => void;
+  private _totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 };
+  private _callCount: number = 0;
 
   constructor(config: NativeAgentConfig) {
     this.modelClient = config.modelClient;
@@ -79,6 +81,10 @@ export class NativeAgent {
 
     const response = await this.chatWithRouter(request);
 
+    this._totalUsage.inputTokens += response.usage.inputTokens;
+    this._totalUsage.outputTokens += response.usage.outputTokens;
+    this._callCount++;
+
     if (response.fallback) {
       console.warn(`[Flynn] ${response.fallbackReason}`);
     }
@@ -110,6 +116,10 @@ export class NativeAgent {
 
       const response = await this.chatWithRouter(request);
 
+      this._totalUsage.inputTokens += response.usage.inputTokens;
+      this._totalUsage.outputTokens += response.usage.outputTokens;
+      this._callCount++;
+
       if (response.fallback) {
         console.warn(`[Flynn] ${response.fallbackReason}`);
       }
@@ -185,6 +195,16 @@ export class NativeAgent {
     } else {
       this.inMemoryHistory = [];
     }
+    this.resetUsage();
+  }
+
+  getUsage(): { inputTokens: number; outputTokens: number; calls: number } {
+    return { ...this._totalUsage, calls: this._callCount };
+  }
+
+  resetUsage(): void {
+    this._totalUsage = { inputTokens: 0, outputTokens: 0 };
+    this._callCount = 0;
   }
 
   getHistory(): Message[] {
diff --git a/src/backends/native/index.ts b/src/backends/native/index.ts
index a6ec3c4..76ffd69 100644
--- a/src/backends/native/index.ts
+++ b/src/backends/native/index.ts
@@ -5,6 +5,7 @@ export {
   type SubAgentRequest,
   type SubAgentResult,
   type DelegationConfig,
+  type UsageReport,
 } from './orchestrator.js';
 export {
   COMPACTION_SYSTEM_PROMPT,
diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts
index cffb797..abf82ea 100644
--- a/src/backends/native/orchestrator.ts
+++ b/src/backends/native/orchestrator.ts
@@ -8,6 +8,7 @@ import { NativeAgent } from './agent.js';
 import type { ToolUseEvent } from './agent.js';
 import { shouldCompact } from '../../context/tokens.js';
 import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
+import { estimateCost } from '../../models/costs.js';
 
 // ── Public types ──────────────────────────────────────────────────────
 
@@ -44,6 +45,25 @@ interface TierUsageStats {
   calls: number;
 }
 
+/** Full usage stats for an orchestrator session. */
+export interface UsageReport {
+  /** Primary agent (user-facing) usage. */
+  primary: {
+    inputTokens: number;
+    outputTokens: number;
+    calls: number;
+  };
+  /** Delegation (sub-agent) usage, broken down by tier. */
+  delegation: Record<string, { inputTokens: number; outputTokens: number; calls: number }>;
+  /** Combined totals. */
+  total: {
+    inputTokens: number;
+    outputTokens: number;
+    calls: number;
+    estimatedCost: number;
+  };
+}
+
 /** Full configuration for the AgentOrchestrator. */
 export interface OrchestratorConfig {
   modelRouter: ModelRouter;
@@ -228,9 +248,10 @@ export class AgentOrchestrator {
     return result;
   }
 
-  /** Reset the primary agent's conversation history. */
+  /** Reset the primary agent's conversation history and usage stats. */
   reset(): void {
     this._agent.reset();
+    this._usageByTier.clear();
   }
 
   /** Get the primary agent's conversation history. */
@@ -267,6 +288,36 @@ export class AgentOrchestrator {
     return result;
   }
 
+  /**
+   * Returns comprehensive usage stats combining primary agent and delegation usage.
+   * Includes estimated cost based on the primary model's pricing.
+   */
+  getUsage(): UsageReport {
+    const primary = this._agent.getUsage();
+    const delegation = this.getDelegationUsage();
+
+    let totalInput = primary.inputTokens;
+    let totalOutput = primary.outputTokens;
+    let totalCalls = primary.calls;
+
+    for (const stats of Object.values(delegation)) {
+      totalInput += stats.inputTokens;
+      totalOutput += stats.outputTokens;
+      totalCalls += stats.calls;
+    }
+
+    return {
+      primary,
+      delegation,
+      total: {
+        inputTokens: totalInput,
+        outputTokens: totalOutput,
+        calls: totalCalls,
+        estimatedCost: estimateCost(totalInput, totalOutput, this._modelName),
+      },
+    };
+  }
+
   /**
    * Look up which model tier is configured for a given delegation task.
    * Convenience method so callers don't need to access the config directly.
diff --git a/src/channels/discord/adapter.ts b/src/channels/discord/adapter.ts
index aa3881d..72e5484 100644
--- a/src/channels/discord/adapter.ts
+++ b/src/channels/discord/adapter.ts
@@ -15,6 +15,7 @@ import type {
   ChannelAdapter,
   ChannelStatus,
 } from '../types.js';
+import { splitMessage } from '../utils.js';
 
 /** Configuration for the Discord channel adapter. */
 export interface DiscordAdapterConfig {
@@ -27,36 +28,6 @@ export interface DiscordAdapterConfig {
   requireMention?: boolean;
 }
 
-/**
- * Split a long message into chunks that respect Discord's 2000 char limit.
- * Prefers splitting at newlines, then spaces, then hard-cuts.
- */
-function splitMessage(text: string, maxLength: number): string[] {
-  const chunks: string[] = [];
-  let remaining = text;
-
-  while (remaining.length > 0) {
-    if (remaining.length <= maxLength) {
-      chunks.push(remaining);
-      break;
-    }
-
-    // Try to split at a newline within the allowed window
-    let splitIndex = remaining.lastIndexOf('\n', maxLength);
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = remaining.lastIndexOf(' ', maxLength);
-    }
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = maxLength;
-    }
-
-    chunks.push(remaining.slice(0, splitIndex));
-    remaining = remaining.slice(splitIndex).trimStart();
-  }
-
-  return chunks;
-}
-
 /**
  * Discord channel adapter backed by discord.js.
  *
diff --git a/src/channels/index.ts b/src/channels/index.ts
index c9ed125..0608b21 100644
--- a/src/channels/index.ts
+++ b/src/channels/index.ts
@@ -7,6 +7,7 @@ export type {
   MessageHandler,
 } from './types.js';
 export { ChannelRegistry } from './registry.js';
+export { splitMessage } from './utils.js';
 export { TelegramAdapter, type TelegramAdapterConfig } from './telegram/index.js';
 export { WebChatAdapter, type WebChatAdapterConfig } from './webchat/index.js';
 export { DiscordAdapter, type DiscordAdapterConfig } from './discord/index.js';
diff --git a/src/channels/slack/adapter.ts b/src/channels/slack/adapter.ts
index b4a71f3..ffcad58 100644
--- a/src/channels/slack/adapter.ts
+++ b/src/channels/slack/adapter.ts
@@ -13,6 +13,7 @@ import type {
   ChannelAdapter,
   ChannelStatus,
 } from '../types.js';
+import { splitMessage } from '../utils.js';
 
 /** Configuration for the Slack channel adapter. */
 export interface SlackAdapterConfig {
@@ -34,36 +35,6 @@ interface SlackMessageEvent {
   subtype?: string;
 }
 
-/**
- * Split a long message into chunks that respect Slack's readability limit.
- * Prefers splitting at newlines, then spaces, then hard-cuts.
- */
-function splitMessage(text: string, maxLength: number): string[] {
-  const chunks: string[] = [];
-  let remaining = text;
-
-  while (remaining.length > 0) {
-    if (remaining.length <= maxLength) {
-      chunks.push(remaining);
-      break;
-    }
-
-    // Try to split at a newline within the allowed window
-    let splitIndex = remaining.lastIndexOf('\n', maxLength);
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = remaining.lastIndexOf(' ', maxLength);
-    }
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = maxLength;
-    }
-
-    chunks.push(remaining.slice(0, splitIndex));
-    remaining = remaining.slice(splitIndex).trimStart();
-  }
-
-  return chunks;
-}
-
 /**
  * Slack channel adapter backed by @slack/bolt.
  *
@@ -77,6 +48,7 @@ export class SlackAdapter implements ChannelAdapter {
   private app: App | null = null;
   private messageHandler?: (msg: InboundMessage) => void;
   private config: SlackAdapterConfig;
+  private userNameCache: Map<string, string> = new Map();
 
   get status(): ChannelStatus {
     return this._status;
@@ -105,7 +77,7 @@ export class SlackAdapter implements ChannelAdapter {
 
       // Register message event handler
       this.app.message(async ({ message }) => {
-        this.handleMessage(message as unknown as SlackMessageEvent);
+        await this.handleMessage(message as unknown as SlackMessageEvent);
       });
 
       await this.app.start();
@@ -161,8 +133,23 @@ export class SlackAdapter implements ChannelAdapter {
     }
   }
 
+  /** Resolve a Slack user ID to a display name, with caching. */
+  private async resolveUserName(userId: string): Promise<string> {
+    const cached = this.userNameCache.get(userId);
+    if (cached) return cached;
+
+    try {
+      const result = await this.app!.client.users.info({ user: userId });
+      const name = result.user?.real_name || result.user?.name || userId;
+      this.userNameCache.set(userId, name);
+      return name;
+    } catch {
+      return userId;
+    }
+  }
+
   /** Internal: process an inbound Slack message event. */
-  private handleMessage(message: SlackMessageEvent): void {
+  private async handleMessage(message: SlackMessageEvent): Promise<void> {
     if (!this.messageHandler) return;
 
     // Ignore bot messages
@@ -187,9 +174,10 @@ export class SlackAdapter implements ChannelAdapter {
     // Strip bot mentions: <@U\w+> pattern
     let text = (message.text ?? '').replace(/<@U\w+>/g, '').trim();
 
-    // TODO: message.user is a Slack user ID (e.g. U0123ABC), not a display name.
-    // To resolve display names, use this.app.client.users.info() with caching.
-    const senderName = message.user;
+    // Resolve display name from Slack user ID
+    const senderName = message.user
+      ? await this.resolveUserName(message.user)
+      : undefined;
 
     // Detect reset command
     if (text === '!reset' || text === 'reset') {
diff --git a/src/channels/telegram/adapter.ts b/src/channels/telegram/adapter.ts
index 824110c..46716ec 100644
--- a/src/channels/telegram/adapter.ts
+++ b/src/channels/telegram/adapter.ts
@@ -9,6 +9,7 @@ import type {
 } from '../types.js';
 import { isAllowedChat } from '../../frontends/telegram/handlers.js';
 import { parseConfirmationCallback } from '../../frontends/telegram/confirmations.js';
+import { splitMessage } from '../utils.js';
 
 /** Configuration for the Telegram channel adapter. */
 export interface TelegramAdapterConfig {
@@ -17,36 +18,6 @@ export interface TelegramAdapterConfig {
   hookEngine?: HookEngine;
 }
 
-/**
- * Split a long message into chunks that respect Telegram's 4096 char limit.
- * Prefers splitting at newlines, then spaces, then hard-cuts.
- */
-function splitMessage(text: string, maxLength: number): string[] {
-  const chunks: string[] = [];
-  let remaining = text;
-
-  while (remaining.length > 0) {
-    if (remaining.length <= maxLength) {
-      chunks.push(remaining);
-      break;
-    }
-
-    // Try to split at a newline within the allowed window
-    let splitIndex = remaining.lastIndexOf('\n', maxLength);
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = remaining.lastIndexOf(' ', maxLength);
-    }
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = maxLength;
-    }
-
-    chunks.push(remaining.slice(0, splitIndex));
-    remaining = remaining.slice(splitIndex).trimStart();
-  }
-
-  return chunks;
-}
-
 /**
  * Telegram channel adapter backed by grammy.
  *
diff --git a/src/channels/utils.test.ts b/src/channels/utils.test.ts
new file mode 100644
index 0000000..db8baba
--- /dev/null
+++ b/src/channels/utils.test.ts
@@ -0,0 +1,86 @@
+import { describe, it, expect } from 'vitest';
+import { splitMessage } from './utils.js';
+
+describe('splitMessage', () => {
+  it('returns single chunk for empty string', () => {
+    const result = splitMessage('', 100);
+    // empty string never enters the while loop → returns empty array
+    expect(result).toEqual([]);
+  });
+
+  it('returns single chunk when text is under maxLength', () => {
+    const result = splitMessage('hello world', 100);
+    expect(result).toEqual(['hello world']);
+  });
+
+  it('returns single chunk when text equals maxLength', () => {
+    const text = 'a'.repeat(50);
+    const result = splitMessage(text, 50);
+    expect(result).toEqual([text]);
+  });
+
+  it('splits at newline when possible', () => {
+    const text = 'line one\nline two\nline three';
+    // maxLength 18 → "line one\nline two\n" is 18 chars, lastIndexOf('\n', 18) = 17
+    const result = splitMessage(text, 18);
+    expect(result).toEqual(['line one\nline two', 'line three']);
+  });
+
+  it('splits at space when no newline available', () => {
+    const text = 'word1 word2 word3 word4';
+    // maxLength 12 → "word1 word2 " lastIndexOf(' ', 12) = 11
+    const result = splitMessage(text, 12);
+    expect(result[0]).toBe('word1 word2');
+    expect(result.length).toBeGreaterThanOrEqual(2);
+  });
+
+  it('hard-cuts when no whitespace available', () => {
+    const text = 'abcdefghijklmnop';
+    const result = splitMessage(text, 5);
+    expect(result[0]).toBe('abcde');
+    expect(result[1]).toBe('fghij');
+    expect(result[2]).toBe('klmno');
+    expect(result[3]).toBe('p');
+  });
+
+  it('produces multiple chunks for long text', () => {
+    const text = 'chunk one\nchunk two\nchunk three\nchunk four';
+    const result = splitMessage(text, 20);
+    expect(result.length).toBeGreaterThan(1);
+    // Every chunk respects the limit
+    for (const chunk of result) {
+      expect(chunk.length).toBeLessThanOrEqual(20);
+    }
+  });
+
+  it('preserves all content (joined chunks equal original minus trimmed whitespace)', () => {
+    const text = 'The quick brown fox jumps over the lazy dog. ' +
+      'Pack my box with five dozen liquor jugs. ' +
+      'How vexingly quick daft zebras jump.';
+    const result = splitMessage(text, 30);
+
+    // Reassemble: since trimStart() removes leading whitespace between chunks,
+    // we verify all words are preserved
+    const originalWords = text.split(/\s+/);
+    const resultWords = result.join(' ').split(/\s+/);
+    expect(resultWords).toEqual(originalWords);
+  });
+
+  it('prefers newline split over space split', () => {
+    // Place newline at a good position and space later
+    const text = 'first part\nsecond part of the message';
+    // maxLength 15: lastIndexOf('\n', 15) = 10, which is >= 15/2 = 7.5 → splits at newline
+    const result = splitMessage(text, 15);
+    expect(result[0]).toBe('first part');
+  });
+
+  it('falls back to space when newline is too early', () => {
+    // Newline at position 2, which is < maxLength/2 for maxLength=14
+    const text = 'ab\ncdefghij klmnopqrst';
+    // lastIndexOf('\n', 14) = 2, but 2 < 14/2=7 → falls back to space
+    // lastIndexOf(' ', 14) = 11, which is >= 7 → splits at space
+    const result = splitMessage(text, 14);
+    expect(result[0]).toBe('ab\ncdefghij');
+    expect(result[1]).toBe('klmnopqrst');
+  });
+});
diff --git a/src/channels/utils.ts b/src/channels/utils.ts
new file mode 100644
index 0000000..e26703d
--- /dev/null
+++ b/src/channels/utils.ts
@@ -0,0 +1,33 @@
+/**
+ * Shared utilities for channel adapters.
+ */
+
+/**
+ * Split a long message into chunks that respect a platform's character limit.
+ * Prefers splitting at newlines, then spaces, then hard-cuts.
+ */
+export function splitMessage(text: string, maxLength: number): string[] {
+  const chunks: string[] = [];
+  let remaining = text;
+
+  while (remaining.length > 0) {
+    if (remaining.length <= maxLength) {
+      chunks.push(remaining);
+      break;
+    }
+
+    // Try to split at a newline within the allowed window
+    let splitIndex = remaining.lastIndexOf('\n', maxLength);
+    if (splitIndex === -1 || splitIndex < maxLength / 2) {
+      splitIndex = remaining.lastIndexOf(' ', maxLength);
+    }
+    if (splitIndex === -1 || splitIndex < maxLength / 2) {
+      splitIndex = maxLength;
+    }
+
+    chunks.push(remaining.slice(0, splitIndex));
+    remaining = remaining.slice(splitIndex).trimStart();
+  }
+
+  return chunks;
+}
diff --git a/src/channels/whatsapp/adapter.ts b/src/channels/whatsapp/adapter.ts
index 3c4114d..e2d9068 100644
--- a/src/channels/whatsapp/adapter.ts
+++ b/src/channels/whatsapp/adapter.ts
@@ -14,6 +14,7 @@ import type {
   ChannelAdapter,
   ChannelStatus,
 } from '../types.js';
+import { splitMessage } from '../utils.js';
 
 /** Configuration for the WhatsApp channel adapter. */
 export interface WhatsAppAdapterConfig {
@@ -34,36 +35,6 @@ interface WhatsAppMessage {
   _data?: { notifyName?: string };
 }
 
-/**
- * Split a long message into chunks that respect WhatsApp's readability limit.
- * Prefers splitting at newlines, then spaces, then hard-cuts.
- */
-function splitMessage(text: string, maxLength: number): string[] {
-  const chunks: string[] = [];
-  let remaining = text;
-
-  while (remaining.length > 0) {
-    if (remaining.length <= maxLength) {
-      chunks.push(remaining);
-      break;
-    }
-
-    // Try to split at a newline within the allowed window
-    let splitIndex = remaining.lastIndexOf('\n', maxLength);
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = remaining.lastIndexOf(' ', maxLength);
-    }
-    if (splitIndex === -1 || splitIndex < maxLength / 2) {
-      splitIndex = maxLength;
-    }
-
-    chunks.push(remaining.slice(0, splitIndex));
-    remaining = remaining.slice(splitIndex).trimStart();
-  }
-
-  return chunks;
-}
-
 /**
  * WhatsApp channel adapter backed by whatsapp-web.js.
  *
diff --git a/src/config/schema.ts b/src/config/schema.ts
index 728c786..88505b2 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -146,6 +146,14 @@ const processSchema = z.object({
   buffer_size: z.number().min(1024).max(1048576).default(65536),
 }).default({});
 
+const retrySchema = z.object({
+  enabled: z.boolean().default(true),
+  max_retries: z.number().min(0).max(10).default(3),
+  initial_delay_ms: z.number().min(100).max(60000).default(1000),
+  backoff_multiplier: z.number().min(1).max(5).default(2),
+  max_delay_ms: z.number().min(1000).max(120000).default(30000),
+}).default({});
+
 const webSearchSchema = z.object({
   provider: z.enum(['brave', 'searxng']).default('brave'),
   api_key: z.string().optional(),
@@ -153,6 +161,16 @@ const webSearchSchema = z.object({
   max_results: z.number().min(1).max(20).default(5),
 }).default({});
 
+const promptSchema = z.object({
+  /** Additional directories to search for prompt template files. */
+  search_dirs: z.array(z.string()).default([]),
+  /** Extra named sections to include in the system prompt. */
+  extra_sections: z.array(z.object({
+    name: z.string(),
+    content: z.string(),
+  })).default([]),
+}).default({});
+
 export const configSchema = z.object({
   telegram: telegramSchema,
   discord: discordSchema,
@@ -169,7 +187,9 @@ export const configSchema = z.object({
   compaction: compactionSchema,
   memory: memorySchema,
   process: processSchema,
+  retry: retrySchema,
   web_search: webSearchSchema,
+  prompt: promptSchema,
 });
 
 export type Config = z.infer<typeof configSchema>;
@@ -184,3 +204,5 @@ export type ProcessConfig = z.infer<typeof processSchema>;
 export type DiscordConfig = z.infer<typeof discordSchema>;
 export type SlackConfig = z.infer<typeof slackSchema>;
 export type WhatsAppConfig = z.infer<typeof whatsappSchema>;
+export type RetryPolicyConfig = z.infer<typeof retrySchema>;
+export type PromptConfig = z.infer<typeof promptSchema>;
diff --git a/src/daemon/index.ts b/src/daemon/index.ts
index bb45c7d..f7a2fb6 100644
--- a/src/daemon/index.ts
+++ b/src/daemon/index.ts
@@ -1,7 +1,7 @@
 import { Lifecycle } from './lifecycle.js';
 import type { Config, ModelConfig } from '../config/index.js';
-import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js';
-import type { ModelClient } from '../models/index.js';
+import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter, DEFAULT_RETRY_CONFIG } from '../models/index.js';
+import type { ModelClient, RetryConfig } from '../models/index.js';
 import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js';
 import { SessionStore, SessionManager } from '../session/index.js';
 import { HookEngine } from '../hooks/index.js';
@@ -14,9 +14,10 @@ import { CronScheduler } from '../automation/index.js';
 import type { InboundMessage, OutboundMessage } from '../channels/index.js';
 import { McpManager } from '../mcp/index.js';
 import { SkillRegistry, SkillInstaller, loadAllSkills } from '../skills/index.js';
+import { assembleSystemPrompt } from '../prompt/index.js';
 import { resolve } from 'path';
 import { homedir } from 'os';
-import { mkdirSync, readFileSync, existsSync } from 'fs';
+import { mkdirSync } from 'fs';
 
 export interface DaemonContext {
   config: Config;
@@ -34,21 +35,23 @@ export interface DaemonContext {
   skillInstaller: SkillInstaller;
 }
 
-function loadSystemPrompt(): string {
-  // Try to load SOUL.md from working directory first, then from project root
-  const paths = [
-    resolve(process.cwd(), 'SOUL.md'),
-    resolve(import.meta.dirname, '../../SOUL.md'),
+function loadSystemPrompt(config: Config): string {
+  const searchDirs = [
+    process.cwd(),
+    resolve(import.meta.dirname, '../..'),
+    ...(config.prompt.search_dirs ?? []),
   ];
 
-  for (const soulPath of paths) {
-    if (existsSync(soulPath)) {
-      return readFileSync(soulPath, 'utf-8');
-    }
+  const result = assembleSystemPrompt({
+    searchDirs,
+    extraSections: config.prompt.extra_sections,
+  });
+
+  if (result.loadedFiles.length > 0) {
+    console.log(`Loaded prompt templates: ${result.loadedFiles.map(f => f.split('/').pop()).join(', ')}`);
   }
 
-  // Fallback if SOUL.md not found
-  return 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.';
+  return result.prompt;
 }
 
 /**
@@ -125,12 +128,26 @@ function createModelRouter(config: Config): ModelRouter {
   console.log(`Model router: default=${models.default.provider}/${models.default.model}, ` +
     `fallback=[${models.fallback_chain.join(', ')}]`);
 
+  // Build retry config if enabled
+  const retryConfig: RetryConfig | undefined = config.retry.enabled ? {
+    maxRetries: config.retry.max_retries,
+    initialDelayMs: config.retry.initial_delay_ms,
+    backoffMultiplier: config.retry.backoff_multiplier,
+    maxDelayMs: config.retry.max_delay_ms,
+    nonRetryablePatterns: DEFAULT_RETRY_CONFIG.nonRetryablePatterns,
+  } : undefined;
+
+  if (retryConfig) {
+    console.log(`Retry policy: max_retries=${retryConfig.maxRetries}, initial_delay=${retryConfig.initialDelayMs}ms`);
+  }
+
   return new ModelRouter({
     default: defaultClient,
     fast: fastClient,
     complex: complexClient,
     local: localClient,
     fallbackChain,
+    retryConfig,
   });
 }
 
@@ -210,6 +227,33 @@ function createMessageRouter(deps: {
         }
         return;
       }
+      if (msg.metadata.command === 'usage') {
+        const usage = agent.getUsage();
+        const lines = [
+          '**Token Usage**',
+          '',
+          `Primary: ${usage.primary.inputTokens.toLocaleString()} in / ${usage.primary.outputTokens.toLocaleString()} out (${usage.primary.calls} calls)`,
+        ];
+
+        const delegationEntries = Object.entries(usage.delegation);
+        if (delegationEntries.length > 0) {
+          lines.push('');
+          lines.push('Delegation:');
+          for (const [tier, stats] of delegationEntries) {
+            lines.push(`  ${tier}: ${stats.inputTokens.toLocaleString()} in / ${stats.outputTokens.toLocaleString()} out (${stats.calls} calls)`);
+          }
+        }
+
+        lines.push('');
+        lines.push(`**Total:** ${usage.total.inputTokens.toLocaleString()} in / ${usage.total.outputTokens.toLocaleString()} out (${usage.total.calls} calls)`);
+
+        if (usage.total.estimatedCost > 0) {
+          lines.push(`**Estimated cost:** $${usage.total.estimatedCost.toFixed(4)}`);
+        }
+
+        await reply({ text: lines.join('\n'), replyTo: msg.id });
+        return;
+      }
     }
 
     try {
@@ -331,7 +375,7 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
   const modelRouter = createModelRouter(config);
 
   // Load system prompt and append skill instructions
-  let systemPrompt = loadSystemPrompt();
+  let systemPrompt = loadSystemPrompt(config);
   const skillAdditions = skillRegistry.getSystemPromptAdditions();
   if (skillAdditions) {
     systemPrompt = `${systemPrompt}\n\n# Available Skills\n\n${skillAdditions}`;
diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts
index 730e6ed..63b04ea 100644
--- a/src/frontends/tui/commands.test.ts
+++ b/src/frontends/tui/commands.test.ts
@@ -30,6 +30,10 @@ describe('parseCommand', () => {
     expect(parseCommand('/compact')).toEqual({ type: 'compact' });
   });
 
+  it('parses /usage command', () => {
+    expect(parseCommand('/usage')).toEqual({ type: 'usage' });
+  });
+
   it('parses /model command without argument', () => {
     expect(parseCommand('/model')).toEqual({ type: 'model' });
   });
@@ -69,6 +73,7 @@ describe('getHelpText', () => {
     expect(help).toContain('/model');
     expect(help).toContain('/reset');
     expect(help).toContain('/compact');
+    expect(help).toContain('/usage');
     expect(help).toContain('/quit');
   });
 });
diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts
index b7054e7..8e46d3e 100644
--- a/src/frontends/tui/commands.ts
+++ b/src/frontends/tui/commands.ts
@@ -5,6 +5,7 @@ export type Command =
   | { type: 'status' }
   | { type: 'fullscreen' }
   | { type: 'compact' }
+  | { type: 'usage' }
   | { type: 'model'; name?: string }
   | { type: 'backend'; provider?: string }
   | { type: 'transfer'; target: string }
@@ -44,6 +45,11 @@ export function parseCommand(input: string): Command | null {
     return { type: 'compact' };
   }
 
+  // Usage
+  if (trimmed === '/usage') {
+    return { type: 'usage' };
+  }
+
   // Model (with optional argument)
   if (trimmed === '/model') {
     return { type: 'model' };
@@ -80,6 +86,7 @@ Commands:
   /backend [provider]    Show or switch local backend (ollama, llamacpp)
   /reset, /clear, /new   Clear conversation history
   /compact               Compact conversation history
+  /usage                 Show token usage and estimated cost
   /status                Show session info and token usage
   /fullscreen, /fs       Switch to fullscreen mode
   /transfer <dest>       Transfer session to another frontend
@@ -98,6 +105,7 @@ export const SLASH_COMMANDS = [
   '/clear',
   '/new',
   '/compact',
+  '/usage',
   '/status',
   '/fullscreen',
   '/fs',
@@ -115,6 +123,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
   '/clear': 'Clear conversation history',
   '/new': 'Start a new conversation',
   '/compact': 'Compact conversation history to save context space',
+  '/usage': 'Show token usage and estimated cost',
   '/status': 'Show session info and token usage',
   '/fullscreen': 'Switch to fullscreen mode',
   '/fs': 'Switch to fullscreen mode',
diff --git a/src/models/costs.test.ts b/src/models/costs.test.ts
new file mode 100644
index 0000000..203c5b1
--- /dev/null
+++ b/src/models/costs.test.ts
@@ -0,0 +1,57 @@
+import { describe, it, expect } from 'vitest';
+import { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
+
+describe('estimateCost', () => {
+  it('returns 0 for local/unknown models', () => {
+    expect(estimateCost(1000, 1000)).toBe(0);
+    expect(estimateCost(1000, 1000, 'some-local-model')).toBe(0);
+  });
+
+  it('uses default costs when model name is undefined', () => {
+    const cost = estimateCost(1_000_000, 1_000_000);
+    expect(cost).toBe(0);
+  });
+
+  it('calculates correctly for known Anthropic models', () => {
+    // claude-sonnet-4: $3/M input, $15/M output
+    const cost = estimateCost(1_000_000, 1_000_000, 'claude-sonnet-4-20250514');
+    expect(cost).toBe(3 + 15);
+  });
+
+  it('calculates correctly for claude-opus', () => {
+    // claude-opus-4: $15/M input, $75/M output
+    const cost = estimateCost(1_000_000, 500_000, 'claude-opus-4-20250514');
+    expect(cost).toBe(15 + 37.5);
+  });
+
+  it('calculates correctly for OpenAI models', () => {
+    // gpt-4o: $2.50/M input, $10/M output
+    const cost = estimateCost(2_000_000, 1_000_000, 'gpt-4o');
+    expect(cost).toBe(5 + 10);
+  });
+
+  it('handles small token counts', () => {
+    // 1000 tokens of claude-sonnet input: 1000 * 3 / 1_000_000 = 0.003
+    const cost = estimateCost(1000, 0, 'claude-sonnet-4-20250514');
+    expect(cost).toBeCloseTo(0.003);
+  });
+
+  it('handles zero tokens', () => {
+    const cost = estimateCost(0, 0, 'claude-sonnet-4-20250514');
+    expect(cost).toBe(0);
+  });
+});
+
+describe('MODEL_COSTS_PER_MILLION', () => {
+  it('has a default entry', () => {
+    expect(MODEL_COSTS_PER_MILLION['default']).toEqual({ input: 0, output: 0 });
+  });
+
+  it('has entries for all expected models', () => {
+    expect(MODEL_COSTS_PER_MILLION['claude-sonnet-4-20250514']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['claude-3-5-haiku-20241022']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['claude-opus-4-20250514']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['gpt-4o']).toBeDefined();
+    expect(MODEL_COSTS_PER_MILLION['gpt-4o-mini']).toBeDefined();
+  });
+});
diff --git a/src/models/costs.ts b/src/models/costs.ts
new file mode 100644
index 0000000..ae758ea
--- /dev/null
+++ b/src/models/costs.ts
@@ -0,0 +1,21 @@
+/** Approximate cost per million tokens for known models. */
+export const MODEL_COSTS_PER_MILLION: Record<string, { input: number; output: number }> = {
+  // Anthropic
+  'claude-sonnet-4-20250514': { input: 3, output: 15 },
+  'claude-3-5-haiku-20241022': { input: 0.80, output: 4 },
+  'claude-opus-4-20250514': { input: 15, output: 75 },
+  // OpenAI
+  'gpt-4o': { input: 2.50, output: 10 },
+  'gpt-4o-mini': { input: 0.15, output: 0.60 },
+  // Local / unknown models
+  'default': { input: 0, output: 0 },
+};
+
+/**
+ * Estimate the dollar cost for a given number of input/output tokens.
+ * Falls back to zero cost for unknown or local models.
+ */
+export function estimateCost(inputTokens: number, outputTokens: number, modelName?: string): number {
+  const costs = MODEL_COSTS_PER_MILLION[modelName ?? ''] ?? MODEL_COSTS_PER_MILLION['default'];
+  return (inputTokens * costs.input + outputTokens * costs.output) / 1_000_000;
+}
diff --git a/src/models/index.ts b/src/models/index.ts
index 1f8b5e8..346c0b5 100644
--- a/src/models/index.ts
+++ b/src/models/index.ts
@@ -3,6 +3,8 @@ export { OpenAIClient, type OpenAIClientConfig } from './openai.js';
 export { OllamaClient, type OllamaClientConfig } from './local/index.js';
 export { LlamaCppClient, type LlamaCppClientConfig } from './local/index.js';
 export { ModelRouter, type ModelRouterConfig, type ModelTier } from './router.js';
+export { withRetry, isRetryable, DEFAULT_RETRY_CONFIG, type RetryConfig } from './retry.js';
+export { estimateCost, MODEL_COSTS_PER_MILLION } from './costs.js';
 export type {
   Message,
   ChatRequest,
diff --git a/src/models/retry.test.ts b/src/models/retry.test.ts
new file mode 100644
index 0000000..3005d96
--- /dev/null
+++ b/src/models/retry.test.ts
@@ -0,0 +1,169 @@
+import { describe, it, expect, vi } from 'vitest';
+import { isRetryable, withRetry, DEFAULT_RETRY_CONFIG } from './retry.js';
+import type { RetryConfig } from './retry.js';
+
+describe('isRetryable', () => {
+  it('returns true for generic errors', () => {
+    const error = new Error('Connection timeout');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(true);
+  });
+
+  it('returns false for authentication errors', () => {
+    const error = new Error('Invalid API key: authentication failed');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for invalid_api_key errors', () => {
+    const error = new Error('Error: invalid_api_key');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for unauthorized errors', () => {
+    const error = new Error('Request unauthorized');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for invalid_request errors', () => {
+    const error = new Error('invalid_request: missing parameter');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for context_length_exceeded errors', () => {
+    const error = new Error('context_length_exceeded: max 128k tokens');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('returns false for content_policy errors', () => {
+    const error = new Error('content_policy violation detected');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('is case-insensitive when matching patterns', () => {
+    const error = new Error('AUTHENTICATION error');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
+  it('uses custom patterns when provided', () => {
+    const error = new Error('quota exceeded');
+    expect(isRetryable(error, ['quota'])).toBe(false);
+  });
+});
+
+describe('withRetry', () => {
+  // Use minimal real delays to avoid fake-timer race conditions
+  const fastConfig: RetryConfig = {
+    maxRetries: 3,
+    initialDelayMs: 1,
+    backoffMultiplier: 1,
+    maxDelayMs: 5,
+    nonRetryablePatterns: DEFAULT_RETRY_CONFIG.nonRetryablePatterns,
+  };
+
+  it('succeeds on first attempt without delay', async () => {
+    const fn = vi.fn().mockResolvedValue('success');
+
+    const result = await withRetry(fn, fastConfig);
+
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('retries on transient failure then succeeds', async () => {
+    const fn = vi.fn()
+      .mockRejectedValueOnce(new Error('timeout'))
+      .mockRejectedValueOnce(new Error('timeout'))
+      .mockResolvedValueOnce('recovered');
+
+    const result = await withRetry(fn, fastConfig, 'test-op');
+
+    expect(result).toBe('recovered');
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('throws after maxRetries exhausted', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('persistent failure'));
+
+    await expect(withRetry(fn, fastConfig, 'test-op')).rejects.toThrow('persistent failure');
+    // 1 initial + 3 retries = 4 total
+    expect(fn).toHaveBeenCalledTimes(4);
+  });
+
+  it('does not retry non-retryable errors', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('invalid_api_key'));
+
+    await expect(withRetry(fn, fastConfig)).rejects.toThrow('invalid_api_key');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('does not retry authentication errors', async () => {
+    const fn = vi.fn().mockRejectedValue(new Error('Request unauthorized'));
+
+    await expect(withRetry(fn, fastConfig)).rejects.toThrow('Request unauthorized');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('converts non-Error throws to Error objects', async () => {
+    const fn = vi.fn().mockRejectedValue('string error');
+
+    await expect(withRetry(fn, { ...fastConfig, maxRetries: 0 })).rejects.toThrow('string error');
+  });
+
+  it('respects maxDelayMs cap', async () => {
+    const cappedConfig: RetryConfig = {
+      maxRetries: 2,
+      initialDelayMs: 1,
+      backoffMultiplier: 10,
+      maxDelayMs: 2,
+      nonRetryablePatterns: [],
+    };
+
+    let callCount = 0;
+    const fn = vi.fn().mockImplementation(() => {
+      callCount++;
+      if (callCount < 3) return Promise.reject(new Error('fail'));
+      return Promise.resolve('ok');
+    });
+
+    // If maxDelayMs weren't respected, a 10x multiplier could cause very long waits.
+    // With maxDelayMs=2ms, this completes quickly.
+    const result = await withRetry(fn, cappedConfig, 'capped-test');
+    expect(result).toBe('ok');
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('uses default config when none provided', async () => {
+    const fn = vi.fn().mockResolvedValue('default-config');
+
+    const result = await withRetry(fn);
+
+    expect(result).toBe('default-config');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('increases delay exponentially between retries', async () => {
+    const timestamps: number[] = [];
+    const config: RetryConfig = {
+      maxRetries: 2,
+      initialDelayMs: 20,
+      backoffMultiplier: 2,
+      maxDelayMs: 1000,
+      nonRetryablePatterns: [],
+    };
+
+    const fn = vi.fn().mockImplementation(() => {
+      timestamps.push(Date.now());
+      if (timestamps.length < 3) return Promise.reject(new Error('fail'));
+      return Promise.resolve('ok');
+    });
+
+    await withRetry(fn, config, 'backoff-test');
+
+    expect(fn).toHaveBeenCalledTimes(3);
+    // First retry delay: ~20ms (jitter 50-100% of 20 = 10-20ms)
+    // Second retry delay: ~40ms (jitter 50-100% of 40 = 20-40ms)
+    const firstDelay = timestamps[1] - timestamps[0];
+    const secondDelay = timestamps[2] - timestamps[1];
+    // Second delay should be roughly double the first (within jitter range)
+    expect(secondDelay).toBeGreaterThanOrEqual(firstDelay * 0.7);
+  });
+});
diff --git a/src/models/retry.ts b/src/models/retry.ts
new file mode 100644
index 0000000..e295f61
--- /dev/null
+++ b/src/models/retry.ts
@@ -0,0 +1,71 @@
+export interface RetryConfig {
+  /** Maximum number of retry attempts (default: 3). Does not count the initial attempt. */
+  maxRetries: number;
+  /** Initial delay in milliseconds before first retry (default: 1000). */
+  initialDelayMs: number;
+  /** Multiplier applied to delay after each retry (default: 2). */
+  backoffMultiplier: number;
+  /** Maximum delay in milliseconds (default: 30000). */
+  maxDelayMs: number;
+  /** Errors matching these patterns should NOT be retried (e.g. auth errors, invalid requests). */
+  nonRetryablePatterns: string[];
+}
+
+export const DEFAULT_RETRY_CONFIG: RetryConfig = {
+  maxRetries: 3,
+  initialDelayMs: 1000,
+  backoffMultiplier: 2,
+  maxDelayMs: 30000,
+  nonRetryablePatterns: [
+    'invalid_api_key',
+    'authentication',
+    'unauthorized',
+    'invalid_request',
+    'context_length_exceeded',
+    'content_policy',
+  ],
+};
+
+export function isRetryable(error: Error, nonRetryablePatterns: string[]): boolean {
+  const msg = error.message.toLowerCase();
+  return !nonRetryablePatterns.some(pattern => msg.includes(pattern.toLowerCase()));
+}
+
+export async function withRetry<T>(
+  fn: () => Promise<T>,
+  config: RetryConfig = DEFAULT_RETRY_CONFIG,
+  label?: string,
+): Promise<T> {
+  let lastError: Error | undefined;
+
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+
+      // Don't retry non-retryable errors
+      if (!isRetryable(lastError, config.nonRetryablePatterns)) {
+        throw lastError;
+      }
+
+      // Don't retry if we've exhausted attempts
+      if (attempt >= config.maxRetries) {
+        throw lastError;
+      }
+
+      // Calculate delay with exponential backoff + jitter
+      const baseDelay = config.initialDelayMs * Math.pow(config.backoffMultiplier, attempt);
+      const delay = Math.min(baseDelay, config.maxDelayMs);
+      const jitter = delay * (0.5 + Math.random() * 0.5); // 50-100% of delay
+
+      console.warn(
+        `[retry] ${label ?? 'operation'} attempt ${attempt + 1}/${config.maxRetries} failed: ${lastError.message}. Retrying in ${Math.round(jitter)}ms...`,
+      );
+
+      await new Promise(resolve => setTimeout(resolve, jitter));
+    }
+  }
+
+  throw lastError ?? new Error('Retry failed with no error');
+}
diff --git a/src/models/router.ts b/src/models/router.ts
index 3a090ca..38d4a46 100644
--- a/src/models/router.ts
+++ b/src/models/router.ts
@@ -1,4 +1,6 @@
 import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
+import { withRetry } from './retry.js';
+import type { RetryConfig } from './retry.js';
 
 export type ModelTier = 'fast' | 'default' | 'complex' | 'local';
 
@@ -8,6 +10,7 @@ export interface ModelRouterConfig {
   complex?: ModelClient;
   local?: ModelClient;
   fallbackChain: ModelClient[];
+  retryConfig?: RetryConfig;
 }
 
 export class ModelRouter implements ModelClient {
@@ -16,11 +19,13 @@ export class ModelRouter implements ModelClient {
   private fallbackChain: ModelClient[];
   private currentTier: ModelTier = 'default';
   private localProviderName?: string;
+  private retryConfig?: RetryConfig;
 
   constructor(config: ModelRouterConfig) {
     this.clients = new Map();
     this.defaultClient = config.default;
     this.fallbackChain = config.fallbackChain;
+    this.retryConfig = config.retryConfig;
 
     this.clients.set('default', config.default);
     if (config.fast) this.clients.set('fast', config.fast);
@@ -49,8 +54,11 @@ export class ModelRouter implements ModelClient {
     const primaryClient = this.clients.get(useTier) ?? this.defaultClient;
     const errors: Error[] = [];
 
-    // Try primary client
+    // Try primary client (with retry if configured)
     try {
+      if (this.retryConfig) {
+        return await withRetry(() => primaryClient.chat(request), this.retryConfig, 'primary model');
+      }
       return await primaryClient.chat(request);
     } catch (error) {
       errors.push(error instanceof Error ? error : new Error(String(error)));
diff --git a/src/prompt/index.ts b/src/prompt/index.ts
new file mode 100644
index 0000000..1801564
--- /dev/null
+++ b/src/prompt/index.ts
@@ -0,0 +1 @@
+export { assembleSystemPrompt, type PromptTemplateConfig, type PromptTemplateResult } from './template.js';
diff --git a/src/prompt/template.test.ts b/src/prompt/template.test.ts
new file mode 100644
index 0000000..9b35abc
--- /dev/null
+++ b/src/prompt/template.test.ts
@@ -0,0 +1,178 @@
+import { describe, it, expect, afterEach } from 'vitest';
+import { mkdtempSync, writeFileSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+import { assembleSystemPrompt } from './template.js';
+
+describe('assembleSystemPrompt', () => {
+  const tempDirs: string[] = [];
+
+  function makeTempDir(): string {
+    const dir = mkdtempSync(join(tmpdir(), 'flynn-prompt-test-'));
+    tempDirs.push(dir);
+    return dir;
+  }
+
+  afterEach(() => {
+    for (const dir of tempDirs) {
+      rmSync(dir, { recursive: true, force: true });
+    }
+    tempDirs.length = 0;
+  });
+
+  it('returns fallback when no files found', () => {
+    const dir = makeTempDir();
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.prompt).toBe(
+      'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.',
+    );
+    expect(result.loadedFiles).toEqual([]);
+  });
+
+  it('loads SOUL.md without section header', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), 'You are Flynn.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.prompt).toBe('You are Flynn.');
+    expect(result.loadedFiles).toHaveLength(1);
+    expect(result.loadedFiles[0]).toContain('SOUL.md');
+  });
+
+  it('loads AGENTS.md with "# Agent Instructions" section header', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'AGENTS.md'), 'Follow these rules.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.prompt).toBe('# Agent Instructions\n\nFollow these rules.');
+    expect(result.loadedFiles).toHaveLength(1);
+    expect(result.loadedFiles[0]).toContain('AGENTS.md');
+  });
+
+  it('loads multiple files in correct order', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), 'I am Flynn.');
+    writeFileSync(join(dir, 'AGENTS.md'), 'Be helpful.');
+    writeFileSync(join(dir, 'USER.md'), 'User likes cats.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.loadedFiles).toHaveLength(3);
+    // Verify correct ordering: SOUL → AGENTS → USER
+    expect(result.prompt).toBe(
+      'I am Flynn.\n\n# Agent Instructions\n\nBe helpful.\n\n# User Context\n\nUser likes cats.',
+    );
+  });
+
+  it('first search dir takes precedence over later dirs', () => {
+    const dir1 = makeTempDir();
+    const dir2 = makeTempDir();
+    writeFileSync(join(dir1, 'SOUL.md'), 'Primary identity.');
+    writeFileSync(join(dir2, 'SOUL.md'), 'Secondary identity.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] });
+
+    expect(result.prompt).toBe('Primary identity.');
+    expect(result.loadedFiles).toHaveLength(1);
+    expect(result.loadedFiles[0]).toContain(dir1);
+  });
+
+  it('falls through to later dir if file missing in first dir', () => {
+    const dir1 = makeTempDir();
+    const dir2 = makeTempDir();
+    writeFileSync(join(dir2, 'SOUL.md'), 'Fallback identity.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] });
+
+    expect(result.prompt).toBe('Fallback identity.');
+    expect(result.loadedFiles[0]).toContain(dir2);
+  });
+
+  it('extra sections are appended', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), 'Base identity.');
+
+    const result = assembleSystemPrompt({
+      searchDirs: [dir],
+      extraSections: [
+        { name: 'Custom Rules', content: 'Always be polite.' },
+      ],
+    });
+
+    expect(result.prompt).toBe(
+      'Base identity.\n\n# Custom Rules\n\nAlways be polite.',
+    );
+  });
+
+  it('empty files are skipped', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), '');
+    writeFileSync(join(dir, 'AGENTS.md'), '   ');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.prompt).toBe(
+      'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.',
+    );
+    expect(result.loadedFiles).toEqual([]);
+  });
+
+  it('empty extra sections are skipped', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), 'Base identity.');
+
+    const result = assembleSystemPrompt({
+      searchDirs: [dir],
+      extraSections: [
+        { name: 'Empty', content: '   ' },
+        { name: 'Populated', content: 'Has content.' },
+      ],
+    });
+
+    expect(result.prompt).toBe(
+      'Base identity.\n\n# Populated\n\nHas content.',
+    );
+  });
+
+  it('attempts all PROMPT_FILES', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), 'Soul.');
+    writeFileSync(join(dir, 'AGENTS.md'), 'Agents.');
+    writeFileSync(join(dir, 'IDENTITY.md'), 'Identity.');
+    writeFileSync(join(dir, 'USER.md'), 'User.');
+    writeFileSync(join(dir, 'TOOLS.md'), 'Tools.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.loadedFiles).toHaveLength(5);
+    expect(result.prompt).toContain('Soul.');
+    expect(result.prompt).toContain('# Agent Instructions\n\nAgents.');
+    expect(result.prompt).toContain('# Identity Customization\n\nIdentity.');
+    expect(result.prompt).toContain('# User Context\n\nUser.');
+    expect(result.prompt).toContain('# Tool Instructions\n\nTools.');
+  });
+
+  it('trims whitespace from loaded file content', () => {
+    const dir = makeTempDir();
+    writeFileSync(join(dir, 'SOUL.md'), '\n  I am Flynn.  \n\n');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir] });
+
+    expect(result.prompt).toBe('I am Flynn.');
+  });
+
+  it('mixes files from different search directories', () => {
+    const dir1 = makeTempDir();
+    const dir2 = makeTempDir();
+    writeFileSync(join(dir1, 'SOUL.md'), 'Primary soul.');
+    writeFileSync(join(dir2, 'AGENTS.md'), 'Agent rules.');
+
+    const result = assembleSystemPrompt({ searchDirs: [dir1, dir2] });
+
+    expect(result.loadedFiles).toHaveLength(2);
+    expect(result.prompt).toBe('Primary soul.\n\n# Agent Instructions\n\nAgent rules.');
+  });
+});
diff --git a/src/prompt/template.ts b/src/prompt/template.ts
new file mode 100644
index 0000000..9905fe4
--- /dev/null
+++ b/src/prompt/template.ts
@@ -0,0 +1,78 @@
+import { readFileSync, existsSync } from 'fs';
+import { resolve } from 'path';
+
+/** Ordered list of prompt template files to look for. */
+const PROMPT_FILES = [
+  { name: 'SOUL.md', section: 'Identity', required: false },
+  { name: 'AGENTS.md', section: 'Agent Instructions', required: false },
+  { name: 'IDENTITY.md', section: 'Identity Customization', required: false },
+  { name: 'USER.md', section: 'User Context', required: false },
+  { name: 'TOOLS.md', section: 'Tool Instructions', required: false },
+] as const;
+
+export interface PromptTemplateConfig {
+  /** Directories to search for template files, in priority order. */
+  searchDirs: string[];
+  /** Additional sections to inject (e.g., from config). */
+  extraSections?: Array<{ name: string; content: string }>;
+}
+
+export interface PromptTemplateResult {
+  /** The assembled system prompt. */
+  prompt: string;
+  /** Which files were loaded. */
+  loadedFiles: string[];
+}
+
+/**
+ * Assemble a system prompt from multiple template files.
+ *
+ * Searches `searchDirs` in order for each template file.
+ * First match wins — a file found in an earlier directory takes precedence.
+ * Sections are assembled in the order defined in PROMPT_FILES.
+ */
+export function assembleSystemPrompt(config: PromptTemplateConfig): PromptTemplateResult {
+  const sections: string[] = [];
+  const loadedFiles: string[] = [];
+
+  for (const { name, section } of PROMPT_FILES) {
+    for (const dir of config.searchDirs) {
+      const filePath = resolve(dir, name);
+      if (existsSync(filePath)) {
+        const content = readFileSync(filePath, 'utf-8').trim();
+        if (content) {
+          // SOUL.md is special — it's the base identity, no section header
+          if (name === 'SOUL.md') {
+            sections.push(content);
+          } else {
+            sections.push(`# ${section}\n\n${content}`);
+          }
+          loadedFiles.push(filePath);
+        }
+        break; // First match wins for this file
+      }
+    }
+  }
+
+  // Add extra sections
+  if (config.extraSections) {
+    for (const { name, content } of config.extraSections) {
+      if (content.trim()) {
+        sections.push(`# ${name}\n\n${content.trim()}`);
+      }
+    }
+  }
+
+  // Fallback if nothing was loaded
+  if (sections.length === 0) {
+    return {
+      prompt: 'You are Flynn, a helpful personal AI assistant. Be direct, concise, and helpful. Use markdown when it improves readability.',
+      loadedFiles: [],
+    };
+  }
+
+  return {
+    prompt: sections.join('\n\n'),
+    loadedFiles,
+  };
+}