diff --git a/src/backends/index.ts b/src/backends/index.ts
index 580066d..cf41f7e 100644
--- a/src/backends/index.ts
+++ b/src/backends/index.ts
@@ -1 +1,14 @@
-export { NativeAgent, type NativeAgentConfig } from './native/index.js';
+export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './native/index.js';
+export {
+  AgentOrchestrator,
+  type OrchestratorConfig,
+  type SubAgentRequest,
+  type SubAgentResult,
+  type DelegationConfig,
+} from './native/index.js';
+export {
+  COMPACTION_SYSTEM_PROMPT,
+  MEMORY_EXTRACTION_PROMPT,
+  CLASSIFICATION_PROMPT,
+  TOOL_SUMMARISATION_PROMPT,
+} from './native/index.js';
diff --git a/src/backends/native/agent.test.ts b/src/backends/native/agent.test.ts
index 42d5b78..a4b4041 100644
--- a/src/backends/native/agent.test.ts
+++ b/src/backends/native/agent.test.ts
@@ -55,6 +55,7 @@ describe('NativeAgent', () => {
       getHistory: vi.fn().mockReturnValue([]),
       addMessage: vi.fn(),
       clear: vi.fn(),
+      replaceHistory: vi.fn(),
     };
 
     const agent = new NativeAgent({
diff --git a/src/backends/native/index.ts b/src/backends/native/index.ts
index 651b762..a6ec3c4 100644
--- a/src/backends/native/index.ts
+++ b/src/backends/native/index.ts
@@ -1 +1,14 @@
-export { NativeAgent, type NativeAgentConfig } from './agent.js';
+export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './agent.js';
+export {
+  AgentOrchestrator,
+  type OrchestratorConfig,
+  type SubAgentRequest,
+  type SubAgentResult,
+  type DelegationConfig,
+} from './orchestrator.js';
+export {
+  COMPACTION_SYSTEM_PROMPT,
+  MEMORY_EXTRACTION_PROMPT,
+  CLASSIFICATION_PROMPT,
+  TOOL_SUMMARISATION_PROMPT,
+} from './prompts.js';
diff --git a/src/backends/native/orchestrator.test.ts b/src/backends/native/orchestrator.test.ts
new file mode 100644
index 0000000..c1966f8
--- /dev/null
+++ b/src/backends/native/orchestrator.test.ts
@@ -0,0 +1,613 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { AgentOrchestrator } from './orchestrator.js';
+import { ModelRouter } from '../../models/router.js';
+import type { ChatResponse, ModelClient } from '../../models/types.js';
+import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
+import { HookEngine } from '../../hooks/engine.js';
+import type { SubAgentRequest } from './orchestrator.js';
+
+describe('AgentOrchestrator', () => {
+  let mockDefaultClient: ModelClient;
+  let mockFastClient: ModelClient;
+  let mockComplexClient: ModelClient;
+  let mockRouter: ModelRouter;
+
+  const createMockClient = (name: string, inputTokens = 100, outputTokens = 50): ModelClient => ({
+    chat: vi.fn().mockResolvedValue({
+      content: `${name} response`,
+      stopReason: 'end_turn',
+      usage: { inputTokens, outputTokens },
+    }),
+  });
+
+  beforeEach(() => {
+    mockDefaultClient = createMockClient('default', 100, 50);
+    mockFastClient = createMockClient('fast', 50, 25);
+    mockComplexClient = createMockClient('complex', 200, 100);
+
+    mockRouter = new ModelRouter({
+      default: mockDefaultClient,
+      fast: mockFastClient,
+      complex: mockComplexClient,
+      fallbackChain: [],
+    });
+  });
+
+  describe('delegate()', () => {
+    it('routes to the correct tier when specified', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful assistant.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const result = await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Summarize this text',
+        message: 'This is a test message',
+        maxTokens: 1000,
+      });
+
+      expect(result.content).toBe('fast response');
+      expect(result.tier).toBe('fast');
+    });
+
+    it('includes tools when requested', async () => {
+      const mockToolRegistry = new ToolRegistry();
+      const hooks = new HookEngine({
+        confirm: ['*'],
+        log: [],
+        silent: [],
+      });
+      const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);
+
+      const mockFastChatClient = mockRouter.getClient('fast')!;
+      const mockFastChatFn = vi.fn().mockResolvedValue({
+        content: 'response with tools',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 100, outputTokens: 50 },
+      } as ChatResponse);
+
+      Object.assign(mockFastChatClient, { chat: mockFastChatFn });
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+        toolRegistry: mockToolRegistry,
+        toolExecutor: mockToolExecutor,
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Use available tools',
+        message: 'Help me analyze data',
+        tools: true,
+      });
+
+      expect(mockFastChatFn).toHaveBeenCalled();
+    });
+
+    it('falls back to default tier when requested tier is unavailable', async () => {
+      const routerWithoutComplex = new ModelRouter({
+        default: mockDefaultClient,
+        fast: mockFastClient,
+        fallbackChain: [],
+      });
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: routerWithoutComplex,
+        systemPrompt: 'You are a helpful assistant.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const result = await orchestrator.delegate({
+        tier: 'complex',
+        systemPrompt: 'Analyze deeply',
+        message: 'This is complex',
+      });
+
+      expect(result.content).toBe('default response');
+      expect(result.tier).toBe('default');
+    });
+
+    it('tracks cumulative usage after delegate calls', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Fast task',
+        message: 'Fast message',
+      });
+
+      await orchestrator.delegate({
+        tier: 'complex',
+        systemPrompt: 'Complex task',
+        message: 'Complex message',
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Another fast task',
+        message: 'Another fast message',
+      });
+
+      const usage = orchestrator.getDelegationUsage();
+
+      expect(usage.fast).toEqual({
+        inputTokens: 100,
+        outputTokens: 50,
+        calls: 2,
+      });
+
+      expect(usage.complex).toEqual({
+        inputTokens: 200,
+        outputTokens: 100,
+        calls: 1,
+      });
+
+      expect(usage.default).toBeUndefined();
+    });
+
+    it('tracks usage across tiers correctly', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Fast task',
+        message: 'Fast message',
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Another fast task',
+        message: 'Another fast message',
+      });
+
+      const usage = orchestrator.getDelegationUsage();
+
+      expect(usage.fast.inputTokens).toBe(100);
+      expect(usage.fast.outputTokens).toBe(50);
+      expect(usage.fast.calls).toBe(2);
+    });
+
+    it('logs delegation details with tier and token counts', async () => {
+      const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Fast task',
+        message: 'Fast message',
+      });
+
+      expect(consoleSpy).toHaveBeenCalledWith(
+        '[Flynn:delegate] tier=fast tokens=50+25'
+      );
+
+      consoleSpy.mockRestore();
+    });
+  });
+
+  describe('getDelegationTier()', () => {
+    it('returns correct tier for each task type', () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      expect(orchestrator.getDelegationTier('compaction')).toBe('fast');
+      expect(orchestrator.getDelegationTier('memory_extraction')).toBe('default');
+      expect(orchestrator.getDelegationTier('classification')).toBe('complex');
+      expect(orchestrator.getDelegationTier('tool_summarisation')).toBe('default');
+      expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('complex');
+    });
+
+    it('returns tier that was explicitly configured', () => {
+      const customDelegation = {
+        compaction: 'local' as const,
+        memory_extraction: 'fast' as const,
+        classification: 'complex' as const,
+        tool_summarisation: 'default' as const,
+        complex_reasoning: 'local' as const,
+      };
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: customDelegation,
+        maxDelegationDepth: 10,
+      });
+
+      expect(orchestrator.getDelegationTier('compaction')).toBe('local');
+      expect(orchestrator.getDelegationTier('memory_extraction')).toBe('fast');
+      expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('local');
+    });
+  });
+
+  describe('process()', () => {
+    it('proxies to NativeAgent for user messages', async () => {
+      const mockDefaultChatClient = mockRouter.getClient('default')!;
+      const mockDefaultChatFn = vi.fn().mockResolvedValue({
+        content: 'Agent response',
+        stopReason: 'end_turn',
+        usage: { inputTokens: 150, outputTokens: 75 },
+      } as ChatResponse);
+
+      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const response = await orchestrator.process('Hello, agent!');
+
+      expect(response).toBe('Agent response');
+    });
+
+    it('maintains conversation history through process()', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.process('Hello');
+      await orchestrator.process('How are you?');
+      await orchestrator.process('Tell me about yourself');
+
+      const history = orchestrator.getHistory();
+
+      expect(history).toHaveLength(6);
+      expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
+      expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
+      expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
+      expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
+      expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
+      expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
+    });
+  });
+
+  describe('reset()', () => {
+    it('clears primary agent conversation history', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.process('Hello');
+      await orchestrator.process('How are you?');
+
+      expect(orchestrator.getHistory()).toHaveLength(4);
+
+      orchestrator.reset();
+
+      expect(orchestrator.getHistory()).toHaveLength(0);
+    });
+
+    it('can be called multiple times', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.process('Hello');
+      orchestrator.reset();
+
+      expect(orchestrator.getHistory()).toHaveLength(0);
+
+      await orchestrator.process('World');
+      orchestrator.reset();
+
+      expect(orchestrator.getHistory()).toHaveLength(0);
+    });
+  });
+
+  describe('getDelegationUsage()', () => {
+    it('returns copy of usage stats (doesn\'t expose internal map)', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.delegate({
+        tier: 'fast',
+        systemPrompt: 'Fast task',
+        message: 'Fast message',
+      });
+
+      const usage1 = orchestrator.getDelegationUsage();
+      const usage2 = orchestrator.getDelegationUsage();
+
+      expect(usage1).toEqual(usage2);
+
+      usage1.fast.inputTokens = 999;
+
+      expect(usage2.fast.inputTokens).toBe(50);
+    });
+
+    it('returns empty object when no usage tracked', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const usage = orchestrator.getDelegationUsage();
+
+      expect(usage).toEqual({});
+    });
+  });
+
+  describe('getHistory()', () => {
+    it('returns conversation history from primary agent', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      await orchestrator.process('Hello');
+      await orchestrator.process('How are you?');
+
+      const history = orchestrator.getHistory();
+
+      expect(history).toHaveLength(4);
+      expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
+      expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
+      expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
+      expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
+    });
+
+    it('returns empty array when no history', async () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are a helpful agent.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const history = orchestrator.getHistory();
+
+      expect(history).toEqual([]);
+    });
+  });
+
+  describe('setModelTier()', () => {
+    it('sets model tier on primary agent', () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      orchestrator.setModelTier('fast');
+
+      expect(orchestrator.getModelTier()).toBe('fast');
+    });
+
+    it('allows tier changes after initialization', () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      expect(orchestrator.getModelTier()).toBe('default');
+
+      orchestrator.setModelTier('complex');
+
+      expect(orchestrator.getModelTier()).toBe('complex');
+
+      orchestrator.setModelTier('fast');
+
+      expect(orchestrator.getModelTier()).toBe('fast');
+    });
+  });
+
+  describe('setOnToolUse()', () => {
+    it('sets tool-use callback on primary agent', () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const callback = vi.fn();
+
+      orchestrator.setOnToolUse(callback);
+
+      expect(orchestrator.getModelTier()).toBe('default');
+    });
+
+    it('allows callback changes', () => {
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: mockRouter,
+        systemPrompt: 'You are helpful.',
+        primaryTier: 'default',
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 10,
+      });
+
+      const callback1 = vi.fn();
+      const callback2 = vi.fn();
+
+      orchestrator.setOnToolUse(callback1);
+
+      expect(orchestrator.getModelTier()).toBe('default');
+
+      orchestrator.setOnToolUse(callback2);
+
+      expect(orchestrator.getModelTier()).toBe('default');
+    });
+  });
+});
diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts
new file mode 100644
index 0000000..2049674
--- /dev/null
+++ b/src/backends/native/orchestrator.ts
@@ -0,0 +1,309 @@
+import type { ModelRouter, ModelTier } from '../../models/router.js';
+import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
+import type { Session } from '../../session/index.js';
+import type { ToolRegistry } from '../../tools/registry.js';
+import type { ToolExecutor } from '../../tools/executor.js';
+import { NativeAgent } from './agent.js';
+import type { ToolUseEvent } from './agent.js';
+import { shouldCompact } from '../../context/tokens.js';
+import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
+
+// ── Public types ──────────────────────────────────────────────────────
+
+/** A single-turn, stateless request to a sub-agent at a specific tier. */
+export interface SubAgentRequest {
+  tier: ModelTier;
+  systemPrompt: string;
+  message: string;
+  maxTokens?: number;
+  /** When true, include tools from the toolRegistry in the request. */
+  tools?: boolean;
+}
+
+/** Result returned from a sub-agent delegation call. */
+export interface SubAgentResult {
+  content: string;
+  usage: TokenUsage;
+  tier: ModelTier;
+}
+
+/** Maps each delegation task to the model tier that should handle it. */
+export interface DelegationConfig {
+  compaction: ModelTier;
+  memory_extraction: ModelTier;
+  classification: ModelTier;
+  tool_summarisation: ModelTier;
+  complex_reasoning: ModelTier;
+}
+
+/** Per-tier cumulative usage statistics. */
+interface TierUsageStats {
+  inputTokens: number;
+  outputTokens: number;
+  calls: number;
+}
+
+/** Full configuration for the AgentOrchestrator. */
+export interface OrchestratorConfig {
+  modelRouter: ModelRouter;
+  systemPrompt: string;
+  session?: Session;
+  toolRegistry?: ToolRegistry;
+  toolExecutor?: ToolExecutor;
+  maxIterations?: number;
+  /** The tier used by the primary NativeAgent for user-facing conversation. */
+  primaryTier: ModelTier;
+  /** Which tier to use for each delegation task type. */
+  delegation: DelegationConfig;
+  /** Maximum nesting depth for delegation calls (safety guard). */
+  maxDelegationDepth: number;
+  onToolUse?: (event: ToolUseEvent) => void;
+  /** Context compaction settings. When provided, enables automatic compaction. */
+  compaction?: CompactionConfig;
+  /** Model identifier for the primary model (used for context window lookup). */
+  modelName?: string;
+  /** Optional override for the context window size (in tokens). */
+  contextWindow?: number;
+}
+
+// ── AgentOrchestrator ─────────────────────────────────────────────────
+
+/**
+ * Wraps a primary NativeAgent and adds the ability to delegate
+ * single-turn sub-tasks to different model tiers via the ModelRouter.
+ *
+ * The primary agent handles the main conversation loop (with tools),
+ * while `delegate()` enables cheap, stateless calls for tasks like
+ * compaction, classification, and memory extraction.
+ */
+export class AgentOrchestrator {
+  private _agent: NativeAgent;
+  private _modelRouter: ModelRouter;
+  private _delegation: DelegationConfig;
+  private _maxDelegationDepth: number;
+  private _toolRegistry?: ToolRegistry;
+  private _session?: Session;
+  private _compactionConfig?: CompactionConfig;
+  private _modelName?: string;
+  private _contextWindow?: number;
+  private _usageByTier: Map<string, TierUsageStats> = new Map();
+
+  constructor(config: OrchestratorConfig) {
+    this._modelRouter = config.modelRouter;
+    this._delegation = config.delegation;
+    this._maxDelegationDepth = config.maxDelegationDepth;
+    this._toolRegistry = config.toolRegistry;
+    this._session = config.session;
+    this._compactionConfig = config.compaction;
+    this._modelName = config.modelName;
+    this._contextWindow = config.contextWindow;
+
+    // Create the primary NativeAgent for user-facing conversation
+    this._agent = new NativeAgent({
+      modelClient: config.modelRouter,
+      systemPrompt: config.systemPrompt,
+      session: config.session,
+      toolRegistry: config.toolRegistry,
+      toolExecutor: config.toolExecutor,
+      maxIterations: config.maxIterations,
+      onToolUse: config.onToolUse,
+    });
+
+    // Set the primary tier on the agent
+    this._agent.setModelTier(config.primaryTier);
+  }
+
+  // ── Delegation ────────────────────────────────────────────────────
+
+  /**
+   * Perform a single-turn, stateless call to a model at the specified tier.
+   *
+   * This is used for internal sub-tasks (compaction, classification, etc.)
+   * that don't need the full conversation history or tool loop.
+   *
+   * If the requested tier is not available on the router, falls back to
+   * the 'default' tier with a warning.
+   */
+  async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
+    let tier = request.tier;
+
+    // Check if the requested tier is available; fall back to 'default' if not
+    const client = this._modelRouter.getClient(tier);
+    if (!client) {
+      console.warn(
+        `[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
+      );
+      tier = 'default';
+    }
+
+    // Build the single-turn chat request
+    const messages: Message[] = [
+      { role: 'user', content: request.message },
+    ];
+
+    const chatRequest: ChatRequest = {
+      messages,
+      system: request.systemPrompt,
+      maxTokens: request.maxTokens,
+    };
+
+    // Optionally include tools from the registry
+    if (request.tools && this._toolRegistry) {
+      chatRequest.tools = this._toolRegistry.toAnthropicFormat();
+    }
+
+    const response = await this._modelRouter.chat(chatRequest, tier);
+
+    // Track cumulative usage for this tier
+    this._trackUsage(tier, response.usage);
+
+    console.log(
+      `[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
+    );
+
+    return {
+      content: response.content,
+      usage: response.usage,
+      tier,
+    };
+  }
+
+  // ── Primary agent proxies ─────────────────────────────────────────
+
+  /**
+   * Process a user message through the primary NativeAgent.
+   * This is the main entry point for user-facing conversation.
+   *
+   * When compaction is configured, checks whether the conversation history
+   * exceeds the context window threshold and compacts it before processing.
+   */
+  async process(userMessage: string): Promise<string> {
+    await this.compactIfNeeded();
+    return this._agent.process(userMessage);
+  }
+
+  /**
+   * Force-compact the current conversation history regardless of threshold.
+   * Returns the compaction result, or null if there was nothing to compact
+   * (e.g. no session, too few messages).
+   */
+  async compact(): Promise<CompactionResult | null> {
+    const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
+    const messages = this.getHistory();
+
+    if (messages.length === 0) {
+      return null;
+    }
+
+    const result = await compactHistory({
+      messages,
+      orchestrator: this,
+      config,
+    });
+
+    // If nothing was actually compacted, skip the replace
+    if (result.compactedCount === 0) {
+      return result;
+    }
+
+    // Persist the compacted history
+    if (this._session) {
+      this._session.replaceHistory(result.messages);
+    }
+
+    console.log(
+      `[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
+      `${result.tokensBefore} → ${result.tokensAfter} tokens`,
+    );
+
+    return result;
+  }
+
+  /** Reset the primary agent's conversation history. */
+  reset(): void {
+    this._agent.reset();
+  }
+
+  /** Get the primary agent's conversation history. */
+  getHistory(): Message[] {
+    return this._agent.getHistory();
+  }
+
+  /** Set the model tier on the primary agent. */
+  setModelTier(tier: ModelTier): void {
+    this._agent.setModelTier(tier);
+  }
+
+  /** Get the current model tier of the primary agent. */
+  getModelTier(): ModelTier {
+    return this._agent.getModelTier();
+  }
+
+  /** Set the tool-use callback on the primary agent. */
+  setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
+    this._agent.setOnToolUse(callback);
+  }
+
+  // ── Usage & config accessors ──────────────────────────────────────
+
+  /**
+   * Returns cumulative delegation usage stats per tier.
+   * Useful for cost tracking and visibility into sub-agent calls.
+   */
+  getDelegationUsage(): Record<string, TierUsageStats> {
+    const result: Record<string, TierUsageStats> = {};
+    for (const [tier, stats] of this._usageByTier) {
+      result[tier] = { ...stats };
+    }
+    return result;
+  }
+
+  /**
+   * Look up which model tier is configured for a given delegation task.
+   * Convenience method so callers don't need to access the config directly.
+   */
+  getDelegationTier(task: keyof DelegationConfig): ModelTier {
+    return this._delegation[task];
+  }
+
+  // ── Private helpers ───────────────────────────────────────────────
+
+  /**
+   * Check whether automatic compaction should run, and if so, compact.
+   * Called before each `process()` call when compaction is configured.
+   */
+  private async compactIfNeeded(): Promise<void> {
+    if (!this._compactionConfig) return;
+
+    const messages = this.getHistory();
+    if (messages.length === 0) return;
+
+    const model = this._modelName ?? 'unknown';
+    const needs = shouldCompact({
+      messages,
+      model,
+      contextWindow: this._contextWindow,
+      thresholdPct: this._compactionConfig.thresholdPct,
+    });
+
+    if (!needs) return;
+
+    await this.compact();
+  }
+
+  /** Accumulate usage stats for a given tier. */
+  private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
+    const existing = this._usageByTier.get(tier);
+    if (existing) {
+      existing.inputTokens += usage.inputTokens;
+      existing.outputTokens += usage.outputTokens;
+      existing.calls += 1;
+    } else {
+      this._usageByTier.set(tier, {
+        inputTokens: usage.inputTokens,
+        outputTokens: usage.outputTokens,
+        calls: 1,
+      });
+    }
+  }
+}
diff --git a/src/backends/native/prompts.ts b/src/backends/native/prompts.ts
new file mode 100644
index 0000000..f13fec1
--- /dev/null
+++ b/src/backends/native/prompts.ts
@@ -0,0 +1,94 @@
+/**
+ * System prompts for delegated tasks.
+ *
+ * Each prompt is designed for a specific sub-task that the agent farms out
+ * to a (usually cheaper/faster) model call. Keep them focused and
+ * deterministic — the caller should be able to parse the output reliably.
+ */
+
+/**
+ * Instructs a model to summarise conversation history during compaction.
+ * The resulting summary replaces the full history to reclaim context window space.
+ */
+export const COMPACTION_SYSTEM_PROMPT = `You are a conversation summariser. Your job is to condense a conversation history into a concise summary that preserves all important information.
+
+Rules:
+- Preserve key facts, decisions, user preferences, and action items.
+- Maintain chronological order of events.
+- Note any unresolved questions or pending tasks.
+- Be concise but thorough — aim for roughly 20% of the original length.
+- Use bullet points for clarity.
+- Never invent information that is not present in the conversation.
+- If the conversation references files, paths, error messages, or specific values, include them verbatim.
+- Group related points together under short descriptive headings when it aids readability.
+
+Output format:
+Return a markdown summary with bullet points. Do not include any preamble or explanation — output only the summary.`;
+
+/**
+ * Instructs a model to extract persistent facts from conversation text.
+ * Extracted facts are stored in long-term memory for future sessions.
+ */
+export const MEMORY_EXTRACTION_PROMPT = `You are a fact extractor. Given a block of conversation text, extract persistent facts worth remembering across sessions.
+
+Categories to extract:
+
+## User
+- Name, role, location, timezone, or other personal details explicitly shared.
+
+## Preferences
+- Communication style, formatting preferences, tool preferences, workflow habits.
+
+## Technical
+- Project names, repositories, tech stacks, conventions, architecture decisions.
+- File paths, environment details, deployment targets.
+
+## Decisions
+- Explicit decisions made during the conversation (e.g. "we decided to use X instead of Y").
+- Rationale for decisions when stated.
+
+Rules:
+- Only extract facts that are explicitly stated — never infer or assume.
+- Skip transient or session-specific information (e.g. "run this command now", "fix this error today").
+- Skip information that is only relevant to the current task and has no long-term value.
+- If no facts worth extracting exist, return an empty response.
+- Use concise bullet points under each category heading.
+- Omit any category that has no entries.
+
+Output format:
+Return markdown with the category headings above and bullet points underneath. No preamble.`;
+
+/**
+ * Instructs a model to classify an inbound message into a discrete category.
+ * The caller uses the label to route the message to the appropriate handler.
+ */
+export const CLASSIFICATION_PROMPT = `Classify the following message into exactly one of these categories:
+
+- command   — a direct instruction to perform an action (e.g. "run tests", "deploy to staging")
+- question  — a request for information or explanation (e.g. "what does this function do?")
+- task      — a multi-step objective that requires planning (e.g. "add authentication to the API")
+- conversation — casual chat, greetings, acknowledgements, or social interaction
+- unclear   — the message is ambiguous or lacks enough context to classify
+
+Rules:
+- Return ONLY the classification label — a single word, nothing else.
+- Do not explain your reasoning.
+- If the message fits multiple categories, choose the most specific one (command > task > question > conversation).`;
+
+/**
+ * Instructs a model to condense verbose tool output into a compact summary.
+ * Used to shrink large tool results before they consume context window space.
+ */
+export const TOOL_SUMMARISATION_PROMPT = `You are a tool-output summariser. Given the raw output of a tool invocation, produce a compact summary that preserves the essential information.
+
+Rules:
+- Preserve the key outcome: success or failure.
+- Preserve important data: counts, IDs, names, statuses.
+- Preserve all file paths, error codes, error messages, and specific values verbatim.
+- Strip boilerplate, redundant lines, decorative formatting, and progress indicators.
+- Keep the summary under 500 tokens.
+- If the output is already concise, return it as-is rather than paraphrasing.
+- Use a structured format (bullet points or short paragraphs) for readability.
+
+Output format:
+Return the summarised output directly. No preamble or meta-commentary.`;
diff --git a/src/config/index.ts b/src/config/index.ts
index 5d70ff7..fe7493e 100644
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -1,2 +1,2 @@
 export { loadConfig } from './loader.js';
-export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig } from './schema.js';
+export { configSchema, type Config, type TelegramConfig, type ModelConfig, type CronJobConfig, type AgentsConfig, type CompactionConfig } from './schema.js';
diff --git a/src/config/schema.ts b/src/config/schema.ts
index 4cb37e8..77dfc53 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -19,6 +19,7 @@ const modelConfigSchema = z.object({
   auth_token: z.string().optional(),
   for: z.array(z.string()).optional(),
   num_gpu: z.number().optional(),
+  context_window: z.number().optional(),
 });
 
 const modelsSchema = z.object({
@@ -87,6 +88,32 @@ const automationSchema = z.object({
   cron: z.array(cronJobSchema).default([]),
 }).default({});
 
+const agentsSchema = z.object({
+  primary_tier: z.enum(['fast', 'default', 'complex', 'local']).default('default'),
+  delegation: z.object({
+    compaction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    memory_extraction: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    classification: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    tool_summarisation: z.enum(['fast', 'default', 'complex', 'local']).default('fast'),
+    complex_reasoning: z.enum(['fast', 'default', 'complex', 'local']).default('complex'),
+  }).default({
+    compaction: 'fast',
+    memory_extraction: 'fast',
+    classification: 'fast',
+    tool_summarisation: 'fast',
+    complex_reasoning: 'complex',
+  }),
+  auto_escalate: z.boolean().default(false),
+  max_delegation_depth: z.number().min(1).max(10).default(3),
+}).default({});
+
+const compactionSchema = z.object({
+  enabled: z.boolean().default(true),
+  threshold_pct: z.number().min(10).max(100).default(80),
+  keep_turns: z.number().min(1).max(50).default(4),
+  summary_max_tokens: z.number().min(128).max(4096).default(1024),
+}).default({});
+
 export const configSchema = z.object({
   telegram: telegramSchema,
   server: serverSchema.default({}),
@@ -96,9 +123,13 @@ export const configSchema = z.object({
   skills: skillsSchema.default({}),
   mcp: mcpSchema.default({ servers: [] }),
   automation: automationSchema,
+  agents: agentsSchema,
+  compaction: compactionSchema,
 });
 
 export type Config = z.infer<typeof configSchema>;
 export type TelegramConfig = z.infer<typeof telegramSchema>;
 export type ModelConfig = z.infer<typeof modelConfigSchema>;
 export type CronJobConfig = z.infer<typeof cronJobSchema>;
+export type AgentsConfig = z.infer<typeof agentsSchema>;
+export type CompactionConfig = z.infer<typeof compactionSchema>;
diff --git a/src/context/compaction.test.ts b/src/context/compaction.test.ts
new file mode 100644
index 0000000..1de56ca
--- /dev/null
+++ b/src/context/compaction.test.ts
@@ -0,0 +1,104 @@
+import { describe, it, expect, vi } from 'vitest';
+import { compactHistory, DEFAULT_COMPACTION_CONFIG } from './compaction.js';
+import type { CompactionConfig } from './compaction.js';
+import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
+import type { Message } from '../models/types.js';
+
+function makeMockOrchestrator(summaryText = 'Summary of conversation'): AgentOrchestrator {
+  return {
+    getDelegationTier: vi.fn().mockReturnValue('fast'),
+    delegate: vi.fn().mockResolvedValue({
+      content: summaryText,
+      usage: { inputTokens: 100, outputTokens: 50 },
+      tier: 'fast',
+    }),
+  } as unknown as AgentOrchestrator;
+}
+
+function makeMessages(count: number): Message[] {
+  const msgs: Message[] = [];
+  for (let i = 0; i < count; i++) {
+    msgs.push({
+      role: i % 2 === 0 ? 'user' : 'assistant',
+      content: `Message ${i}`,
+    });
+  }
+  return msgs;
+}
+
+describe('compactHistory', () => {
+  const config: CompactionConfig = {
+    thresholdPct: 80,
+    keepTurns: 2,  // keeps last 4 messages
+    summaryMaxTokens: 1024,
+  };
+
+  it('returns no-op when messages count is at or below keepTurns threshold', async () => {
+    const messages = makeMessages(4);  // keepTurns=2 → keep 4 messages
+    const orchestrator = makeMockOrchestrator();
+
+    const result = await compactHistory({ messages, orchestrator, config });
+
+    expect(result.compactedCount).toBe(0);
+    expect(result.messages).toEqual(messages);
+    expect(orchestrator.delegate).not.toHaveBeenCalled();
+  });
+
+  it('compacts older messages and keeps recent ones', async () => {
+    const messages = makeMessages(10);  // 10 messages, keep last 4, compact 6
+    const orchestrator = makeMockOrchestrator('Summarized conversation');
+
+    const result = await compactHistory({ messages, orchestrator, config });
+
+    expect(result.compactedCount).toBe(6);
+    expect(result.messages).toHaveLength(5);  // 1 summary + 4 kept
+    expect(result.messages[0].role).toBe('assistant');
+    expect(result.messages[0].content).toContain('[Summary of earlier conversation]');
+    expect(result.messages[0].content).toContain('Summarized conversation');
+    // Last 4 messages should be preserved
+    expect(result.messages.slice(1)).toEqual(messages.slice(-4));
+  });
+
+  it('calls delegate with compaction tier and correct params', async () => {
+    const messages = makeMessages(10);
+    const orchestrator = makeMockOrchestrator();
+
+    await compactHistory({ messages, orchestrator, config });
+
+    expect(orchestrator.getDelegationTier).toHaveBeenCalledWith('compaction');
+    expect(orchestrator.delegate).toHaveBeenCalledWith(
+      expect.objectContaining({
+        tier: 'fast',
+        maxTokens: 1024,
+      }),
+    );
+  });
+
+  it('populates token counts in result', async () => {
+    const messages = makeMessages(10);
+    const orchestrator = makeMockOrchestrator();
+
+    const result = await compactHistory({ messages, orchestrator, config });
+
+    expect(result.tokensBefore).toBeGreaterThan(0);
+    expect(result.tokensAfter).toBeGreaterThan(0);
+    expect(result.tokensAfter).toBeLessThan(result.tokensBefore);
+  });
+
+  it('handles single turn above keepTurns threshold', async () => {
+    // 3 turns = 6 messages, keepTurns=2 keeps 4, compacts 2
+    const messages = makeMessages(6);
+    const orchestrator = makeMockOrchestrator();
+
+    const result = await compactHistory({ messages, orchestrator, config });
+
+    expect(result.compactedCount).toBe(2);
+    expect(result.messages).toHaveLength(5);  // 1 summary + 4 kept
+  });
+
+  it('uses DEFAULT_COMPACTION_CONFIG values correctly', () => {
+    expect(DEFAULT_COMPACTION_CONFIG.thresholdPct).toBe(80);
+    expect(DEFAULT_COMPACTION_CONFIG.keepTurns).toBe(4);
+    expect(DEFAULT_COMPACTION_CONFIG.summaryMaxTokens).toBe(1024);
+  });
+});
diff --git a/src/context/compaction.ts b/src/context/compaction.ts
new file mode 100644
index 0000000..1f2f968
--- /dev/null
+++ b/src/context/compaction.ts
@@ -0,0 +1,74 @@
+import type { Message } from '../models/types.js';
+import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
+import { COMPACTION_SYSTEM_PROMPT } from '../backends/native/prompts.js';
+import { estimateMessageTokens } from './tokens.js';
+
+export interface CompactionConfig {
+  /** Percentage of context window that triggers compaction (default: 80). */
+  thresholdPct: number;
+  /** Number of recent turns (user+assistant pairs) to always keep intact. */
+  keepTurns: number;
+  /** Maximum tokens for the compaction summary response. */
+  summaryMaxTokens: number;
+}
+
+export interface CompactionResult {
+  /** The compacted messages: [summary, ...recentMessages]. */
+  messages: Message[];
+  /** Number of messages that were compacted (removed). */
+  compactedCount: number;
+  /** Estimated tokens before compaction. */
+  tokensBefore: number;
+  /** Estimated tokens after compaction. */
+  tokensAfter: number;
+}
+
+export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
+  thresholdPct: 80,
+  keepTurns: 4,
+  summaryMaxTokens: 1024,
+};
+
+export async function compactHistory(opts: {
+  messages: Message[];
+  orchestrator: AgentOrchestrator;
+  config: CompactionConfig;
+}): Promise<CompactionResult> {
+  const { messages, orchestrator, config } = opts;
+
+  const keepCount = config.keepTurns * 2;
+  if (messages.length <= keepCount) {
+    return {
+      messages,
+      compactedCount: 0,
+      tokensBefore: estimateMessageTokens(messages),
+      tokensAfter: estimateMessageTokens(messages),
+    };
+  }
+
+  const toCompact = messages.slice(0, -keepCount);
+  const toKeep = messages.slice(-keepCount);
+
+  const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
+
+  const tier = orchestrator.getDelegationTier('compaction');
+
+  const result = await orchestrator.delegate({
+    tier,
+    systemPrompt: COMPACTION_SYSTEM_PROMPT,
+    message: formattedConversation,
+    maxTokens: config.summaryMaxTokens,
+  });
+
+  const summaryMessage: Message = {
+    role: 'assistant',
+    content: '[Summary of earlier conversation]\n\n' + result.content,
+  };
+
+  return {
+    messages: [summaryMessage, ...toKeep],
+    compactedCount: toCompact.length,
+    tokensBefore: estimateMessageTokens(messages),
+    tokensAfter: estimateMessageTokens([summaryMessage, ...toKeep]),
+  };
+}
diff --git a/src/context/index.ts b/src/context/index.ts
new file mode 100644
index 0000000..b90a041
--- /dev/null
+++ b/src/context/index.ts
@@ -0,0 +1,15 @@
+export {
+  estimateTokens,
+  estimateMessageTokens,
+  getContextWindow,
+  shouldCompact,
+  CONTEXT_WINDOWS,
+  type ShouldCompactOpts,
+} from './tokens.js';
+
+export {
+  compactHistory,
+  type CompactionConfig,
+  type CompactionResult,
+  DEFAULT_COMPACTION_CONFIG,
+} from './compaction.js';
diff --git a/src/context/tokens.test.ts b/src/context/tokens.test.ts
new file mode 100644
index 0000000..0f1ae6e
--- /dev/null
+++ b/src/context/tokens.test.ts
@@ -0,0 +1,108 @@
+import { describe, it, expect } from 'vitest';
+import { estimateTokens, estimateMessageTokens, getContextWindow, shouldCompact, CONTEXT_WINDOWS } from './tokens.js';
+
+describe('estimateTokens', () => {
+  it('returns 0 for empty string', () => {
+    // estimateTokens('') should be 0 — Math.ceil(0/4) = 0
+    expect(estimateTokens('')).toBe(0);
+  });
+
+  it('estimates ~1 token per 4 characters', () => {
+    // 'abcd' = 4 chars → ceil(4/4) = 1
+    expect(estimateTokens('abcd')).toBe(1);
+    // 'abcde' = 5 chars → ceil(5/4) = 2
+    expect(estimateTokens('abcde')).toBe(2);
+  });
+
+  it('handles longer text', () => {
+    const text = 'a'.repeat(100);
+    expect(estimateTokens(text)).toBe(25);  // 100/4 = 25
+  });
+});
+
+describe('estimateMessageTokens', () => {
+  it('returns 0 for empty array', () => {
+    expect(estimateMessageTokens([])).toBe(0);
+  });
+
+  it('includes overhead per message', () => {
+    // 'abcd' = 1 token content + 4 overhead = 5 per message
+    const messages = [{ role: 'user' as const, content: 'abcd' }];
+    expect(estimateMessageTokens(messages)).toBe(5);
+  });
+
+  it('sums multiple messages', () => {
+    const messages = [
+      { role: 'user' as const, content: 'abcd' },     // 1 + 4 = 5
+      { role: 'assistant' as const, content: 'abcd' }, // 1 + 4 = 5
+    ];
+    expect(estimateMessageTokens(messages)).toBe(10);
+  });
+});
+
+describe('getContextWindow', () => {
+  it('returns known window for Claude Sonnet', () => {
+    expect(getContextWindow('claude-sonnet-4-20250514')).toBe(200_000);
+  });
+
+  it('returns known window for GPT-4o', () => {
+    expect(getContextWindow('gpt-4o')).toBe(128_000);
+  });
+
+  it('returns default 128000 for unknown model', () => {
+    expect(getContextWindow('unknown-model')).toBe(128_000);
+  });
+
+  it('returns override when provided', () => {
+    expect(getContextWindow('claude-sonnet-4-20250514', 50_000)).toBe(50_000);
+  });
+
+  it('returns override even for unknown model', () => {
+    expect(getContextWindow('unknown-model', 32_000)).toBe(32_000);
+  });
+});
+
+describe('shouldCompact', () => {
+  it('returns false when messages are well below threshold', () => {
+    const messages = [{ role: 'user' as const, content: 'hello' }];
+    expect(shouldCompact({
+      messages,
+      model: 'gpt-4o',  // 128k context window
+    })).toBe(false);
+  });
+
+  it('returns true when messages exceed threshold', () => {
+    // Create messages that exceed 80% of a small context window
+    // context window = 100, threshold = 80% = 80 tokens
+    // each message: ceil(400/4) + 4 = 104 tokens → well over 80
+    const messages = [{ role: 'user' as const, content: 'a'.repeat(400) }];
+    expect(shouldCompact({
+      messages,
+      model: 'unknown',
+      contextWindow: 100,
+      thresholdPct: 80,
+    })).toBe(true);
+  });
+
+  it('respects custom thresholdPct', () => {
+    // 1 message: ceil(20/4) + 4 = 9 tokens
+    // contextWindow = 100, thresholdPct = 5 → threshold = 5 tokens
+    const messages = [{ role: 'user' as const, content: 'a'.repeat(20) }];
+    expect(shouldCompact({
+      messages,
+      model: 'unknown',
+      contextWindow: 100,
+      thresholdPct: 5,
+    })).toBe(true);
+  });
+
+  it('uses model lookup when no contextWindow override', () => {
+    // gpt-3.5-turbo = 16385 tokens, default threshold 80% = 13108
+    // Large message to exceed: ceil(60000/4) + 4 = 15004 tokens → over 13108
+    const messages = [{ role: 'user' as const, content: 'a'.repeat(60000) }];
+    expect(shouldCompact({
+      messages,
+      model: 'gpt-3.5-turbo',
+    })).toBe(true);
+  });
+});
diff --git a/src/context/tokens.ts b/src/context/tokens.ts
new file mode 100644
index 0000000..9bf278d
--- /dev/null
+++ b/src/context/tokens.ts
@@ -0,0 +1,88 @@
+import type { Message } from '../models/types.js';
+
+/**
+ * Approximate overhead tokens per message (role marker, separators, etc.).
+ */
+const MESSAGE_OVERHEAD_TOKENS = 4;
+
+/**
+ * Conservative default context window when a model is not in the lookup table.
+ */
+const DEFAULT_CONTEXT_WINDOW = 128_000;
+
+/**
+ * Hard-coded context window sizes (in tokens) for known models.
+ */
+export const CONTEXT_WINDOWS: Record<string, number> = {
+  'claude-sonnet-4-20250514': 200_000,
+  'claude-3-5-haiku-20241022': 200_000,
+  'claude-3-5-sonnet-20241022': 200_000,
+  'claude-3-opus-20240229': 200_000,
+  'claude-opus-4-20250514': 200_000,
+  'gpt-4o': 128_000,
+  'gpt-4o-mini': 128_000,
+  'gpt-4-turbo': 128_000,
+  'gpt-3.5-turbo': 16_385,
+} as const;
+
+/**
+ * Cheap character-based token estimation.
+ *
+ * Uses `Math.ceil(text.length / 4)` as a reasonable approximation for
+ * English text (roughly 4 characters per token on average).
+ */
+export function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+/**
+ * Estimate the total token count for an array of messages.
+ *
+ * For each message the estimate includes the content tokens plus a fixed
+ * overhead of ~4 tokens to account for the role marker and separators.
+ */
+export function estimateMessageTokens(messages: Message[]): number {
+  return messages.reduce(
+    (sum, msg) => sum + estimateTokens(msg.content) + MESSAGE_OVERHEAD_TOKENS,
+    0,
+  );
+}
+
+/**
+ * Return the context window size (in tokens) for a given model.
+ *
+ * @param model    - Model identifier to look up.
+ * @param override - If provided, this value is returned directly.
+ * @returns The context window size in tokens.
+ */
+export function getContextWindow(model: string, override?: number): number {
+  if (override !== undefined) {
+    return override;
+  }
+  return CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
+}
+
+/**
+ * Options for {@link shouldCompact}.
+ */
+export interface ShouldCompactOpts {
+  messages: Message[];
+  model: string;
+  contextWindow?: number;
+  /** Percentage of the context window that triggers compaction (default 80). */
+  thresholdPct?: number;
+}
+
+/**
+ * Determine whether the conversation should be compacted.
+ *
+ * Returns `true` when the estimated token count of `messages` exceeds
+ * `thresholdPct` percent of the effective context window.
+ */
+export function shouldCompact(opts: ShouldCompactOpts): boolean {
+  const { messages, model, contextWindow, thresholdPct = 80 } = opts;
+  const window = getContextWindow(model, contextWindow);
+  const threshold = (thresholdPct / 100) * window;
+  const estimated = estimateMessageTokens(messages);
+  return estimated > threshold;
+}
diff --git a/src/daemon/index.ts b/src/daemon/index.ts
index 2b88141..ecc0c22 100644
--- a/src/daemon/index.ts
+++ b/src/daemon/index.ts
@@ -2,7 +2,7 @@ import { Lifecycle } from './lifecycle.js';
 import type { Config, ModelConfig } from '../config/index.js';
 import { AnthropicClient, OpenAIClient, OllamaClient, LlamaCppClient, ModelRouter } from '../models/index.js';
 import type { ModelClient } from '../models/index.js';
-import { NativeAgent } from '../backends/index.js';
+import { AgentOrchestrator, type DelegationConfig } from '../backends/index.js';
 import { SessionStore, SessionManager } from '../session/index.js';
 import { HookEngine } from '../hooks/index.js';
 import { ToolRegistry, ToolExecutor, allBuiltinTools } from '../tools/index.js';
@@ -134,7 +134,8 @@ function createModelRouter(config: Config): ModelRouter {
 
 /**
  * Create the unified message handler for the channel registry.
- * Each channel+sender pair gets its own NativeAgent backed by a persistent session.
+ * Each channel+sender pair gets its own AgentOrchestrator backed by a persistent session.
+ * The orchestrator wraps a NativeAgent and adds delegation to different model tiers.
  */
 function createMessageRouter(deps: {
   sessionManager: SessionManager;
@@ -142,21 +143,39 @@ function createMessageRouter(deps: {
   systemPrompt: string;
   toolRegistry: ToolRegistry;
   toolExecutor: ToolExecutor;
+  config: Config;
 }) {
   // Cache agents by session ID to avoid recreating on every message
-  const agents = new Map<string, NativeAgent>();
+  const agents = new Map<string, AgentOrchestrator>();
 
-  function getOrCreateAgent(channel: string, senderId: string): NativeAgent {
+  function getOrCreateAgent(channel: string, senderId: string): AgentOrchestrator {
     const sessionId = `${channel}:${senderId}`;
     let agent = agents.get(sessionId);
     if (!agent) {
       const session = deps.sessionManager.getSession(channel, senderId);
-      agent = new NativeAgent({
-        modelClient: deps.modelRouter,
+      const delegationConfig: DelegationConfig = {
+        compaction: deps.config.agents.delegation.compaction ?? 'fast',
+        memory_extraction: deps.config.agents.delegation.memory_extraction ?? 'fast',
+        classification: deps.config.agents.delegation.classification ?? 'fast',
+        tool_summarisation: deps.config.agents.delegation.tool_summarisation ?? 'fast',
+        complex_reasoning: deps.config.agents.delegation.complex_reasoning ?? 'complex',
+      };
+      agent = new AgentOrchestrator({
+        modelRouter: deps.modelRouter,
         systemPrompt: deps.systemPrompt,
         session,
         toolRegistry: deps.toolRegistry,
         toolExecutor: deps.toolExecutor,
+        primaryTier: deps.config.agents.primary_tier ?? 'default',
+        delegation: delegationConfig,
+        maxDelegationDepth: deps.config.agents.max_delegation_depth ?? 3,
+        compaction: deps.config.compaction.enabled ? {
+          thresholdPct: deps.config.compaction.threshold_pct,
+          keepTurns: deps.config.compaction.keep_turns,
+          summaryMaxTokens: deps.config.compaction.summary_max_tokens,
+        } : undefined,
+        modelName: deps.config.models.default.model,
+        contextWindow: deps.config.models.default.context_window,
       });
       agents.set(sessionId, agent);
     }
@@ -167,9 +186,26 @@ function createMessageRouter(deps: {
     const agent = getOrCreateAgent(msg.channel, msg.senderId);
 
     // Handle special commands
-    if (msg.metadata?.isCommand && msg.metadata.command === 'reset') {
-      agent.reset();
-      return;
+    if (msg.metadata?.isCommand) {
+      if (msg.metadata.command === 'reset') {
+        agent.reset();
+        return;
+      }
+      if (msg.metadata.command === 'compact') {
+        const result = await agent.compact();
+        if (result && result.compactedCount > 0) {
+          await reply({
+            text: `Compacted ${result.compactedCount} messages: ${result.tokensBefore} → ${result.tokensAfter} tokens`,
+            replyTo: msg.id,
+          });
+        } else {
+          await reply({
+            text: 'Nothing to compact.',
+            replyTo: msg.id,
+          });
+        }
+        return;
+      }
     }
 
     try {
@@ -284,6 +320,7 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
     systemPrompt,
     toolRegistry,
     toolExecutor,
+    config,
   }));
 
   // Register Telegram adapter
diff --git a/src/frontends/tui/commands.test.ts b/src/frontends/tui/commands.test.ts
index dea4267..730e6ed 100644
--- a/src/frontends/tui/commands.test.ts
+++ b/src/frontends/tui/commands.test.ts
@@ -26,6 +26,10 @@ describe('parseCommand', () => {
     expect(parseCommand('/fs')).toEqual({ type: 'fullscreen' });
   });
 
+  it('parses /compact command', () => {
+    expect(parseCommand('/compact')).toEqual({ type: 'compact' });
+  });
+
   it('parses /model command without argument', () => {
     expect(parseCommand('/model')).toEqual({ type: 'model' });
   });
@@ -64,6 +68,7 @@ describe('getHelpText', () => {
     expect(help).toContain('/help');
     expect(help).toContain('/model');
     expect(help).toContain('/reset');
+    expect(help).toContain('/compact');
     expect(help).toContain('/quit');
   });
 });
diff --git a/src/frontends/tui/commands.ts b/src/frontends/tui/commands.ts
index 61b9adf..b7054e7 100644
--- a/src/frontends/tui/commands.ts
+++ b/src/frontends/tui/commands.ts
@@ -4,6 +4,7 @@ export type Command =
   | { type: 'help' }
   | { type: 'status' }
   | { type: 'fullscreen' }
+  | { type: 'compact' }
   | { type: 'model'; name?: string }
   | { type: 'backend'; provider?: string }
   | { type: 'transfer'; target: string }
@@ -38,6 +39,11 @@ export function parseCommand(input: string): Command | null {
     return { type: 'fullscreen' };
   }
 
+  // Compact
+  if (trimmed === '/compact') {
+    return { type: 'compact' };
+  }
+
   // Model (with optional argument)
   if (trimmed === '/model') {
     return { type: 'model' };
@@ -73,6 +79,7 @@ Commands:
   /model [name]          Show or switch model (local, default, fast, complex)
   /backend [provider]    Show or switch local backend (ollama, llamacpp)
   /reset, /clear, /new   Clear conversation history
+  /compact               Compact conversation history
   /status                Show session info and token usage
   /fullscreen, /fs       Switch to fullscreen mode
   /transfer <dest>       Transfer session to another frontend
@@ -90,6 +97,7 @@ export const SLASH_COMMANDS = [
   '/reset',
   '/clear',
   '/new',
+  '/compact',
   '/status',
   '/fullscreen',
   '/fs',
@@ -106,6 +114,7 @@ export const COMMAND_TOOLTIPS: Record<string, string> = {
   '/reset': 'Clear conversation history',
   '/clear': 'Clear conversation history',
   '/new': 'Start a new conversation',
+  '/compact': 'Compact conversation history to save context space',
   '/status': 'Show session info and token usage',
   '/fullscreen': 'Switch to fullscreen mode',
   '/fs': 'Switch to fullscreen mode',
diff --git a/src/frontends/tui/minimal.test.ts b/src/frontends/tui/minimal.test.ts
index 579e908..21e9bcb 100644
--- a/src/frontends/tui/minimal.test.ts
+++ b/src/frontends/tui/minimal.test.ts
@@ -44,6 +44,7 @@ describe('MinimalTui backend command', () => {
       getHistory: () => [],
       addMessage: vi.fn(),
       clear: vi.fn(),
+      replaceHistory: vi.fn(),
     };
 
     const mockRouter = {
@@ -84,6 +85,7 @@ describe('MinimalTui backend command', () => {
       getHistory: () => [],
       addMessage: vi.fn(),
       clear: vi.fn(),
+      replaceHistory: vi.fn(),
     };
 
     const mockRouter = {
diff --git a/src/gateway/handlers/handlers.test.ts b/src/gateway/handlers/handlers.test.ts
index 782e866..4d4fd2c 100644
--- a/src/gateway/handlers/handlers.test.ts
+++ b/src/gateway/handlers/handlers.test.ts
@@ -44,6 +44,7 @@ describe('session handlers', () => {
     addMessage: vi.fn(),
     getHistory: vi.fn(() => mockHistory),
     clear: vi.fn(),
+    replaceHistory: vi.fn(),
   };
 
   const mockSessionManager = {
diff --git a/src/gateway/server.test.ts b/src/gateway/server.test.ts
index 4af4e0d..ef408c7 100644
--- a/src/gateway/server.test.ts
+++ b/src/gateway/server.test.ts
@@ -13,6 +13,7 @@ const mockSession = {
   getHistory: vi.fn(() => []),
   clear: vi.fn(),
   setHistory: vi.fn(),
+  replaceHistory: vi.fn(),
 };
 
 const mockSessionManager = {
diff --git a/src/gateway/session-bridge.test.ts b/src/gateway/session-bridge.test.ts
index 2602925..8dfbb02 100644
--- a/src/gateway/session-bridge.test.ts
+++ b/src/gateway/session-bridge.test.ts
@@ -8,6 +8,7 @@ const mockSession = {
   addMessage: vi.fn(),
   getHistory: vi.fn(() => []),
   clear: vi.fn(),
+  replaceHistory: vi.fn(),
 };
 
 const mockSessionManager = {
diff --git a/src/session/manager.ts b/src/session/manager.ts
index f54e935..bbd4236 100644
--- a/src/session/manager.ts
+++ b/src/session/manager.ts
@@ -6,6 +6,7 @@ export interface Session {
   addMessage(message: Message): void;
   getHistory(): Message[];
   clear(): void;
+  replaceHistory(messages: Message[]): void;
 }
 
 export class ManagedSession implements Session {
@@ -34,6 +35,16 @@ export class ManagedSession implements Session {
     this.store.clearSession(this.id);
   }
 
+  /**
+   * Replace the entire session history with new messages.
+   * Used after compaction to persist the compacted state.
+   * Updates both in-memory history and SQLite storage atomically.
+   */
+  replaceHistory(messages: Message[]): void {
+    this.history = [...messages];
+    this.store.replaceMessages(this.id, messages);
+  }
+
   setHistory(messages: Message[]): void {
     this.history = [...messages];
   }
diff --git a/src/session/store.ts b/src/session/store.ts
index edeb21f..cab1218 100644
--- a/src/session/store.ts
+++ b/src/session/store.ts
@@ -40,6 +40,26 @@ export class SessionStore {
     }));
   }
 
+  /**
+   * Atomically replace all messages for a session.
+   * Used by compaction to swap full history with a compacted version.
+   * Runs in a transaction: delete all → re-insert in order.
+   */
+  replaceMessages(sessionId: string, messages: Message[]): void {
+    const transaction = this.db.transaction(() => {
+      // Delete existing messages
+      this.db.prepare('DELETE FROM messages WHERE session_id = ?').run(sessionId);
+      // Re-insert in order
+      const insert = this.db.prepare(
+        'INSERT INTO messages (session_id, role, content) VALUES (?, ?, ?)'
+      );
+      for (const msg of messages) {
+        insert.run(sessionId, msg.role, msg.content);
+      }
+    });
+    transaction();
+  }
+
   clearSession(sessionId: string): void {
     const stmt = this.db.prepare('DELETE FROM messages WHERE session_id = ?');
     stmt.run(sessionId);