flynn/src/backends/native/orchestrator.test.ts

import { describe, it, expect, vi, beforeEach } from 'vitest';
import { AgentOrchestrator } from './orchestrator.js';
import { ModelRouter } from '../../models/router.js';
import type { ChatResponse, ModelClient } from '../../models/types.js';
import { ToolRegistry, ToolExecutor } from '../../tools/index.js';
import { HookEngine } from '../../hooks/engine.js';
import { MemoryStore } from '../../memory/store.js';
import { mkdtempSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';

describe('AgentOrchestrator', () => {
  let mockDefaultClient: ModelClient;
  let mockFastClient: ModelClient;
  let mockComplexClient: ModelClient;
  let mockRouter: ModelRouter;

  const createMockClient = (name: string, inputTokens = 100, outputTokens = 50): ModelClient => ({
    chat: vi.fn().mockResolvedValue({
      content: `${name} response`,
      stopReason: 'end_turn',
      usage: { inputTokens, outputTokens },
    }),
  });

  beforeEach(() => {
    mockDefaultClient = createMockClient('default', 100, 50);
    mockFastClient = createMockClient('fast', 50, 25);
    mockComplexClient = createMockClient('complex', 200, 100);

    mockRouter = new ModelRouter({
      default: mockDefaultClient,
      fast: mockFastClient,
      complex: mockComplexClient,
      fallbackChain: [],
    });
  });

  const requireClient = (tier: 'default' | 'fast' | 'complex'): ModelClient => {
    const client = mockRouter.getClient(tier);
    if (!client) {
      throw new Error(`Expected ${tier} model client to exist in test router`);
    }
    return client;
  };

  describe('delegate()', () => {
    it('routes to the correct tier when specified', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful assistant.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const result = await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Summarize this text',
        message: 'This is a test message',
        maxTokens: 1000,
      });

      expect(result.content).toBe('fast response');
      expect(result.tier).toBe('fast');
    });

    it('includes tools when requested', async () => {
      const mockToolRegistry = new ToolRegistry();
      const hooks = new HookEngine({
        confirm: ['*'],
        log: [],
        silent: [],
      });
      const mockToolExecutor = new ToolExecutor(mockToolRegistry, hooks);

      const mockFastChatClient = requireClient('fast');
      const mockFastChatFn = vi.fn().mockResolvedValue({
        content: 'response with tools',
        stopReason: 'end_turn',
        usage: { inputTokens: 100, outputTokens: 50 },
      } as ChatResponse);

      Object.assign(mockFastChatClient, { chat: mockFastChatFn });

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
        toolRegistry: mockToolRegistry,
        toolExecutor: mockToolExecutor,
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Use available tools',
        message: 'Help me analyze data',
        tools: true,
      });

      expect(mockFastChatFn).toHaveBeenCalled();
    });

    it('falls back to default tier when requested tier is unavailable', async () => {
      const routerWithoutComplex = new ModelRouter({
        default: mockDefaultClient,
        fast: mockFastClient,
        fallbackChain: [],
      });

      const orchestrator = new AgentOrchestrator({
        modelRouter: routerWithoutComplex,
        systemPrompt: 'You are a helpful assistant.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const result = await orchestrator.delegate({
        tier: 'complex',
        systemPrompt: 'Analyze deeply',
        message: 'This is complex',
      });

      expect(result.content).toBe('default response');
      expect(result.tier).toBe('default');
    });

    it('tracks cumulative usage after delegate calls', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Fast task',
        message: 'Fast message',
      });

      await orchestrator.delegate({
        tier: 'complex',
        systemPrompt: 'Complex task',
        message: 'Complex message',
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Another fast task',
        message: 'Another fast message',
      });

      const usage = orchestrator.getDelegationUsage();

      expect(usage.fast).toEqual({
        inputTokens: 100,
        outputTokens: 50,
        calls: 2,
      });

      expect(usage.complex).toEqual({
        inputTokens: 200,
        outputTokens: 100,
        calls: 1,
      });

      expect(usage.default).toBeUndefined();
    });

    it('tracks usage across tiers correctly', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Fast task',
        message: 'Fast message',
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Another fast task',
        message: 'Another fast message',
      });

      const usage = orchestrator.getDelegationUsage();

      expect(usage.fast.inputTokens).toBe(100);
      expect(usage.fast.outputTokens).toBe(50);
      expect(usage.fast.calls).toBe(2);
    });

    it('logs delegation details with tier and token counts', async () => {
      const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Fast task',
        message: 'Fast message',
      });

      expect(consoleSpy).toHaveBeenCalledWith(
        '[Flynn:delegate] tier=fast tokens=50+25',
      );

      consoleSpy.mockRestore();
    });
  });

  describe('getDelegationTier()', () => {
    it('returns correct tier for each task type', () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      expect(orchestrator.getDelegationTier('compaction')).toBe('fast');
      expect(orchestrator.getDelegationTier('memory_extraction')).toBe('default');
      expect(orchestrator.getDelegationTier('classification')).toBe('complex');
      expect(orchestrator.getDelegationTier('tool_summarisation')).toBe('default');
      expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('complex');
    });

    it('returns tier that was explicitly configured', () => {
      const customDelegation = {
        compaction: 'local' as const,
        memory_extraction: 'fast' as const,
        classification: 'complex' as const,
        tool_summarisation: 'default' as const,
        complex_reasoning: 'local' as const,
      };

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: customDelegation,
        maxDelegationDepth: 10,
      });

      expect(orchestrator.getDelegationTier('compaction')).toBe('local');
      expect(orchestrator.getDelegationTier('memory_extraction')).toBe('fast');
      expect(orchestrator.getDelegationTier('complex_reasoning')).toBe('local');
    });
  });

  describe('process()', () => {
    it('proxies to NativeAgent for user messages', async () => {
      const mockDefaultChatClient = requireClient('default');
      const mockDefaultChatFn = vi.fn().mockResolvedValue({
        content: 'Agent response',
        stopReason: 'end_turn',
        usage: { inputTokens: 150, outputTokens: 75 },
      } as ChatResponse);

      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const response = await orchestrator.process('Hello, agent!');

      expect(response).toBe('Agent response');
    });

    it('maintains conversation history through process()', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.process('Hello');
      await orchestrator.process('How are you?');
      await orchestrator.process('Tell me about yourself');

      const history = orchestrator.getHistory();

      expect(history).toHaveLength(6);
      expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
      expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
      expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
      expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
      expect(history[4]).toEqual({ role: 'user', content: 'Tell me about yourself' });
      expect(history[5]).toEqual({ role: 'assistant', content: 'default response' });
    });

    it('uses adaptive memory injection strategy when configured', async () => {
      const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-'));
      const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
      memoryStore.writeCategory('user', 'preferences', 'User prefers concise output.', 'replace');

      const mockDefaultChatClient = requireClient('default');
      const mockDefaultChatFn = vi.fn().mockResolvedValue({
        content: 'Agent response',
        stopReason: 'end_turn',
        usage: { inputTokens: 50, outputTokens: 25 },
      } as ChatResponse);
      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
        memoryStore,
        memoryInjectionStrategy: 'adaptive',
        memoryMaxInjectionTokens: 100,
      });

      await orchestrator.process('Keep this concise please');

      expect(mockDefaultChatFn).toHaveBeenCalled();
      const callArgs = mockDefaultChatFn.mock.calls[0][0];
      expect(callArgs.system).toContain('# Memory Context');
      expect(callArgs.system).toContain('concise');

      rmSync(tempDir, { recursive: true, force: true });
    });

    it('falls back to default memory context when adaptive injection errors', async () => {
      const tempDir = mkdtempSync(join(tmpdir(), 'flynn-orchestrator-memory-fallback-'));
      const memoryStore = new MemoryStore({ dir: tempDir, maxContextTokens: 2000 });
      memoryStore.write('user', 'Fallback memory content', 'replace');
      const getPromptSectionsSpy = vi.spyOn(memoryStore, 'getPromptSections').mockImplementationOnce(() => {
        throw new Error('boom');
      });

      const mockDefaultChatClient = requireClient('default');
      const mockDefaultChatFn = vi.fn().mockResolvedValue({
        content: 'Agent response',
        stopReason: 'end_turn',
        usage: { inputTokens: 50, outputTokens: 25 },
      } as ChatResponse);
      Object.assign(mockDefaultChatClient, { chat: mockDefaultChatFn });

      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
        memoryStore,
        memoryInjectionStrategy: 'adaptive',
        memoryMaxInjectionTokens: 100,
      });

      await orchestrator.process('test message');

      const callArgs = mockDefaultChatFn.mock.calls[0][0];
      expect(callArgs.system).toContain('Fallback memory content');

      getPromptSectionsSpy.mockRestore();
      rmSync(tempDir, { recursive: true, force: true });
    });
  });

  describe('reset()', () => {
    it('clears primary agent conversation history', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.process('Hello');
      await orchestrator.process('How are you?');

      expect(orchestrator.getHistory()).toHaveLength(4);

      orchestrator.reset();

      expect(orchestrator.getHistory()).toHaveLength(0);
    });

    it('can be called multiple times', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.process('Hello');
      orchestrator.reset();

      expect(orchestrator.getHistory()).toHaveLength(0);

      await orchestrator.process('World');
      orchestrator.reset();

      expect(orchestrator.getHistory()).toHaveLength(0);
    });
  });

  describe('getDelegationUsage()', () => {
    it('returns copy of usage stats (doesn\'t expose internal map)', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.delegate({
        tier: 'fast',
        systemPrompt: 'Fast task',
        message: 'Fast message',
      });

      const usage1 = orchestrator.getDelegationUsage();
      const usage2 = orchestrator.getDelegationUsage();

      expect(usage1).toEqual(usage2);

      usage1.fast.inputTokens = 999;

      expect(usage2.fast.inputTokens).toBe(50);
    });

    it('returns empty object when no usage tracked', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const usage = orchestrator.getDelegationUsage();

      expect(usage).toEqual({});
    });
  });

  describe('getHistory()', () => {
    it('returns conversation history from primary agent', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      await orchestrator.process('Hello');
      await orchestrator.process('How are you?');

      const history = orchestrator.getHistory();

      expect(history).toHaveLength(4);
      expect(history[0]).toEqual({ role: 'user', content: 'Hello' });
      expect(history[1]).toEqual({ role: 'assistant', content: 'default response' });
      expect(history[2]).toEqual({ role: 'user', content: 'How are you?' });
      expect(history[3]).toEqual({ role: 'assistant', content: 'default response' });
    });

    it('returns empty array when no history', async () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are a helpful agent.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const history = orchestrator.getHistory();

      expect(history).toEqual([]);
    });
  });

  describe('process()', () => {
    it('rolls back tool-loop provider errors, hard-trims on context overflow, and retries once', async () => {
      let callCount = 0;
      const mockClient: ModelClient = {
        chat: vi.fn().mockImplementation(async () => {
          callCount++;
          if (callCount === 1) {
            return {
              content: '',
              stopReason: 'tool_use',
              usage: { inputTokens: 10, outputTokens: 5 },
              toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
            } as ChatResponse;
          }
          if (callCount === 2) {
            // Simulate llama.cpp context overflow buried inside an aggregated router error.
            throw new Error(
              'llama-server error (400): {"error":{"type":"exceedcontextsizeerror","nprompttokens":9183,"nctx":4096}}',
            );
          }
          return {
            content: 'ok',
            stopReason: 'end_turn',
            usage: { inputTokens: 10, outputTokens: 5 },
          } as ChatResponse;
        }),
      };

      const router = new ModelRouter({
        default: mockClient,
        fallbackChain: [],
      });

      // Minimal Session stub that supports rollback via replaceHistory().
      const history: any[] = [];
      const session = {
        id: 'test',
        addMessage: vi.fn((m: any) => { history.push(m); }),
        getHistory: vi.fn(() => [...history]),
        clear: vi.fn(() => { history.length = 0; }),
        replaceHistory: vi.fn((msgs: any[]) => {
          history.length = 0;
          history.push(...msgs);
        }),
        getConfig: vi.fn(() => undefined),
        setConfig: vi.fn(),
        deleteConfig: vi.fn(),
      } as any;

      const registry = new ToolRegistry();
      registry.register({
        name: 'test.echo',
        description: 'echo',
        inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
        execute: async (args: any) => ({ success: true, output: String(args.text ?? '') }),
      });

      const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
      const executor = new ToolExecutor(registry, hooks);

      const orchestrator = new AgentOrchestrator({
        modelRouter: router,
        systemPrompt: 'You are helpful.',
        session,
        toolRegistry: registry,
        toolExecutor: executor,
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 3,
      });

      const res = await orchestrator.process('hello');
      expect(res).toBe('ok');

      // Ensure we didn't persist the low-level error string in history.
      const textHistory = history
        .map(m => (typeof m.content === 'string' ? m.content : ''))
        .join('\n');
      expect(textHistory).not.toContain('Error in tool loop');
    });
  });

  describe('setModelTier()', () => {
    it('sets model tier on primary agent', () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      orchestrator.setModelTier('fast');

      expect(orchestrator.getModelTier()).toBe('fast');
    });

    it('allows tier changes after initialization', () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      expect(orchestrator.getModelTier()).toBe('default');

      orchestrator.setModelTier('complex');

      expect(orchestrator.getModelTier()).toBe('complex');

      orchestrator.setModelTier('fast');

      expect(orchestrator.getModelTier()).toBe('fast');
    });
  });

  describe('setOnToolUse()', () => {
    it('sets tool-use callback on primary agent', () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const callback = vi.fn();

      orchestrator.setOnToolUse(callback);

      expect(orchestrator.getModelTier()).toBe('default');
    });

    it('allows callback changes', () => {
      const orchestrator = new AgentOrchestrator({
        modelRouter: mockRouter,
        systemPrompt: 'You are helpful.',
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 10,
      });

      const callback1 = vi.fn();
      const callback2 = vi.fn();

      orchestrator.setOnToolUse(callback1);

      expect(orchestrator.getModelTier()).toBe('default');

      orchestrator.setOnToolUse(callback2);

      expect(orchestrator.getModelTier()).toBe('default');
    });
  });
});