flynn/src/backends/native/orchestrator.ts

import type { ModelRouter, ModelTier } from '../../models/router.js';
import type { ChatRequest, Message, TokenUsage } from '../../models/types.js';
import type { Session } from '../../session/index.js';
import type { ToolRegistry } from '../../tools/registry.js';
import type { ToolExecutor } from '../../tools/executor.js';
import type { MemoryStore } from '../../memory/store.js';
import type { ToolPolicyContext } from '../../tools/policy.js';
import type { Attachment } from '../../channels/types.js';
import { NativeAgent } from './agent.js';
import type { ToolUseEvent } from './agent.js';
import type { OutboundAttachmentCollector } from './attachments.js';
import { estimateMessageTokens, shouldCompact } from '../../context/tokens.js';
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
import { estimateCost } from '../../models/costs.js';
import { auditLogger } from '../../audit/index.js';
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
import { buildUserMessage } from '../../models/media.js';

// ── Public types ──────────────────────────────────────────────────────

/** A single-turn, stateless request to a sub-agent at a specific tier. */
export interface SubAgentRequest {
  tier: ModelTier;
  systemPrompt: string;
  message: string;
  maxTokens?: number;
  /** When true, include tools from the toolRegistry in the request. */
  tools?: boolean;
}

/** Result returned from a sub-agent delegation call. */
export interface SubAgentResult {
  content: string;
  usage: TokenUsage;
  tier: ModelTier;
}

/** Maps each delegation task to the model tier that should handle it. */
export interface DelegationConfig {
  compaction: ModelTier;
  memory_extraction: ModelTier;
  classification: ModelTier;
  tool_summarisation: ModelTier;
  complex_reasoning: ModelTier;
}

/** Per-tier cumulative usage statistics. */
interface TierUsageStats {
  inputTokens: number;
  outputTokens: number;
  calls: number;
}

/** Full usage stats for an orchestrator session. */
export interface UsageReport {
  /** Primary agent (user-facing) usage. */
  primary: {
    inputTokens: number;
    outputTokens: number;
    calls: number;
  };
  /** Delegation (sub-agent) usage, broken down by tier. */
  delegation: Record<string, { inputTokens: number; outputTokens: number; calls: number }>;
  /** Combined totals. */
  total: {
    inputTokens: number;
    outputTokens: number;
    calls: number;
    estimatedCost: number;
  };
}

/** Full configuration for the AgentOrchestrator. */
export interface OrchestratorConfig {
  modelRouter: ModelRouter;
  systemPrompt: string;
  session?: Session;
  toolRegistry?: ToolRegistry;
  toolExecutor?: ToolExecutor;
  maxIterations?: number;
  /** The tier used by the primary NativeAgent for user-facing conversation. */
  primaryTier: ModelTier;
  /** Which tier to use for each delegation task type. */
  delegation: DelegationConfig;
  /** Maximum nesting depth for delegation calls (safety guard). */
  maxDelegationDepth: number;
  onToolUse?: (event: ToolUseEvent) => void;
  /** Context compaction settings. When provided, enables automatic compaction. */
  compaction?: CompactionConfig;
  /** Model identifier for the primary model (used for context window lookup). */
  modelName?: string;
  /** Optional override for the context window size (in tokens). */
  contextWindow?: number;
  /** Optional memory store for injecting persistent memory into the system prompt. */
  memoryStore?: MemoryStore;
  /** Enable/disable automatic memory extraction during compaction. */
  memoryAutoExtract?: boolean;
  /** Strategy for memory prompt injection. */
  memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
  /** Maximum tokens allowed for injected memory context. */
  memoryMaxInjectionTokens?: number;
  /** Policy context for tool filtering (agent tier, provider). */
  toolPolicyContext?: ToolPolicyContext;
  /** Collector for outbound attachments queued by tools (e.g. media.send). */
  attachmentCollector?: OutboundAttachmentCollector;
}

// ── AgentOrchestrator ─────────────────────────────────────────────────

/**
 * Wraps a primary NativeAgent and adds the ability to delegate
 * single-turn sub-tasks to different model tiers via the ModelRouter.
 *
 * The primary agent handles the main conversation loop (with tools),
 * while `delegate()` enables cheap, stateless calls for tasks like
 * compaction, classification, and memory extraction.
 */
export class AgentOrchestrator {
  private _agent: NativeAgent;
  private _modelRouter: ModelRouter;
  private _delegation: DelegationConfig;
  private _maxDelegationDepth: number;
  private _toolRegistry?: ToolRegistry;
  private _session?: Session;
  private _compactionConfig?: CompactionConfig;
  private _modelName?: string;
  private _contextWindow?: number;
  private _memoryStore?: MemoryStore;
  private _memoryAutoExtract: boolean;
  private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
  private _memoryMaxInjectionTokens: number;
  private _systemPromptBase: string;
  private _usageByTier: Map<string, TierUsageStats> = new Map();

  constructor(config: OrchestratorConfig) {
    this._modelRouter = config.modelRouter;
    this._delegation = config.delegation;
    this._maxDelegationDepth = config.maxDelegationDepth;
    this._toolRegistry = config.toolRegistry;
    this._session = config.session;
    this._compactionConfig = config.compaction;
    this._modelName = config.modelName;
    this._contextWindow = config.contextWindow;
    this._memoryStore = config.memoryStore;
    this._memoryAutoExtract = config.memoryAutoExtract ?? true;
    this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
    this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
    this._systemPromptBase = config.systemPrompt;

    // Create the primary NativeAgent for user-facing conversation
    this._agent = new NativeAgent({
      modelClient: config.modelRouter,
      systemPrompt: config.systemPrompt,
      session: config.session,
      toolRegistry: config.toolRegistry,
      toolExecutor: config.toolExecutor,
      maxIterations: config.maxIterations,
      onToolUse: config.onToolUse,
      toolPolicyContext: config.toolPolicyContext,
      attachmentCollector: config.attachmentCollector,
    });

    // Set the primary tier on the agent
    this._agent.setModelTier(config.primaryTier);
  }

  // ── Delegation ────────────────────────────────────────────────────

  /**
   * Perform a single-turn, stateless call to a model at the specified tier.
   *
   * This is used for internal sub-tasks (compaction, classification, etc.)
   * that don't need the full conversation history or tool loop.
   *
   * If the requested tier is not available on the router, falls back to
   * the 'default' tier with a warning.
   */
  async delegate(request: SubAgentRequest): Promise<SubAgentResult> {
    let tier = request.tier;

    // Check if the requested tier is available; fall back to 'default' if not
    const client = this._modelRouter.getClient(tier);
    if (!client) {
      console.warn(
        `[Flynn:delegate] Tier '${tier}' not available, falling back to 'default'`,
      );
      tier = 'default';
    }

    // Build the single-turn chat request
    const messages: Message[] = [
      { role: 'user', content: request.message },
    ];

    const chatRequest: ChatRequest = {
      messages,
      system: request.systemPrompt,
      maxTokens: request.maxTokens,
    };

    // Optionally include tools from the registry (filtered by policy)
    if (request.tools && this._toolRegistry) {
      const policyContext = this._agent.getToolPolicyContext();
      chatRequest.tools = this._toolRegistry.filteredToAnthropicFormat(policyContext);
    }

    const response = await this._modelRouter.chat(chatRequest, tier);

    // Track cumulative usage for this tier
    this._trackUsage(tier, response.usage);

    console.log(
      `[Flynn:delegate] tier=${tier} tokens=${response.usage.inputTokens}+${response.usage.outputTokens}`,
    );

    return {
      content: response.content,
      usage: response.usage,
      tier,
    };
  }

  // ── Primary agent proxies ─────────────────────────────────────────

  /**
   * Process a user message through the primary NativeAgent.
   * This is the main entry point for user-facing conversation.
   *
   * When compaction is configured, checks whether the conversation history
   * exceeds the context window threshold and compacts it before processing.
   */
  async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
    this._injectMemoryContext(userMessage);
    await this.compactIfNeeded();

    // Snapshot history so we can rollback if the underlying tool loop returns an error message.
    // This avoids persisting low-level provider errors to the user-visible conversation state.
    const before = this.getHistory();

    const result = await this._agent.process(userMessage, attachments);

    // NativeAgent currently converts tool-loop exceptions into a user-visible error string.
    // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
    if (this._isToolLoopErrorMessage(result)) {
      // Roll back the user message + error message inserted by the agent.
      this._restoreHistory(before);

      const underlying = this._stripToolLoopErrorPrefix(result);
      const ctx = this._extractContextWindowFromError(underlying);
      if (ctx) {
        // Attempt: compact + hard-trim to fit the discovered context window, then retry once.
        await this._compactAndTrimToFit(ctx);
        const retry = await this._agent.process(userMessage, attachments);
        if (!this._isToolLoopErrorMessage(retry)) {
          return retry;
        }
        // If we still failed, roll back again so we don't persist the error string.
        this._restoreHistory(before);
      }

      // Persist a short, user-friendly failure message (without provider internals).
      const friendly =
        [
          'I ran into a model/provider error while processing that message.',
          '',
          'Try again. If it keeps happening:',
          '1. Run `/compact` or `/reset` to shrink the conversation context.',
          '2. Switch to a different model tier (e.g. `/model local`).',
        ].join('\n');

      // Re-add the user message so the conversation state matches what the user sent.
      this._appendUserAndAssistant(userMessage, attachments, friendly);
      return friendly;
    }

    return result;
  }

  /**
   * Force-compact the current conversation history regardless of threshold.
   * Returns the compaction result, or null if there was nothing to compact
   * (e.g. no session, too few messages).
   */
  async compact(): Promise<CompactionResult | null> {
    const config = this._compactionConfig ?? DEFAULT_COMPACTION_CONFIG;
    const messages = this.getHistory();

    if (messages.length === 0) {
      return null;
    }

    const result = await compactHistory({
      messages,
      orchestrator: this,
      config,
      memoryStore: this._memoryStore,
      autoExtract: this._memoryAutoExtract,
    });

    // If nothing was actually compacted, skip the replace
    if (result.compactedCount === 0) {
      return result;
    }

    // Persist the compacted history
    if (this._session) {
      this._session.replaceHistory(result.messages);
    }

    console.log(
      `[Flynn:compact] Compacted ${result.compactedCount} messages: ` +
      `${result.tokensBefore} → ${result.tokensAfter} tokens`,
    );

    if (this._session) {
      auditLogger?.sessionCompact({
        session_id: this._session.id,
        messages_before: messages.length,
        messages_after: result.messages.length,
        tokens_before: result.tokensBefore,
        tokens_after: result.tokensAfter,
      });
    }

    return result;
  }

  /** Reset the primary agent's conversation history and usage stats. */
  reset(): void {
    this._agent.reset();
    this._usageByTier.clear();
  }

  /** Get the primary agent's conversation history. */
  getHistory(): Message[] {
    return this._agent.getHistory();
  }

  /** Set the model tier on the primary agent. */
  setModelTier(tier: ModelTier): void {
    this._agent.setModelTier(tier);
  }

  /** Get the current model tier of the primary agent. */
  getModelTier(): ModelTier {
    return this._agent.getModelTier();
  }

  /** Set the tool-use callback on the primary agent. */
  setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void {
    this._agent.setOnToolUse(callback);
  }

  /** Request cancellation for the current primary-agent operation. */
  cancel(): void {
    this._agent.cancel();
  }

  /** Whether the primary agent currently has an in-flight operation. */
  isCancellable(): boolean {
    return this._agent.isCancellable();
  }

  // ── Usage & config accessors ──────────────────────────────────────

  /**
   * Returns cumulative delegation usage stats per tier.
   * Useful for cost tracking and visibility into sub-agent calls.
   */
  getDelegationUsage(): Record<string, TierUsageStats> {
    const result: Record<string, TierUsageStats> = {};
    for (const [tier, stats] of this._usageByTier) {
      result[tier] = { ...stats };
    }
    return result;
  }

  /**
   * Returns comprehensive usage stats combining primary agent and delegation usage.
   * Includes estimated cost based on the primary model's pricing.
   */
  getUsage(): UsageReport {
    const primary = this._agent.getUsage();
    const delegation = this.getDelegationUsage();

    let totalInput = primary.inputTokens;
    let totalOutput = primary.outputTokens;
    let totalCalls = primary.calls;

    for (const stats of Object.values(delegation)) {
      totalInput += stats.inputTokens;
      totalOutput += stats.outputTokens;
      totalCalls += stats.calls;
    }

    return {
      primary,
      delegation,
      total: {
        inputTokens: totalInput,
        outputTokens: totalOutput,
        calls: totalCalls,
        estimatedCost: estimateCost(totalInput, totalOutput, this._modelName),
      },
    };
  }

  /**
   * Look up which model tier is configured for a given delegation task.
   * Convenience method so callers don't need to access the config directly.
   */
  getDelegationTier(task: keyof DelegationConfig): ModelTier {
    return this._delegation[task];
  }

  // ── Private helpers ───────────────────────────────────────────────

  /**
   * Inject persistent memory context into the primary agent's system prompt.
   * Reads from the memory store and appends relevant context to the base
   * system prompt. If no memory store is configured or no memory content
   * exists, restores the original base prompt.
   */
  private _injectMemoryContext(userMessage: string): void {
    if (!this._memoryStore) {
      return;
    }

    let memoryContext = '';
    try {
      if (this._memoryInjectionStrategy === 'recent') {
        memoryContext = buildRecentMemoryContext(this._memoryStore, this._memoryMaxInjectionTokens);
      } else if (this._memoryInjectionStrategy === 'adaptive') {
        memoryContext = buildAdaptiveMemoryContext({
          store: this._memoryStore,
          userMessage,
          recentMessages: this.getHistory(),
          config: {
            maxTokens: this._memoryMaxInjectionTokens,
          },
        });
      } else {
        memoryContext = this._memoryStore.getContextForPrompt();
      }
    } catch (error) {
      console.warn('[Flynn:memory] Adaptive memory injection failed, falling back to default context:', error);
      memoryContext = this._memoryStore.getContextForPrompt();
    }

    memoryContext = this._clipMemoryContext(memoryContext);

    if (!memoryContext) {
      this._agent.setSystemPrompt(this._systemPromptBase);
      return;
    }

    const enrichedPrompt = `${this._systemPromptBase}\n\n# Memory Context\n\nThe following is your persistent memory. Use it to maintain continuity across sessions.\n\n${memoryContext}`;
    this._agent.setSystemPrompt(enrichedPrompt);
  }

  private _clipMemoryContext(context: string): string {
    if (!context) {
      return context;
    }
    const maxChars = this._memoryMaxInjectionTokens * 4;
    if (context.length <= maxChars) {
      return context;
    }
    return context.slice(0, maxChars);
  }

  /**
   * Check whether automatic compaction should run, and if so, compact.
   * Called before each `process()` call when compaction is configured.
   */
  private async compactIfNeeded(): Promise<void> {
    if (!this._compactionConfig) {return;}

    const messages = this.getHistory();
    if (messages.length === 0) {return;}

    const model = this._modelName ?? 'unknown';
    const needs = shouldCompact({
      messages,
      model,
      contextWindow: this._contextWindow,
      thresholdPct: this._compactionConfig.thresholdPct,
    });

    if (!needs) {return;}

    await this.compact();
  }

  private _isToolLoopErrorMessage(text: string): boolean {
    return text.startsWith('Error in tool loop (iteration ');
  }

  private _stripToolLoopErrorPrefix(text: string): string {
    const m = text.match(/^Error in tool loop \(iteration \d+\):\s*(.*)$/s);
    return m ? m[1] : text;
  }

  private _restoreHistory(messages: Message[]): void {
    if (this._session) {
      this._session.replaceHistory(messages);

    }
    // No session available; nothing safe to do here.
  }

  private _appendUserAndAssistant(userMessage: string, attachments: Attachment[] | undefined, assistantText: string): void {
    if (!this._session) {
      return;
    }
    const userMsg = buildUserMessage(userMessage, attachments);
    this._session.addMessage(userMsg);
    this._session.addMessage({ role: 'assistant', content: assistantText });
  }

  private _extractContextWindowFromError(errorText: string): number | undefined {
    // Try a few common patterns and pick the smallest plausible context window.
    // Example llama.cpp error:
    //   exceeds the available context size (4096 tokens) ... "nctx":4096
    const candidates: number[] = [];
    const jsonNctx = errorText.match(/"nctx"\s*:\s*(\d{3,7})/);
    if (jsonNctx) { candidates.push(Number(jsonNctx[1])); }
    const paren = errorText.match(/context size \((\d{3,7}) tokens\)/);
    if (paren) { candidates.push(Number(paren[1])); }
    const maxContext = errorText.match(/maximum context length is (\d{3,7}) tokens/i);
    if (maxContext) { candidates.push(Number(maxContext[1])); }

    const valid = candidates.filter(n => Number.isFinite(n) && n >= 256);
    if (valid.length === 0) {
      return undefined;
    }
    return Math.min(...valid);
  }

  private async _compactAndTrimToFit(contextWindow: number): Promise<void> {
    // Compaction is best-effort; if it fails (e.g., providers down), fall back to a hard trim.
    try {
      await this.compact();
    } catch (error) {
      console.warn('[Flynn:compact] Emergency compaction failed:', error);
    }

    if (!this._session) {
      return;
    }

    const threshold = Math.floor((this._compactionConfig?.thresholdPct ?? 80) / 100 * contextWindow);
    let messages = this.getHistory();
    let estimated = estimateMessageTokens(messages);

    // Drop oldest messages until we're under budget.
    // This is intentionally blunt; it only triggers after a real provider context overflow.
    while (messages.length > 1 && estimated > threshold) {
      messages = messages.slice(1);
      estimated = estimateMessageTokens(messages);
    }

    this._session.replaceHistory(messages);
  }

  /** Accumulate usage stats for a given tier. */
  private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
    const existing = this._usageByTier.get(tier);
    if (existing) {
      existing.inputTokens += usage.inputTokens;
      existing.outputTokens += usage.outputTokens;
      existing.calls += 1;
    } else {
      this._usageByTier.set(tier, {
        inputTokens: usage.inputTokens,
        outputTokens: usage.outputTokens,
        calls: 1,
      });
    }
  }
}