orchestrator: recover from context overflow on fallback

2026-02-13 21:19:02 -08:00
parent 944b2c916a
commit 151b48310e
4 changed files with 236 additions and 18 deletions
@@ -9,11 +9,12 @@ import type { Attachment } from '../../channels/types.js';
 import { NativeAgent } from './agent.js';
 import type { ToolUseEvent } from './agent.js';
 import type { OutboundAttachmentCollector } from './attachments.js';
-import { shouldCompact } from '../../context/tokens.js';
+import { estimateMessageTokens, shouldCompact } from '../../context/tokens.js';
 import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
 import { estimateCost } from '../../models/costs.js';
 import { auditLogger } from '../../audit/index.js';
 import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
+import { buildUserMessage } from '../../models/media.js';

 // ── Public types ──────────────────────────────────────────────────────

@@ -227,7 +228,48 @@ export class AgentOrchestrator {
  async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
    this._injectMemoryContext(userMessage);
    await this.compactIfNeeded();
-    return this._agent.process(userMessage, attachments);
+
+    // Snapshot history so we can rollback if the underlying tool loop returns an error message.
+    // This avoids persisting low-level provider errors to the user-visible conversation state.
+    const before = this.getHistory();
+
+    const result = await this._agent.process(userMessage, attachments);
+
+    // NativeAgent currently converts tool-loop exceptions into a user-visible error string.
+    // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
+    if (this._isToolLoopErrorMessage(result)) {
+      // Roll back the user message + error message inserted by the agent.
+      this._restoreHistory(before);
+
+      const underlying = this._stripToolLoopErrorPrefix(result);
+      const ctx = this._extractContextWindowFromError(underlying);
+      if (ctx) {
+        // Attempt: compact + hard-trim to fit the discovered context window, then retry once.
+        await this._compactAndTrimToFit(ctx);
+        const retry = await this._agent.process(userMessage, attachments);
+        if (!this._isToolLoopErrorMessage(retry)) {
+          return retry;
+        }
+        // If we still failed, roll back again so we don't persist the error string.
+        this._restoreHistory(before);
+      }
+
+      // Persist a short, user-friendly failure message (without provider internals).
+      const friendly =
+        [
+          'I ran into a model/provider error while processing that message.',
+          '',
+          'Try again. If it keeps happening:',
+          '1. Run `/compact` or `/reset` to shrink the conversation context.',
+          '2. Switch to a different model tier (e.g. `/model local`).',
+        ].join('\n');
+
+      // Re-add the user message so the conversation state matches what the user sent.
+      this._appendUserAndAssistant(userMessage, attachments, friendly);
+      return friendly;
+    }
+
+    return result;
  }

  /**
@@ -445,6 +487,77 @@ export class AgentOrchestrator {
    await this.compact();
  }

+  private _isToolLoopErrorMessage(text: string): boolean {
+    return text.startsWith('Error in tool loop (iteration ');
+  }
+
+  private _stripToolLoopErrorPrefix(text: string): string {
+    const m = text.match(/^Error in tool loop \(iteration \d+\):\s*(.*)$/s);
+    return m ? m[1] : text;
+  }
+
+  private _restoreHistory(messages: Message[]): void {
+    if (this._session) {
+      this._session.replaceHistory(messages);
+      return;
+    }
+    // No session available; nothing safe to do here.
+  }
+
+  private _appendUserAndAssistant(userMessage: string, attachments: Attachment[] | undefined, assistantText: string): void {
+    if (!this._session) {
+      return;
+    }
+    const userMsg = buildUserMessage(userMessage, attachments);
+    this._session.addMessage(userMsg);
+    this._session.addMessage({ role: 'assistant', content: assistantText });
+  }
+
+  private _extractContextWindowFromError(errorText: string): number | undefined {
+    // Try a few common patterns and pick the smallest plausible context window.
+    // Example llama.cpp error:
+    //   exceeds the available context size (4096 tokens) ... "nctx":4096
+    const candidates: number[] = [];
+    const jsonNctx = errorText.match(/"nctx"\s*:\s*(\d{3,7})/);
+    if (jsonNctx) { candidates.push(Number(jsonNctx[1])); }
+    const paren = errorText.match(/context size \((\d{3,7}) tokens\)/);
+    if (paren) { candidates.push(Number(paren[1])); }
+    const maxContext = errorText.match(/maximum context length is (\d{3,7}) tokens/i);
+    if (maxContext) { candidates.push(Number(maxContext[1])); }
+
+    const valid = candidates.filter(n => Number.isFinite(n) && n >= 256);
+    if (valid.length === 0) {
+      return undefined;
+    }
+    return Math.min(...valid);
+  }
+
+  private async _compactAndTrimToFit(contextWindow: number): Promise<void> {
+    // Compaction is best-effort; if it fails (e.g., providers down), fall back to a hard trim.
+    try {
+      await this.compact();
+    } catch (error) {
+      console.warn('[Flynn:compact] Emergency compaction failed:', error);
+    }
+
+    if (!this._session) {
+      return;
+    }
+
+    const threshold = Math.floor((this._compactionConfig?.thresholdPct ?? 80) / 100 * contextWindow);
+    let messages = this.getHistory();
+    let estimated = estimateMessageTokens(messages);
+
+    // Drop oldest messages until we're under budget.
+    // This is intentionally blunt; it only triggers after a real provider context overflow.
+    while (messages.length > 1 && estimated > threshold) {
+      messages = messages.slice(1);
+      estimated = estimateMessageTokens(messages);
+    }
+
+    this._session.replaceHistory(messages);
+  }
+
  /** Accumulate usage stats for a given tier. */
  private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
    const existing = this._usageByTier.get(tier);