orchestrator: recover from context overflow on fallback

2026-02-13 21:19:02 -08:00
parent 944b2c916a
commit 151b48310e
4 changed files with 236 additions and 18 deletions
@@ -48,6 +48,17 @@
      ],
      "test_status": "pnpm test:run src/daemon/routing.test.ts + pnpm typecheck passing (full pnpm test:run fails in this sandbox due to EPERM listen/spawn)"
    },
    "context-overflow-recovery": {
      "status": "completed",
      "date": "2026-02-14",
      "summary": "Hardened voice-message and fallback behavior by using tier-specific model identity (modelName/contextWindow) for compaction heuristics and by intercepting tool-loop provider errors to rollback history, hard-trim on detected context overflows, and retry once with a clean, user-friendly failure message on repeated errors.",
      "files_modified": [
        "src/daemon/routing.ts",
        "src/backends/native/orchestrator.ts",
        "src/backends/native/orchestrator.test.ts"
      ],
      "test_status": "pnpm test:run src/backends/native/orchestrator.test.ts src/daemon/routing.test.ts + pnpm typecheck passing"
    },
    "p0-p1-implementation-plan": {
      "file": "2026-02-06-p0-p1-implementation-plan.md",
      "status": "completed",
@@ -605,6 +605,94 @@ describe('AgentOrchestrator', () => {
    });
  });
  describe('process()', () => {
    it('rolls back tool-loop provider errors, hard-trims on context overflow, and retries once', async () => {
      let callCount = 0;
      const mockClient: ModelClient = {
        chat: vi.fn().mockImplementation(async () => {
          callCount++;
          if (callCount === 1) {
            return {
              content: '',
              stopReason: 'tool_use',
              usage: { inputTokens: 10, outputTokens: 5 },
              toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
            } as ChatResponse;
          }
          if (callCount === 2) {
            // Simulate llama.cpp context overflow buried inside an aggregated router error.
            throw new Error(
              'llama-server error (400): {"error":{"type":"exceedcontextsizeerror","nprompttokens":9183,"nctx":4096}}',
            );
          }
          return {
            content: 'ok',
            stopReason: 'end_turn',
            usage: { inputTokens: 10, outputTokens: 5 },
          } as ChatResponse;
        }),
      };
      const router = new ModelRouter({
        default: mockClient,
        fallbackChain: [],
      });
      // Minimal Session stub that supports rollback via replaceHistory().
      const history: any[] = [];
      const session = {
        id: 'test',
        addMessage: vi.fn((m: any) => { history.push(m); }),
        getHistory: vi.fn(() => [...history]),
        clear: vi.fn(() => { history.length = 0; }),
        replaceHistory: vi.fn((msgs: any[]) => {
          history.length = 0;
          history.push(...msgs);
        }),
        getConfig: vi.fn(() => undefined),
        setConfig: vi.fn(),
        deleteConfig: vi.fn(),
      } as any;
      const registry = new ToolRegistry();
      registry.register({
        name: 'test.echo',
        description: 'echo',
        inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
        execute: async (args: any) => ({ success: true, output: String(args.text ?? '') }),
      });
      const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
      const executor = new ToolExecutor(registry, hooks);
      const orchestrator = new AgentOrchestrator({
        modelRouter: router,
        systemPrompt: 'You are helpful.',
        session,
        toolRegistry: registry,
        toolExecutor: executor,
        primaryTier: 'default',
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 3,
      });
      const res = await orchestrator.process('hello');
      expect(res).toBe('ok');
      // Ensure we didn't persist the low-level error string in history.
      const textHistory = history
        .map(m => (typeof m.content === 'string' ? m.content : ''))
        .join('\n');
      expect(textHistory).not.toContain('Error in tool loop');
    });
  });
  describe('setModelTier()', () => {
    it('sets model tier on primary agent', () => {
      const orchestrator = new AgentOrchestrator({
@@ -9,11 +9,12 @@ import type { Attachment } from '../../channels/types.js';
 import { NativeAgent } from './agent.js';
 import type { ToolUseEvent } from './agent.js';
 import type { OutboundAttachmentCollector } from './attachments.js';
-import { shouldCompact } from '../../context/tokens.js';
+import { estimateMessageTokens, shouldCompact } from '../../context/tokens.js';
 import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
 import { estimateCost } from '../../models/costs.js';
 import { auditLogger } from '../../audit/index.js';
 import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
 import { buildUserMessage } from '../../models/media.js';
 // ── Public types ──────────────────────────────────────────────────────
@@ -227,7 +228,48 @@ export class AgentOrchestrator {
  async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
    this._injectMemoryContext(userMessage);
    await this.compactIfNeeded();
-    return this._agent.process(userMessage, attachments);
+
    // Snapshot history so we can rollback if the underlying tool loop returns an error message.
    // This avoids persisting low-level provider errors to the user-visible conversation state.
    const before = this.getHistory();
    const result = await this._agent.process(userMessage, attachments);
    // NativeAgent currently converts tool-loop exceptions into a user-visible error string.
    // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
    if (this._isToolLoopErrorMessage(result)) {
      // Roll back the user message + error message inserted by the agent.
      this._restoreHistory(before);
      const underlying = this._stripToolLoopErrorPrefix(result);
      const ctx = this._extractContextWindowFromError(underlying);
      if (ctx) {
        // Attempt: compact + hard-trim to fit the discovered context window, then retry once.
        await this._compactAndTrimToFit(ctx);
        const retry = await this._agent.process(userMessage, attachments);
        if (!this._isToolLoopErrorMessage(retry)) {
          return retry;
        }
        // If we still failed, roll back again so we don't persist the error string.
        this._restoreHistory(before);
      }
      // Persist a short, user-friendly failure message (without provider internals).
      const friendly =
        [
          'I ran into a model/provider error while processing that message.',
          '',
          'Try again. If it keeps happening:',
          '1. Run `/compact` or `/reset` to shrink the conversation context.',
          '2. Switch to a different model tier (e.g. `/model local`).',
        ].join('\n');
      // Re-add the user message so the conversation state matches what the user sent.
      this._appendUserAndAssistant(userMessage, attachments, friendly);
      return friendly;
    }
    return result;
  }
  /**
@@ -445,6 +487,77 @@ export class AgentOrchestrator {
    await this.compact();
  }
  private _isToolLoopErrorMessage(text: string): boolean {
    return text.startsWith('Error in tool loop (iteration ');
  }
  private _stripToolLoopErrorPrefix(text: string): string {
    const m = text.match(/^Error in tool loop \(iteration \d+\):\s*(.*)$/s);
    return m ? m[1] : text;
  }
  private _restoreHistory(messages: Message[]): void {
    if (this._session) {
      this._session.replaceHistory(messages);
      return;
    }
    // No session available; nothing safe to do here.
  }
  private _appendUserAndAssistant(userMessage: string, attachments: Attachment[] | undefined, assistantText: string): void {
    if (!this._session) {
      return;
    }
    const userMsg = buildUserMessage(userMessage, attachments);
    this._session.addMessage(userMsg);
    this._session.addMessage({ role: 'assistant', content: assistantText });
  }
  private _extractContextWindowFromError(errorText: string): number | undefined {
    // Try a few common patterns and pick the smallest plausible context window.
    // Example llama.cpp error:
    //   exceeds the available context size (4096 tokens) ... "nctx":4096
    const candidates: number[] = [];
    const jsonNctx = errorText.match(/"nctx"\s*:\s*(\d{3,7})/);
    if (jsonNctx) { candidates.push(Number(jsonNctx[1])); }
    const paren = errorText.match(/context size \((\d{3,7}) tokens\)/);
    if (paren) { candidates.push(Number(paren[1])); }
    const maxContext = errorText.match(/maximum context length is (\d{3,7}) tokens/i);
    if (maxContext) { candidates.push(Number(maxContext[1])); }
    const valid = candidates.filter(n => Number.isFinite(n) && n >= 256);
    if (valid.length === 0) {
      return undefined;
    }
    return Math.min(...valid);
  }
  private async _compactAndTrimToFit(contextWindow: number): Promise<void> {
    // Compaction is best-effort; if it fails (e.g., providers down), fall back to a hard trim.
    try {
      await this.compact();
    } catch (error) {
      console.warn('[Flynn:compact] Emergency compaction failed:', error);
    }
    if (!this._session) {
      return;
    }
    const threshold = Math.floor((this._compactionConfig?.thresholdPct ?? 80) / 100 * contextWindow);
    let messages = this.getHistory();
    let estimated = estimateMessageTokens(messages);
    // Drop oldest messages until we're under budget.
    // This is intentionally blunt; it only triggers after a real provider context overflow.
    while (messages.length > 1 && estimated > threshold) {
      messages = messages.slice(1);
      estimated = estimateMessageTokens(messages);
    }
    this._session.replaceHistory(messages);
  }
  /** Accumulate usage stats for a given tier. */
  private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
    const existing = this._usageByTier.get(tier);
@@ -79,10 +79,6 @@ export function createMessageRouter(deps: {
    const baseSid = agentConfigName
      ? `${channel}:${senderId}:${agentConfigName}`
      : `${channel}:${senderId}`;
    const sessionId = tierFromMetadata ? `${baseSid}:${tierFromMetadata}` : baseSid;
    let entry = agents.get(sessionId);
    if (!entry) {
    const session = deps.sessionManager.getSession(channel, senderId);
    // Read per-session model tier override (persisted in SQLite)
@@ -95,9 +91,19 @@ export function createMessageRouter(deps: {
      ?? deps.config.agents.primary_tier
      ?? 'default';
    // Cache agents by tier too so switching tiers updates context-window heuristics.
    const sessionId = `${baseSid}:${effectiveTier}`;
    let entry = agents.get(sessionId);
    if (!entry) {
      // Use agent config overrides where available, falling back to global config
      const effectiveSystemPrompt = agentConfig?.systemPrompt ?? deps.systemPrompt;
-      const effectiveProvider = deps.config.models.default.provider;
+
      const modelsConfig = deps.config.models as Record<string, { provider?: string; model?: string; context_window?: number } | undefined>;
      const tierConfig = modelsConfig[effectiveTier] ?? deps.config.models.default;
      const effectiveProvider = tierConfig?.provider ?? deps.config.models.default.provider;
      const effectiveModelName = tierConfig?.model ?? deps.config.models.default.model;
      const effectiveContextWindow = tierConfig?.context_window ?? deps.config.models.default.context_window;
      const delegationConfig: DelegationConfig = {
        compaction: deps.config.agents.delegation.compaction ?? 'fast',
@@ -181,8 +187,8 @@ export function createMessageRouter(deps: {
          summaryMaxTokens: deps.config.compaction.summary_max_tokens,
          importanceThreshold: deps.config.compaction.importance_threshold,
        } : undefined,
-        modelName: deps.config.models.default.model,
+        modelName: effectiveModelName,
-        contextWindow: deps.config.models.default.context_window,
+        contextWindow: effectiveContextWindow,
        memoryStore: deps.memoryStore,
        memoryInjectionStrategy: deps.config.memory?.injection_strategy,
        memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,