From 151b48310ed9f4fc68aee9ee506a3b0c31b809c0 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 13 Feb 2026 21:19:02 -0800 Subject: [PATCH] orchestrator: recover from context overflow on fallback --- docs/plans/state.json | 11 +++ src/backends/native/orchestrator.test.ts | 88 +++++++++++++++++ src/backends/native/orchestrator.ts | 117 ++++++++++++++++++++++- src/daemon/routing.ts | 38 ++++---- 4 files changed, 236 insertions(+), 18 deletions(-) diff --git a/docs/plans/state.json b/docs/plans/state.json index 01b6095..ad365c3 100644 --- a/docs/plans/state.json +++ b/docs/plans/state.json @@ -48,6 +48,17 @@ ], "test_status": "pnpm test:run src/daemon/routing.test.ts + pnpm typecheck passing (full pnpm test:run fails in this sandbox due to EPERM listen/spawn)" }, + "context-overflow-recovery": { + "status": "completed", + "date": "2026-02-14", + "summary": "Hardened voice-message and fallback behavior by using tier-specific model identity (modelName/contextWindow) for compaction heuristics and by intercepting tool-loop provider errors to rollback history, hard-trim on detected context overflows, and retry once with a clean, user-friendly failure message on repeated errors.", + "files_modified": [ + "src/daemon/routing.ts", + "src/backends/native/orchestrator.ts", + "src/backends/native/orchestrator.test.ts" + ], + "test_status": "pnpm test:run src/backends/native/orchestrator.test.ts src/daemon/routing.test.ts + pnpm typecheck passing" + }, "p0-p1-implementation-plan": { "file": "2026-02-06-p0-p1-implementation-plan.md", "status": "completed", diff --git a/src/backends/native/orchestrator.test.ts b/src/backends/native/orchestrator.test.ts index 6db3be7..303bc86 100644 --- a/src/backends/native/orchestrator.test.ts +++ b/src/backends/native/orchestrator.test.ts @@ -605,6 +605,94 @@ describe('AgentOrchestrator', () => { }); }); + describe('process()', () => { + it('rolls back tool-loop provider errors, hard-trims on context overflow, and retries once', async () => { + let callCount = 0; + const mockClient: ModelClient = { + chat: vi.fn().mockImplementation(async () => { + callCount++; + if (callCount === 1) { + return { + content: '', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }], + } as ChatResponse; + } + if (callCount === 2) { + // Simulate llama.cpp context overflow buried inside an aggregated router error. + throw new Error( + 'llama-server error (400): {"error":{"type":"exceedcontextsizeerror","nprompttokens":9183,"nctx":4096}}', + ); + } + return { + content: 'ok', + stopReason: 'end_turn', + usage: { inputTokens: 10, outputTokens: 5 }, + } as ChatResponse; + }), + }; + + const router = new ModelRouter({ + default: mockClient, + fallbackChain: [], + }); + + // Minimal Session stub that supports rollback via replaceHistory(). + const history: any[] = []; + const session = { + id: 'test', + addMessage: vi.fn((m: any) => { history.push(m); }), + getHistory: vi.fn(() => [...history]), + clear: vi.fn(() => { history.length = 0; }), + replaceHistory: vi.fn((msgs: any[]) => { + history.length = 0; + history.push(...msgs); + }), + getConfig: vi.fn(() => undefined), + setConfig: vi.fn(), + deleteConfig: vi.fn(), + } as any; + + const registry = new ToolRegistry(); + registry.register({ + name: 'test.echo', + description: 'echo', + inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] }, + execute: async (args: any) => ({ success: true, output: String(args.text ?? '') }), + }); + + const hooks = new HookEngine({ confirm: [], log: [], silent: [] }); + const executor = new ToolExecutor(registry, hooks); + + const orchestrator = new AgentOrchestrator({ + modelRouter: router, + systemPrompt: 'You are helpful.', + session, + toolRegistry: registry, + toolExecutor: executor, + primaryTier: 'default', + delegation: { + compaction: 'fast', + memory_extraction: 'default', + classification: 'complex', + tool_summarisation: 'default', + complex_reasoning: 'complex', + }, + maxDelegationDepth: 3, + }); + + const res = await orchestrator.process('hello'); + expect(res).toBe('ok'); + + // Ensure we didn't persist the low-level error string in history. + const textHistory = history + .map(m => (typeof m.content === 'string' ? m.content : '')) + .join('\n'); + expect(textHistory).not.toContain('Error in tool loop'); + }); + }); + describe('setModelTier()', () => { it('sets model tier on primary agent', () => { const orchestrator = new AgentOrchestrator({ diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts index 98bf396..23be6ad 100644 --- a/src/backends/native/orchestrator.ts +++ b/src/backends/native/orchestrator.ts @@ -9,11 +9,12 @@ import type { Attachment } from '../../channels/types.js'; import { NativeAgent } from './agent.js'; import type { ToolUseEvent } from './agent.js'; import type { OutboundAttachmentCollector } from './attachments.js'; -import { shouldCompact } from '../../context/tokens.js'; +import { estimateMessageTokens, shouldCompact } from '../../context/tokens.js'; import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js'; import { estimateCost } from '../../models/costs.js'; import { auditLogger } from '../../audit/index.js'; import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js'; +import { buildUserMessage } from '../../models/media.js'; // ── Public types ────────────────────────────────────────────────────── @@ -227,7 +228,48 @@ export class AgentOrchestrator { async process(userMessage: string, attachments?: Attachment[]): Promise { this._injectMemoryContext(userMessage); await this.compactIfNeeded(); - return this._agent.process(userMessage, attachments); + + // Snapshot history so we can rollback if the underlying tool loop returns an error message. + // This avoids persisting low-level provider errors to the user-visible conversation state. + const before = this.getHistory(); + + const result = await this._agent.process(userMessage, attachments); + + // NativeAgent currently converts tool-loop exceptions into a user-visible error string. + // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully. + if (this._isToolLoopErrorMessage(result)) { + // Roll back the user message + error message inserted by the agent. + this._restoreHistory(before); + + const underlying = this._stripToolLoopErrorPrefix(result); + const ctx = this._extractContextWindowFromError(underlying); + if (ctx) { + // Attempt: compact + hard-trim to fit the discovered context window, then retry once. + await this._compactAndTrimToFit(ctx); + const retry = await this._agent.process(userMessage, attachments); + if (!this._isToolLoopErrorMessage(retry)) { + return retry; + } + // If we still failed, roll back again so we don't persist the error string. + this._restoreHistory(before); + } + + // Persist a short, user-friendly failure message (without provider internals). + const friendly = + [ + 'I ran into a model/provider error while processing that message.', + '', + 'Try again. If it keeps happening:', + '1. Run `/compact` or `/reset` to shrink the conversation context.', + '2. Switch to a different model tier (e.g. `/model local`).', + ].join('\n'); + + // Re-add the user message so the conversation state matches what the user sent. + this._appendUserAndAssistant(userMessage, attachments, friendly); + return friendly; + } + + return result; } /** @@ -445,6 +487,77 @@ export class AgentOrchestrator { await this.compact(); } + private _isToolLoopErrorMessage(text: string): boolean { + return text.startsWith('Error in tool loop (iteration '); + } + + private _stripToolLoopErrorPrefix(text: string): string { + const m = text.match(/^Error in tool loop \(iteration \d+\):\s*(.*)$/s); + return m ? m[1] : text; + } + + private _restoreHistory(messages: Message[]): void { + if (this._session) { + this._session.replaceHistory(messages); + return; + } + // No session available; nothing safe to do here. + } + + private _appendUserAndAssistant(userMessage: string, attachments: Attachment[] | undefined, assistantText: string): void { + if (!this._session) { + return; + } + const userMsg = buildUserMessage(userMessage, attachments); + this._session.addMessage(userMsg); + this._session.addMessage({ role: 'assistant', content: assistantText }); + } + + private _extractContextWindowFromError(errorText: string): number | undefined { + // Try a few common patterns and pick the smallest plausible context window. + // Example llama.cpp error: + // exceeds the available context size (4096 tokens) ... "nctx":4096 + const candidates: number[] = []; + const jsonNctx = errorText.match(/"nctx"\s*:\s*(\d{3,7})/); + if (jsonNctx) { candidates.push(Number(jsonNctx[1])); } + const paren = errorText.match(/context size \((\d{3,7}) tokens\)/); + if (paren) { candidates.push(Number(paren[1])); } + const maxContext = errorText.match(/maximum context length is (\d{3,7}) tokens/i); + if (maxContext) { candidates.push(Number(maxContext[1])); } + + const valid = candidates.filter(n => Number.isFinite(n) && n >= 256); + if (valid.length === 0) { + return undefined; + } + return Math.min(...valid); + } + + private async _compactAndTrimToFit(contextWindow: number): Promise { + // Compaction is best-effort; if it fails (e.g., providers down), fall back to a hard trim. + try { + await this.compact(); + } catch (error) { + console.warn('[Flynn:compact] Emergency compaction failed:', error); + } + + if (!this._session) { + return; + } + + const threshold = Math.floor((this._compactionConfig?.thresholdPct ?? 80) / 100 * contextWindow); + let messages = this.getHistory(); + let estimated = estimateMessageTokens(messages); + + // Drop oldest messages until we're under budget. + // This is intentionally blunt; it only triggers after a real provider context overflow. + while (messages.length > 1 && estimated > threshold) { + messages = messages.slice(1); + estimated = estimateMessageTokens(messages); + } + + this._session.replaceHistory(messages); + } + /** Accumulate usage stats for a given tier. */ private _trackUsage(tier: ModelTier, usage: TokenUsage): void { const existing = this._usageByTier.get(tier); diff --git a/src/daemon/routing.ts b/src/daemon/routing.ts index 4877f86..af5aa78 100644 --- a/src/daemon/routing.ts +++ b/src/daemon/routing.ts @@ -79,25 +79,31 @@ export function createMessageRouter(deps: { const baseSid = agentConfigName ? `${channel}:${senderId}:${agentConfigName}` : `${channel}:${senderId}`; - const sessionId = tierFromMetadata ? `${baseSid}:${tierFromMetadata}` : baseSid; + const session = deps.sessionManager.getSession(channel, senderId); + + // Read per-session model tier override (persisted in SQLite) + const sessionTierOverride = session.getConfig('modelTier') as ModelTier | undefined; + + // Resolution chain: metadata (cron) → session override → agent config → global default + const effectiveTier = tierFromMetadata + ?? sessionTierOverride + ?? agentConfig?.modelTier + ?? deps.config.agents.primary_tier + ?? 'default'; + + // Cache agents by tier too so switching tiers updates context-window heuristics. + const sessionId = `${baseSid}:${effectiveTier}`; let entry = agents.get(sessionId); if (!entry) { - const session = deps.sessionManager.getSession(channel, senderId); - - // Read per-session model tier override (persisted in SQLite) - const sessionTierOverride = session.getConfig('modelTier') as ModelTier | undefined; - - // Resolution chain: metadata (cron) → session override → agent config → global default - const effectiveTier = tierFromMetadata - ?? sessionTierOverride - ?? agentConfig?.modelTier - ?? deps.config.agents.primary_tier - ?? 'default'; - // Use agent config overrides where available, falling back to global config const effectiveSystemPrompt = agentConfig?.systemPrompt ?? deps.systemPrompt; - const effectiveProvider = deps.config.models.default.provider; + + const modelsConfig = deps.config.models as Record; + const tierConfig = modelsConfig[effectiveTier] ?? deps.config.models.default; + const effectiveProvider = tierConfig?.provider ?? deps.config.models.default.provider; + const effectiveModelName = tierConfig?.model ?? deps.config.models.default.model; + const effectiveContextWindow = tierConfig?.context_window ?? deps.config.models.default.context_window; const delegationConfig: DelegationConfig = { compaction: deps.config.agents.delegation.compaction ?? 'fast', @@ -181,8 +187,8 @@ export function createMessageRouter(deps: { summaryMaxTokens: deps.config.compaction.summary_max_tokens, importanceThreshold: deps.config.compaction.importance_threshold, } : undefined, - modelName: deps.config.models.default.model, - contextWindow: deps.config.models.default.context_window, + modelName: effectiveModelName, + contextWindow: effectiveContextWindow, memoryStore: deps.memoryStore, memoryInjectionStrategy: deps.config.memory?.injection_strategy, memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,