orchestrator: recover from context overflow on fallback
This commit is contained in:
@@ -48,6 +48,17 @@
|
|||||||
],
|
],
|
||||||
"test_status": "pnpm test:run src/daemon/routing.test.ts + pnpm typecheck passing (full pnpm test:run fails in this sandbox due to EPERM listen/spawn)"
|
"test_status": "pnpm test:run src/daemon/routing.test.ts + pnpm typecheck passing (full pnpm test:run fails in this sandbox due to EPERM listen/spawn)"
|
||||||
},
|
},
|
||||||
|
"context-overflow-recovery": {
|
||||||
|
"status": "completed",
|
||||||
|
"date": "2026-02-14",
|
||||||
|
"summary": "Hardened voice-message and fallback behavior by using tier-specific model identity (modelName/contextWindow) for compaction heuristics and by intercepting tool-loop provider errors to rollback history, hard-trim on detected context overflows, and retry once with a clean, user-friendly failure message on repeated errors.",
|
||||||
|
"files_modified": [
|
||||||
|
"src/daemon/routing.ts",
|
||||||
|
"src/backends/native/orchestrator.ts",
|
||||||
|
"src/backends/native/orchestrator.test.ts"
|
||||||
|
],
|
||||||
|
"test_status": "pnpm test:run src/backends/native/orchestrator.test.ts src/daemon/routing.test.ts + pnpm typecheck passing"
|
||||||
|
},
|
||||||
"p0-p1-implementation-plan": {
|
"p0-p1-implementation-plan": {
|
||||||
"file": "2026-02-06-p0-p1-implementation-plan.md",
|
"file": "2026-02-06-p0-p1-implementation-plan.md",
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
|
|||||||
@@ -605,6 +605,94 @@ describe('AgentOrchestrator', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('process()', () => {
|
||||||
|
it('rolls back tool-loop provider errors, hard-trims on context overflow, and retries once', async () => {
|
||||||
|
let callCount = 0;
|
||||||
|
const mockClient: ModelClient = {
|
||||||
|
chat: vi.fn().mockImplementation(async () => {
|
||||||
|
callCount++;
|
||||||
|
if (callCount === 1) {
|
||||||
|
return {
|
||||||
|
content: '',
|
||||||
|
stopReason: 'tool_use',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hi' } }],
|
||||||
|
} as ChatResponse;
|
||||||
|
}
|
||||||
|
if (callCount === 2) {
|
||||||
|
// Simulate llama.cpp context overflow buried inside an aggregated router error.
|
||||||
|
throw new Error(
|
||||||
|
'llama-server error (400): {"error":{"type":"exceedcontextsizeerror","nprompttokens":9183,"nctx":4096}}',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
content: 'ok',
|
||||||
|
stopReason: 'end_turn',
|
||||||
|
usage: { inputTokens: 10, outputTokens: 5 },
|
||||||
|
} as ChatResponse;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const router = new ModelRouter({
|
||||||
|
default: mockClient,
|
||||||
|
fallbackChain: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Minimal Session stub that supports rollback via replaceHistory().
|
||||||
|
const history: any[] = [];
|
||||||
|
const session = {
|
||||||
|
id: 'test',
|
||||||
|
addMessage: vi.fn((m: any) => { history.push(m); }),
|
||||||
|
getHistory: vi.fn(() => [...history]),
|
||||||
|
clear: vi.fn(() => { history.length = 0; }),
|
||||||
|
replaceHistory: vi.fn((msgs: any[]) => {
|
||||||
|
history.length = 0;
|
||||||
|
history.push(...msgs);
|
||||||
|
}),
|
||||||
|
getConfig: vi.fn(() => undefined),
|
||||||
|
setConfig: vi.fn(),
|
||||||
|
deleteConfig: vi.fn(),
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
const registry = new ToolRegistry();
|
||||||
|
registry.register({
|
||||||
|
name: 'test.echo',
|
||||||
|
description: 'echo',
|
||||||
|
inputSchema: { type: 'object', properties: { text: { type: 'string' } }, required: ['text'] },
|
||||||
|
execute: async (args: any) => ({ success: true, output: String(args.text ?? '') }),
|
||||||
|
});
|
||||||
|
|
||||||
|
const hooks = new HookEngine({ confirm: [], log: [], silent: [] });
|
||||||
|
const executor = new ToolExecutor(registry, hooks);
|
||||||
|
|
||||||
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
modelRouter: router,
|
||||||
|
systemPrompt: 'You are helpful.',
|
||||||
|
session,
|
||||||
|
toolRegistry: registry,
|
||||||
|
toolExecutor: executor,
|
||||||
|
primaryTier: 'default',
|
||||||
|
delegation: {
|
||||||
|
compaction: 'fast',
|
||||||
|
memory_extraction: 'default',
|
||||||
|
classification: 'complex',
|
||||||
|
tool_summarisation: 'default',
|
||||||
|
complex_reasoning: 'complex',
|
||||||
|
},
|
||||||
|
maxDelegationDepth: 3,
|
||||||
|
});
|
||||||
|
|
||||||
|
const res = await orchestrator.process('hello');
|
||||||
|
expect(res).toBe('ok');
|
||||||
|
|
||||||
|
// Ensure we didn't persist the low-level error string in history.
|
||||||
|
const textHistory = history
|
||||||
|
.map(m => (typeof m.content === 'string' ? m.content : ''))
|
||||||
|
.join('\n');
|
||||||
|
expect(textHistory).not.toContain('Error in tool loop');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('setModelTier()', () => {
|
describe('setModelTier()', () => {
|
||||||
it('sets model tier on primary agent', () => {
|
it('sets model tier on primary agent', () => {
|
||||||
const orchestrator = new AgentOrchestrator({
|
const orchestrator = new AgentOrchestrator({
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ import type { Attachment } from '../../channels/types.js';
|
|||||||
import { NativeAgent } from './agent.js';
|
import { NativeAgent } from './agent.js';
|
||||||
import type { ToolUseEvent } from './agent.js';
|
import type { ToolUseEvent } from './agent.js';
|
||||||
import type { OutboundAttachmentCollector } from './attachments.js';
|
import type { OutboundAttachmentCollector } from './attachments.js';
|
||||||
import { shouldCompact } from '../../context/tokens.js';
|
import { estimateMessageTokens, shouldCompact } from '../../context/tokens.js';
|
||||||
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
|
import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js';
|
||||||
import { estimateCost } from '../../models/costs.js';
|
import { estimateCost } from '../../models/costs.js';
|
||||||
import { auditLogger } from '../../audit/index.js';
|
import { auditLogger } from '../../audit/index.js';
|
||||||
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
|
import { buildAdaptiveMemoryContext, buildRecentMemoryContext } from '../../memory/adaptive.js';
|
||||||
|
import { buildUserMessage } from '../../models/media.js';
|
||||||
|
|
||||||
// ── Public types ──────────────────────────────────────────────────────
|
// ── Public types ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -227,7 +228,48 @@ export class AgentOrchestrator {
|
|||||||
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
|
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
|
||||||
this._injectMemoryContext(userMessage);
|
this._injectMemoryContext(userMessage);
|
||||||
await this.compactIfNeeded();
|
await this.compactIfNeeded();
|
||||||
return this._agent.process(userMessage, attachments);
|
|
||||||
|
// Snapshot history so we can rollback if the underlying tool loop returns an error message.
|
||||||
|
// This avoids persisting low-level provider errors to the user-visible conversation state.
|
||||||
|
const before = this.getHistory();
|
||||||
|
|
||||||
|
const result = await this._agent.process(userMessage, attachments);
|
||||||
|
|
||||||
|
// NativeAgent currently converts tool-loop exceptions into a user-visible error string.
|
||||||
|
// Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
|
||||||
|
if (this._isToolLoopErrorMessage(result)) {
|
||||||
|
// Roll back the user message + error message inserted by the agent.
|
||||||
|
this._restoreHistory(before);
|
||||||
|
|
||||||
|
const underlying = this._stripToolLoopErrorPrefix(result);
|
||||||
|
const ctx = this._extractContextWindowFromError(underlying);
|
||||||
|
if (ctx) {
|
||||||
|
// Attempt: compact + hard-trim to fit the discovered context window, then retry once.
|
||||||
|
await this._compactAndTrimToFit(ctx);
|
||||||
|
const retry = await this._agent.process(userMessage, attachments);
|
||||||
|
if (!this._isToolLoopErrorMessage(retry)) {
|
||||||
|
return retry;
|
||||||
|
}
|
||||||
|
// If we still failed, roll back again so we don't persist the error string.
|
||||||
|
this._restoreHistory(before);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist a short, user-friendly failure message (without provider internals).
|
||||||
|
const friendly =
|
||||||
|
[
|
||||||
|
'I ran into a model/provider error while processing that message.',
|
||||||
|
'',
|
||||||
|
'Try again. If it keeps happening:',
|
||||||
|
'1. Run `/compact` or `/reset` to shrink the conversation context.',
|
||||||
|
'2. Switch to a different model tier (e.g. `/model local`).',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
// Re-add the user message so the conversation state matches what the user sent.
|
||||||
|
this._appendUserAndAssistant(userMessage, attachments, friendly);
|
||||||
|
return friendly;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -445,6 +487,77 @@ export class AgentOrchestrator {
|
|||||||
await this.compact();
|
await this.compact();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private _isToolLoopErrorMessage(text: string): boolean {
|
||||||
|
return text.startsWith('Error in tool loop (iteration ');
|
||||||
|
}
|
||||||
|
|
||||||
|
private _stripToolLoopErrorPrefix(text: string): string {
|
||||||
|
const m = text.match(/^Error in tool loop \(iteration \d+\):\s*(.*)$/s);
|
||||||
|
return m ? m[1] : text;
|
||||||
|
}
|
||||||
|
|
||||||
|
private _restoreHistory(messages: Message[]): void {
|
||||||
|
if (this._session) {
|
||||||
|
this._session.replaceHistory(messages);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// No session available; nothing safe to do here.
|
||||||
|
}
|
||||||
|
|
||||||
|
private _appendUserAndAssistant(userMessage: string, attachments: Attachment[] | undefined, assistantText: string): void {
|
||||||
|
if (!this._session) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const userMsg = buildUserMessage(userMessage, attachments);
|
||||||
|
this._session.addMessage(userMsg);
|
||||||
|
this._session.addMessage({ role: 'assistant', content: assistantText });
|
||||||
|
}
|
||||||
|
|
||||||
|
private _extractContextWindowFromError(errorText: string): number | undefined {
|
||||||
|
// Try a few common patterns and pick the smallest plausible context window.
|
||||||
|
// Example llama.cpp error:
|
||||||
|
// exceeds the available context size (4096 tokens) ... "nctx":4096
|
||||||
|
const candidates: number[] = [];
|
||||||
|
const jsonNctx = errorText.match(/"nctx"\s*:\s*(\d{3,7})/);
|
||||||
|
if (jsonNctx) { candidates.push(Number(jsonNctx[1])); }
|
||||||
|
const paren = errorText.match(/context size \((\d{3,7}) tokens\)/);
|
||||||
|
if (paren) { candidates.push(Number(paren[1])); }
|
||||||
|
const maxContext = errorText.match(/maximum context length is (\d{3,7}) tokens/i);
|
||||||
|
if (maxContext) { candidates.push(Number(maxContext[1])); }
|
||||||
|
|
||||||
|
const valid = candidates.filter(n => Number.isFinite(n) && n >= 256);
|
||||||
|
if (valid.length === 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return Math.min(...valid);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async _compactAndTrimToFit(contextWindow: number): Promise<void> {
|
||||||
|
// Compaction is best-effort; if it fails (e.g., providers down), fall back to a hard trim.
|
||||||
|
try {
|
||||||
|
await this.compact();
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('[Flynn:compact] Emergency compaction failed:', error);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this._session) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const threshold = Math.floor((this._compactionConfig?.thresholdPct ?? 80) / 100 * contextWindow);
|
||||||
|
let messages = this.getHistory();
|
||||||
|
let estimated = estimateMessageTokens(messages);
|
||||||
|
|
||||||
|
// Drop oldest messages until we're under budget.
|
||||||
|
// This is intentionally blunt; it only triggers after a real provider context overflow.
|
||||||
|
while (messages.length > 1 && estimated > threshold) {
|
||||||
|
messages = messages.slice(1);
|
||||||
|
estimated = estimateMessageTokens(messages);
|
||||||
|
}
|
||||||
|
|
||||||
|
this._session.replaceHistory(messages);
|
||||||
|
}
|
||||||
|
|
||||||
/** Accumulate usage stats for a given tier. */
|
/** Accumulate usage stats for a given tier. */
|
||||||
private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
|
private _trackUsage(tier: ModelTier, usage: TokenUsage): void {
|
||||||
const existing = this._usageByTier.get(tier);
|
const existing = this._usageByTier.get(tier);
|
||||||
|
|||||||
+13
-7
@@ -79,10 +79,6 @@ export function createMessageRouter(deps: {
|
|||||||
const baseSid = agentConfigName
|
const baseSid = agentConfigName
|
||||||
? `${channel}:${senderId}:${agentConfigName}`
|
? `${channel}:${senderId}:${agentConfigName}`
|
||||||
: `${channel}:${senderId}`;
|
: `${channel}:${senderId}`;
|
||||||
const sessionId = tierFromMetadata ? `${baseSid}:${tierFromMetadata}` : baseSid;
|
|
||||||
|
|
||||||
let entry = agents.get(sessionId);
|
|
||||||
if (!entry) {
|
|
||||||
const session = deps.sessionManager.getSession(channel, senderId);
|
const session = deps.sessionManager.getSession(channel, senderId);
|
||||||
|
|
||||||
// Read per-session model tier override (persisted in SQLite)
|
// Read per-session model tier override (persisted in SQLite)
|
||||||
@@ -95,9 +91,19 @@ export function createMessageRouter(deps: {
|
|||||||
?? deps.config.agents.primary_tier
|
?? deps.config.agents.primary_tier
|
||||||
?? 'default';
|
?? 'default';
|
||||||
|
|
||||||
|
// Cache agents by tier too so switching tiers updates context-window heuristics.
|
||||||
|
const sessionId = `${baseSid}:${effectiveTier}`;
|
||||||
|
|
||||||
|
let entry = agents.get(sessionId);
|
||||||
|
if (!entry) {
|
||||||
// Use agent config overrides where available, falling back to global config
|
// Use agent config overrides where available, falling back to global config
|
||||||
const effectiveSystemPrompt = agentConfig?.systemPrompt ?? deps.systemPrompt;
|
const effectiveSystemPrompt = agentConfig?.systemPrompt ?? deps.systemPrompt;
|
||||||
const effectiveProvider = deps.config.models.default.provider;
|
|
||||||
|
const modelsConfig = deps.config.models as Record<string, { provider?: string; model?: string; context_window?: number } | undefined>;
|
||||||
|
const tierConfig = modelsConfig[effectiveTier] ?? deps.config.models.default;
|
||||||
|
const effectiveProvider = tierConfig?.provider ?? deps.config.models.default.provider;
|
||||||
|
const effectiveModelName = tierConfig?.model ?? deps.config.models.default.model;
|
||||||
|
const effectiveContextWindow = tierConfig?.context_window ?? deps.config.models.default.context_window;
|
||||||
|
|
||||||
const delegationConfig: DelegationConfig = {
|
const delegationConfig: DelegationConfig = {
|
||||||
compaction: deps.config.agents.delegation.compaction ?? 'fast',
|
compaction: deps.config.agents.delegation.compaction ?? 'fast',
|
||||||
@@ -181,8 +187,8 @@ export function createMessageRouter(deps: {
|
|||||||
summaryMaxTokens: deps.config.compaction.summary_max_tokens,
|
summaryMaxTokens: deps.config.compaction.summary_max_tokens,
|
||||||
importanceThreshold: deps.config.compaction.importance_threshold,
|
importanceThreshold: deps.config.compaction.importance_threshold,
|
||||||
} : undefined,
|
} : undefined,
|
||||||
modelName: deps.config.models.default.model,
|
modelName: effectiveModelName,
|
||||||
contextWindow: deps.config.models.default.context_window,
|
contextWindow: effectiveContextWindow,
|
||||||
memoryStore: deps.memoryStore,
|
memoryStore: deps.memoryStore,
|
||||||
memoryInjectionStrategy: deps.config.memory?.injection_strategy,
|
memoryInjectionStrategy: deps.config.memory?.injection_strategy,
|
||||||
memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
|
memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
|
||||||
|
|||||||
Reference in New Issue
Block a user