feat(agent): implement auto-escalation retries and track remediation state

2026-02-17 09:20:08 -08:00
parent bdded84a9b
commit 2273ffd020
4 changed files with 168 additions and 4 deletions
@@ -771,6 +771,106 @@ describe('AgentOrchestrator', () => {
        .join('\n');
      expect(textHistory).not.toContain('Error in tool loop');
    });
+
+    it('auto-escalates to complex tier when primary tier fails', async () => {
+      const defaultClient: ModelClient = {
+        chat: vi.fn().mockRejectedValue(new Error('default tier down')),
+      };
+      const complexClient: ModelClient = {
+        chat: vi.fn().mockResolvedValue({
+          content: 'complex recovered response',
+          stopReason: 'end_turn',
+          usage: { inputTokens: 20, outputTokens: 10 },
+        }),
+      };
+      const router = new ModelRouter({
+        default: defaultClient,
+        complex: complexClient,
+        fallbackChain: [],
+      });
+
+      const sessionHistory: Message[] = [];
+      const session: Session = {
+        id: 'auto-escalate',
+        addMessage: vi.fn((m: Message) => { sessionHistory.push(m); }),
+        getHistory: vi.fn(() => [...sessionHistory]),
+        clear: vi.fn(() => { sessionHistory.length = 0; }),
+        replaceHistory: vi.fn((msgs: Message[]) => {
+          sessionHistory.length = 0;
+          sessionHistory.push(...msgs);
+        }),
+        getConfig: vi.fn(() => undefined),
+        setConfig: vi.fn(),
+        deleteConfig: vi.fn(),
+      };
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: router,
+        systemPrompt: 'You are helpful.',
+        session,
+        primaryTier: 'default',
+        autoEscalate: true,
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 3,
+      });
+
+      const res = await orchestrator.process('please recover');
+      expect(res).toBe('complex recovered response');
+      expect(orchestrator.getModelTier()).toBe('default');
+      expect(defaultClient.chat).toHaveBeenCalled();
+      expect(complexClient.chat).toHaveBeenCalled();
+    });
+
+    it('returns friendly error when auto-escalate is disabled and primary tier fails', async () => {
+      const defaultClient: ModelClient = {
+        chat: vi.fn().mockRejectedValue(new Error('primary tier failure')),
+      };
+      const router = new ModelRouter({
+        default: defaultClient,
+        fallbackChain: [],
+      });
+
+      const sessionHistory: Message[] = [];
+      const session: Session = {
+        id: 'no-auto-escalate',
+        addMessage: vi.fn((m: Message) => { sessionHistory.push(m); }),
+        getHistory: vi.fn(() => [...sessionHistory]),
+        clear: vi.fn(() => { sessionHistory.length = 0; }),
+        replaceHistory: vi.fn((msgs: Message[]) => {
+          sessionHistory.length = 0;
+          sessionHistory.push(...msgs);
+        }),
+        getConfig: vi.fn(() => undefined),
+        setConfig: vi.fn(),
+        deleteConfig: vi.fn(),
+      };
+
+      const orchestrator = new AgentOrchestrator({
+        modelRouter: router,
+        systemPrompt: 'You are helpful.',
+        session,
+        primaryTier: 'default',
+        autoEscalate: false,
+        delegation: {
+          compaction: 'fast',
+          memory_extraction: 'default',
+          classification: 'complex',
+          tool_summarisation: 'default',
+          complex_reasoning: 'complex',
+        },
+        maxDelegationDepth: 3,
+      });
+
+      const res = await orchestrator.process('fail');
+      expect(res).toContain('model/provider error');
+      expect(orchestrator.getModelTier()).toBe('default');
+    });
  });

  describe('setModelTier()', () => {
@@ -123,6 +123,10 @@ export interface OrchestratorConfig {
  memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
  /** Maximum tokens allowed for injected memory context. */
  memoryMaxInjectionTokens?: number;
+  /** Automatically retry failed primary runs on a higher tier. */
+  autoEscalate?: boolean;
+  /** Tier to try for auto-escalation retries. Defaults to complex. */
+  autoEscalateTier?: ModelTier;
  /** Policy context for tool filtering (agent tier, provider). */
  toolPolicyContext?: ToolPolicyContext;
  /** Collector for outbound attachments queued by tools (e.g. media.send). */
@@ -153,6 +157,8 @@ export class AgentOrchestrator {
  private _memoryAutoExtract: boolean;
  private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
  private _memoryMaxInjectionTokens: number;
+  private _autoEscalate: boolean;
+  private _autoEscalateTier: ModelTier;
  private _systemPromptBase: string;
  private _usageByTier: Map<string, TierUsageStats> = new Map();
  private _lastContextAlertLevel: ContextAlertLevel | null = null;
@@ -172,6 +178,8 @@ export class AgentOrchestrator {
    this._memoryAutoExtract = config.memoryAutoExtract ?? true;
    this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
    this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
+    this._autoEscalate = config.autoEscalate ?? false;
+    this._autoEscalateTier = config.autoEscalateTier ?? 'complex';
    this._systemPromptBase = config.systemPrompt;

    // Create the primary NativeAgent for user-facing conversation
@@ -264,8 +272,28 @@ export class AgentOrchestrator {
    // Snapshot history so we can rollback if the underlying tool loop returns an error message.
    // This avoids persisting low-level provider errors to the user-visible conversation state.
    const before = this.getHistory();
+    const originalTier = this._agent.getModelTier();

-    const result = await this._agent.process(userMessage, attachments);
+    let result: string;
+    try {
+      result = await this._agent.process(userMessage, attachments);
+    } catch {
+      this._restoreHistory(before);
+      const escalated = await this._retryWithEscalation(userMessage, attachments, before, originalTier);
+      if (escalated) {
+        return escalated;
+      }
+      const friendly =
+        [
+          'I ran into a model/provider error while processing that message.',
+          '',
+          'Try again. If it keeps happening:',
+          '1. Run `/compact` or `/reset` to shrink the conversation context.',
+          '2. Switch to a different model tier (e.g. `/model local`).',
+        ].join('\n');
+      this._appendUserAndAssistant(userMessage, attachments, friendly);
+      return friendly;
+    }

    // NativeAgent currently converts tool-loop exceptions into a user-visible error string.
    // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
@@ -286,6 +314,11 @@ export class AgentOrchestrator {
        this._restoreHistory(before);
      }

+      const escalated = await this._retryWithEscalation(userMessage, attachments, before, originalTier);
+      if (escalated) {
+        return escalated;
+      }
+
      // Persist a short, user-friendly failure message (without provider internals).
      const friendly =
        [
@@ -304,6 +337,38 @@ export class AgentOrchestrator {
    return result;
  }

+  private async _retryWithEscalation(
+    userMessage: string,
+    attachments: Attachment[] | undefined,
+    historyBefore: Message[],
+    originalTier: ModelTier,
+  ): Promise<string | null> {
+    if (!this._autoEscalate) {
+      return null;
+    }
+
+    let targetTier = this._autoEscalateTier;
+    if (targetTier === originalTier) {
+      targetTier = 'complex';
+    }
+
+    if (targetTier === originalTier || !this._modelRouter.getClient(targetTier)) {
+      return null;
+    }
+
+    this._agent.setModelTier(targetTier);
+    try {
+      const retry = await this._agent.process(userMessage, attachments);
+      if (!this._isToolLoopErrorMessage(retry)) {
+        return retry;
+      }
+      this._restoreHistory(historyBefore);
+      return null;
+    } finally {
+      this._agent.setModelTier(originalTier);
+    }
+  }
+
  /**
   * Force-compact the current conversation history regardless of threshold.
   * Returns the compaction result, or null if there was nothing to compact