feat(agent): implement auto-escalation retries and track remediation state

2026-02-17 09:20:08 -08:00
parent bdded84a9b
commit 2273ffd020
4 changed files with 168 additions and 4 deletions
@@ -771,6 +771,106 @@ describe('AgentOrchestrator', () => {
        .join('\n');
      expect(textHistory).not.toContain('Error in tool loop');
    });
    it('auto-escalates to complex tier when primary tier fails', async () => {
      const defaultClient: ModelClient = {
        chat: vi.fn().mockRejectedValue(new Error('default tier down')),
      };
      const complexClient: ModelClient = {
        chat: vi.fn().mockResolvedValue({
          content: 'complex recovered response',
          stopReason: 'end_turn',
          usage: { inputTokens: 20, outputTokens: 10 },
        }),
      };
      const router = new ModelRouter({
        default: defaultClient,
        complex: complexClient,
        fallbackChain: [],
      });
      const sessionHistory: Message[] = [];
      const session: Session = {
        id: 'auto-escalate',
        addMessage: vi.fn((m: Message) => { sessionHistory.push(m); }),
        getHistory: vi.fn(() => [...sessionHistory]),
        clear: vi.fn(() => { sessionHistory.length = 0; }),
        replaceHistory: vi.fn((msgs: Message[]) => {
          sessionHistory.length = 0;
          sessionHistory.push(...msgs);
        }),
        getConfig: vi.fn(() => undefined),
        setConfig: vi.fn(),
        deleteConfig: vi.fn(),
      };
      const orchestrator = new AgentOrchestrator({
        modelRouter: router,
        systemPrompt: 'You are helpful.',
        session,
        primaryTier: 'default',
        autoEscalate: true,
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 3,
      });
      const res = await orchestrator.process('please recover');
      expect(res).toBe('complex recovered response');
      expect(orchestrator.getModelTier()).toBe('default');
      expect(defaultClient.chat).toHaveBeenCalled();
      expect(complexClient.chat).toHaveBeenCalled();
    });
    it('returns friendly error when auto-escalate is disabled and primary tier fails', async () => {
      const defaultClient: ModelClient = {
        chat: vi.fn().mockRejectedValue(new Error('primary tier failure')),
      };
      const router = new ModelRouter({
        default: defaultClient,
        fallbackChain: [],
      });
      const sessionHistory: Message[] = [];
      const session: Session = {
        id: 'no-auto-escalate',
        addMessage: vi.fn((m: Message) => { sessionHistory.push(m); }),
        getHistory: vi.fn(() => [...sessionHistory]),
        clear: vi.fn(() => { sessionHistory.length = 0; }),
        replaceHistory: vi.fn((msgs: Message[]) => {
          sessionHistory.length = 0;
          sessionHistory.push(...msgs);
        }),
        getConfig: vi.fn(() => undefined),
        setConfig: vi.fn(),
        deleteConfig: vi.fn(),
      };
      const orchestrator = new AgentOrchestrator({
        modelRouter: router,
        systemPrompt: 'You are helpful.',
        session,
        primaryTier: 'default',
        autoEscalate: false,
        delegation: {
          compaction: 'fast',
          memory_extraction: 'default',
          classification: 'complex',
          tool_summarisation: 'default',
          complex_reasoning: 'complex',
        },
        maxDelegationDepth: 3,
      });
      const res = await orchestrator.process('fail');
      expect(res).toContain('model/provider error');
      expect(orchestrator.getModelTier()).toBe('default');
    });
  });
  describe('setModelTier()', () => {
@@ -123,6 +123,10 @@ export interface OrchestratorConfig {
  memoryInjectionStrategy?: 'all' | 'recent' | 'adaptive';
  /** Maximum tokens allowed for injected memory context. */
  memoryMaxInjectionTokens?: number;
  /** Automatically retry failed primary runs on a higher tier. */
  autoEscalate?: boolean;
  /** Tier to try for auto-escalation retries. Defaults to complex. */
  autoEscalateTier?: ModelTier;
  /** Policy context for tool filtering (agent tier, provider). */
  toolPolicyContext?: ToolPolicyContext;
  /** Collector for outbound attachments queued by tools (e.g. media.send). */
@@ -153,6 +157,8 @@ export class AgentOrchestrator {
  private _memoryAutoExtract: boolean;
  private _memoryInjectionStrategy: 'all' | 'recent' | 'adaptive';
  private _memoryMaxInjectionTokens: number;
  private _autoEscalate: boolean;
  private _autoEscalateTier: ModelTier;
  private _systemPromptBase: string;
  private _usageByTier: Map<string, TierUsageStats> = new Map();
  private _lastContextAlertLevel: ContextAlertLevel | null = null;
@@ -172,6 +178,8 @@ export class AgentOrchestrator {
    this._memoryAutoExtract = config.memoryAutoExtract ?? true;
    this._memoryInjectionStrategy = config.memoryInjectionStrategy ?? 'all';
    this._memoryMaxInjectionTokens = config.memoryMaxInjectionTokens ?? 2000;
    this._autoEscalate = config.autoEscalate ?? false;
    this._autoEscalateTier = config.autoEscalateTier ?? 'complex';
    this._systemPromptBase = config.systemPrompt;
    // Create the primary NativeAgent for user-facing conversation
@@ -264,8 +272,28 @@ export class AgentOrchestrator {
    // Snapshot history so we can rollback if the underlying tool loop returns an error message.
    // This avoids persisting low-level provider errors to the user-visible conversation state.
    const before = this.getHistory();
    const originalTier = this._agent.getModelTier();
-    const result = await this._agent.process(userMessage, attachments);
+    let result: string;
    try {
      result = await this._agent.process(userMessage, attachments);
    } catch {
      this._restoreHistory(before);
      const escalated = await this._retryWithEscalation(userMessage, attachments, before, originalTier);
      if (escalated) {
        return escalated;
      }
      const friendly =
        [
          'I ran into a model/provider error while processing that message.',
          '',
          'Try again. If it keeps happening:',
          '1. Run `/compact` or `/reset` to shrink the conversation context.',
          '2. Switch to a different model tier (e.g. `/model local`).',
        ].join('\n');
      this._appendUserAndAssistant(userMessage, attachments, friendly);
      return friendly;
    }
    // NativeAgent currently converts tool-loop exceptions into a user-visible error string.
    // Intercept a few common cases here to self-heal (context overflow) and/or degrade gracefully.
@@ -286,6 +314,11 @@ export class AgentOrchestrator {
        this._restoreHistory(before);
      }
      const escalated = await this._retryWithEscalation(userMessage, attachments, before, originalTier);
      if (escalated) {
        return escalated;
      }
      // Persist a short, user-friendly failure message (without provider internals).
      const friendly =
        [
@@ -304,6 +337,38 @@ export class AgentOrchestrator {
    return result;
  }
  private async _retryWithEscalation(
    userMessage: string,
    attachments: Attachment[] | undefined,
    historyBefore: Message[],
    originalTier: ModelTier,
  ): Promise<string | null> {
    if (!this._autoEscalate) {
      return null;
    }
    let targetTier = this._autoEscalateTier;
    if (targetTier === originalTier) {
      targetTier = 'complex';
    }
    if (targetTier === originalTier || !this._modelRouter.getClient(targetTier)) {
      return null;
    }
    this._agent.setModelTier(targetTier);
    try {
      const retry = await this._agent.process(userMessage, attachments);
      if (!this._isToolLoopErrorMessage(retry)) {
        return retry;
      }
      this._restoreHistory(historyBefore);
      return null;
    } finally {
      this._agent.setModelTier(originalTier);
    }
  }
  /**
   * Force-compact the current conversation history regardless of threshold.
   * Returns the compaction result, or null if there was nothing to compact
@@ -117,9 +117,6 @@ function validateUnsupportedConfig(config: Config): void {
  if (config.backends.opencode.enabled) {
    throw new Error('backends.opencode is not implemented yet. Set backends.opencode.enabled=false.');
  }
  if (config.agents.auto_escalate) {
    throw new Error('agents.auto_escalate is not implemented yet. Set agents.auto_escalate=false.');
  }
 }
 export async function startDaemon(config: Config, options?: StartDaemonOptions): Promise<DaemonContext> {
@@ -309,6 +309,8 @@ export function createMessageRouter(deps: {
        memoryAutoExtract: deps.config.memory?.auto_extract,
        memoryInjectionStrategy: deps.config.memory?.injection_strategy,
        memoryMaxInjectionTokens: deps.config.memory?.max_injection_tokens,
        autoEscalate: deps.config.agents.auto_escalate,
        autoEscalateTier: 'complex',
        toolPolicyContext,
        attachmentCollector: collector,
      });