import type { ModelClient, Message, ChatRequest, ChatResponse, ModelToolCall, TokenUsage } from '../../models/types.js'; import type { ModelRouter, ModelTier } from '../../models/router.js'; import type { Session } from '../../session/index.js'; import type { ToolRegistry } from '../../tools/registry.js'; import type { ToolExecutor } from '../../tools/executor.js'; import type { ToolResult } from '../../tools/types.js'; import type { ToolPolicyContext } from '../../tools/policy.js'; import type { Attachment } from '../../channels/types.js'; import type { OutboundAttachmentCollector } from './attachments.js'; import { buildUserMessage } from '../../models/media.js'; import { getElevationWindow } from '../../security/elevation.js'; import { auditLogger } from '../../audit/index.js'; import { AsyncLocalStorage } from 'node:async_hooks'; export interface ToolUseEvent { type: 'start' | 'end'; tool: string; args?: unknown; result?: ToolResult; } export interface ToolInventorySnapshot { sessionId: string; agent: string; provider: string; skill: string; internalCount: number; exposedCount: number; internalBrowser: string[]; exposedBrowser: string[]; } export interface NativeAgentConfig { modelClient: ModelClient | ModelRouter; systemPrompt: string; session?: Session; toolRegistry?: ToolRegistry; toolExecutor?: ToolExecutor; maxIterations?: number; onToolUse?: (event: ToolUseEvent) => void; /** Policy context for tool filtering (agent tier, provider). */ toolPolicyContext?: ToolPolicyContext; /** Collector for outbound attachments queued by tools (e.g. media.send). */ attachmentCollector?: OutboundAttachmentCollector; /** Hard timeout for each model request in milliseconds. */ modelTimeoutMs?: number; } // Internal message type for the tool loop — supports both text and structured content blocks. // This is broader than Message to accommodate Anthropic's tool_use/tool_result block format. interface LoopMessage { role: 'user' | 'assistant'; content: string | unknown[]; } interface PseudoToolUse { name?: string; id?: string; } interface ExtractedTextToolCall { toolCall: ModelToolCall; start: number; end: number; } const LAST_AUDIO_ATTACHMENT_CONFIG_KEY = 'lastAudioAttachment'; interface AudioToolInput { data?: string; url?: string; mime_type?: string; } interface AudioToolArgSummary { hasData: boolean; hasUrl: boolean; mimeType?: string; } export interface NativeAgentTurnAudioInput { data?: string; url?: string; mime_type?: string; } interface NativeAgentRunContext { turnAudioInput?: AudioToolInput; } export class NativeAgent { private static readonly EMPTY_RESPONSE_FALLBACK = 'I could not generate a response for that. Please try again.'; private static readonly DEFAULT_MODEL_TIMEOUT_MS = 120_000; private modelClient: ModelClient | ModelRouter; private systemPrompt: string; private session?: Session; private inMemoryHistory: Message[] = []; private currentTier: ModelTier = 'default'; private toolRegistry?: ToolRegistry; private toolExecutor?: ToolExecutor; private maxIterations: number; private onToolUse?: (event: ToolUseEvent) => void; private _totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 }; private _callCount: number = 0; private _toolPolicyContext?: ToolPolicyContext; private _attachmentCollector?: OutboundAttachmentCollector; private _thinking: boolean = false; private _lastToolFingerprint?: string; private _cancelRequested = false; private _runInProgress = false; private _runAbortController?: AbortController; private modelTimeoutMs: number; private readonly _runContext = new AsyncLocalStorage(); constructor(config: NativeAgentConfig) { this.modelClient = config.modelClient; this.systemPrompt = config.systemPrompt; this.session = config.session; this.toolRegistry = config.toolRegistry; this.toolExecutor = config.toolExecutor; this.maxIterations = config.maxIterations ?? 200; this.onToolUse = config.onToolUse; this._toolPolicyContext = config.toolPolicyContext; this._attachmentCollector = config.attachmentCollector; this.modelTimeoutMs = Number.isFinite(config.modelTimeoutMs) ? Math.max(1, Math.floor(config.modelTimeoutMs as number)) : NativeAgent.DEFAULT_MODEL_TIMEOUT_MS; } private get history(): Message[] { return this.session?.getHistory() ?? [...this.inMemoryHistory]; } async process( userMessage: string, attachments?: Attachment[], turnAudioInput?: NativeAgentTurnAudioInput, ): Promise { this._cancelRequested = false; this._runAbortController = new AbortController(); const normalizedTurnAudioInput = this.normalizeTurnAudioInput(turnAudioInput) ?? this.extractLatestAudioInputFromAttachments(attachments); if ('clearAbort' in this.modelClient && typeof this.modelClient.clearAbort === 'function') { this.modelClient.clearAbort(); } this._runInProgress = true; return await this._runContext.run({ turnAudioInput: normalizedTurnAudioInput }, async () => { // Detect and strip !!think prefix for per-message thinking mode try { if (userMessage.startsWith('!!think ') || userMessage === '!!think') { this._thinking = true; userMessage = userMessage.replace(/^!!think\s*/, '').trim() || 'Think about this.'; } else { this._thinking = false; } const userMsg = buildUserMessage(userMessage, attachments); if (this.session) { this.session.addMessage(userMsg); } else { this.inMemoryHistory.push(userMsg); } // If no tools configured, use the simple single-turn path if (!this.toolRegistry || !this.toolExecutor) { return await this.singleTurn(); } return await this.toolLoop(); } catch (error) { if (this.isAbortError(error)) { const cancelledMsg = 'Operation cancelled by user.'; this.addToHistory({ role: 'assistant', content: cancelledMsg }); return cancelledMsg; } throw error; } finally { this._runInProgress = false; this._cancelRequested = false; this._runAbortController = undefined; } }); } private async singleTurn(): Promise { this.throwIfCancelled(); const request: ChatRequest = { messages: this.history, system: this.systemPrompt, ...(this._thinking ? { thinking: true } : {}), }; const response = await this.chatWithRouter(request); this.throwIfCancelled(); this._totalUsage.inputTokens += response.usage.inputTokens; this._totalUsage.outputTokens += response.usage.outputTokens; this._callCount++; const normalizedContent = this.normalizeAssistantContent(response.content); // Prepend thinking content if present let finalContent = normalizedContent; if (response.thinkingContent) { finalContent = `\n${response.thinkingContent}\n\n\n${normalizedContent}`; } const assistantMsg: Message = { role: 'assistant', content: normalizedContent }; this.addToHistory(assistantMsg); return finalContent; } private async toolLoop(): Promise { const toolRegistry = this.toolRegistry; const toolExecutor = this.toolExecutor; if (!toolRegistry || !toolExecutor) { throw new Error('Tool loop requires tool registry and executor'); } const tools = toolRegistry.filteredToAnthropicFormat(this._toolPolicyContext); // Track whether untrusted content (web/fetched/tool output) has been introduced // during this run. Used to harden against prompt injection. let untrustedContentSeen = false; // Detect tool inventory changes to combat conversational inertia in long sessions. // When tools change (e.g. new tools added between restarts), the model's prior messages // saying "I can't do that" can override tool definitions. Injecting a system note fixes this. const currentFingerprint = tools.map(t => t.name).sort().join(','); const hasHistory = this.history.length > 1; // more than just the current user message let effectiveSystem = this.systemPrompt; if (hasHistory && this._lastToolFingerprint !== currentFingerprint) { const toolNames = tools.map(t => t.name).join(', '); effectiveSystem += `\n\n[Tool inventory updated — available tools: ${toolNames}. Use these tools directly; do not attempt workarounds for functionality that a tool already provides.]`; } this._lastToolFingerprint = currentFingerprint; // Build the loop messages from existing history. // These are the messages sent to the model, including any structured tool blocks. const loopMessages: LoopMessage[] = this.history.map(m => ({ role: m.role, content: m.content, })); // Track consecutive identical tool call fingerprints to detect loops. // Local LLMs are especially prone to repeatedly requesting the same tool call. let lastFingerprint: string | undefined; let consecutiveRepeats = 0; const maxConsecutiveRepeats = 3; let lastToolResults: string[] = []; // Track consecutive calls to the same tool (even with different args). // Local models often call the same tool with slight query variations. let lastToolName: string | undefined; let sameToolStreak = 0; const maxSameToolStreak = 4; // nudge after 4 calls to the same tool let nudged = false; let actionIntentNudged = false; for (let iteration = 0; iteration < this.maxIterations; iteration++) { try { this.throwIfCancelled(); // Build request — cast loopMessages to Message[] because the underlying // model client will pass them through to the API which accepts structured content. const request = { messages: loopMessages as unknown as Message[], system: effectiveSystem, tools, ...(this._thinking ? { thinking: true } : {}), }; const response = await this.chatWithRouter(request); this.throwIfCancelled(); this._totalUsage.inputTokens += response.usage.inputTokens; this._totalUsage.outputTokens += response.usage.outputTokens; this._callCount++; // Some backends emit tool_use JSON as plain text rather than structured tool metadata. // Recover those calls when possible so the loop can continue safely. let toolCalls = response.toolCalls ?? []; let assistantTextContent = response.content; if (toolCalls.length === 0) { const extracted = this.extractToolCallsFromText(response.content); if (extracted && extracted.toolCalls.length > 0) { const executableCalls = extracted.toolCalls.filter(tc => Boolean(toolRegistry.getByApiName(tc.name))); if (executableCalls.length > 0) { toolCalls = executableCalls; assistantTextContent = extracted.remainingText; } } } if (toolCalls.length === 0) { const recovered = this.extractMalformedShellToolCall(response.content); if (recovered && toolRegistry.getByApiName(recovered.toolCall.name)) { toolCalls = [recovered.toolCall]; assistantTextContent = recovered.remainingText; } } const wantsToolUse = toolCalls.length > 0; if (!wantsToolUse) { const pseudoToolUse = this.extractPseudoToolUse(response.content); if (this.shouldNudgeForMissingToolCall(response.content, pseudoToolUse) && !actionIntentNudged) { actionIntentNudged = true; const normalized = this.normalizeAssistantContent(response.content); loopMessages.push({ role: 'assistant', content: normalized }); loopMessages.push({ role: 'user', content: 'You said you would perform an action now, but no tool call was emitted. If a tool is needed, call it now. If blocked, explain the exact blocker.', }); continue; } const baseContent = pseudoToolUse ? this.buildPseudoToolUseWarning(response.content, pseudoToolUse) : this.normalizeAssistantContent(response.content); let finalContent = baseContent; if (response.thinkingContent) { finalContent = `\n${response.thinkingContent}\n\n\n${baseContent}`; } const assistantMsg: Message = { role: 'assistant', content: baseContent }; this.addToHistory(assistantMsg); return finalContent; } // Check for repeated tool calls — build a fingerprint from tool names + args const fingerprint = toolCalls .map(tc => `${tc.name}:${JSON.stringify(tc.args)}`) .sort() .join('|'); if (fingerprint === lastFingerprint) { consecutiveRepeats++; } else { consecutiveRepeats = 1; lastFingerprint = fingerprint; } // Track consecutive calls to the same tool (by name, ignoring args) const toolNames = toolCalls.map(tc => tc.name).sort().join(','); if (toolNames === lastToolName) { sameToolStreak++; } else { sameToolStreak = 1; lastToolName = toolNames; nudged = false; } // Build the assistant message with tool_use content blocks const assistantContent: unknown[] = []; if (assistantTextContent) { assistantContent.push({ type: 'text', text: assistantTextContent }); } for (const tc of toolCalls) { assistantContent.push({ type: 'tool_use', id: tc.id, name: tc.name, input: tc.args, }); } loopMessages.push({ role: 'assistant', content: assistantContent }); // Execute each tool call and collect results const toolResultBlocks: unknown[] = []; lastToolResults = []; for (const tc of toolCalls) { this.throwIfCancelled(); const internalName = toolRegistry.getByApiName(tc.name)?.name ?? tc.name; this.onToolUse?.({ type: 'start', tool: internalName, args: tc.args }); let elevationUntilMs: number | undefined; let elevationReason: string | undefined; let elevationId: string | undefined; if (this.session) { const elevation = getElevationWindow({ get: (key) => this.session!.getConfig(key), set: (key, value) => this.session!.setConfig(key, value), delete: (key) => this.session!.deleteConfig(key), }, { auditContext: { sessionId: this.session.id, channel: this._toolPolicyContext?.channel ?? 'unknown', sender: this._toolPolicyContext?.sender ?? 'unknown', }, }); if (elevation.window) { elevationUntilMs = elevation.window.untilMs; elevationId = elevation.window.id; elevationReason = elevation.window.reason; } } const perCallContext: ToolPolicyContext | undefined = this._toolPolicyContext ? { ...this._toolPolicyContext, untrustedContent: untrustedContentSeen, elevatedHostUntilMs: elevationUntilMs, elevatedHostReason: elevationReason, elevatedHostId: elevationId, } : undefined; const toolArgs = this.normalizeToolArgsForExecution(internalName, tc.args); const result = await toolExecutor.execute(internalName, toolArgs, perCallContext, { signal: this._runAbortController?.signal, }); this.onToolUse?.({ type: 'end', tool: internalName, result }); const provenance = (internalName === 'web.fetch' || internalName === 'web.search' || internalName === 'browser.content') ? 'fetched_content' : 'tool_output'; if (provenance === 'fetched_content') { untrustedContentSeen = true; } const rawContent = result.success ? result.output : (result.error ?? 'Unknown error'); const resultContent = `[provenance=${provenance} tool=${internalName} untrusted=${provenance === 'fetched_content' ? 'true' : 'false'}]\n${rawContent}\n[/provenance]`; toolResultBlocks.push({ type: 'tool_result', tool_use_id: tc.id, content: resultContent, is_error: !result.success, }); if (result.success && result.output) { lastToolResults.push(result.output); } } // If the same tool has been called too many times, append a nudge // telling the model to use what it has. This combats local models // that endlessly retry searches with slight query variations. let nudgeMessage: string | null = null; if (sameToolStreak >= maxSameToolStreak && !nudged) { nudged = true; nudgeMessage = `You have called this tool ${sameToolStreak} times in a row. You have enough information — do NOT call it again. Summarize what you have found and respond to the user now.`; } // Add tool results as a user message loopMessages.push({ role: 'user', content: toolResultBlocks }); if (nudgeMessage) { loopMessages.push({ role: 'user', content: nudgeMessage }); } // Break out if the model is stuck in a repeated tool call loop if (consecutiveRepeats >= maxConsecutiveRepeats) { const toolOutput = lastToolResults.length > 0 ? lastToolResults.join('\n\n') : 'No results available.'; const breakMsg = `Tool loop detected (same tool called ${consecutiveRepeats} times). Returning last results:\n\n${toolOutput}`; const assistantMsg: Message = { role: 'assistant', content: breakMsg }; this.addToHistory(assistantMsg); return breakMsg; } } catch (error) { if (this.isAbortError(error)) { const cancelledMsg = 'Operation cancelled by user.'; const assistantMsg: Message = { role: 'assistant', content: cancelledMsg }; this.addToHistory(assistantMsg); return cancelledMsg; } const errorMsg = `Error in tool loop (iteration ${iteration + 1}): ${error instanceof Error ? error.message : String(error)}`; const assistantMsg: Message = { role: 'assistant', content: errorMsg }; this.addToHistory(assistantMsg); return errorMsg; } } // Max iterations reached const warningMsg = `Stopped after reaching max iterations (${this.maxIterations}). The task may be incomplete.`; const assistantMsg: Message = { role: 'assistant', content: warningMsg }; this.addToHistory(assistantMsg); return warningMsg; } private shouldNudgeForMissingToolCall(content: string, pseudoToolUse: PseudoToolUse | null): boolean { if (!content || pseudoToolUse) { return false; } const normalized = content.toLowerCase(); const intentRegex = /\b(i(?:'m| am)? going to|i(?:'ll| will)|let me|proceeding(?: now)?|i can(?: now)?)\b/; if (!intentRegex.test(normalized)) { return false; } const actionRegex = /\b(create|run|execute|call|use|check|fetch|search|read|write|send|retry|proceed|attempt|apply|delete|update|list)\b/; return actionRegex.test(normalized); } private async chatWithRouter(request: ChatRequest): Promise { const runSignal = this._runAbortController?.signal; const requestSignal = request.signal; const signal = runSignal && requestSignal ? AbortSignal.any([runSignal, requestSignal]) : (runSignal ?? requestSignal); const requestWithSignal = signal ? { ...request, signal } : request; const requestPromise = 'getClient' in this.modelClient ? (this.modelClient as ModelRouter).chat(requestWithSignal, this.currentTier) : this.modelClient.chat(requestWithSignal); let timer: NodeJS.Timeout | undefined; let abortCleanup: (() => void) | undefined; const timeoutPromise = new Promise((_, reject) => { timer = setTimeout(() => { const error = new Error(`Model request timed out after ${this.modelTimeoutMs}ms`); error.name = 'TimeoutError'; reject(error); }, this.modelTimeoutMs); timer.unref?.(); }); const abortPromise = signal ? new Promise((_, reject) => { if (signal.aborted) { const error = new Error('Operation cancelled by user.'); error.name = 'AbortError'; reject(error); return; } const onAbort = () => { const error = new Error('Operation cancelled by user.'); error.name = 'AbortError'; reject(error); }; signal.addEventListener('abort', onAbort, { once: true }); abortCleanup = () => signal.removeEventListener('abort', onAbort); }) : null; try { return await Promise.race([requestPromise, timeoutPromise, ...(abortPromise ? [abortPromise] : [])]); } finally { if (timer) { clearTimeout(timer); } abortCleanup?.(); } } private addToHistory(msg: Message): void { if (this.session) { this.session.addMessage(msg); } else { this.inMemoryHistory.push(msg); } } reset(): void { if (this.session) { this.session.clear(); } else { this.inMemoryHistory = []; } this._lastToolFingerprint = undefined; this.resetUsage(); } getUsage(): { inputTokens: number; outputTokens: number; calls: number } { return { ...this._totalUsage, calls: this._callCount }; } resetUsage(): void { this._totalUsage = { inputTokens: 0, outputTokens: 0 }; this._callCount = 0; } getHistory(): Message[] { return [...this.history]; } setModelTier(tier: ModelTier): void { this.currentTier = tier; } getModelTier(): ModelTier { return this.currentTier; } setSystemPrompt(prompt: string): void { this.systemPrompt = prompt; } setOnToolUse(callback: ((event: ToolUseEvent) => void) | undefined): void { this.onToolUse = callback; } setToolPolicyContext(context: ToolPolicyContext | undefined): void { this._toolPolicyContext = context; } getToolPolicyContext(): ToolPolicyContext | undefined { return this._toolPolicyContext; } getToolInventorySnapshot(): ToolInventorySnapshot { if (!this.toolRegistry) { return { sessionId: '-', agent: '-', provider: '-', skill: '-', internalCount: 0, exposedCount: 0, internalBrowser: [], exposedBrowser: [], }; } const internal = this.toolRegistry.filteredList(this._toolPolicyContext).map((tool) => tool.name); const exposed = this.toolRegistry.filteredToAnthropicFormat(this._toolPolicyContext).map((tool) => tool.name); const context = this._toolPolicyContext; return { sessionId: context?.sessionId ?? '-', agent: context?.agent ?? '-', provider: context?.provider ?? '-', skill: context?.skillName ?? '-', internalCount: internal.length, exposedCount: exposed.length, internalBrowser: internal.filter((name) => name.startsWith('browser.')), exposedBrowser: exposed.filter((name) => name.startsWith('browser_')), }; } getAvailableToolNames(): string[] { if (!this.toolRegistry) { return []; } return this.toolRegistry .filteredList(this._toolPolicyContext) .map((tool) => tool.name) .sort(); } setAttachmentCollector(collector: OutboundAttachmentCollector | undefined): void { this._attachmentCollector = collector; } getAttachmentCollector(): OutboundAttachmentCollector | undefined { return this._attachmentCollector; } cancel(): void { if (this._runInProgress) { this._cancelRequested = true; this._runAbortController?.abort(); if ('requestAbort' in this.modelClient && typeof this.modelClient.requestAbort === 'function') { this.modelClient.requestAbort(); } } } isCancellable(): boolean { return this._runInProgress; } private throwIfCancelled(): void { if (!this._cancelRequested && !this._runAbortController?.signal.aborted) { return; } const error = new Error('Operation cancelled by user.'); error.name = 'AbortError'; throw error; } private isAbortError(error: unknown): boolean { return error instanceof Error && error.name === 'AbortError'; } private normalizeToolArgsForExecution(toolName: string, rawArgs: unknown): unknown { if (toolName !== 'audio.transcribe') { return rawArgs; } return this.hydrateAudioTranscribeArgs(rawArgs); } private hydrateAudioTranscribeArgs(rawArgs: unknown): unknown { const args = (rawArgs && typeof rawArgs === 'object') ? { ...(rawArgs as Record) } : {}; const original = this.summarizeAudioToolArgs(args); const runTurnAudioInput = this._runContext.getStore()?.turnAudioInput; if (runTurnAudioInput) { this.applyAudioToolInput(args, runTurnAudioInput); this.logAudioArgsRewrite('latest_audio_preferred', 'latest_turn', original, args); return args; } const latestTurnAudio = this.getLatestTurnUserAudioInput(); if (latestTurnAudio) { this.applyAudioToolInput(args, latestTurnAudio); this.logAudioArgsRewrite('latest_audio_preferred', 'latest_turn', original, args); return args; } if (this.isCurrentTurnVoiceTranscriptFallback()) { const persistedAudio = this.getPersistedAudioInput(); if (persistedAudio) { this.applyAudioToolInput(args, persistedAudio); this.logAudioArgsRewrite('voice_turn_fallback', 'persisted', original, args); return args; } } const normalizedData = this.normalizeAudioTranscribeDataArg(args.data, args.mime_type); const normalizedUrl = this.normalizeAudioTranscribeUrlArg(args.url); if (normalizedData) { args.data = normalizedData; delete args.url; } else if (normalizedUrl) { args.url = normalizedUrl; delete args.data; } else { delete args.data; delete args.url; } const hasData = typeof args.data === 'string' && args.data.length > 0; const hasUrl = typeof args.url === 'string' && args.url.length > 0; if (hasData || hasUrl) { if (hasData && (typeof args.mime_type !== 'string' || args.mime_type.length === 0)) { const latestAudioForMime = this.getLatestUserAudioInput(); if (latestAudioForMime?.mime_type) { args.mime_type = latestAudioForMime.mime_type; } } return args; } const latestAudio = this.getLatestUserAudioInput(); if (!latestAudio) { return args; } const persistedAudio = this.getPersistedAudioInput(); const source: 'history' | 'persisted' = persistedAudio?.data === latestAudio.data && persistedAudio?.mime_type === latestAudio.mime_type ? 'persisted' : 'history'; this.applyAudioToolInput(args, latestAudio); this.logAudioArgsRewrite(original.hasData || original.hasUrl ? 'invalid_model_args' : 'missing_model_args', source, original, args); return args; } private summarizeAudioToolArgs(args: Record): AudioToolArgSummary { const hasData = typeof args.data === 'string' && args.data.length > 0; const hasUrl = typeof args.url === 'string' && args.url.length > 0; const mimeType = typeof args.mime_type === 'string' && args.mime_type.length > 0 ? args.mime_type : undefined; return { hasData, hasUrl, mimeType }; } private applyAudioToolInput(args: Record, audio: AudioToolInput): void { if (audio.data) { args.data = audio.data; delete args.url; } else if (audio.url) { args.url = audio.url; delete args.data; } else { delete args.data; delete args.url; } if (audio.mime_type) { args.mime_type = audio.mime_type; } } private logAudioArgsRewrite( reason: 'latest_audio_preferred' | 'voice_turn_fallback' | 'invalid_model_args' | 'missing_model_args', source: 'latest_turn' | 'history' | 'persisted', original: AudioToolArgSummary, normalizedArgs: Record, ): void { const finalMime = typeof normalizedArgs.mime_type === 'string' && normalizedArgs.mime_type.length > 0 ? normalizedArgs.mime_type : undefined; auditLogger?.toolArgsRewritten({ tool_name: 'audio.transcribe', session_id: this.session?.id, source, reason, original_has_data: original.hasData, original_has_url: original.hasUrl, original_mime_type: original.mimeType, final_mime_type: finalMime, }); } private isCurrentTurnVoiceTranscriptFallback(): boolean { for (let i = this.history.length - 1; i >= 0; i--) { const msg = this.history[i]; if (msg.role !== 'user') { continue; } if (typeof msg.content === 'string') { return msg.content.includes('[Voice message]:'); } if (!Array.isArray(msg.content)) { return false; } return msg.content.some((part) => ( part.type === 'text' && typeof part.text === 'string' && part.text.includes('[Voice message]:') )); } return false; } private getLatestTurnUserAudioInput(): AudioToolInput | null { for (let i = this.history.length - 1; i >= 0; i--) { const msg = this.history[i]; if (msg.role !== 'user') { continue; } if (!Array.isArray(msg.content)) { return null; } for (const part of msg.content) { if (part.type !== 'audio') { continue; } const source = part.source; if (typeof source.data === 'string' && source.data.length > 0 && typeof source.media_type === 'string' && source.media_type.length > 0) { return { data: source.data, mime_type: source.media_type }; } } return null; } return null; } private normalizeTurnAudioInput(turnAudioInput: NativeAgentTurnAudioInput | undefined): AudioToolInput | undefined { if (!turnAudioInput) { return undefined; } const data = typeof turnAudioInput.data === 'string' && turnAudioInput.data.length > 0 ? turnAudioInput.data : undefined; const url = typeof turnAudioInput.url === 'string' && turnAudioInput.url.length > 0 ? turnAudioInput.url : undefined; const mimeType = typeof turnAudioInput.mime_type === 'string' && turnAudioInput.mime_type.length > 0 ? turnAudioInput.mime_type : undefined; if (!data && !url) { return undefined; } return { ...(data ? { data } : {}), ...(url ? { url } : {}), ...(mimeType ? { mime_type: mimeType } : {}), }; } private extractLatestAudioInputFromAttachments(attachments?: Attachment[]): AudioToolInput | undefined { if (!attachments || attachments.length === 0) { return undefined; } for (let i = attachments.length - 1; i >= 0; i--) { const attachment = attachments[i]; if (!attachment.mimeType.startsWith('audio/')) { continue; } const data = typeof attachment.data === 'string' && attachment.data.length > 0 ? attachment.data : undefined; const url = typeof attachment.url === 'string' && attachment.url.length > 0 ? attachment.url : undefined; if (!data && !url) { continue; } return { ...(data ? { data } : {}), ...(url ? { url } : {}), mime_type: attachment.mimeType, }; } return undefined; } private normalizeAudioTranscribeDataArg(rawData: unknown, rawMimeType: unknown): string | undefined { if (typeof rawData !== 'string') { return undefined; } const compact = rawData.replace(/\s+/g, ''); if (compact.length === 0) { return undefined; } if (!/^[A-Za-z0-9+/=]+$/.test(compact)) { return undefined; } try { const decoded = Buffer.from(compact, 'base64'); if (decoded.length === 0) { return undefined; } const mimeType = typeof rawMimeType === 'string' ? rawMimeType : undefined; if (!this.matchesAudioSignature(decoded, mimeType)) { return undefined; } return compact; } catch { return undefined; } } private matchesAudioSignature(buffer: Buffer, mimeType?: string): boolean { const ascii = (offset: number, value: string): boolean => { if (buffer.length < offset + value.length) { return false; } return buffer.subarray(offset, offset + value.length).toString('ascii') === value; }; if (!mimeType) { return true; } switch (mimeType) { case 'audio/ogg': return ascii(0, 'OggS'); case 'audio/wav': return ascii(0, 'RIFF') && ascii(8, 'WAVE'); case 'audio/webm': return buffer.length >= 4 && buffer[0] === 0x1A && buffer[1] === 0x45 && buffer[2] === 0xDF && buffer[3] === 0xA3; case 'audio/mpeg': case 'audio/mp3': return ascii(0, 'ID3') || (buffer.length >= 2 && buffer[0] === 0xFF && (buffer[1] & 0xE0) === 0xE0); case 'audio/mp4': case 'audio/x-m4a': return ascii(4, 'ftyp'); default: return true; } } private normalizeAudioTranscribeUrlArg(rawUrl: unknown): string | undefined { if (typeof rawUrl !== 'string') { return undefined; } const trimmed = rawUrl.trim(); if (trimmed.length === 0) { return undefined; } if (!/^https?:\/\//i.test(trimmed)) { return undefined; } return trimmed; } private getLatestUserAudioInput(): AudioToolInput | null { for (let i = this.history.length - 1; i >= 0; i--) { const msg = this.history[i]; if (msg.role !== 'user' || !Array.isArray(msg.content)) { continue; } for (const part of msg.content) { if (part.type !== 'audio') { continue; } const source = part.source; if (typeof source.data === 'string' && source.data.length > 0 && typeof source.media_type === 'string' && source.media_type.length > 0) { return { data: source.data, mime_type: source.media_type }; } } } return this.getPersistedAudioInput(); } private getPersistedAudioInput(): AudioToolInput | null { const persisted = this.session?.getConfig(LAST_AUDIO_ATTACHMENT_CONFIG_KEY); if (!persisted) { return null; } try { const parsed = JSON.parse(persisted) as { data?: unknown; url?: unknown; mimeType?: unknown }; const data = typeof parsed.data === 'string' && parsed.data.length > 0 ? parsed.data : undefined; const url = typeof parsed.url === 'string' && parsed.url.length > 0 ? parsed.url : undefined; const mimeType = typeof parsed.mimeType === 'string' && parsed.mimeType.length > 0 ? parsed.mimeType : undefined; if (!data && !url) { return null; } return { ...(data ? { data } : {}), ...(url ? { url } : {}), ...(mimeType ? { mime_type: mimeType } : {}), }; } catch { return null; } } private extractPseudoToolUse(content: string): PseudoToolUse | null { if (!content) { return null; } if (!/"type"\s*:\s*"tool_use"/.test(content)) { return null; } const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/); const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/); return { name: nameMatch?.[1], id: idMatch?.[1], }; } private extractToolCallsFromText(content: string): { toolCalls: ModelToolCall[]; remainingText: string } | null { if (!content || content.indexOf('{') === -1) { return null; } const extracted: ExtractedTextToolCall[] = []; for (let i = 0; i < content.length; i++) { if (content[i] !== '{') { continue; } const end = this.findJsonObjectEnd(content, i); if (end === -1) { continue; } const candidate = content.slice(i, end + 1); const parsedCall = this.parseTextToolUse(candidate, extracted.length + 1); if (parsedCall) { extracted.push({ toolCall: parsedCall, start: i, end: end + 1, }); } i = end; } if (extracted.length === 0) { return null; } let remainingText = ''; let cursor = 0; for (const item of extracted) { remainingText += content.slice(cursor, item.start); cursor = item.end; } remainingText += content.slice(cursor); remainingText = remainingText.trim(); return { toolCalls: extracted.map(e => e.toolCall), remainingText, }; } private parseTextToolUse(candidate: string, ordinal: number): ModelToolCall | null { let parsed: unknown; try { parsed = JSON.parse(candidate); } catch { return null; } if (!parsed || typeof parsed !== 'object') { return null; } const obj = parsed as Record; if (obj.type !== 'tool_use') { return null; } if (typeof obj.name !== 'string' || obj.name.trim().length === 0) { return null; } const id = typeof obj.id === 'string' && obj.id.trim().length > 0 ? obj.id : `text_tool_call_${ordinal}`; return { id, name: obj.name, args: obj.input ?? {}, }; } private extractMalformedShellToolCall(content: string): { toolCall: ModelToolCall; remainingText: string } | null { if (!content || !/"type"\s*:\s*"tool_use"/.test(content)) { return null; } const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/); if (!nameMatch?.[1]) { return null; } const name = nameMatch[1]; const normalized = name.replace(/_/g, '.').toLowerCase(); if (normalized !== 'shell.exec') { return null; } // Recover malformed shell command payloads where inner quotes are not escaped. const commandMatch = content.match(/"command"\s*:\s*"([\s\S]*)"\s*}\s*}/); if (!commandMatch?.[1]) { return null; } const command = this.sanitizeRecoveredShellCommand(commandMatch[1]); if (!command) { return null; } const idMatch = content.match(/"id"\s*:\s*"([^"]+)"/); const id = idMatch?.[1]?.trim().length ? idMatch[1] : 'text_tool_call_recovered_shell'; return { toolCall: { id, name, args: { command }, }, remainingText: this.stripFirstToolUseObject(content), }; } private sanitizeRecoveredShellCommand(raw: string): string { let command = raw.trim(); if (command.length === 0) { return ''; } // Common malformed pattern: opening quote duplicated into value. if ((command.startsWith('"') && !command.endsWith('"')) || (command.startsWith('\'') && !command.endsWith('\''))) { command = command.slice(1).trimStart(); } return command; } private stripFirstToolUseObject(content: string): string { const typeMatch = /"type"\s*:\s*"tool_use"/.exec(content); if (!typeMatch || typeMatch.index < 0) { return content.trim(); } const objectStart = content.lastIndexOf('{', typeMatch.index); if (objectStart < 0) { return content.trim(); } const objectEnd = this.findObjectEndByBraceDepth(content, objectStart); if (objectEnd < 0) { return content.trim(); } return `${content.slice(0, objectStart)}${content.slice(objectEnd + 1)}`.trim(); } private findObjectEndByBraceDepth(content: string, start: number): number { let depth = 0; for (let i = start; i < content.length; i++) { const ch = content[i]; if (ch === '{') { depth++; } else if (ch === '}') { depth--; if (depth === 0) { return i; } } } return -1; } private findJsonObjectEnd(content: string, start: number): number { let depth = 0; let inString = false; let escaping = false; for (let i = start; i < content.length; i++) { const ch = content[i]; if (inString) { if (escaping) { escaping = false; continue; } if (ch === '\\') { escaping = true; continue; } if (ch === '"') { inString = false; } continue; } if (ch === '"') { inString = true; continue; } if (ch === '{') { depth++; continue; } if (ch === '}') { depth--; if (depth === 0) { return i; } } } return -1; } private buildPseudoToolUseWarning(rawContent: string, pseudo: PseudoToolUse): string { const toolName = pseudo.name ?? 'unknown'; const toolId = pseudo.id ?? 'unknown'; return [ 'Tool call was emitted as plain text and was not executed.', `Tool: ${toolName} (id: ${toolId})`, 'This usually means the current model/backend did not return structured tool metadata.', 'Original assistant output:', rawContent, ].join('\n'); } private normalizeAssistantContent(content: string): string { if (content.trim().length > 0) { return content; } return NativeAgent.EMPTY_RESPONSE_FALLBACK; } }