Bind audio.transcribe hydration to current message turn
This commit is contained in:
@@ -77,6 +77,12 @@ interface AudioToolArgSummary {
|
||||
mimeType?: string;
|
||||
}
|
||||
|
||||
export interface NativeAgentTurnAudioInput {
|
||||
data?: string;
|
||||
url?: string;
|
||||
mime_type?: string;
|
||||
}
|
||||
|
||||
export class NativeAgent {
|
||||
private static readonly EMPTY_RESPONSE_FALLBACK =
|
||||
'I could not generate a response for that. Please try again.';
|
||||
@@ -100,6 +106,7 @@ export class NativeAgent {
|
||||
private _runInProgress = false;
|
||||
private _runAbortController?: AbortController;
|
||||
private modelTimeoutMs: number;
|
||||
private _currentTurnAudioInput?: AudioToolInput;
|
||||
|
||||
constructor(config: NativeAgentConfig) {
|
||||
this.modelClient = config.modelClient;
|
||||
@@ -120,9 +127,14 @@ export class NativeAgent {
|
||||
return this.session?.getHistory() ?? [...this.inMemoryHistory];
|
||||
}
|
||||
|
||||
async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
|
||||
async process(
|
||||
userMessage: string,
|
||||
attachments?: Attachment[],
|
||||
turnAudioInput?: NativeAgentTurnAudioInput,
|
||||
): Promise<string> {
|
||||
this._cancelRequested = false;
|
||||
this._runAbortController = new AbortController();
|
||||
this._currentTurnAudioInput = this.normalizeTurnAudioInput(turnAudioInput) ?? this.extractLatestAudioInputFromAttachments(attachments);
|
||||
if ('clearAbort' in this.modelClient && typeof this.modelClient.clearAbort === 'function') {
|
||||
this.modelClient.clearAbort();
|
||||
}
|
||||
@@ -162,6 +174,7 @@ export class NativeAgent {
|
||||
this._runInProgress = false;
|
||||
this._cancelRequested = false;
|
||||
this._runAbortController = undefined;
|
||||
this._currentTurnAudioInput = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,6 +662,12 @@ export class NativeAgent {
|
||||
: {};
|
||||
const original = this.summarizeAudioToolArgs(args);
|
||||
|
||||
if (this._currentTurnAudioInput) {
|
||||
this.applyAudioToolInput(args, this._currentTurnAudioInput);
|
||||
this.logAudioArgsRewrite('latest_audio_preferred', 'latest_turn', original, args);
|
||||
return args;
|
||||
}
|
||||
|
||||
const latestTurnAudio = this.getLatestTurnUserAudioInput();
|
||||
if (latestTurnAudio) {
|
||||
this.applyAudioToolInput(args, latestTurnAudio);
|
||||
@@ -794,6 +813,56 @@ export class NativeAgent {
|
||||
return null;
|
||||
}
|
||||
|
||||
private normalizeTurnAudioInput(turnAudioInput: NativeAgentTurnAudioInput | undefined): AudioToolInput | undefined {
|
||||
if (!turnAudioInput) {
|
||||
return undefined;
|
||||
}
|
||||
const data = typeof turnAudioInput.data === 'string' && turnAudioInput.data.length > 0
|
||||
? turnAudioInput.data
|
||||
: undefined;
|
||||
const url = typeof turnAudioInput.url === 'string' && turnAudioInput.url.length > 0
|
||||
? turnAudioInput.url
|
||||
: undefined;
|
||||
const mimeType = typeof turnAudioInput.mime_type === 'string' && turnAudioInput.mime_type.length > 0
|
||||
? turnAudioInput.mime_type
|
||||
: undefined;
|
||||
if (!data && !url) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...(data ? { data } : {}),
|
||||
...(url ? { url } : {}),
|
||||
...(mimeType ? { mime_type: mimeType } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private extractLatestAudioInputFromAttachments(attachments?: Attachment[]): AudioToolInput | undefined {
|
||||
if (!attachments || attachments.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
for (let i = attachments.length - 1; i >= 0; i--) {
|
||||
const attachment = attachments[i];
|
||||
if (!attachment.mimeType.startsWith('audio/')) {
|
||||
continue;
|
||||
}
|
||||
const data = typeof attachment.data === 'string' && attachment.data.length > 0
|
||||
? attachment.data
|
||||
: undefined;
|
||||
const url = typeof attachment.url === 'string' && attachment.url.length > 0
|
||||
? attachment.url
|
||||
: undefined;
|
||||
if (!data && !url) {
|
||||
continue;
|
||||
}
|
||||
return {
|
||||
...(data ? { data } : {}),
|
||||
...(url ? { url } : {}),
|
||||
mime_type: attachment.mimeType,
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private normalizeAudioTranscribeDataArg(rawData: unknown, rawMimeType: unknown): string | undefined {
|
||||
if (typeof rawData !== 'string') {
|
||||
return undefined;
|
||||
|
||||
Reference in New Issue
Block a user