Bind audio.transcribe hydration to current message turn

This commit is contained in:
William Valentin
2026-02-22 21:27:09 -08:00
parent 89246e7da0
commit a761813375
5 changed files with 180 additions and 11 deletions
+2 -2
View File
@@ -656,7 +656,7 @@ describe('daemon command fast-path integration', () => {
const keys = Array.from(router.agents.keys());
expect(keys.some(key => key.includes(':research'))).toBe(true);
expect(processSpy).toHaveBeenCalledWith('compare k0s vs k3s for a homelab', undefined);
expect(processSpy).toHaveBeenCalledWith('compare k0s vs k3s for a homelab', undefined, undefined);
});
it('falls back to llm path when confidence is below fast threshold', async () => {
@@ -1938,6 +1938,6 @@ describe('daemon talk mode (voice wake) integration', () => {
timestamp: Date.now(),
} as MessageRouterInput, reply);
expect(processSpy).toHaveBeenCalledOnce();
expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined);
expect(processSpy).toHaveBeenCalledWith('what time is it?', undefined, undefined);
});
});
+23 -2
View File
@@ -215,6 +215,26 @@ function persistLatestAudioAttachment(
}
}
function extractLatestAudioToolInput(audioAttachments: Attachment[]): { data?: string; url?: string; mime_type?: string } | undefined {
const latest = [...audioAttachments].reverse().find((att) => (
(typeof att.data === 'string' && att.data.length > 0)
|| (typeof att.url === 'string' && att.url.length > 0)
));
if (!latest) {
return undefined;
}
const data = typeof latest.data === 'string' && latest.data.length > 0 ? latest.data : undefined;
const url = typeof latest.url === 'string' && latest.url.length > 0 ? latest.url : undefined;
if (!data && !url) {
return undefined;
}
return {
...(data ? { data } : {}),
...(url ? { url } : {}),
mime_type: latest.mimeType,
};
}
function isTtsEnabledForChannel(config: Config, channel: string): boolean {
if (!config.tts?.enabled) {
return false;
@@ -1317,6 +1337,7 @@ export function createMessageRouter(deps: {
let messageText = incomingText;
let attachments = msg.attachments;
const audioAttachments = (msg.attachments ?? []).filter((a: Attachment) => isSupportedAudio(a));
const turnAudioToolInput = extractLatestAudioToolInput(audioAttachments);
if (audioAttachments.length > 0) {
persistLatestAudioAttachment(session, audioAttachments);
}
@@ -1424,7 +1445,7 @@ export function createMessageRouter(deps: {
let response: string;
activeRuns.set(sessionIdForRun, agent);
try {
response = await agent.process(messageText, attachments);
response = await agent.process(messageText, attachments, turnAudioToolInput);
} catch (error) {
const currentTier = agent.getModelTier();
const canEscalate = deps.config.agents.auto_escalate && currentTier !== 'complex';
@@ -1434,7 +1455,7 @@ export function createMessageRouter(deps: {
console.warn(`Auto-escalating session ${msg.channel}:${msg.senderId} from ${currentTier} to complex after processing failure.`);
agent.setModelTier('complex');
response = await agent.process(messageText, attachments);
response = await agent.process(messageText, attachments, turnAudioToolInput);
}
const outboundAttachments = collector.drain();
const ttsAttachment = await maybeBuildTtsAttachment(response, msg.channel);