routing: fast-path voice when transcription not configured
This commit is contained in:
@@ -380,3 +380,154 @@ describe('daemon command fast-path integration', () => {
|
||||
expect(keys.some(key => key.includes(':assistant'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('daemon audio routing integration', () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('fast-path replies for voice attachments when transcription is not configured and model does not support audio', async () => {
|
||||
const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process');
|
||||
|
||||
const session = {
|
||||
id: 'telegram:user-voice-1',
|
||||
addMessage: vi.fn(),
|
||||
getHistory: vi.fn(() => []),
|
||||
clear: vi.fn(),
|
||||
replaceHistory: vi.fn(),
|
||||
getConfig: vi.fn(() => undefined),
|
||||
setConfig: vi.fn(),
|
||||
deleteConfig: vi.fn(),
|
||||
};
|
||||
|
||||
const commandRegistry = new CommandRegistry();
|
||||
registerBuiltinCommands(commandRegistry);
|
||||
|
||||
const router = createMessageRouter({
|
||||
sessionManager: { getSession: vi.fn(() => session) } as any,
|
||||
modelRouter: {
|
||||
getAvailableTiers: () => ['default'],
|
||||
getAllLabels: () => ({ default: 'default' }),
|
||||
getLabel: (tier: string) => tier,
|
||||
} as any,
|
||||
systemPrompt: 'test prompt',
|
||||
toolRegistry: { clone() { return this; }, register: vi.fn() } as any,
|
||||
toolExecutor: {} as any,
|
||||
config: {
|
||||
agents: {
|
||||
primary_tier: 'default',
|
||||
delegation: {
|
||||
compaction: 'default',
|
||||
memory_extraction: 'default',
|
||||
classification: 'default',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'default',
|
||||
},
|
||||
max_delegation_depth: 1,
|
||||
max_iterations: 3,
|
||||
},
|
||||
compaction: { enabled: false },
|
||||
// Anthropic doesn't support native audio; ensures routing hits the non-audio path.
|
||||
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||
audio: { enabled: false },
|
||||
} as any,
|
||||
commandRegistry,
|
||||
});
|
||||
|
||||
const reply = vi.fn(async () => {});
|
||||
await router.handler({
|
||||
id: 'v1',
|
||||
channel: 'telegram',
|
||||
senderId: 'user-voice-1',
|
||||
text: '',
|
||||
attachments: [{ mimeType: 'audio/ogg', data: 'ZGF0YQ==', filename: 'voice.ogg' }],
|
||||
timestamp: Date.now(),
|
||||
} as any, reply);
|
||||
|
||||
expect(processSpy).not.toHaveBeenCalled();
|
||||
expect(reply).toHaveBeenCalledTimes(1);
|
||||
const msg = (reply.mock.calls[0] as unknown as any[])[0] as { text?: string };
|
||||
expect(String(msg.text)).toContain('audio transcription is not configured');
|
||||
});
|
||||
|
||||
it('transcribes voice attachments when transcription is configured, then strips audio before calling agent.process', async () => {
|
||||
const processSpy = vi.spyOn(AgentOrchestrator.prototype, 'process').mockResolvedValue('ok');
|
||||
|
||||
// Mock transcription endpoint call.
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch' as any).mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
json: async () => ({ text: 'hello world' }),
|
||||
} as any);
|
||||
|
||||
const session = {
|
||||
id: 'telegram:user-voice-2',
|
||||
addMessage: vi.fn(),
|
||||
getHistory: vi.fn(() => []),
|
||||
clear: vi.fn(),
|
||||
replaceHistory: vi.fn(),
|
||||
getConfig: vi.fn(() => undefined),
|
||||
setConfig: vi.fn(),
|
||||
deleteConfig: vi.fn(),
|
||||
};
|
||||
|
||||
const commandRegistry = new CommandRegistry();
|
||||
registerBuiltinCommands(commandRegistry);
|
||||
|
||||
const router = createMessageRouter({
|
||||
sessionManager: { getSession: vi.fn(() => session) } as any,
|
||||
modelRouter: {
|
||||
getAvailableTiers: () => ['default'],
|
||||
getAllLabels: () => ({ default: 'default' }),
|
||||
getLabel: (tier: string) => tier,
|
||||
} as any,
|
||||
systemPrompt: 'test prompt',
|
||||
toolRegistry: { clone() { return this; }, register: vi.fn() } as any,
|
||||
toolExecutor: {} as any,
|
||||
config: {
|
||||
agents: {
|
||||
primary_tier: 'default',
|
||||
delegation: {
|
||||
compaction: 'default',
|
||||
memory_extraction: 'default',
|
||||
classification: 'default',
|
||||
tool_summarisation: 'default',
|
||||
complex_reasoning: 'default',
|
||||
},
|
||||
max_delegation_depth: 1,
|
||||
max_iterations: 3,
|
||||
},
|
||||
compaction: { enabled: false },
|
||||
models: { default: { provider: 'anthropic', model: 'claude' } },
|
||||
audio: {
|
||||
enabled: true,
|
||||
provider: { type: 'openai', endpoint: 'https://example.com/v1/audio/transcriptions', api_key: 'sk-test', model: 'whisper-1' },
|
||||
},
|
||||
} as any,
|
||||
commandRegistry,
|
||||
});
|
||||
|
||||
const reply = vi.fn(async () => {});
|
||||
await router.handler({
|
||||
id: 'v2',
|
||||
channel: 'telegram',
|
||||
senderId: 'user-voice-2',
|
||||
text: 'caption',
|
||||
attachments: [
|
||||
{ mimeType: 'audio/ogg', data: 'ZGF0YQ==', filename: 'voice.ogg' },
|
||||
{ mimeType: 'image/jpeg', data: 'aW1n', filename: 'img.jpg' },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
} as any, reply);
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalled();
|
||||
expect(processSpy).toHaveBeenCalledTimes(1);
|
||||
const [calledText, calledAttachments] = processSpy.mock.calls[0] ?? [];
|
||||
expect(String(calledText)).toContain('[Voice message]: hello world');
|
||||
expect(String(calledText)).toContain('caption');
|
||||
const atts = calledAttachments as any[] | undefined;
|
||||
expect(atts?.some(a => a.mimeType === 'audio/ogg')).toBe(false);
|
||||
expect(atts?.some(a => a.mimeType === 'image/jpeg')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user