feat(audio): add audio.transcribe tool with Whisper-compatible API support
- Add createAudioTranscribeTool with OpenAI/Groq/Ollama/llama.cpp provider support - Refactor audio config schema to nested audio.enabled + audio.provider structure - Move audio tool registration to initTools() for conditional enablement - Fix duplication bug in audio-transcribe.ts URL download handler - Support base64 data and URL-based audio input with format detection
This commit is contained in:
+1
-8
@@ -5,7 +5,6 @@ import { mkdirSync } from 'fs';
|
||||
|
||||
// ── Config & Types ──
|
||||
import type { Config } from '../config/index.js';
|
||||
import type { AudioTranscriptionConfig } from '../models/media.js';
|
||||
import type { ToolRegistry, ToolExecutor, BrowserManager } from '../tools/index.js';
|
||||
import type { AgentConfigRegistry, AgentRouter } from '../agents/index.js';
|
||||
import type { SandboxManager } from '../sandbox/index.js';
|
||||
@@ -100,12 +99,6 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
|
||||
const { skillRegistry, skillInstaller } = initSkills(config);
|
||||
const { agentConfigRegistry, agentRouter, sandboxManager } = await initAgents({ config, lifecycle });
|
||||
|
||||
// ── Model & Prompt ──
|
||||
const audioConfig: AudioTranscriptionConfig = {
|
||||
endpoint: config.audio.transcription_endpoint,
|
||||
apiKey: config.audio.transcription_api_key,
|
||||
model: config.audio.transcription_model,
|
||||
};
|
||||
const modelRouter = createModelRouter(config);
|
||||
|
||||
// Restore persisted model tier
|
||||
@@ -133,7 +126,7 @@ export async function startDaemon(config: Config): Promise<DaemonContext> {
|
||||
|
||||
const messageRouter = createMessageRouter({
|
||||
sessionManager, modelRouter, systemPrompt, toolRegistry, toolExecutor,
|
||||
config, memoryStore, agentConfigRegistry, agentRouter, sandboxManager, audioConfig,
|
||||
config, memoryStore, agentConfigRegistry, agentRouter, sandboxManager,
|
||||
});
|
||||
channelRegistry.setMessageHandler(messageRouter.handler);
|
||||
channelAgents = messageRouter.agents;
|
||||
|
||||
+14
-1
@@ -1,7 +1,8 @@
|
||||
import type { Config } from '../config/index.js';
|
||||
import type { Lifecycle } from './lifecycle.js';
|
||||
import type { AudioTranscriptionConfig } from '../models/media.js';
|
||||
import { HookEngine } from '../hooks/index.js';
|
||||
import { ToolRegistry, ToolExecutor, ToolPolicy, allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools } from '../tools/index.js';
|
||||
import { ToolRegistry, ToolExecutor, ToolPolicy, allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createAudioTranscribeTool } from '../tools/index.js';
|
||||
|
||||
export interface ToolsDeps {
|
||||
config: Config;
|
||||
@@ -52,6 +53,18 @@ export function initTools(deps: ToolsDeps): ToolsResult {
|
||||
console.log('Process manager stopped');
|
||||
});
|
||||
|
||||
// Register audio transcription tool if configured
|
||||
if (config.audio?.enabled && config.audio.provider) {
|
||||
const audioConfig: AudioTranscriptionConfig = {
|
||||
endpoint: config.audio.provider.endpoint,
|
||||
apiKey: config.audio.provider.api_key,
|
||||
model: config.audio.provider.model,
|
||||
};
|
||||
const audioTool = createAudioTranscribeTool(audioConfig);
|
||||
toolRegistry.register(audioTool);
|
||||
console.log(`Audio transcription enabled (type=${config.audio.provider.type}, endpoint=${audioConfig.endpoint})`);
|
||||
}
|
||||
|
||||
// Initialize browser manager and register browser tools (if enabled)
|
||||
let browserManager: BrowserManager | undefined;
|
||||
if (config.browser?.enabled) {
|
||||
|
||||
Reference in New Issue
Block a user