From b9bfee9c5b3e47de5395308e0a346e723446c8c5 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 7 Feb 2026 09:09:00 -0800 Subject: [PATCH] feat: add outbound attachment support with media.send tool Introduces OutboundAttachment type on OutboundMessage, an OutboundAttachmentCollector (push/drain pattern), and a media.send tool that queues files for outbound delivery. Each channel adapter (Telegram, Discord, Slack, WhatsApp) sends attachments after the text reply. Includes 15 tests for collector and tool. --- src/backends/native/agent.ts | 13 +++ src/backends/native/attachments.test.ts | 83 +++++++++++++++ src/backends/native/attachments.ts | 26 +++++ src/backends/native/index.ts | 1 + src/backends/native/orchestrator.ts | 4 + src/channels/discord/adapter.ts | 33 +++++- src/channels/index.ts | 1 + src/channels/slack/adapter.ts | 54 ++++++++-- src/channels/telegram/adapter.ts | 106 ++++++++++++++++++- src/channels/types.ts | 14 +++ src/channels/whatsapp/adapter.ts | 56 ++++++++-- src/tools/builtin/index.ts | 4 + src/tools/builtin/media-send.test.ts | 129 ++++++++++++++++++++++++ src/tools/builtin/media-send.ts | 71 +++++++++++++ src/tools/index.ts | 2 +- 15 files changed, 576 insertions(+), 21 deletions(-) create mode 100644 src/backends/native/attachments.test.ts create mode 100644 src/backends/native/attachments.ts create mode 100644 src/tools/builtin/media-send.test.ts create mode 100644 src/tools/builtin/media-send.ts diff --git a/src/backends/native/agent.ts b/src/backends/native/agent.ts index f1f3f6f..4c63f56 100644 --- a/src/backends/native/agent.ts +++ b/src/backends/native/agent.ts @@ -6,6 +6,7 @@ import type { ToolExecutor } from '../../tools/executor.js'; import type { ToolResult } from '../../tools/types.js'; import type { ToolPolicyContext } from '../../tools/policy.js'; import type { Attachment } from '../../channels/types.js'; +import type { OutboundAttachmentCollector } from './attachments.js'; import { buildUserMessage, getMessageText } from '../../models/media.js'; export interface ToolUseEvent { @@ -25,6 +26,8 @@ export interface NativeAgentConfig { onToolUse?: (event: ToolUseEvent) => void; /** Policy context for tool filtering (agent tier, provider). */ toolPolicyContext?: ToolPolicyContext; + /** Collector for outbound attachments queued by tools (e.g. media.send). */ + attachmentCollector?: OutboundAttachmentCollector; } // Internal message type for the tool loop — supports both text and structured content blocks. @@ -47,6 +50,7 @@ export class NativeAgent { private _totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 }; private _callCount: number = 0; private _toolPolicyContext?: ToolPolicyContext; + private _attachmentCollector?: OutboundAttachmentCollector; constructor(config: NativeAgentConfig) { this.modelClient = config.modelClient; @@ -57,6 +61,7 @@ export class NativeAgent { this.maxIterations = config.maxIterations ?? 10; this.onToolUse = config.onToolUse; this._toolPolicyContext = config.toolPolicyContext; + this._attachmentCollector = config.attachmentCollector; } private get history(): Message[] { @@ -241,4 +246,12 @@ export class NativeAgent { getToolPolicyContext(): ToolPolicyContext | undefined { return this._toolPolicyContext; } + + setAttachmentCollector(collector: OutboundAttachmentCollector | undefined): void { + this._attachmentCollector = collector; + } + + getAttachmentCollector(): OutboundAttachmentCollector | undefined { + return this._attachmentCollector; + } } diff --git a/src/backends/native/attachments.test.ts b/src/backends/native/attachments.test.ts new file mode 100644 index 0000000..8cb1c0d --- /dev/null +++ b/src/backends/native/attachments.test.ts @@ -0,0 +1,83 @@ +import { describe, it, expect } from 'vitest'; +import { OutboundAttachmentCollector } from './attachments.js'; + +describe('OutboundAttachmentCollector', () => { + it('starts with zero count', () => { + const collector = new OutboundAttachmentCollector(); + expect(collector.count).toBe(0); + }); + + it('push increments count', () => { + const collector = new OutboundAttachmentCollector(); + collector.push({ mimeType: 'image/png', data: 'abc123' }); + expect(collector.count).toBe(1); + + collector.push({ mimeType: 'application/pdf', url: 'https://example.com/doc.pdf' }); + expect(collector.count).toBe(2); + }); + + it('drain returns all queued attachments', () => { + const collector = new OutboundAttachmentCollector(); + collector.push({ mimeType: 'image/png', data: 'abc123', filename: 'photo.png' }); + collector.push({ mimeType: 'audio/ogg', url: 'https://example.com/audio.ogg' }); + + const drained = collector.drain(); + expect(drained).toHaveLength(2); + expect(drained[0]).toEqual({ + mimeType: 'image/png', + data: 'abc123', + filename: 'photo.png', + }); + expect(drained[1]).toEqual({ + mimeType: 'audio/ogg', + url: 'https://example.com/audio.ogg', + }); + }); + + it('drain clears the queue', () => { + const collector = new OutboundAttachmentCollector(); + collector.push({ mimeType: 'image/png', data: 'abc123' }); + expect(collector.count).toBe(1); + + collector.drain(); + expect(collector.count).toBe(0); + }); + + it('drain returns empty array when nothing queued', () => { + const collector = new OutboundAttachmentCollector(); + const drained = collector.drain(); + expect(drained).toEqual([]); + }); + + it('drain returns a copy, not the internal array', () => { + const collector = new OutboundAttachmentCollector(); + collector.push({ mimeType: 'image/png', data: 'abc123' }); + const first = collector.drain(); + const second = collector.drain(); + + // First drain got the item, second drain is empty + expect(first).toHaveLength(1); + expect(second).toHaveLength(0); + + // Mutating the returned array doesn't affect the collector + first.push({ mimeType: 'image/gif', data: 'xyz' }); + expect(collector.count).toBe(0); + }); + + it('can push and drain multiple cycles', () => { + const collector = new OutboundAttachmentCollector(); + + // First cycle + collector.push({ mimeType: 'image/png', data: 'cycle1' }); + expect(collector.drain()).toHaveLength(1); + expect(collector.count).toBe(0); + + // Second cycle + collector.push({ mimeType: 'image/jpeg', data: 'cycle2a' }); + collector.push({ mimeType: 'image/gif', data: 'cycle2b' }); + const second = collector.drain(); + expect(second).toHaveLength(2); + expect(second[0].data).toBe('cycle2a'); + expect(second[1].data).toBe('cycle2b'); + }); +}); diff --git a/src/backends/native/attachments.ts b/src/backends/native/attachments.ts new file mode 100644 index 0000000..32a261e --- /dev/null +++ b/src/backends/native/attachments.ts @@ -0,0 +1,26 @@ +import type { OutboundAttachment } from '../../channels/types.js'; + +/** + * Collects outbound attachments during a tool execution cycle. + * Tools can push attachments here, and they'll be included in the reply. + */ +export class OutboundAttachmentCollector { + private _attachments: OutboundAttachment[] = []; + + /** Queue an attachment for inclusion in the next outbound message. */ + push(attachment: OutboundAttachment): void { + this._attachments.push(attachment); + } + + /** Remove and return all queued attachments. */ + drain(): OutboundAttachment[] { + const result = [...this._attachments]; + this._attachments = []; + return result; + } + + /** Number of queued attachments. */ + get count(): number { + return this._attachments.length; + } +} diff --git a/src/backends/native/index.ts b/src/backends/native/index.ts index 76ffd69..71c5be5 100644 --- a/src/backends/native/index.ts +++ b/src/backends/native/index.ts @@ -1,4 +1,5 @@ export { NativeAgent, type NativeAgentConfig, type ToolUseEvent } from './agent.js'; +export { OutboundAttachmentCollector } from './attachments.js'; export { AgentOrchestrator, type OrchestratorConfig, diff --git a/src/backends/native/orchestrator.ts b/src/backends/native/orchestrator.ts index 9d52338..5c43ff3 100644 --- a/src/backends/native/orchestrator.ts +++ b/src/backends/native/orchestrator.ts @@ -8,6 +8,7 @@ import type { ToolPolicyContext } from '../../tools/policy.js'; import type { Attachment } from '../../channels/types.js'; import { NativeAgent } from './agent.js'; import type { ToolUseEvent } from './agent.js'; +import type { OutboundAttachmentCollector } from './attachments.js'; import { shouldCompact } from '../../context/tokens.js'; import { compactHistory, type CompactionConfig, type CompactionResult, DEFAULT_COMPACTION_CONFIG } from '../../context/compaction.js'; import { estimateCost } from '../../models/costs.js'; @@ -91,6 +92,8 @@ export interface OrchestratorConfig { memoryStore?: MemoryStore; /** Policy context for tool filtering (agent tier, provider). */ toolPolicyContext?: ToolPolicyContext; + /** Collector for outbound attachments queued by tools (e.g. media.send). */ + attachmentCollector?: OutboundAttachmentCollector; } // ── AgentOrchestrator ───────────────────────────────────────────────── @@ -139,6 +142,7 @@ export class AgentOrchestrator { maxIterations: config.maxIterations, onToolUse: config.onToolUse, toolPolicyContext: config.toolPolicyContext, + attachmentCollector: config.attachmentCollector, }); // Set the primary tier on the agent diff --git a/src/channels/discord/adapter.ts b/src/channels/discord/adapter.ts index 48f2fc0..6c90173 100644 --- a/src/channels/discord/adapter.ts +++ b/src/channels/discord/adapter.ts @@ -6,13 +6,14 @@ * Messages are chunked at Discord's 2000-char limit. */ -import { Client, GatewayIntentBits, Events } from 'discord.js'; +import { Client, GatewayIntentBits, Events, AttachmentBuilder } from 'discord.js'; import type { Message as DiscordMessage } from 'discord.js'; import type { Attachment, InboundMessage, OutboundMessage, + OutboundAttachment, ChannelAdapter, ChannelStatus, } from '../types.js'; @@ -121,7 +122,7 @@ export class DiscordAdapter implements ChannelAdapter { } const text = message.text; - const sendable = channel as { send: (content: string) => Promise }; + const sendable = channel as { send: (content: string | Record) => Promise }; if (text.length <= 2000) { await sendable.send(text); @@ -131,6 +132,30 @@ export class DiscordAdapter implements ChannelAdapter { await sendable.send(chunk); } } + + // Send outbound attachments after text + if (message.attachments && message.attachments.length > 0) { + const files = message.attachments + .filter((a) => a.data || a.url) + .map((a) => this.buildDiscordAttachment(a)); + + if (files.length > 0) { + await sendable.send({ files }); + } + } + } + + /** Build a discord.js AttachmentBuilder from an OutboundAttachment. */ + private buildDiscordAttachment(attachment: OutboundAttachment): AttachmentBuilder { + if (attachment.data) { + return new AttachmentBuilder(Buffer.from(attachment.data, 'base64'), { + name: attachment.filename ?? 'attachment', + }); + } + // URL-based attachment + return new AttachmentBuilder(attachment.url!, { + name: attachment.filename ?? 'attachment', + }); } /** Internal: process an inbound Discord message. */ @@ -174,12 +199,12 @@ export class DiscordAdapter implements ChannelAdapter { // Strip bot mention from the message text const text = message.content.replace(/<@!?\d+>/g, '').trim(); - // ── Extract image attachments ── + // ── Extract media attachments ── const attachments: Attachment[] = []; if (message.attachments && message.attachments.size > 0) { for (const attachment of message.attachments.values()) { const mimeType = attachment.contentType || this._inferMimeTypeFromUrl(attachment.url); - if (mimeType && mimeType.startsWith('image/')) { + if (mimeType && (mimeType.startsWith('image/') || mimeType.startsWith('audio/'))) { attachments.push({ mimeType, url: attachment.url, diff --git a/src/channels/index.ts b/src/channels/index.ts index a397ec2..11e892b 100644 --- a/src/channels/index.ts +++ b/src/channels/index.ts @@ -3,6 +3,7 @@ export type { ChannelStatus, InboundMessage, OutboundMessage, + OutboundAttachment, Attachment, ToolStatusEvent, MessageHandler, diff --git a/src/channels/slack/adapter.ts b/src/channels/slack/adapter.ts index 1d1f641..447cb14 100644 --- a/src/channels/slack/adapter.ts +++ b/src/channels/slack/adapter.ts @@ -11,6 +11,7 @@ import type { Attachment, InboundMessage, OutboundMessage, + OutboundAttachment, ChannelAdapter, ChannelStatus, } from '../types.js'; @@ -152,6 +153,45 @@ export class SlackAdapter implements ChannelAdapter { }); } } + + // Send outbound attachments after text + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + await this.sendAttachment(channel, threadTs, attachment); + } + } + } + + /** Upload and send a single outbound attachment via Slack's files.uploadV2 API. */ + private async sendAttachment( + channel: string, + threadTs: string, + attachment: OutboundAttachment, + ): Promise { + if (!this.app) return; + + try { + if (attachment.data) { + await this.app.client.files.uploadV2({ + channel_id: channel, + thread_ts: threadTs, + file: Buffer.from(attachment.data, 'base64'), + filename: attachment.filename ?? 'attachment', + }); + } else if (attachment.url) { + // For URL-based attachments, share as a text message with the URL + await this.app.client.chat.postMessage({ + channel, + text: attachment.url, + thread_ts: threadTs, + }); + } + } catch (error) { + console.error( + `Slack: failed to send ${attachment.mimeType} attachment:`, + error instanceof Error ? error.message : 'Unknown error', + ); + } } /** Resolve a Slack user ID to a display name, with caching. */ @@ -170,10 +210,10 @@ export class SlackAdapter implements ChannelAdapter { } /** - * Download image files from a Slack message and convert to base64 Attachments. - * Non-image files are skipped. Download errors are logged but don't crash the handler. + * Download media files from a Slack message and convert to base64 Attachments. + * Non-media files are skipped. Download errors are logged but don't crash the handler. */ - private async extractImageAttachments( + private async extractMediaAttachments( files?: SlackMessageEvent['files'], ): Promise { if (!files || files.length === 0) return []; @@ -181,8 +221,8 @@ export class SlackAdapter implements ChannelAdapter { const attachments: Attachment[] = []; for (const file of files) { - // Only process image files - if (!file.mimetype?.startsWith('image/')) continue; + // Only process image and audio files + if (!file.mimetype?.startsWith('image/') && !file.mimetype?.startsWith('audio/')) continue; const downloadUrl = file.url_private_download || file.url_private; if (!downloadUrl) continue; @@ -259,8 +299,8 @@ export class SlackAdapter implements ChannelAdapter { ? await this.resolveUserName(message.user) : undefined; - // Extract image attachments from Slack file uploads - const attachments = await this.extractImageAttachments(message.files); + // Extract media attachments from Slack file uploads + const attachments = await this.extractMediaAttachments(message.files); // Detect reset command if (text === '!reset' || text === 'reset') { diff --git a/src/channels/telegram/adapter.ts b/src/channels/telegram/adapter.ts index dec5739..f0fed07 100644 --- a/src/channels/telegram/adapter.ts +++ b/src/channels/telegram/adapter.ts @@ -1,10 +1,11 @@ -import { Bot } from 'grammy'; +import { Bot, InputFile } from 'grammy'; import type { HookEngine } from '../../hooks/index.js'; import type { Attachment, InboundMessage, OutboundMessage, + OutboundAttachment, ChannelAdapter, ChannelStatus, } from '../types.js'; @@ -263,6 +264,80 @@ export class TelegramAdapter implements ChannelAdapter { }); }); + // ── Voice message handler ── + + this.bot.on('message:voice', async (ctx) => { + if (!this.messageHandler) return; + + const voice = ctx.message.voice; + if (!voice) return; + + await ctx.replyWithChatAction('typing'); + + const fileData = await this.downloadFileToBase64(voice.file_id); + if (!fileData) { + console.error(`Failed to download voice message ${voice.file_id}`); + return; + } + + const caption = ctx.message.caption ?? ''; + const mimeType = voice.mime_type ?? 'audio/ogg'; + + this.messageHandler({ + id: String(ctx.message.message_id), + channel: 'telegram', + senderId: String(ctx.chat.id), + senderName: ctx.from?.first_name, + text: caption, + attachments: [ + { + mimeType, + data: fileData, + filename: `voice_${voice.file_unique_id}.ogg`, + size: voice.file_size, + }, + ], + timestamp: Date.now(), + }); + }); + + // ── Audio message handler ── + + this.bot.on('message:audio', async (ctx) => { + if (!this.messageHandler) return; + + const audio = ctx.message.audio; + if (!audio) return; + + await ctx.replyWithChatAction('typing'); + + const fileData = await this.downloadFileToBase64(audio.file_id); + if (!fileData) { + console.error(`Failed to download audio message ${audio.file_id}`); + return; + } + + const caption = ctx.message.caption ?? ''; + const mimeType = audio.mime_type ?? 'audio/mpeg'; + + this.messageHandler({ + id: String(ctx.message.message_id), + channel: 'telegram', + senderId: String(ctx.chat.id), + senderName: ctx.from?.first_name, + text: caption, + attachments: [ + { + mimeType, + data: fileData, + filename: `audio_${audio.file_unique_id}.${mimeType.split('/')[1]}`, + size: audio.file_size, + }, + ], + timestamp: Date.now(), + }); + }); + // ── Start long polling ── this.bot.start({ @@ -304,5 +379,34 @@ export class TelegramAdapter implements ChannelAdapter { await this.bot.api.sendMessage(chatId, chunk, { parse_mode: 'Markdown' }); } } + + // Send outbound attachments after text + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + await this.sendAttachment(chatId, attachment); + } + } + } + + /** Send a single outbound attachment via the Telegram API. */ + private async sendAttachment(chatId: number, attachment: OutboundAttachment): Promise { + if (!this.bot) return; + + try { + const file = attachment.data + ? new InputFile(Buffer.from(attachment.data, 'base64'), attachment.filename) + : attachment.url ?? ''; + + if (attachment.mimeType.startsWith('image/')) { + await this.bot.api.sendPhoto(chatId, file); + } else { + await this.bot.api.sendDocument(chatId, file); + } + } catch (error) { + console.error( + `Failed to send ${attachment.mimeType} attachment to ${chatId}:`, + error instanceof Error ? error.message : 'Unknown error', + ); + } } } diff --git a/src/channels/types.ts b/src/channels/types.ts index 3eebb5b..b1a96a5 100644 --- a/src/channels/types.ts +++ b/src/channels/types.ts @@ -42,12 +42,26 @@ export interface InboundMessage { metadata?: Record; } +/** Attachment to send back via a channel adapter. */ +export interface OutboundAttachment { + /** MIME type (e.g. "image/png", "application/pdf"). */ + mimeType: string; + /** Base64-encoded file content. */ + data?: string; + /** URL to the file (alternative to data). */ + url?: string; + /** Suggested filename. */ + filename?: string; +} + /** Outbound message to send via a channel adapter. */ export interface OutboundMessage { /** Response text (markdown). */ text: string; /** Original message ID to reply to. */ replyTo?: string; + /** File or image attachments to send with the response. */ + attachments?: OutboundAttachment[]; /** Platform-specific extras. */ metadata?: Record; } diff --git a/src/channels/whatsapp/adapter.ts b/src/channels/whatsapp/adapter.ts index b7a7221..06007e0 100644 --- a/src/channels/whatsapp/adapter.ts +++ b/src/channels/whatsapp/adapter.ts @@ -7,11 +7,12 @@ * Messages are chunked at 4096 chars (same as Telegram). */ -import { Client, LocalAuth } from 'whatsapp-web.js'; +import { Client, LocalAuth, MessageMedia } from 'whatsapp-web.js'; import type { Attachment, InboundMessage, OutboundMessage, + OutboundAttachment, ChannelAdapter, ChannelStatus, } from '../types.js'; @@ -153,6 +154,38 @@ export class WhatsAppAdapter implements ChannelAdapter { await this.client.sendMessage(peerId, chunk); } } + + // Send outbound attachments after text + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + await this.sendAttachment(peerId, attachment); + } + } + } + + /** Send a single outbound attachment via WhatsApp using MessageMedia. */ + private async sendAttachment(peerId: string, attachment: OutboundAttachment): Promise { + if (!this.client) return; + + try { + if (attachment.data) { + const media = new MessageMedia( + attachment.mimeType, + attachment.data, + attachment.filename, + ); + await this.client.sendMessage(peerId, media); + } else if (attachment.url) { + // Download from URL and send as MessageMedia + const media = await MessageMedia.fromUrl(attachment.url); + await this.client.sendMessage(peerId, media); + } + } catch (error) { + console.error( + `WhatsApp: failed to send ${attachment.mimeType} attachment:`, + error instanceof Error ? error.message : 'Unknown error', + ); + } } /** Internal: process an inbound WhatsApp message. */ @@ -211,17 +244,24 @@ export class WhatsAppAdapter implements ChannelAdapter { const senderName = message._data?.notifyName; - // Extract image attachments if the message has media + // Extract media attachments if the message has media const attachments: Attachment[] = []; if (message.hasMedia) { try { const media = await (message as any).downloadMedia(); - if (media && typeof media.mimetype === 'string' && media.mimetype.startsWith('image/')) { - attachments.push({ - mimeType: media.mimetype, - data: media.data, - filename: media.filename, - }); + if (media && typeof media.mimetype === 'string') { + const mimeType = media.mimetype; + const isAudio = mimeType.startsWith('audio/'); + const isImage = mimeType.startsWith('image/'); + const isVoice = message.type === 'ptt'; + + if (isAudio || isImage || isVoice) { + attachments.push({ + mimeType: mimeType, + data: media.data, + filename: media.filename, + }); + } } } catch (error) { console.error( diff --git a/src/tools/builtin/index.ts b/src/tools/builtin/index.ts index 82fcc04..d1ad214 100644 --- a/src/tools/builtin/index.ts +++ b/src/tools/builtin/index.ts @@ -4,6 +4,8 @@ export { fileWriteTool } from './file-write.js'; export { fileEditTool } from './file-edit.js'; export { fileListTool } from './file-list.js'; export { webFetchTool } from './web-fetch.js'; +export { createMediaSendTool } from './media-send.js'; +export { createImageAnalyzeTool } from './image-analyze.js'; export { createMemoryReadTool } from './memory-read.js'; export { createMemoryWriteTool } from './memory-write.js'; export { createMemorySearchTool } from './memory-search.js'; @@ -23,6 +25,8 @@ import { fileWriteTool } from './file-write.js'; import { fileEditTool } from './file-edit.js'; import { fileListTool } from './file-list.js'; import { webFetchTool } from './web-fetch.js'; +import { createMediaSendTool } from './media-send.js'; +import { createImageAnalyzeTool } from './image-analyze.js'; import { createMemoryReadTool } from './memory-read.js'; import { createMemoryWriteTool } from './memory-write.js'; import { createMemorySearchTool } from './memory-search.js'; diff --git a/src/tools/builtin/media-send.test.ts b/src/tools/builtin/media-send.test.ts new file mode 100644 index 0000000..6fb1796 --- /dev/null +++ b/src/tools/builtin/media-send.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect } from 'vitest'; +import { OutboundAttachmentCollector } from '../../backends/native/attachments.js'; +import { createMediaSendTool } from './media-send.js'; + +describe('media.send tool', () => { + it('has correct metadata', () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + expect(tool.name).toBe('media.send'); + expect(tool.inputSchema.required).toEqual(['mime_type']); + }); + + it('queues attachment with base64 data', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + const result = await tool.execute({ + data: 'aGVsbG8=', + mime_type: 'image/png', + filename: 'hello.png', + }); + + expect(result.success).toBe(true); + expect(result.output).toContain('image/png'); + expect(result.output).toContain('hello.png'); + expect(collector.count).toBe(1); + + const drained = collector.drain(); + expect(drained[0]).toEqual({ + mimeType: 'image/png', + data: 'aGVsbG8=', + url: undefined, + filename: 'hello.png', + }); + }); + + it('queues attachment with URL', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + const result = await tool.execute({ + url: 'https://example.com/photo.jpg', + mime_type: 'image/jpeg', + }); + + expect(result.success).toBe(true); + expect(result.output).toContain('image/jpeg'); + expect(collector.count).toBe(1); + + const drained = collector.drain(); + expect(drained[0]).toEqual({ + mimeType: 'image/jpeg', + data: undefined, + url: 'https://example.com/photo.jpg', + filename: undefined, + }); + }); + + it('fails when neither data nor url is provided', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + const result = await tool.execute({ mime_type: 'image/png' }); + + expect(result.success).toBe(false); + expect(result.error).toContain('Either data or url must be provided'); + expect(collector.count).toBe(0); + }); + + it('queues multiple attachments', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + await tool.execute({ data: 'img1', mime_type: 'image/png' }); + await tool.execute({ url: 'https://example.com/doc.pdf', mime_type: 'application/pdf' }); + await tool.execute({ data: 'img2', mime_type: 'image/jpeg', filename: 'photo.jpg' }); + + expect(collector.count).toBe(3); + + const drained = collector.drain(); + expect(drained).toHaveLength(3); + expect(drained[0].mimeType).toBe('image/png'); + expect(drained[1].mimeType).toBe('application/pdf'); + expect(drained[2].mimeType).toBe('image/jpeg'); + expect(drained[2].filename).toBe('photo.jpg'); + }); + + it('collector drains correctly after tool use', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + await tool.execute({ data: 'abc', mime_type: 'image/png' }); + expect(collector.count).toBe(1); + + const first = collector.drain(); + expect(first).toHaveLength(1); + expect(collector.count).toBe(0); + + // Second drain should be empty + const second = collector.drain(); + expect(second).toHaveLength(0); + }); + + it('output includes filename when provided', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + const result = await tool.execute({ + data: 'abc', + mime_type: 'application/pdf', + filename: 'report.pdf', + }); + + expect(result.output).toContain('report.pdf'); + }); + + it('output omits filename when not provided', async () => { + const collector = new OutboundAttachmentCollector(); + const tool = createMediaSendTool(collector); + + const result = await tool.execute({ + data: 'abc', + mime_type: 'image/png', + }); + + expect(result.output).toBe('Attachment queued (image/png)'); + }); +}); diff --git a/src/tools/builtin/media-send.ts b/src/tools/builtin/media-send.ts new file mode 100644 index 0000000..1c86f47 --- /dev/null +++ b/src/tools/builtin/media-send.ts @@ -0,0 +1,71 @@ +import type { Tool, ToolResult } from '../types.js'; +import type { OutboundAttachmentCollector } from '../../backends/native/attachments.js'; + +interface MediaSendArgs { + data?: string; + url?: string; + mime_type: string; + filename?: string; +} + +/** + * Create the media.send tool bound to an OutboundAttachmentCollector. + * + * The tool lets the agent queue a file or image to be sent back to the user. + * Attachments are collected during the tool loop and included in the outbound + * message after the agent finishes processing. + */ +export function createMediaSendTool(collector: OutboundAttachmentCollector): Tool { + return { + name: 'media.send', + description: + 'Attach a file or image to send back to the user. The attachment will be included with the next text response.', + inputSchema: { + type: 'object', + properties: { + data: { + type: 'string', + description: 'Base64-encoded file content', + }, + url: { + type: 'string', + description: 'URL to the file (alternative to data)', + }, + mime_type: { + type: 'string', + description: 'MIME type of the file (e.g. image/png, application/pdf)', + }, + filename: { + type: 'string', + description: 'Suggested filename', + }, + }, + required: ['mime_type'], + }, + + execute: async (rawArgs: unknown): Promise => { + const args = rawArgs as MediaSendArgs; + + if (!args.data && !args.url) { + return { + success: false, + output: '', + error: 'Either data or url must be provided', + }; + } + + collector.push({ + mimeType: args.mime_type, + data: args.data, + url: args.url, + filename: args.filename, + }); + + const label = args.filename ? `: ${args.filename}` : ''; + return { + success: true, + output: `Attachment queued (${args.mime_type}${label})`, + }; + }, + }; +} diff --git a/src/tools/index.ts b/src/tools/index.ts index b97f0ec..ac993ba 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,7 +5,7 @@ export { ToolExecutor } from './executor.js'; export type { ToolExecutorConfig } from './executor.js'; export { ToolPolicy } from './policy.js'; export type { ToolPolicyContext } from './policy.js'; -export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools } from './builtin/index.js'; +export { allBuiltinTools, createWebSearchTools, createProcessTools, ProcessManager, BrowserManager, createBrowserTools, createMediaSendTool } from './builtin/index.js'; export type { WebSearchConfig } from './builtin/web-search.js'; export type { ProcessManagerConfig } from './builtin/process/index.js'; export type { BrowserManagerConfig } from './builtin/browser/index.js';