feat: add multimodal media pipeline for image support across all providers and channels
Widen Message.content from string to string | MessageContentPart[] to support multimodal content. Add Attachment type to channel layer, media conversion utilities, and image extraction to all channel adapters (Telegram, Discord, Slack, WhatsApp). Update all model clients (Anthropic, OpenAI, Gemini, Bedrock) to convert structured content to provider-specific formats. Fix downstream consumers (tokens, compaction, TUI, local models) to handle the widened type via getMessageText() helper.
This commit is contained in:
@@ -10,6 +10,7 @@ import { Client, GatewayIntentBits, Events } from 'discord.js';
|
||||
import type { Message as DiscordMessage } from 'discord.js';
|
||||
|
||||
import type {
|
||||
Attachment,
|
||||
InboundMessage,
|
||||
OutboundMessage,
|
||||
ChannelAdapter,
|
||||
@@ -50,6 +51,20 @@ export class DiscordAdapter implements ChannelAdapter {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/** Infer MIME type from URL if contentType is not provided. */
|
||||
private _inferMimeTypeFromUrl(url: string): string | null {
|
||||
const ext = url.split('.').pop()?.toLowerCase();
|
||||
const mimeTypes: Record<string, string> = {
|
||||
png: 'image/png',
|
||||
jpg: 'image/jpeg',
|
||||
jpeg: 'image/jpeg',
|
||||
gif: 'image/gif',
|
||||
webp: 'image/webp',
|
||||
svg: 'image/svg+xml',
|
||||
};
|
||||
return mimeTypes[ext || ''] || null;
|
||||
}
|
||||
|
||||
/** Register the inbound message handler. Called by the registry before connect(). */
|
||||
onMessage(handler: (msg: InboundMessage) => void): void {
|
||||
this.messageHandler = handler;
|
||||
@@ -159,6 +174,22 @@ export class DiscordAdapter implements ChannelAdapter {
|
||||
// Strip bot mention from the message text
|
||||
const text = message.content.replace(/<@!?\d+>/g, '').trim();
|
||||
|
||||
// ── Extract image attachments ──
|
||||
const attachments: Attachment[] = [];
|
||||
if (message.attachments && message.attachments.size > 0) {
|
||||
for (const attachment of message.attachments.values()) {
|
||||
const mimeType = attachment.contentType || this._inferMimeTypeFromUrl(attachment.url);
|
||||
if (mimeType && mimeType.startsWith('image/')) {
|
||||
attachments.push({
|
||||
mimeType,
|
||||
url: attachment.url,
|
||||
filename: attachment.name,
|
||||
size: attachment.size,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Reset command ──
|
||||
if (text === '!reset' || text === 'reset') {
|
||||
this.messageHandler({
|
||||
@@ -180,6 +211,7 @@ export class DiscordAdapter implements ChannelAdapter {
|
||||
senderId: message.channelId,
|
||||
senderName: message.author.username,
|
||||
text,
|
||||
attachments: attachments.length > 0 ? attachments : undefined,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ export type {
|
||||
ChannelStatus,
|
||||
InboundMessage,
|
||||
OutboundMessage,
|
||||
Attachment,
|
||||
ToolStatusEvent,
|
||||
MessageHandler,
|
||||
} from './types.js';
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
import { App } from '@slack/bolt';
|
||||
import type {
|
||||
Attachment,
|
||||
InboundMessage,
|
||||
OutboundMessage,
|
||||
ChannelAdapter,
|
||||
@@ -35,6 +36,14 @@ interface SlackMessageEvent {
|
||||
text?: string;
|
||||
bot_id?: string;
|
||||
subtype?: string;
|
||||
files?: Array<{
|
||||
id?: string;
|
||||
mimetype?: string;
|
||||
name?: string;
|
||||
size?: number;
|
||||
url_private?: string;
|
||||
url_private_download?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -160,6 +169,56 @@ export class SlackAdapter implements ChannelAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download image files from a Slack message and convert to base64 Attachments.
|
||||
* Non-image files are skipped. Download errors are logged but don't crash the handler.
|
||||
*/
|
||||
private async extractImageAttachments(
|
||||
files?: SlackMessageEvent['files'],
|
||||
): Promise<Attachment[]> {
|
||||
if (!files || files.length === 0) return [];
|
||||
|
||||
const attachments: Attachment[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
// Only process image files
|
||||
if (!file.mimetype?.startsWith('image/')) continue;
|
||||
|
||||
const downloadUrl = file.url_private_download || file.url_private;
|
||||
if (!downloadUrl) continue;
|
||||
|
||||
try {
|
||||
const response = await fetch(downloadUrl, {
|
||||
headers: { Authorization: `Bearer ${this.config.botToken}` },
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(
|
||||
`Slack: failed to download file ${file.name ?? file.id ?? 'unknown'}: HTTP ${response.status}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const base64 = Buffer.from(arrayBuffer).toString('base64');
|
||||
|
||||
attachments.push({
|
||||
mimeType: file.mimetype,
|
||||
data: base64,
|
||||
filename: file.name,
|
||||
size: file.size,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Slack: error downloading file ${file.name ?? file.id ?? 'unknown'}:`,
|
||||
error instanceof Error ? error.message : 'Unknown error',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return attachments;
|
||||
}
|
||||
|
||||
/** Internal: process an inbound Slack message event. */
|
||||
private async handleMessage(message: SlackMessageEvent): Promise<void> {
|
||||
if (!this.messageHandler) return;
|
||||
@@ -200,6 +259,9 @@ export class SlackAdapter implements ChannelAdapter {
|
||||
? await this.resolveUserName(message.user)
|
||||
: undefined;
|
||||
|
||||
// Extract image attachments from Slack file uploads
|
||||
const attachments = await this.extractImageAttachments(message.files);
|
||||
|
||||
// Detect reset command
|
||||
if (text === '!reset' || text === 'reset') {
|
||||
this.messageHandler({
|
||||
@@ -210,6 +272,7 @@ export class SlackAdapter implements ChannelAdapter {
|
||||
text: '!reset',
|
||||
timestamp: Date.now(),
|
||||
metadata: { isCommand: true, command: 'reset' },
|
||||
...(attachments.length > 0 && { attachments }),
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -222,6 +285,7 @@ export class SlackAdapter implements ChannelAdapter {
|
||||
senderName,
|
||||
text,
|
||||
timestamp: Date.now(),
|
||||
...(attachments.length > 0 && { attachments }),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Bot } from 'grammy';
|
||||
|
||||
import type { HookEngine } from '../../hooks/index.js';
|
||||
import type {
|
||||
Attachment,
|
||||
InboundMessage,
|
||||
OutboundMessage,
|
||||
ChannelAdapter,
|
||||
@@ -44,6 +45,26 @@ export class TelegramAdapter implements ChannelAdapter {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/** Download a file from Telegram API and convert to base64. */
|
||||
private async downloadFileToBase64(fileId: string): Promise<string | null> {
|
||||
try {
|
||||
const file = await this.bot?.api.getFile(fileId);
|
||||
if (!file || !file.file_path) return null;
|
||||
|
||||
const token = this.config.botToken;
|
||||
const url = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
|
||||
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) return null;
|
||||
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
return buffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error(`Failed to download file ${fileId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Register the inbound message handler. Called by the registry before connect(). */
|
||||
onMessage(handler: (msg: InboundMessage) => void): void {
|
||||
this.messageHandler = handler;
|
||||
@@ -164,6 +185,84 @@ export class TelegramAdapter implements ChannelAdapter {
|
||||
});
|
||||
});
|
||||
|
||||
// ── Photo message handler ──
|
||||
|
||||
this.bot.on('message:photo', async (ctx) => {
|
||||
if (!this.messageHandler) return;
|
||||
|
||||
const photo = ctx.message.photo;
|
||||
if (!photo || photo.length === 0) return;
|
||||
|
||||
const largestPhoto = photo[photo.length - 1];
|
||||
|
||||
await ctx.replyWithChatAction('typing');
|
||||
|
||||
const imageData = await this.downloadFileToBase64(largestPhoto.file_id);
|
||||
if (!imageData) {
|
||||
console.error(`Failed to download photo ${largestPhoto.file_id}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const caption = ctx.message.caption ?? '';
|
||||
|
||||
this.messageHandler({
|
||||
id: String(ctx.message.message_id),
|
||||
channel: 'telegram',
|
||||
senderId: String(ctx.chat.id),
|
||||
senderName: ctx.from?.first_name,
|
||||
text: caption,
|
||||
attachments: [
|
||||
{
|
||||
mimeType: 'image/jpeg',
|
||||
data: imageData,
|
||||
filename: `photo_${largestPhoto.file_unique_id}.jpg`,
|
||||
size: largestPhoto.file_size,
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
});
|
||||
|
||||
// ── Image document handler ──
|
||||
|
||||
this.bot.on('message:document', async (ctx) => {
|
||||
if (!this.messageHandler) return;
|
||||
|
||||
const document = ctx.message.document;
|
||||
if (!document) return;
|
||||
|
||||
const mimeType = document.mime_type ?? '';
|
||||
if (!mimeType.startsWith('image/')) return;
|
||||
|
||||
await ctx.replyWithChatAction('typing');
|
||||
|
||||
const fileData = await this.downloadFileToBase64(document.file_id);
|
||||
if (!fileData) {
|
||||
console.error(`Failed to download document ${document.file_id}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const caption = ctx.message.caption ?? '';
|
||||
const filename = document.file_name ?? document.file_unique_id;
|
||||
|
||||
this.messageHandler({
|
||||
id: String(ctx.message.message_id),
|
||||
channel: 'telegram',
|
||||
senderId: String(ctx.chat.id),
|
||||
senderName: ctx.from?.first_name,
|
||||
text: caption,
|
||||
attachments: [
|
||||
{
|
||||
mimeType,
|
||||
data: fileData,
|
||||
filename,
|
||||
size: document.file_size,
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
});
|
||||
|
||||
// ── Start long polling ──
|
||||
|
||||
this.bot.start({
|
||||
|
||||
@@ -6,6 +6,20 @@
|
||||
* the ChannelAdapter interface to provide a uniform messaging API.
|
||||
*/
|
||||
|
||||
/** Media attachment received from or sent to a channel. */
|
||||
export interface Attachment {
|
||||
/** MIME type (e.g. "image/jpeg", "audio/ogg", "application/pdf"). */
|
||||
mimeType: string;
|
||||
/** Base64-encoded data (preferred for model APIs). */
|
||||
data?: string;
|
||||
/** URL to download the attachment (alternative to data). */
|
||||
url?: string;
|
||||
/** Original filename, if available. */
|
||||
filename?: string;
|
||||
/** File size in bytes, if known. */
|
||||
size?: number;
|
||||
}
|
||||
|
||||
/** Inbound message received from a channel platform. */
|
||||
export interface InboundMessage {
|
||||
/** Platform message ID. */
|
||||
@@ -18,6 +32,8 @@ export interface InboundMessage {
|
||||
senderName?: string;
|
||||
/** Message text. */
|
||||
text: string;
|
||||
/** Media attachments (images, audio, documents). */
|
||||
attachments?: Attachment[];
|
||||
/** ID of message being replied to. */
|
||||
replyTo?: string;
|
||||
/** Unix ms. */
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
import { Client, LocalAuth } from 'whatsapp-web.js';
|
||||
import type {
|
||||
Attachment,
|
||||
InboundMessage,
|
||||
OutboundMessage,
|
||||
ChannelAdapter,
|
||||
@@ -37,6 +38,12 @@ interface WhatsAppMessage {
|
||||
fromMe: boolean;
|
||||
author?: string;
|
||||
_data?: { notifyName?: string };
|
||||
/** Whether this message contains media (image, video, audio, document). */
|
||||
hasMedia?: boolean;
|
||||
/** Message type (e.g. "image", "video", "chat"). */
|
||||
type?: string;
|
||||
/** Download the media attached to this message. */
|
||||
downloadMedia?: () => Promise<{ mimetype: string; data: string; filename?: string } | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -149,7 +156,7 @@ export class WhatsAppAdapter implements ChannelAdapter {
|
||||
}
|
||||
|
||||
/** Internal: process an inbound WhatsApp message. */
|
||||
private handleMessage(message: WhatsAppMessage): void {
|
||||
private async handleMessage(message: WhatsAppMessage): Promise<void> {
|
||||
if (!this.messageHandler) return;
|
||||
|
||||
// Ignore messages from the bot itself
|
||||
@@ -204,6 +211,26 @@ export class WhatsAppAdapter implements ChannelAdapter {
|
||||
|
||||
const senderName = message._data?.notifyName;
|
||||
|
||||
// Extract image attachments if the message has media
|
||||
const attachments: Attachment[] = [];
|
||||
if (message.hasMedia) {
|
||||
try {
|
||||
const media = await (message as any).downloadMedia();
|
||||
if (media && typeof media.mimetype === 'string' && media.mimetype.startsWith('image/')) {
|
||||
attachments.push({
|
||||
mimeType: media.mimetype,
|
||||
data: media.data,
|
||||
filename: media.filename,
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Failed to download WhatsApp media:',
|
||||
error instanceof Error ? error.message : 'Unknown error',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Detect reset command
|
||||
if (text === '!reset' || text === 'reset') {
|
||||
this.messageHandler({
|
||||
@@ -214,6 +241,7 @@ export class WhatsAppAdapter implements ChannelAdapter {
|
||||
text: '!reset',
|
||||
timestamp: Date.now(),
|
||||
metadata: { isCommand: true, command: 'reset' },
|
||||
...(attachments.length > 0 ? { attachments } : {}),
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -226,6 +254,7 @@ export class WhatsAppAdapter implements ChannelAdapter {
|
||||
senderName,
|
||||
text,
|
||||
timestamp: Date.now(),
|
||||
...(attachments.length > 0 ? { attachments } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user