feat: add multimodal media pipeline for image support across all providers and channels
Widen Message.content from string to string | MessageContentPart[] to support multimodal content. Add Attachment type to channel layer, media conversion utilities, and image extraction to all channel adapters (Telegram, Discord, Slack, WhatsApp). Update all model clients (Anthropic, OpenAI, Gemini, Bedrock) to convert structured content to provider-specific formats. Fix downstream consumers (tokens, compaction, TUI, local models) to handle the widened type via getMessageText() helper.
This commit is contained in:
+42
-6
@@ -1,6 +1,6 @@
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import type { Message } from '@anthropic-ai/sdk/resources/messages/messages.js';
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient } from './types.js';
|
||||
import type { Message as AnthropicMessage } from '@anthropic-ai/sdk/resources/messages/messages.js';
|
||||
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, Message, MessageContentPart } from './types.js';
|
||||
|
||||
export interface AnthropicClientConfig {
|
||||
apiKey?: string; // Falls back to ANTHROPIC_API_KEY env var
|
||||
@@ -9,6 +9,42 @@ export interface AnthropicClientConfig {
|
||||
maxTokens?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Flynn MessageContentPart[] to Anthropic ContentBlockParam format.
|
||||
*/
|
||||
function toAnthropicContent(content: string | MessageContentPart[]): string | unknown[] {
|
||||
if (typeof content === 'string') {
|
||||
return content;
|
||||
}
|
||||
|
||||
return content.map(part => {
|
||||
if (part.type === 'text') {
|
||||
return { type: 'text', text: part.text };
|
||||
}
|
||||
if (part.type === 'image') {
|
||||
if (part.source.type === 'base64') {
|
||||
return {
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: part.source.media_type,
|
||||
data: part.source.data!,
|
||||
},
|
||||
};
|
||||
}
|
||||
// URL-based image
|
||||
return {
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'url',
|
||||
url: part.source.url!,
|
||||
},
|
||||
};
|
||||
}
|
||||
return part;
|
||||
});
|
||||
}
|
||||
|
||||
export class AnthropicClient implements ModelClient {
|
||||
private client: Anthropic;
|
||||
private model: string;
|
||||
@@ -30,7 +66,7 @@ export class AnthropicClient implements ModelClient {
|
||||
system: request.system,
|
||||
messages: request.messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
content: toAnthropicContent(m.content),
|
||||
})),
|
||||
};
|
||||
|
||||
@@ -38,7 +74,7 @@ export class AnthropicClient implements ModelClient {
|
||||
params.tools = request.tools;
|
||||
}
|
||||
|
||||
const response = await this.client.messages.create(params as unknown as Parameters<typeof this.client.messages.create>[0]) as Message;
|
||||
const response = await this.client.messages.create(params as unknown as Parameters<typeof this.client.messages.create>[0]) as AnthropicMessage;
|
||||
|
||||
const textContent = response.content.find((c) => c.type === 'text');
|
||||
const content = textContent?.type === 'text' ? textContent.text : '';
|
||||
@@ -65,8 +101,8 @@ export class AnthropicClient implements ModelClient {
|
||||
system: request.system,
|
||||
messages: request.messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
content: toAnthropicContent(m.content),
|
||||
})) as Parameters<typeof this.client.messages.stream>[0]['messages'],
|
||||
});
|
||||
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user