feat: add multimodal media pipeline for image support across all providers and channels
Widen Message.content from string to string | MessageContentPart[] to support multimodal content. Add Attachment type to channel layer, media conversion utilities, and image extraction to all channel adapters (Telegram, Discord, Slack, WhatsApp). Update all model clients (Anthropic, OpenAI, Gemini, Bedrock) to convert structured content to provider-specific formats. Fix downstream consumers (tokens, compaction, TUI, local models) to handle the widened type via getMessageText() helper.
This commit is contained in:
+30
-2
@@ -1,5 +1,5 @@
|
||||
import OpenAI from 'openai';
|
||||
import type { ChatRequest, ChatResponse, ModelClient } from './types.js';
|
||||
import type { ChatRequest, ChatResponse, ModelClient, MessageContentPart } from './types.js';
|
||||
|
||||
export interface OpenAIClientConfig {
|
||||
apiKey?: string;
|
||||
@@ -8,6 +8,31 @@ export interface OpenAIClientConfig {
|
||||
baseURL?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Flynn message content to OpenAI format.
|
||||
* OpenAI uses { type: 'text', text } and { type: 'image_url', image_url: { url } } parts.
|
||||
*/
|
||||
function toOpenAIContent(content: string | MessageContentPart[]): string | OpenAI.ChatCompletionContentPart[] {
|
||||
if (typeof content === 'string') {
|
||||
return content;
|
||||
}
|
||||
|
||||
return content.map((part): OpenAI.ChatCompletionContentPart => {
|
||||
if (part.type === 'text') {
|
||||
return { type: 'text', text: part.text };
|
||||
}
|
||||
if (part.type === 'image') {
|
||||
// OpenAI accepts data URIs or regular URLs
|
||||
const url = part.source.type === 'base64'
|
||||
? `data:${part.source.media_type};base64,${part.source.data!}`
|
||||
: part.source.url!;
|
||||
return { type: 'image_url', image_url: { url } };
|
||||
}
|
||||
// Fallback — shouldn't happen
|
||||
return { type: 'text', text: JSON.stringify(part) };
|
||||
});
|
||||
}
|
||||
|
||||
export class OpenAIClient implements ModelClient {
|
||||
private client: OpenAI;
|
||||
private model: string;
|
||||
@@ -30,7 +55,10 @@ export class OpenAIClient implements ModelClient {
|
||||
}
|
||||
|
||||
for (const msg of request.messages) {
|
||||
messages.push({ role: msg.role, content: msg.content });
|
||||
messages.push({
|
||||
role: msg.role,
|
||||
content: toOpenAIContent(msg.content),
|
||||
} as OpenAI.ChatCompletionMessageParam);
|
||||
}
|
||||
|
||||
// Build params, conditionally including tools
|
||||
|
||||
Reference in New Issue
Block a user