feat: add multimodal media pipeline for image support across all providers and channels

Widen Message.content from string to string | MessageContentPart[] to support
multimodal content. Add Attachment type to channel layer, media conversion
utilities, and image extraction to all channel adapters (Telegram, Discord,
Slack, WhatsApp). Update all model clients (Anthropic, OpenAI, Gemini, Bedrock)
to convert structured content to provider-specific formats. Fix downstream
consumers (tokens, compaction, TUI, local models) to handle the widened type
via getMessageText() helper.
This commit is contained in:
William Valentin
2026-02-06 17:17:21 -08:00
parent cfdd448495
commit a515912537
22 changed files with 788 additions and 37 deletions
+2 -1
View File
@@ -3,6 +3,7 @@ import type { AgentOrchestrator } from '../backends/native/orchestrator.js';
import type { MemoryStore } from '../memory/store.js';
import { COMPACTION_SYSTEM_PROMPT, MEMORY_EXTRACTION_PROMPT } from '../backends/native/prompts.js';
import { estimateMessageTokens } from './tokens.js';
import { getMessageText } from '../models/media.js';
export interface CompactionConfig {
/** Percentage of context window that triggers compaction (default: 80). */
@@ -52,7 +53,7 @@ export async function compactHistory(opts: {
const toCompact = messages.slice(0, -keepCount);
const toKeep = messages.slice(-keepCount);
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${msg.content}`).join('\n\n');
const formattedConversation = toCompact.map((msg) => `${msg.role}: ${getMessageText(msg)}`).join('\n\n');
const tier = orchestrator.getDelegationTier('compaction');