feat: add multimodal media pipeline for image support across all providers and channels

Widen Message.content from string to string | MessageContentPart[] to support multimodal content. Add Attachment type to channel layer, media conversion utilities, and image extraction to all channel adapters (Telegram, Discord, Slack, WhatsApp). Update all model clients (Anthropic, OpenAI, Gemini, Bedrock) to convert structured content to provider-specific formats. Fix downstream consumers (tokens, compaction, TUI, local models) to handle the widened type via getMessageText() helper.
2026-02-06 17:17:21 -08:00
parent cfdd448495
commit a515912537
22 changed files with 788 additions and 37 deletions
@@ -5,6 +5,7 @@ import type { ToolRegistry } from '../../tools/registry.js';
 import type { ToolExecutor } from '../../tools/executor.js';
 import type { MemoryStore } from '../../memory/store.js';
 import type { ToolPolicyContext } from '../../tools/policy.js';
+import type { Attachment } from '../../channels/types.js';
 import { NativeAgent } from './agent.js';
 import type { ToolUseEvent } from './agent.js';
 import { shouldCompact } from '../../context/tokens.js';
@@ -209,10 +210,10 @@ export class AgentOrchestrator {
   * When compaction is configured, checks whether the conversation history
   * exceeds the context window threshold and compacts it before processing.
   */
-  async process(userMessage: string): Promise<string> {
+  async process(userMessage: string, attachments?: Attachment[]): Promise<string> {
    this._injectMemoryContext();
    await this.compactIfNeeded();
-    return this._agent.process(userMessage);
+    return this._agent.process(userMessage, attachments);
  }

  /**