32e1a2724a
- Add AudioSource interface and 'audio' variant to MessageContentPart union - Update buildUserMessage() to create audio content parts from attachments - Add attachmentToAudioSource(), hasAudio(), stripAudioParts() helpers - Gemini: native audio via inlineData (same format as images) - OpenAI/GitHub: native audio via input_audio content parts - Anthropic/Bedrock: graceful fallback to transcript text - Update getMessageTextWithTools() to handle audio blocks for local models
149 lines
4.8 KiB
TypeScript
149 lines
4.8 KiB
TypeScript
import Anthropic from '@anthropic-ai/sdk';
|
|
import type { Message as AnthropicMessage } from '@anthropic-ai/sdk/resources/messages/messages.js';
|
|
import type { ChatRequest, ChatResponse, ChatStreamEvent, ModelClient, Message, MessageContentPart } from './types.js';
|
|
|
|
export interface AnthropicClientConfig {
|
|
apiKey?: string; // Falls back to ANTHROPIC_API_KEY env var
|
|
authToken?: string; // Alternative: use auth token instead of API key
|
|
model: string;
|
|
maxTokens?: number;
|
|
}
|
|
|
|
/**
|
|
* Convert Flynn MessageContentPart[] to Anthropic ContentBlockParam format.
|
|
*/
|
|
function toAnthropicContent(content: string | MessageContentPart[]): string | unknown[] {
|
|
if (typeof content === 'string') {
|
|
return content;
|
|
}
|
|
|
|
return content.map(part => {
|
|
if (part.type === 'text') {
|
|
return { type: 'text', text: part.text };
|
|
}
|
|
if (part.type === 'image') {
|
|
if (part.source.type === 'base64') {
|
|
return {
|
|
type: 'image',
|
|
source: {
|
|
type: 'base64',
|
|
media_type: part.source.media_type,
|
|
data: part.source.data!,
|
|
},
|
|
};
|
|
}
|
|
// URL-based image
|
|
return {
|
|
type: 'image',
|
|
source: {
|
|
type: 'url',
|
|
url: part.source.url!,
|
|
},
|
|
};
|
|
}
|
|
// Audio — Anthropic doesn't support native audio input; use transcript fallback
|
|
if (part.type === 'audio') {
|
|
if (part.source.transcript) {
|
|
return { type: 'text', text: `[Voice message]: ${part.source.transcript}` };
|
|
}
|
|
return { type: 'text', text: '[Audio message received but no transcript available]' };
|
|
}
|
|
return part;
|
|
});
|
|
}
|
|
|
|
export class AnthropicClient implements ModelClient {
|
|
private client: Anthropic;
|
|
private model: string;
|
|
private defaultMaxTokens: number;
|
|
|
|
constructor(config: AnthropicClientConfig) {
|
|
this.client = new Anthropic({
|
|
apiKey: config.apiKey,
|
|
authToken: config.authToken,
|
|
});
|
|
this.model = config.model;
|
|
this.defaultMaxTokens = config.maxTokens ?? 4096;
|
|
}
|
|
|
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
|
const params: Record<string, unknown> = {
|
|
model: this.model,
|
|
max_tokens: request.maxTokens ?? this.defaultMaxTokens,
|
|
system: request.system,
|
|
messages: request.messages.map((m) => ({
|
|
role: m.role,
|
|
content: toAnthropicContent(m.content),
|
|
})),
|
|
};
|
|
|
|
if (request.tools && request.tools.length > 0) {
|
|
params.tools = request.tools;
|
|
}
|
|
|
|
// Extended thinking mode — enable thinking with a budget
|
|
if (request.thinking) {
|
|
params.max_tokens = Math.max(params.max_tokens as number, 16384);
|
|
(params as any).thinking = { type: 'enabled', budget_tokens: 4096 };
|
|
}
|
|
|
|
const response = await this.client.messages.create(params as unknown as Parameters<typeof this.client.messages.create>[0]) as AnthropicMessage;
|
|
|
|
const textContent = response.content.find((c) => c.type === 'text');
|
|
const content = textContent?.type === 'text' ? textContent.text : '';
|
|
|
|
// Extract thinking content if present
|
|
const thinkingBlock = response.content.find((c) => c.type === 'thinking');
|
|
const thinkingContent = thinkingBlock && 'thinking' in thinkingBlock ? (thinkingBlock as any).text : undefined;
|
|
|
|
const toolCalls = response.content
|
|
.filter((c): c is { type: 'tool_use'; id: string; name: string; input: unknown } => c.type === 'tool_use')
|
|
.map(c => ({ id: c.id, name: c.name, args: c.input }));
|
|
|
|
return {
|
|
content,
|
|
stopReason: response.stop_reason ?? 'end_turn',
|
|
usage: {
|
|
inputTokens: response.usage.input_tokens,
|
|
outputTokens: response.usage.output_tokens,
|
|
},
|
|
...(toolCalls.length > 0 ? { toolCalls } : {}),
|
|
...(thinkingContent ? { thinkingContent } : {}),
|
|
};
|
|
}
|
|
|
|
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
|
const stream = this.client.messages.stream({
|
|
model: this.model,
|
|
max_tokens: request.maxTokens ?? this.defaultMaxTokens,
|
|
system: request.system,
|
|
messages: request.messages.map((m) => ({
|
|
role: m.role,
|
|
content: toAnthropicContent(m.content),
|
|
})) as Parameters<typeof this.client.messages.stream>[0]['messages'],
|
|
});
|
|
|
|
try {
|
|
for await (const event of stream) {
|
|
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
|
yield { type: 'content', content: event.delta.text };
|
|
}
|
|
}
|
|
|
|
const finalMessage = await stream.finalMessage();
|
|
yield {
|
|
type: 'done',
|
|
usage: {
|
|
inputTokens: finalMessage.usage.input_tokens,
|
|
outputTokens: finalMessage.usage.output_tokens,
|
|
},
|
|
};
|
|
} catch (error) {
|
|
yield {
|
|
type: 'error',
|
|
error: error instanceof Error ? error : new Error(String(error)),
|
|
};
|
|
}
|
|
}
|
|
}
|