feat(tui,gemini): implement verbose transfer and url image fetch
This commit is contained in:
@@ -108,6 +108,45 @@ describe('GeminiClient', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('fetches URL-based images and sends them as inlineData', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
headers: new Headers({ 'content-type': 'image/jpeg' }),
|
||||
arrayBuffer: async () => Uint8Array.from([1, 2, 3]).buffer,
|
||||
} as Response);
|
||||
const client = new GeminiClient({
|
||||
apiKey: 'test-key',
|
||||
model: 'gemini-2.0-flash',
|
||||
});
|
||||
|
||||
await client.chat({
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Analyze this' },
|
||||
{ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/image.jpg' } },
|
||||
],
|
||||
}],
|
||||
});
|
||||
|
||||
expect(fetchSpy).toHaveBeenCalledWith('https://example.com/image.jpg');
|
||||
expect(mockGenerateContent).toHaveBeenCalledWith({
|
||||
contents: [{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{ text: 'Analyze this' },
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/jpeg',
|
||||
data: Buffer.from([1, 2, 3]).toString('base64'),
|
||||
},
|
||||
},
|
||||
],
|
||||
}],
|
||||
});
|
||||
fetchSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('maps MAX_TOKENS finish reason', async () => {
|
||||
mockGenerateContent.mockResolvedValueOnce(
|
||||
makeResponse([{ text: 'Truncated...' }], 'MAX_TOKENS'),
|
||||
|
||||
+36
-9
@@ -44,7 +44,7 @@ export class GeminiClient implements ModelClient {
|
||||
|
||||
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
const model = this.getModel(request);
|
||||
const contents = convertMessages(request.messages);
|
||||
const contents = await convertMessages(request.messages);
|
||||
|
||||
const result = await model.generateContent({ contents });
|
||||
const response = result.response;
|
||||
@@ -100,7 +100,7 @@ export class GeminiClient implements ModelClient {
|
||||
|
||||
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const model = this.getModel(request);
|
||||
const contents = convertMessages(request.messages);
|
||||
const contents = await convertMessages(request.messages);
|
||||
|
||||
try {
|
||||
const result = await model.generateContentStream({ contents });
|
||||
@@ -162,8 +162,8 @@ export class GeminiClient implements ModelClient {
|
||||
}
|
||||
|
||||
/** Convert Flynn's Message[] to Gemini Content[] format, including multimodal parts */
|
||||
function convertMessages(messages: Message[]): Content[] {
|
||||
return messages.map(m => {
|
||||
async function convertMessages(messages: Message[]): Promise<Content[]> {
|
||||
return Promise.all(messages.map(async (m) => {
|
||||
const role = m.role === 'assistant' ? 'model' : 'user';
|
||||
|
||||
if (typeof m.content === 'string') {
|
||||
@@ -171,7 +171,7 @@ function convertMessages(messages: Message[]): Content[] {
|
||||
}
|
||||
|
||||
// Multimodal content — convert each part
|
||||
const parts: Part[] = m.content.map(part => {
|
||||
const parts = await Promise.all(m.content.map(async (part): Promise<Part> => {
|
||||
if (part.type === 'text') {
|
||||
return { text: part.text };
|
||||
}
|
||||
@@ -184,8 +184,12 @@ function convertMessages(messages: Message[]): Content[] {
|
||||
},
|
||||
};
|
||||
}
|
||||
// URL-based images — Gemini doesn't natively support URL refs in inline data,
|
||||
// so we pass as a text description. In production, you'd want to fetch + base64 encode.
|
||||
if (part.source.type === 'url' && part.source.url) {
|
||||
const inlineImage = await fetchImageAsInlineData(part.source.url, part.source.media_type);
|
||||
if (inlineImage) {
|
||||
return inlineImage;
|
||||
}
|
||||
}
|
||||
return { text: `[Image: ${part.source.url ?? 'unavailable'}]` };
|
||||
}
|
||||
// Audio part — Gemini supports native audio via inlineData (same format as images)
|
||||
@@ -198,10 +202,33 @@ function convertMessages(messages: Message[]): Content[] {
|
||||
};
|
||||
}
|
||||
return { text: JSON.stringify(part) };
|
||||
});
|
||||
}));
|
||||
|
||||
return { role, parts };
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
async function fetchImageAsInlineData(url: string, fallbackMimeType: string): Promise<Part | null> {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
return null;
|
||||
}
|
||||
const mimeTypeHeader = response.headers.get('content-type');
|
||||
const mimeType = mimeTypeHeader ? mimeTypeHeader.split(';')[0].trim() : fallbackMimeType;
|
||||
const data = Buffer.from(await response.arrayBuffer()).toString('base64');
|
||||
if (!data) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
inlineData: {
|
||||
mimeType,
|
||||
data,
|
||||
},
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Convert Flynn's ToolDefinition to Gemini FunctionDeclaration format */
|
||||
|
||||
Reference in New Issue
Block a user