feat(tui,gemini): implement verbose transfer and url image fetch

This commit is contained in:
William Valentin
2026-02-17 10:58:14 -08:00
parent 77ae15b3e2
commit e3b6f9df7c
8 changed files with 254 additions and 30 deletions
+39
View File
@@ -108,6 +108,45 @@ describe('GeminiClient', () => {
});
});
it('fetches URL-based images and sends them as inlineData', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
headers: new Headers({ 'content-type': 'image/jpeg' }),
arrayBuffer: async () => Uint8Array.from([1, 2, 3]).buffer,
} as Response);
const client = new GeminiClient({
apiKey: 'test-key',
model: 'gemini-2.0-flash',
});
await client.chat({
messages: [{
role: 'user',
content: [
{ type: 'text', text: 'Analyze this' },
{ type: 'image', source: { type: 'url', media_type: 'image/png', url: 'https://example.com/image.jpg' } },
],
}],
});
expect(fetchSpy).toHaveBeenCalledWith('https://example.com/image.jpg');
expect(mockGenerateContent).toHaveBeenCalledWith({
contents: [{
role: 'user',
parts: [
{ text: 'Analyze this' },
{
inlineData: {
mimeType: 'image/jpeg',
data: Buffer.from([1, 2, 3]).toString('base64'),
},
},
],
}],
});
fetchSpy.mockRestore();
});
it('maps MAX_TOKENS finish reason', async () => {
mockGenerateContent.mockResolvedValueOnce(
makeResponse([{ text: 'Truncated...' }], 'MAX_TOKENS'),
+36 -9
View File
@@ -44,7 +44,7 @@ export class GeminiClient implements ModelClient {
async chat(request: ChatRequest): Promise<ChatResponse> {
const model = this.getModel(request);
const contents = convertMessages(request.messages);
const contents = await convertMessages(request.messages);
const result = await model.generateContent({ contents });
const response = result.response;
@@ -100,7 +100,7 @@ export class GeminiClient implements ModelClient {
async *chatStream(request: ChatRequest): AsyncIterable<ChatStreamEvent> {
const model = this.getModel(request);
const contents = convertMessages(request.messages);
const contents = await convertMessages(request.messages);
try {
const result = await model.generateContentStream({ contents });
@@ -162,8 +162,8 @@ export class GeminiClient implements ModelClient {
}
/** Convert Flynn's Message[] to Gemini Content[] format, including multimodal parts */
function convertMessages(messages: Message[]): Content[] {
return messages.map(m => {
async function convertMessages(messages: Message[]): Promise<Content[]> {
return Promise.all(messages.map(async (m) => {
const role = m.role === 'assistant' ? 'model' : 'user';
if (typeof m.content === 'string') {
@@ -171,7 +171,7 @@ function convertMessages(messages: Message[]): Content[] {
}
// Multimodal content — convert each part
const parts: Part[] = m.content.map(part => {
const parts = await Promise.all(m.content.map(async (part): Promise<Part> => {
if (part.type === 'text') {
return { text: part.text };
}
@@ -184,8 +184,12 @@ function convertMessages(messages: Message[]): Content[] {
},
};
}
// URL-based images — Gemini doesn't natively support URL refs in inline data,
// so we pass as a text description. In production, you'd want to fetch + base64 encode.
if (part.source.type === 'url' && part.source.url) {
const inlineImage = await fetchImageAsInlineData(part.source.url, part.source.media_type);
if (inlineImage) {
return inlineImage;
}
}
return { text: `[Image: ${part.source.url ?? 'unavailable'}]` };
}
// Audio part — Gemini supports native audio via inlineData (same format as images)
@@ -198,10 +202,33 @@ function convertMessages(messages: Message[]): Content[] {
};
}
return { text: JSON.stringify(part) };
});
}));
return { role, parts };
});
}));
}
async function fetchImageAsInlineData(url: string, fallbackMimeType: string): Promise<Part | null> {
try {
const response = await fetch(url);
if (!response.ok) {
return null;
}
const mimeTypeHeader = response.headers.get('content-type');
const mimeType = mimeTypeHeader ? mimeTypeHeader.split(';')[0].trim() : fallbackMimeType;
const data = Buffer.from(await response.arrayBuffer()).toString('base64');
if (!data) {
return null;
}
return {
inlineData: {
mimeType,
data,
},
};
} catch {
return null;
}
}
/** Convert Flynn's ToolDefinition to Gemini FunctionDeclaration format */