295 lines
9.4 KiB
TypeScript
295 lines
9.4 KiB
TypeScript
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
import type { ModelClient } from '../../models/types.js';
|
|
import { createImageAnalyzeTool } from './image-analyze.js';
|
|
|
|
describe('image.analyze tool', () => {
|
|
let mockClient: ModelClient & { chat: ReturnType<typeof vi.fn> };
|
|
|
|
beforeEach(() => {
|
|
mockClient = {
|
|
chat: vi.fn(),
|
|
};
|
|
});
|
|
|
|
afterEach(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
it('has correct metadata', () => {
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
expect(tool.name).toBe('image.analyze');
|
|
expect(tool.inputSchema.required).toHaveLength(0);
|
|
});
|
|
|
|
it('analyzes image from URL', async () => {
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce({
|
|
content: 'This is a beautiful sunset over the ocean.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 50 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe('This is a beautiful sunset over the ocean.');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(1);
|
|
expect(mockClient.chat).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
messages: expect.arrayContaining([
|
|
expect.objectContaining({
|
|
role: 'user',
|
|
content: expect.arrayContaining([
|
|
{ type: 'text', text: 'Describe this image in detail.' },
|
|
{
|
|
type: 'image',
|
|
source: expect.objectContaining({
|
|
type: 'url',
|
|
media_type: 'image/jpeg',
|
|
url: 'https://example.com/image.jpg',
|
|
}),
|
|
},
|
|
]),
|
|
}),
|
|
]),
|
|
system: expect.stringContaining('vision assistant'),
|
|
maxTokens: 1024,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('analyzes image from base64 data', async () => {
|
|
const base64Data = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce({
|
|
content: 'This is a sample image.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 20 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
data: base64Data,
|
|
media_type: 'image/png',
|
|
});
|
|
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe('This is a sample image.');
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(1);
|
|
expect(mockClient.chat).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
messages: expect.arrayContaining([
|
|
expect.objectContaining({
|
|
role: 'user',
|
|
content: expect.arrayContaining([
|
|
{ type: 'text', text: 'Describe this image in detail.' },
|
|
{
|
|
type: 'image',
|
|
source: expect.objectContaining({
|
|
type: 'base64',
|
|
media_type: 'image/png',
|
|
data: base64Data,
|
|
}),
|
|
},
|
|
]),
|
|
}),
|
|
]),
|
|
system: expect.stringContaining('vision assistant'),
|
|
maxTokens: 1024,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('uses custom prompt', async () => {
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce({
|
|
content: 'The image shows a cat sitting on a mat.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 30 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
url: 'https://example.com/cat.jpg',
|
|
prompt: 'What is in this image?',
|
|
});
|
|
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe('The image shows a cat sitting on a mat.');
|
|
expect(mockClient.chat).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
messages: expect.arrayContaining([
|
|
expect.objectContaining({
|
|
content: expect.arrayContaining([
|
|
{ type: 'text', text: 'What is in this image?' },
|
|
expect.any(Object),
|
|
]),
|
|
}),
|
|
]),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('defaults prompt to "Describe this image in detail."', async () => {
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce({
|
|
content: 'This is the default prompt response.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 10 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
expect(mockClient.chat).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
messages: expect.arrayContaining([
|
|
expect.objectContaining({
|
|
content: expect.arrayContaining([
|
|
{ type: 'text', text: 'Describe this image in detail.' },
|
|
expect.any(Object),
|
|
]),
|
|
}),
|
|
]),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('fails when neither url nor data is provided', async () => {
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({});
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('Either "url" or "data" must be provided');
|
|
expect(mockClient.chat).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('fails when both url and data are provided', async () => {
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
url: 'https://example.com/image.jpg',
|
|
data: 'base64data',
|
|
});
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('Cannot provide both "url" and "data"');
|
|
expect(mockClient.chat).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('fails when data is provided without media_type', async () => {
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
data: 'base64data',
|
|
prompt: 'Test',
|
|
});
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('media_type is required when providing data');
|
|
expect(mockClient.chat).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('fails with invalid media_type', async () => {
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
data: 'base64data',
|
|
media_type: 'image/tiff',
|
|
});
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.error).toContain('Invalid media_type');
|
|
expect(mockClient.chat).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('passes valid media_types', async () => {
|
|
const validTypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
|
|
|
|
for (const mediaType of validTypes) {
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce({
|
|
content: 'Success',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 10 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({
|
|
data: 'base64data',
|
|
media_type: mediaType,
|
|
});
|
|
|
|
expect(result.success).toBe(true);
|
|
expect(mockClient.chat).toHaveBeenCalledTimes(1);
|
|
mockClient.chat = vi.fn();
|
|
}
|
|
});
|
|
|
|
it('handles model client errors', async () => {
|
|
mockClient.chat = vi.fn().mockRejectedValueOnce(new Error('Model API error'));
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.output).toBe('');
|
|
expect(result.error).toBe('Model API error');
|
|
});
|
|
|
|
it('handles non-Error exceptions', async () => {
|
|
mockClient.chat = vi.fn().mockRejectedValueOnce('String error');
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
expect(result.success).toBe(false);
|
|
expect(result.output).toBe('');
|
|
expect(result.error).toBe('String error');
|
|
});
|
|
|
|
it('uses custom system message prompt', async () => {
|
|
const mockResponse = {
|
|
content: 'Analysis complete.',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 10 },
|
|
};
|
|
|
|
mockClient.chat = vi.fn().mockResolvedValue(mockResponse);
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
await tool.execute({
|
|
url: 'https://example.com/image.jpg',
|
|
prompt: 'Analyze the colors.',
|
|
});
|
|
|
|
const callArgs = vi.mocked(mockClient.chat).mock.calls[0]?.[0] as { system?: string };
|
|
expect(callArgs.system).toContain('vision assistant');
|
|
expect(callArgs.system).toContain('Analyze the provided image');
|
|
});
|
|
|
|
it('respects maxTokens parameter', async () => {
|
|
const mockResponse = {
|
|
content: 'Short response',
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 10, outputTokens: 10 },
|
|
};
|
|
|
|
mockClient.chat = vi.fn().mockResolvedValueOnce(mockResponse);
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
const callArgs = vi.mocked(mockClient.chat).mock.calls[0]?.[0] as { maxTokens?: number };
|
|
expect(callArgs.maxTokens).toBe(1024);
|
|
});
|
|
|
|
it('passes through model response content', async () => {
|
|
const expectedContent = 'Detailed analysis of the image...';
|
|
mockClient.chat.mockResolvedValueOnce({
|
|
content: expectedContent,
|
|
stopReason: 'end_turn',
|
|
usage: { inputTokens: 100, outputTokens: 100 },
|
|
});
|
|
|
|
const tool = createImageAnalyzeTool(mockClient);
|
|
const result = await tool.execute({ url: 'https://example.com/image.jpg' });
|
|
|
|
expect(result.success).toBe(true);
|
|
expect(result.output).toBe(expectedContent);
|
|
});
|
|
});
|