fix: gracefully handle Ollama models without tool support
Check model capabilities via /api/show before sending tools. Models without 'tools' capability get requests without tools (they can still answer, just without tool use). Result is cached per client instance. Defense-in-depth: 'does not support' added to retry nonRetryablePatterns to avoid wasting retries on permanent errors.
This commit is contained in:
@@ -2,16 +2,21 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { OllamaClient } from './ollama.js';
|
||||
|
||||
const mockChat = vi.fn();
|
||||
const mockShow = vi.fn();
|
||||
|
||||
vi.mock('ollama', () => ({
|
||||
Ollama: vi.fn().mockImplementation(() => ({
|
||||
chat: mockChat,
|
||||
show: mockShow,
|
||||
})),
|
||||
}));
|
||||
|
||||
describe('OllamaClient', () => {
|
||||
beforeEach(() => {
|
||||
mockChat.mockReset();
|
||||
mockShow.mockReset();
|
||||
// Default: model supports tools
|
||||
mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
|
||||
});
|
||||
|
||||
it('sends messages and returns response', async () => {
|
||||
@@ -234,4 +239,112 @@ describe('OllamaClient', () => {
|
||||
usage: { inputTokens: 5, outputTokens: 3 },
|
||||
});
|
||||
});
|
||||
|
||||
it('omits tools when model does not support them', async () => {
|
||||
mockShow.mockResolvedValue({ capabilities: ['completion'] });
|
||||
mockChat.mockResolvedValue({
|
||||
message: { content: 'I cannot use tools but I can answer.' },
|
||||
done_reason: 'stop',
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
});
|
||||
|
||||
const client = new OllamaClient({ model: 'qwen2.5-14b-instruct' });
|
||||
|
||||
const response = await client.chat({
|
||||
messages: [{ role: 'user', content: 'What time is it?' }],
|
||||
tools: [
|
||||
{
|
||||
name: 'system.info',
|
||||
description: 'Get system info',
|
||||
input_schema: { type: 'object', properties: {}, required: [] },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(response.content).toBe('I cannot use tools but I can answer.');
|
||||
const callArgs = mockChat.mock.calls[0][0];
|
||||
expect(callArgs.tools).toBeUndefined();
|
||||
});
|
||||
|
||||
it('passes tools when model supports them', async () => {
|
||||
mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
|
||||
mockChat.mockResolvedValue({
|
||||
message: { content: 'Using tools.' },
|
||||
done_reason: 'stop',
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
});
|
||||
|
||||
const client = new OllamaClient({ model: 'llama3.2' });
|
||||
|
||||
await client.chat({
|
||||
messages: [{ role: 'user', content: 'List files' }],
|
||||
tools: [
|
||||
{
|
||||
name: 'shell.exec',
|
||||
description: 'Run command',
|
||||
input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const callArgs = mockChat.mock.calls[0][0];
|
||||
expect(callArgs.tools).toBeDefined();
|
||||
expect(callArgs.tools[0].function.name).toBe('shell.exec');
|
||||
});
|
||||
|
||||
it('caches capability check across multiple calls', async () => {
|
||||
mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
|
||||
mockChat.mockResolvedValue({
|
||||
message: { content: 'OK' },
|
||||
done_reason: 'stop',
|
||||
prompt_eval_count: 5,
|
||||
eval_count: 3,
|
||||
});
|
||||
|
||||
const client = new OllamaClient({ model: 'llama3.2' });
|
||||
|
||||
const tools = [
|
||||
{
|
||||
name: 'test.tool',
|
||||
description: 'A test tool',
|
||||
input_schema: { type: 'object' as const, properties: {} },
|
||||
},
|
||||
];
|
||||
|
||||
await client.chat({ messages: [{ role: 'user', content: 'a' }], tools });
|
||||
await client.chat({ messages: [{ role: 'user', content: 'b' }], tools });
|
||||
await client.chat({ messages: [{ role: 'user', content: 'c' }], tools });
|
||||
|
||||
// show() should only be called once despite 3 chat calls
|
||||
expect(mockShow).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('defaults to tools supported when show() fails', async () => {
|
||||
mockShow.mockRejectedValue(new Error('connection refused'));
|
||||
mockChat.mockResolvedValue({
|
||||
message: { content: 'OK' },
|
||||
done_reason: 'stop',
|
||||
prompt_eval_count: 5,
|
||||
eval_count: 3,
|
||||
});
|
||||
|
||||
const client = new OllamaClient({ model: 'old-model' });
|
||||
|
||||
await client.chat({
|
||||
messages: [{ role: 'user', content: 'hello' }],
|
||||
tools: [
|
||||
{
|
||||
name: 'test.tool',
|
||||
description: 'A test tool',
|
||||
input_schema: { type: 'object', properties: {} },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Tools should still be passed (optimistic default)
|
||||
const callArgs = mockChat.mock.calls[0][0];
|
||||
expect(callArgs.tools).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,6 +12,8 @@ export class OllamaClient implements ModelClient {
|
||||
private client: Ollama;
|
||||
private model: string;
|
||||
private numGpu: number;
|
||||
/** Cached result of model tool-support check. null = not yet checked. */
|
||||
private _supportsTools: boolean | null = null;
|
||||
|
||||
constructor(config: OllamaClientConfig) {
|
||||
this.client = new Ollama({
|
||||
@@ -21,6 +23,25 @@ export class OllamaClient implements ModelClient {
|
||||
this.numGpu = config.numGpu ?? -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the current model supports tool calling via Ollama's
|
||||
* /api/show endpoint. Result is cached for the lifetime of this client.
|
||||
* On error (e.g. old Ollama version without capabilities), defaults to
|
||||
* true (optimistic — let the server decide).
|
||||
*/
|
||||
private async checkToolSupport(): Promise<boolean> {
|
||||
if (this._supportsTools !== null) return this._supportsTools;
|
||||
try {
|
||||
const info = await this.client.show({ model: this.model });
|
||||
const caps: string[] = (info as any).capabilities ?? [];
|
||||
this._supportsTools = caps.includes('tools');
|
||||
} catch {
|
||||
// Old Ollama or network issue — assume tools are supported
|
||||
this._supportsTools = true;
|
||||
}
|
||||
return this._supportsTools;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Flynn ToolDefinition[] to Ollama Tool[] format.
|
||||
*/
|
||||
@@ -59,7 +80,7 @@ export class OllamaClient implements ModelClient {
|
||||
},
|
||||
};
|
||||
|
||||
if (request.tools && request.tools.length > 0) {
|
||||
if (request.tools && request.tools.length > 0 && await this.checkToolSupport()) {
|
||||
chatParams.tools = this.convertTools(request.tools);
|
||||
}
|
||||
|
||||
@@ -110,8 +131,9 @@ export class OllamaClient implements ModelClient {
|
||||
}
|
||||
|
||||
try {
|
||||
// Build tools array if provided
|
||||
const tools = request.tools && request.tools.length > 0
|
||||
// Build tools array if provided and model supports them
|
||||
const supportsTools = await this.checkToolSupport();
|
||||
const tools = request.tools && request.tools.length > 0 && supportsTools
|
||||
? this.convertTools(request.tools)
|
||||
: undefined;
|
||||
|
||||
|
||||
@@ -38,6 +38,11 @@ describe('isRetryable', () => {
|
||||
expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for "does not support" errors', () => {
|
||||
const error = new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools');
|
||||
expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
|
||||
});
|
||||
|
||||
it('is case-insensitive when matching patterns', () => {
|
||||
const error = new Error('AUTHENTICATION error');
|
||||
expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
|
||||
@@ -102,6 +107,15 @@ describe('withRetry', () => {
|
||||
expect(fn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does not retry "does not support tools" errors', async () => {
|
||||
const fn = vi.fn().mockRejectedValue(
|
||||
new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools'),
|
||||
);
|
||||
|
||||
await expect(withRetry(fn, fastConfig)).rejects.toThrow('does not support tools');
|
||||
expect(fn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('converts non-Error throws to Error objects', async () => {
|
||||
const fn = vi.fn().mockRejectedValue('string error');
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ export const DEFAULT_RETRY_CONFIG: RetryConfig = {
|
||||
'invalid_request',
|
||||
'context_length_exceeded',
|
||||
'content_policy',
|
||||
'does not support',
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user