From 6ed8a4a8bfc7536eb5025128b777d88f9f430384 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Sat, 7 Feb 2026 17:44:47 -0800 Subject: [PATCH] fix: gracefully handle Ollama models without tool support Check model capabilities via /api/show before sending tools. Models without 'tools' capability get requests without tools (they can still answer, just without tool use). Result is cached per client instance. Defense-in-depth: 'does not support' added to retry nonRetryablePatterns to avoid wasting retries on permanent errors. --- src/models/local/ollama.test.ts | 113 ++++++++++++++++++++++++++++++++ src/models/local/ollama.ts | 28 +++++++- src/models/retry.test.ts | 14 ++++ src/models/retry.ts | 1 + 4 files changed, 153 insertions(+), 3 deletions(-) diff --git a/src/models/local/ollama.test.ts b/src/models/local/ollama.test.ts index 7c45e32..c5bcd54 100644 --- a/src/models/local/ollama.test.ts +++ b/src/models/local/ollama.test.ts @@ -2,16 +2,21 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { OllamaClient } from './ollama.js'; const mockChat = vi.fn(); +const mockShow = vi.fn(); vi.mock('ollama', () => ({ Ollama: vi.fn().mockImplementation(() => ({ chat: mockChat, + show: mockShow, })), })); describe('OllamaClient', () => { beforeEach(() => { mockChat.mockReset(); + mockShow.mockReset(); + // Default: model supports tools + mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] }); }); it('sends messages and returns response', async () => { @@ -234,4 +239,112 @@ describe('OllamaClient', () => { usage: { inputTokens: 5, outputTokens: 3 }, }); }); + + it('omits tools when model does not support them', async () => { + mockShow.mockResolvedValue({ capabilities: ['completion'] }); + mockChat.mockResolvedValue({ + message: { content: 'I cannot use tools but I can answer.' }, + done_reason: 'stop', + prompt_eval_count: 10, + eval_count: 5, + }); + + const client = new OllamaClient({ model: 'qwen2.5-14b-instruct' }); + + const response = await client.chat({ + messages: [{ role: 'user', content: 'What time is it?' }], + tools: [ + { + name: 'system.info', + description: 'Get system info', + input_schema: { type: 'object', properties: {}, required: [] }, + }, + ], + }); + + expect(response.content).toBe('I cannot use tools but I can answer.'); + const callArgs = mockChat.mock.calls[0][0]; + expect(callArgs.tools).toBeUndefined(); + }); + + it('passes tools when model supports them', async () => { + mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] }); + mockChat.mockResolvedValue({ + message: { content: 'Using tools.' }, + done_reason: 'stop', + prompt_eval_count: 10, + eval_count: 5, + }); + + const client = new OllamaClient({ model: 'llama3.2' }); + + await client.chat({ + messages: [{ role: 'user', content: 'List files' }], + tools: [ + { + name: 'shell.exec', + description: 'Run command', + input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }, + }, + ], + }); + + const callArgs = mockChat.mock.calls[0][0]; + expect(callArgs.tools).toBeDefined(); + expect(callArgs.tools[0].function.name).toBe('shell.exec'); + }); + + it('caches capability check across multiple calls', async () => { + mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] }); + mockChat.mockResolvedValue({ + message: { content: 'OK' }, + done_reason: 'stop', + prompt_eval_count: 5, + eval_count: 3, + }); + + const client = new OllamaClient({ model: 'llama3.2' }); + + const tools = [ + { + name: 'test.tool', + description: 'A test tool', + input_schema: { type: 'object' as const, properties: {} }, + }, + ]; + + await client.chat({ messages: [{ role: 'user', content: 'a' }], tools }); + await client.chat({ messages: [{ role: 'user', content: 'b' }], tools }); + await client.chat({ messages: [{ role: 'user', content: 'c' }], tools }); + + // show() should only be called once despite 3 chat calls + expect(mockShow).toHaveBeenCalledTimes(1); + }); + + it('defaults to tools supported when show() fails', async () => { + mockShow.mockRejectedValue(new Error('connection refused')); + mockChat.mockResolvedValue({ + message: { content: 'OK' }, + done_reason: 'stop', + prompt_eval_count: 5, + eval_count: 3, + }); + + const client = new OllamaClient({ model: 'old-model' }); + + await client.chat({ + messages: [{ role: 'user', content: 'hello' }], + tools: [ + { + name: 'test.tool', + description: 'A test tool', + input_schema: { type: 'object', properties: {} }, + }, + ], + }); + + // Tools should still be passed (optimistic default) + const callArgs = mockChat.mock.calls[0][0]; + expect(callArgs.tools).toBeDefined(); + }); }); diff --git a/src/models/local/ollama.ts b/src/models/local/ollama.ts index 884bd96..49d5961 100644 --- a/src/models/local/ollama.ts +++ b/src/models/local/ollama.ts @@ -12,6 +12,8 @@ export class OllamaClient implements ModelClient { private client: Ollama; private model: string; private numGpu: number; + /** Cached result of model tool-support check. null = not yet checked. */ + private _supportsTools: boolean | null = null; constructor(config: OllamaClientConfig) { this.client = new Ollama({ @@ -21,6 +23,25 @@ export class OllamaClient implements ModelClient { this.numGpu = config.numGpu ?? -1; } + /** + * Check whether the current model supports tool calling via Ollama's + * /api/show endpoint. Result is cached for the lifetime of this client. + * On error (e.g. old Ollama version without capabilities), defaults to + * true (optimistic — let the server decide). + */ + private async checkToolSupport(): Promise { + if (this._supportsTools !== null) return this._supportsTools; + try { + const info = await this.client.show({ model: this.model }); + const caps: string[] = (info as any).capabilities ?? []; + this._supportsTools = caps.includes('tools'); + } catch { + // Old Ollama or network issue — assume tools are supported + this._supportsTools = true; + } + return this._supportsTools; + } + /** * Convert Flynn ToolDefinition[] to Ollama Tool[] format. */ @@ -59,7 +80,7 @@ export class OllamaClient implements ModelClient { }, }; - if (request.tools && request.tools.length > 0) { + if (request.tools && request.tools.length > 0 && await this.checkToolSupport()) { chatParams.tools = this.convertTools(request.tools); } @@ -110,8 +131,9 @@ export class OllamaClient implements ModelClient { } try { - // Build tools array if provided - const tools = request.tools && request.tools.length > 0 + // Build tools array if provided and model supports them + const supportsTools = await this.checkToolSupport(); + const tools = request.tools && request.tools.length > 0 && supportsTools ? this.convertTools(request.tools) : undefined; diff --git a/src/models/retry.test.ts b/src/models/retry.test.ts index 3005d96..b75cba9 100644 --- a/src/models/retry.test.ts +++ b/src/models/retry.test.ts @@ -38,6 +38,11 @@ describe('isRetryable', () => { expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); }); + it('returns false for "does not support" errors', () => { + const error = new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools'); + expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); + }); + it('is case-insensitive when matching patterns', () => { const error = new Error('AUTHENTICATION error'); expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false); @@ -102,6 +107,15 @@ describe('withRetry', () => { expect(fn).toHaveBeenCalledTimes(1); }); + it('does not retry "does not support tools" errors', async () => { + const fn = vi.fn().mockRejectedValue( + new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools'), + ); + + await expect(withRetry(fn, fastConfig)).rejects.toThrow('does not support tools'); + expect(fn).toHaveBeenCalledTimes(1); + }); + it('converts non-Error throws to Error objects', async () => { const fn = vi.fn().mockRejectedValue('string error'); diff --git a/src/models/retry.ts b/src/models/retry.ts index e295f61..772f4a8 100644 --- a/src/models/retry.ts +++ b/src/models/retry.ts @@ -23,6 +23,7 @@ export const DEFAULT_RETRY_CONFIG: RetryConfig = { 'invalid_request', 'context_length_exceeded', 'content_policy', + 'does not support', ], };