fix: gracefully handle Ollama models without tool support

Check model capabilities via /api/show before sending tools. Models without 'tools' capability get requests without tools (they can still answer, just without tool use). Result is cached per client instance. Defense-in-depth: 'does not support' added to retry nonRetryablePatterns to avoid wasting retries on permanent errors.
2026-02-07 17:44:47 -08:00
parent a0f5584220
commit 6ed8a4a8bf
4 changed files with 153 additions and 3 deletions
@@ -2,16 +2,21 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { OllamaClient } from './ollama.js';

 const mockChat = vi.fn();
+const mockShow = vi.fn();

 vi.mock('ollama', () => ({
  Ollama: vi.fn().mockImplementation(() => ({
    chat: mockChat,
+    show: mockShow,
  })),
 }));

 describe('OllamaClient', () => {
  beforeEach(() => {
    mockChat.mockReset();
+    mockShow.mockReset();
+    // Default: model supports tools
+    mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
  });

  it('sends messages and returns response', async () => {
@@ -234,4 +239,112 @@ describe('OllamaClient', () => {
      usage: { inputTokens: 5, outputTokens: 3 },
    });
  });
+
+  it('omits tools when model does not support them', async () => {
+    mockShow.mockResolvedValue({ capabilities: ['completion'] });
+    mockChat.mockResolvedValue({
+      message: { content: 'I cannot use tools but I can answer.' },
+      done_reason: 'stop',
+      prompt_eval_count: 10,
+      eval_count: 5,
+    });
+
+    const client = new OllamaClient({ model: 'qwen2.5-14b-instruct' });
+
+    const response = await client.chat({
+      messages: [{ role: 'user', content: 'What time is it?' }],
+      tools: [
+        {
+          name: 'system.info',
+          description: 'Get system info',
+          input_schema: { type: 'object', properties: {}, required: [] },
+        },
+      ],
+    });
+
+    expect(response.content).toBe('I cannot use tools but I can answer.');
+    const callArgs = mockChat.mock.calls[0][0];
+    expect(callArgs.tools).toBeUndefined();
+  });
+
+  it('passes tools when model supports them', async () => {
+    mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
+    mockChat.mockResolvedValue({
+      message: { content: 'Using tools.' },
+      done_reason: 'stop',
+      prompt_eval_count: 10,
+      eval_count: 5,
+    });
+
+    const client = new OllamaClient({ model: 'llama3.2' });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'List files' }],
+      tools: [
+        {
+          name: 'shell.exec',
+          description: 'Run command',
+          input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] },
+        },
+      ],
+    });
+
+    const callArgs = mockChat.mock.calls[0][0];
+    expect(callArgs.tools).toBeDefined();
+    expect(callArgs.tools[0].function.name).toBe('shell.exec');
+  });
+
+  it('caches capability check across multiple calls', async () => {
+    mockShow.mockResolvedValue({ capabilities: ['completion', 'tools'] });
+    mockChat.mockResolvedValue({
+      message: { content: 'OK' },
+      done_reason: 'stop',
+      prompt_eval_count: 5,
+      eval_count: 3,
+    });
+
+    const client = new OllamaClient({ model: 'llama3.2' });
+
+    const tools = [
+      {
+        name: 'test.tool',
+        description: 'A test tool',
+        input_schema: { type: 'object' as const, properties: {} },
+      },
+    ];
+
+    await client.chat({ messages: [{ role: 'user', content: 'a' }], tools });
+    await client.chat({ messages: [{ role: 'user', content: 'b' }], tools });
+    await client.chat({ messages: [{ role: 'user', content: 'c' }], tools });
+
+    // show() should only be called once despite 3 chat calls
+    expect(mockShow).toHaveBeenCalledTimes(1);
+  });
+
+  it('defaults to tools supported when show() fails', async () => {
+    mockShow.mockRejectedValue(new Error('connection refused'));
+    mockChat.mockResolvedValue({
+      message: { content: 'OK' },
+      done_reason: 'stop',
+      prompt_eval_count: 5,
+      eval_count: 3,
+    });
+
+    const client = new OllamaClient({ model: 'old-model' });
+
+    await client.chat({
+      messages: [{ role: 'user', content: 'hello' }],
+      tools: [
+        {
+          name: 'test.tool',
+          description: 'A test tool',
+          input_schema: { type: 'object', properties: {} },
+        },
+      ],
+    });
+
+    // Tools should still be passed (optimistic default)
+    const callArgs = mockChat.mock.calls[0][0];
+    expect(callArgs.tools).toBeDefined();
+  });
 });
@@ -12,6 +12,8 @@ export class OllamaClient implements ModelClient {
  private client: Ollama;
  private model: string;
  private numGpu: number;
+  /** Cached result of model tool-support check. null = not yet checked. */
+  private _supportsTools: boolean | null = null;

  constructor(config: OllamaClientConfig) {
    this.client = new Ollama({
@@ -21,6 +23,25 @@ export class OllamaClient implements ModelClient {
    this.numGpu = config.numGpu ?? -1;
  }

+  /**
+   * Check whether the current model supports tool calling via Ollama's
+   * /api/show endpoint. Result is cached for the lifetime of this client.
+   * On error (e.g. old Ollama version without capabilities), defaults to
+   * true (optimistic — let the server decide).
+   */
+  private async checkToolSupport(): Promise<boolean> {
+    if (this._supportsTools !== null) return this._supportsTools;
+    try {
+      const info = await this.client.show({ model: this.model });
+      const caps: string[] = (info as any).capabilities ?? [];
+      this._supportsTools = caps.includes('tools');
+    } catch {
+      // Old Ollama or network issue — assume tools are supported
+      this._supportsTools = true;
+    }
+    return this._supportsTools;
+  }
+
  /**
   * Convert Flynn ToolDefinition[] to Ollama Tool[] format.
   */
@@ -59,7 +80,7 @@ export class OllamaClient implements ModelClient {
      },
    };

-    if (request.tools && request.tools.length > 0) {
+    if (request.tools && request.tools.length > 0 && await this.checkToolSupport()) {
      chatParams.tools = this.convertTools(request.tools);
    }

@@ -110,8 +131,9 @@ export class OllamaClient implements ModelClient {
    }

    try {
-      // Build tools array if provided
-      const tools = request.tools && request.tools.length > 0
+      // Build tools array if provided and model supports them
+      const supportsTools = await this.checkToolSupport();
+      const tools = request.tools && request.tools.length > 0 && supportsTools
        ? this.convertTools(request.tools)
        : undefined;

@@ -38,6 +38,11 @@ describe('isRetryable', () => {
    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
  });

+  it('returns false for "does not support" errors', () => {
+    const error = new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools');
+    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
+  });
+
  it('is case-insensitive when matching patterns', () => {
    const error = new Error('AUTHENTICATION error');
    expect(isRetryable(error, DEFAULT_RETRY_CONFIG.nonRetryablePatterns)).toBe(false);
@@ -102,6 +107,15 @@ describe('withRetry', () => {
    expect(fn).toHaveBeenCalledTimes(1);
  });

+  it('does not retry "does not support tools" errors', async () => {
+    const fn = vi.fn().mockRejectedValue(
+      new Error('registry.ollama.ai/library/qwen2.5:latest does not support tools'),
+    );
+
+    await expect(withRetry(fn, fastConfig)).rejects.toThrow('does not support tools');
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
  it('converts non-Error throws to Error objects', async () => {
    const fn = vi.fn().mockRejectedValue('string error');

@@ -23,6 +23,7 @@ export const DEFAULT_RETRY_CONFIG: RetryConfig = {
    'invalid_request',
    'context_length_exceeded',
    'content_policy',
+    'does not support',
  ],
 };