From 6d9e27a5919e46f2d9dc9cd3cfd96cb263888f13 Mon Sep 17 00:00:00 2001 From: William Valentin Date: Fri, 6 Feb 2026 14:24:28 -0800 Subject: [PATCH] feat: enhance web-fetch with HTML-to-markdown extraction (Phase 6) Add turndown + readability for clean content extraction: - HTML-to-markdown conversion with smart article extraction - Format parameter (markdown/text/html) - Response caching for repeated fetches - 10 tests --- src/tools/builtin/web-fetch.test.ts | 178 +++++++++++++++++++++++- src/tools/builtin/web-fetch.ts | 204 +++++++++++++++++++++++++++- 2 files changed, 374 insertions(+), 8 deletions(-) diff --git a/src/tools/builtin/web-fetch.test.ts b/src/tools/builtin/web-fetch.test.ts index 9705e84..1b3dc80 100644 --- a/src/tools/builtin/web-fetch.test.ts +++ b/src/tools/builtin/web-fetch.test.ts @@ -1,34 +1,142 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { webFetchTool } from './web-fetch.js'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { webFetchTool, _clearCache, _MAX_CONTENT_LENGTH } from './web-fetch.js'; +// --------------------------------------------------------------------------- // Mock global fetch +// --------------------------------------------------------------------------- + const mockFetch = vi.fn(); vi.stubGlobal('fetch', mockFetch); +// --------------------------------------------------------------------------- +// HTML fixture used by extraction tests +// --------------------------------------------------------------------------- + +const SAMPLE_HTML = ` +Test Page + + + + +
+

Article Title

+

This is the main content of the article. It contains important information.

+

Second paragraph with more details about the topic.

+
+ +`; + +// --------------------------------------------------------------------------- +// Setup / teardown +// --------------------------------------------------------------------------- + beforeEach(() => { mockFetch.mockReset(); + _clearCache(); }); +afterEach(() => { + vi.useRealTimers(); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + describe('web.fetch', () => { + // ---- Metadata ---- + it('has correct metadata', () => { expect(webFetchTool.name).toBe('web.fetch'); expect(webFetchTool.inputSchema.required).toContain('url'); + // New format property is advertised in the schema + expect(webFetchTool.inputSchema.properties).toHaveProperty('format'); }); - it('fetches a URL and returns body text', async () => { + // ---- Default behaviour (markdown extraction) ---- + + it('fetches a URL and returns markdown by default', async () => { mockFetch.mockResolvedValue({ ok: true, status: 200, - text: async () => '

Hello

World

', + text: async () => SAMPLE_HTML, headers: new Headers({ 'content-type': 'text/html' }), }); const result = await webFetchTool.execute({ url: 'https://example.com' }); expect(result.success).toBe(true); expect(result.output).toBeTruthy(); + // Should contain markdown-ish content (heading or paragraph text) + expect(result.output).toContain('Article Title'); + // Should NOT contain raw HTML tags + expect(result.output).not.toContain('