fix(gmail): sanitize HTML entities and tags in tool output

Gmail API returns snippets with HTML entities (&amp;, &#39;, <br>, etc.)
that leaked into LLM responses as raw HTML. Added shared sanitizeHtml()
utility in src/utils/html.ts and applied it to gmail tool snippets,
HTML body fallback, and gmail watcher snippets.
This commit is contained in:
William Valentin
2026-02-10 16:30:14 -08:00
parent 4317492e4b
commit 4ce8e81c01
6 changed files with 281 additions and 4 deletions
+62
View File
@@ -191,6 +191,36 @@ describe('gmail.list', () => {
expect(result.success).toBe(true);
expect(result.output).toBe('No messages found.');
});
it('sanitizes HTML entities in snippets', async () => {
setupValidAuth();
mockMessagesList.mockResolvedValue({
data: {
messages: [{ id: 'msg1' }],
},
});
mockMessagesGet.mockResolvedValueOnce(
mockMessageDetails(
'msg1',
'experian@test.com',
'Credit Alert',
'Mon, 10 Feb 2026',
'William, your score is rising&#39;s &amp; it&#8230; Don&apos;t miss out<br>Check now',
),
);
const [listTool] = createGmailTools(testConfig);
const result = await listTool.execute({});
expect(result.success).toBe(true);
expect(result.output).not.toContain('&#39;');
expect(result.output).not.toContain('&amp;');
expect(result.output).not.toContain('&#8230;');
expect(result.output).not.toContain('<br>');
expect(result.output).toContain("rising's");
expect(result.output).toContain('& it');
expect(result.output).toContain("Don't miss out");
});
});
describe('gmail.search', () => {
@@ -360,6 +390,38 @@ describe('gmail.read', () => {
expect(result.output).not.toContain('<html>');
});
it('decodes HTML entities in HTML-only body fallback', async () => {
setupValidAuth();
const htmlBody = '<html><body><p>Hello &amp; welcome</p><br><p>Price: &lt;$100&gt;</p><br><p>It&#39;s great</p></body></html>';
mockMessagesGet.mockResolvedValue({
data: {
payload: {
mimeType: 'multipart/alternative',
headers: [
{ name: 'From', value: 'sender@example.com' },
{ name: 'To', value: 'will@example.com' },
{ name: 'Subject', value: 'HTML Entities' },
{ name: 'Date', value: 'Mon, 10 Feb 2026 12:00:00 -0000' },
],
parts: [
{ mimeType: 'text/html', body: { data: toBase64Url(htmlBody) } },
],
},
},
});
const [, , readTool] = createGmailTools(testConfig);
const result = await readTool.execute({ id: 'msg-entities' });
expect(result.success).toBe(true);
expect(result.output).toContain('Hello & welcome');
expect(result.output).toContain('Price: <$100>');
expect(result.output).toContain("It's great");
expect(result.output).not.toContain('&amp;');
expect(result.output).not.toContain('&lt;');
expect(result.output).not.toContain('&#39;');
});
it('returns error when credentials missing', async () => {
mockExistsSync.mockReturnValue(false);
const [, , readTool] = createGmailTools(testConfig);
+4 -3
View File
@@ -4,6 +4,7 @@ import { resolve } from 'path';
import { homedir } from 'os';
import type { GmailConfig } from '../../config/schema.js';
import type { Tool, ToolResult } from '../types.js';
import { sanitizeHtml } from '../../utils/html.js';
/** Expand ~ to home directory. */
function expandPath(p: string): string {
@@ -79,7 +80,7 @@ async function fetchMessageDetails(
from: getHeader('From'),
subject: getHeader('Subject'),
date: getHeader('Date'),
snippet: msg.data.snippet ?? '',
snippet: sanitizeHtml(msg.data.snippet ?? ''),
};
} catch {
return null;
@@ -123,8 +124,8 @@ function extractTextBody(payload: {
}
}
if (htmlFallback) {
// Strip HTML tags for a rough plain-text rendering
return htmlFallback.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
// Convert HTML to clean plain text
return sanitizeHtml(htmlFallback);
}
}