fix(gmail): sanitize HTML entities and tags in tool output
Gmail API returns snippets with HTML entities (&, ', <br>, etc.) that leaked into LLM responses as raw HTML. Added shared sanitizeHtml() utility in src/utils/html.ts and applied it to gmail tool snippets, HTML body fallback, and gmail watcher snippets.
This commit is contained in:
@@ -191,6 +191,36 @@ describe('gmail.list', () => {
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toBe('No messages found.');
|
||||
});
|
||||
|
||||
it('sanitizes HTML entities in snippets', async () => {
|
||||
setupValidAuth();
|
||||
mockMessagesList.mockResolvedValue({
|
||||
data: {
|
||||
messages: [{ id: 'msg1' }],
|
||||
},
|
||||
});
|
||||
mockMessagesGet.mockResolvedValueOnce(
|
||||
mockMessageDetails(
|
||||
'msg1',
|
||||
'experian@test.com',
|
||||
'Credit Alert',
|
||||
'Mon, 10 Feb 2026',
|
||||
'William, your score is rising's & it… Don't miss out<br>Check now',
|
||||
),
|
||||
);
|
||||
|
||||
const [listTool] = createGmailTools(testConfig);
|
||||
const result = await listTool.execute({});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).not.toContain(''');
|
||||
expect(result.output).not.toContain('&');
|
||||
expect(result.output).not.toContain('…');
|
||||
expect(result.output).not.toContain('<br>');
|
||||
expect(result.output).toContain("rising's");
|
||||
expect(result.output).toContain('& it');
|
||||
expect(result.output).toContain("Don't miss out");
|
||||
});
|
||||
});
|
||||
|
||||
describe('gmail.search', () => {
|
||||
@@ -360,6 +390,38 @@ describe('gmail.read', () => {
|
||||
expect(result.output).not.toContain('<html>');
|
||||
});
|
||||
|
||||
it('decodes HTML entities in HTML-only body fallback', async () => {
|
||||
setupValidAuth();
|
||||
const htmlBody = '<html><body><p>Hello & welcome</p><br><p>Price: <$100></p><br><p>It's great</p></body></html>';
|
||||
mockMessagesGet.mockResolvedValue({
|
||||
data: {
|
||||
payload: {
|
||||
mimeType: 'multipart/alternative',
|
||||
headers: [
|
||||
{ name: 'From', value: 'sender@example.com' },
|
||||
{ name: 'To', value: 'will@example.com' },
|
||||
{ name: 'Subject', value: 'HTML Entities' },
|
||||
{ name: 'Date', value: 'Mon, 10 Feb 2026 12:00:00 -0000' },
|
||||
],
|
||||
parts: [
|
||||
{ mimeType: 'text/html', body: { data: toBase64Url(htmlBody) } },
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const [, , readTool] = createGmailTools(testConfig);
|
||||
const result = await readTool.execute({ id: 'msg-entities' });
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.output).toContain('Hello & welcome');
|
||||
expect(result.output).toContain('Price: <$100>');
|
||||
expect(result.output).toContain("It's great");
|
||||
expect(result.output).not.toContain('&');
|
||||
expect(result.output).not.toContain('<');
|
||||
expect(result.output).not.toContain(''');
|
||||
});
|
||||
|
||||
it('returns error when credentials missing', async () => {
|
||||
mockExistsSync.mockReturnValue(false);
|
||||
const [, , readTool] = createGmailTools(testConfig);
|
||||
|
||||
@@ -4,6 +4,7 @@ import { resolve } from 'path';
|
||||
import { homedir } from 'os';
|
||||
import type { GmailConfig } from '../../config/schema.js';
|
||||
import type { Tool, ToolResult } from '../types.js';
|
||||
import { sanitizeHtml } from '../../utils/html.js';
|
||||
|
||||
/** Expand ~ to home directory. */
|
||||
function expandPath(p: string): string {
|
||||
@@ -79,7 +80,7 @@ async function fetchMessageDetails(
|
||||
from: getHeader('From'),
|
||||
subject: getHeader('Subject'),
|
||||
date: getHeader('Date'),
|
||||
snippet: msg.data.snippet ?? '',
|
||||
snippet: sanitizeHtml(msg.data.snippet ?? ''),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
@@ -123,8 +124,8 @@ function extractTextBody(payload: {
|
||||
}
|
||||
}
|
||||
if (htmlFallback) {
|
||||
// Strip HTML tags for a rough plain-text rendering
|
||||
return htmlFallback.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
|
||||
// Convert HTML to clean plain text
|
||||
return sanitizeHtml(htmlFallback);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user