diff --git a/packages/core/package.json b/packages/core/package.json index 200d40ce2e..b58993b2e5 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -56,6 +56,7 @@ "http-proxy-agent": "catalog:", "https-proxy-agent": "catalog:", "iconv-lite": "catalog:", + "jsdom": "23.0.1", "jsonwebtoken": "catalog:", "lodash": "catalog:", "luxon": "catalog:", diff --git a/packages/core/src/__tests__/__snapshots__/html-sandbox.test.ts.snap b/packages/core/src/__tests__/__snapshots__/html-sandbox.test.ts.snap index e1eed34e3a..2da8201f02 100644 --- a/packages/core/src/__tests__/__snapshots__/html-sandbox.test.ts.snap +++ b/packages/core/src/__tests__/__snapshots__/html-sandbox.test.ts.snap @@ -1,13 +1,13 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`sandboxHtmlResponse should handle HTML with special characters 1`] = ` -"" `; -exports[`sandboxHtmlResponse should handle empty HTML 1`] = ` -"" `; @@ -17,3 +17,21 @@ exports[`sandboxHtmlResponse should replace ampersands and double quotes in HTML style="position:fixed; top:0; left:0; width:100vw; height:100vh; border:none; overflow:auto;" allowtransparency="true">" `; + +exports[`sandboxHtmlResponse should sandbox even with no tag 1`] = ` +"" +`; + +exports[`sandboxHtmlResponse should sandbox when outside and tags 1`] = ` +"" +`; + +exports[`sandboxHtmlResponse should sandbox when outside tag 1`] = ` +"" +`; diff --git a/packages/core/src/__tests__/html-sandbox.test.ts b/packages/core/src/__tests__/html-sandbox.test.ts index 237eedfad4..27813399d6 100644 --- a/packages/core/src/__tests__/html-sandbox.test.ts +++ b/packages/core/src/__tests__/html-sandbox.test.ts @@ -24,15 +24,42 @@ describe('sandboxHtmlResponse', () => { expect(sandboxHtmlResponse(html)).toMatchSnapshot(); }); - it('should handle empty HTML', () => { - const html = ''; - expect(sandboxHtmlResponse(html)).toMatchSnapshot(); - }); - it('should handle HTML with special characters', () => { const html = '

Special characters: <>&"\'

'; expect(sandboxHtmlResponse(html)).toMatchSnapshot(); }); + + it.each([ + ['Hello World', 'Hello World'], + ['< not html >', '< not html >'], + ['# Test', '# Test'], + ['', ''], + [123, '123'], + [null, 'null'], + ])('should not sandbox if not html', (data, expected) => { + expect(sandboxHtmlResponse(data)).toBe(expected); + }); + + it('should sandbox even with no tag', () => { + const html = 'Test'; + expect(sandboxHtmlResponse(html)).toMatchSnapshot(); + }); + + it('should sandbox when outside and tags', () => { + const html = + 'Test'; + expect(sandboxHtmlResponse(html)).toMatchSnapshot(); + }); + + it('should sandbox when outside tag', () => { + const html = 'Test'; + expect(sandboxHtmlResponse(html)).toMatchSnapshot(); + }); + + it('should always sandbox if forceSandbox is true', () => { + const text = 'Hello World'; + expect(sandboxHtmlResponse(text, true)).toMatchSnapshot(); + }); }); describe('isHtmlRenderedContentType', () => { @@ -143,7 +170,7 @@ describe('bufferEscapeHtml', () => { describe('createHtmlSandboxTransformStream', () => { const getComparableHtml = (input: Buffer | string) => - sandboxHtmlResponse(input.toString()).replace(/\s+/g, ' '); + sandboxHtmlResponse(input.toString(), true).replace(/\s+/g, ' '); it('should wrap single chunk in iframe with proper escaping', async () => { const input = Buffer.from('Hello & "World"', 'utf8'); diff --git a/packages/core/src/html-sandbox.ts b/packages/core/src/html-sandbox.ts index 52759505d8..9498dd62a1 100644 --- a/packages/core/src/html-sandbox.ts +++ b/packages/core/src/html-sandbox.ts @@ -1,11 +1,31 @@ +import { JSDOM } from 'jsdom'; import type { TransformCallback } from 'stream'; import { Transform } from 'stream'; /** - * Sandboxes the HTML response to prevent possible exploitation. Embeds the - * response in an iframe to make sure the HTML has a different origin. + * Checks if the given string contains HTML. */ -export const sandboxHtmlResponse = (data: T) => { +export const hasHtml = (str: string) => { + try { + const dom = new JSDOM(str); + return ( + dom.window.document.body.children.length > 0 || dom.window.document.head.children.length > 0 + ); + } catch { + return false; + } +}; + +/** + * Sandboxes the HTML response to prevent possible exploitation, if the data has HTML. + * If the data does not have HTML, it will be returned as is. + * Otherwise, it embeds the response in an iframe to make sure the HTML has a different origin. + * + * @param data - The data to sandbox. + * @param forceSandbox - Whether to force sandboxing even if the data does not contain HTML. + * @returns The sandboxed HTML response. + */ +export const sandboxHtmlResponse = (data: T, forceSandbox = false) => { let text; if (typeof data !== 'string') { text = JSON.stringify(data); @@ -13,6 +33,10 @@ export const sandboxHtmlResponse = (data: T) => { text = data; } + if (!forceSandbox && !hasHtml(text)) { + return text; + } + // Escape & and " as mentioned in the spec: // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element const escapedHtml = text.replaceAll('&', '&').replaceAll('"', '"'); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 986f6f08aa..3523666f33 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1739,6 +1739,9 @@ importers: iconv-lite: specifier: 'catalog:' version: 0.6.3 + jsdom: + specifier: 23.0.1 + version: 23.0.1 jsonwebtoken: specifier: 'catalog:' version: 9.0.2