update HTML escape logic (#2793)

This commit is contained in:
Fred K. Schott 2022-03-15 13:33:55 -07:00 committed by GitHub
parent 2b76ee8d75
commit 6eb494796e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 75 additions and 39 deletions

View file

@ -0,0 +1,5 @@
---
'astro': patch
---
Fix HTML double-escaping issue

View file

@ -85,6 +85,7 @@
"fast-glob": "^3.2.11",
"fast-xml-parser": "^4.0.6",
"html-entities": "^2.3.2",
"html-escaper": "^3.0.3",
"htmlparser2": "^7.2.0",
"kleur": "^4.1.4",
"magic-string": "^0.25.9",
@ -123,6 +124,7 @@
"@types/connect": "^3.4.35",
"@types/debug": "^4.1.7",
"@types/estree": "^0.0.51",
"@types/html-escaper": "^3.0.0",
"@types/mime": "^2.0.3",
"@types/mocha": "^9.1.0",
"@types/parse5": "^6.0.3",

View file

@ -1,25 +1,31 @@
const entities = { '"': 'quot', '&': 'amp', "'": 'apos', '<': 'lt', '>': 'gt' } as const;
import { escape } from 'html-escaper';
// This util is only ever run on expression values that we already know are of type `string`
export const escapeHTML = (str: string) => str.replace(/["'&<>]/g, (char: string) => '&' + entities[char as keyof typeof entities] + ';');
// Leverage the battle-tested `html-escaper` npm package.
export const escapeHTML = escape;
/**
* RawString is a "blessed" version of String
* that is not subject to escaping.
* A "blessed" extension of String that tells Astro that the string
* has already been escaped. This helps prevent double-escaping of HTML.
*/
export class UnescapedString extends String {}
export class HTMLString extends String {}
/**
* unescapeHTML marks a string as raw, unescaped HTML.
* This should only be generated internally, not a public API.
* markHTMLString marks a string as raw or "already escaped" by returning
* a `HTMLString` instance. This is meant for internal use, and should not
* be returned through any public JS API.
*/
export const unescapeHTML = (value: any) => {
// Cast any `string` values to `UnescapedString` to mark them as ignored
// The `as unknown as string` is necessary for TypeScript to treat this as `string`
if (typeof value === 'string') {
return new UnescapedString(value) as unknown as string;
export const markHTMLString = (value: any) => {
// If value is already marked as an HTML string, there is nothing to do.
if (value instanceof HTMLString) {
return value;
}
// Just return values that are `number`, `null`, `undefined` etc
// The compiler will recursively stringify these correctly
// Cast to `HTMLString` to mark the string as valid HTML. Any HTML escaping
// and sanitization should have already happened to the `value` argument.
// NOTE: `unknown as string` is necessary for TypeScript to treat this as `string`
if (typeof value === 'string') {
return new HTMLString(value) as unknown as string;
}
// Return all other values (`number`, `null`, `undefined`) as-is.
// The compiler will recursively stringify these correctly at a later stage.
return value;
};

View file

@ -4,11 +4,14 @@ import type { AstroGlobalPartial, SSRResult, SSRElement } from '../../@types/ast
import shorthash from 'shorthash';
import { extractDirectives, generateHydrateScript } from './hydration.js';
import { serializeListValue } from './util.js';
import { escapeHTML, UnescapedString, unescapeHTML } from './escape.js';
import { escapeHTML, HTMLString, markHTMLString } from './escape.js';
export type { Metadata } from './metadata';
export { createMetadata } from './metadata.js';
export { unescapeHTML } from './escape.js';
export { markHTMLString } from './escape.js';
// TODO(deprecated): This name has been updated in Astro runtime but not yet in the Astro compiler.
export { markHTMLString as unescapeHTML } from './escape.js';
const voidElementNames = /^(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/i;
const htmlBooleanAttributes =
@ -26,24 +29,24 @@ const svgEnumAttributes = /^(autoReverse|externalResourcesRequired|focusable|pre
// Or maybe type UserValue = any; ?
async function _render(child: any): Promise<any> {
child = await child;
if (child instanceof UnescapedString) {
if (child instanceof HTMLString) {
return child;
} else if (Array.isArray(child)) {
return unescapeHTML((await Promise.all(child.map((value) => _render(value)))).join(''));
return markHTMLString((await Promise.all(child.map((value) => _render(value)))).join(''));
} else if (typeof child === 'function') {
// Special: If a child is a function, call it automatically.
// This lets you do {() => ...} without the extra boilerplate
// of wrapping it in a function and calling it.
return _render(child());
} else if (typeof child === 'string') {
return escapeHTML(child);
return markHTMLString(escapeHTML(child));
} else if (!child && child !== 0) {
// do nothing, safe to ignore falsey values.
}
// Add a comment explaining why each of these are needed.
// Maybe create clearly named function for what this is doing.
else if (child instanceof AstroComponent || Object.prototype.toString.call(child) === '[object AstroComponent]') {
return unescapeHTML(await renderAstroComponent(child));
return markHTMLString(await renderAstroComponent(child));
} else {
return child;
}
@ -71,7 +74,7 @@ export class AstroComponent {
const html = htmlParts[i];
const expression = expressions[i];
yield unescapeHTML(html);
yield markHTMLString(html);
yield _render(expression);
}
}
@ -134,12 +137,12 @@ export async function renderComponent(result: SSRResult, displayName: string, Co
if (children == null) {
return children;
}
return unescapeHTML(children);
return markHTMLString(children);
}
if (Component && (Component as any).isAstroComponentFactory) {
const output = await renderToString(result, Component as any, _props, slots);
return unescapeHTML(output);
return markHTMLString(output);
}
if (Component === null && !_props['client:only']) {
@ -250,7 +253,7 @@ If you're still stuck, please open an issue on GitHub or join us at https://astr
// as a string and the user is responsible for adding a script tag for the component definition.
if (!html && typeof Component === 'string') {
html = await renderAstroComponent(
await render`<${Component}${spreadAttributes(props)}${unescapeHTML(
await render`<${Component}${spreadAttributes(props)}${markHTMLString(
(children == null || children == '') && voidElementNames.test(Component) ? `/>` : `>${children}</${Component}>`
)}`
);
@ -267,7 +270,7 @@ If you're still stuck, please open an issue on GitHub or join us at https://astr
}
if (!hydration) {
return unescapeHTML(html.replace(/\<\/?astro-fragment\>/g, ''));
return markHTMLString(html.replace(/\<\/?astro-fragment\>/g, ''));
}
// Include componentExport name and componentUrl in hash to dedupe identical islands
@ -280,7 +283,7 @@ If you're still stuck, please open an issue on GitHub or join us at https://astr
// Render a template if no fragment is provided.
const needsAstroTemplate = children && !/<\/?astro-fragment\>/.test(html);
const template = needsAstroTemplate ? `<template data-astro-template>${children}</template>` : '';
return unescapeHTML(`<astro-root uid="${astroId}"${needsAstroTemplate ? ' tmpl' : ''}>${html ?? ''}${template}</astro-root>`);
return markHTMLString(`<astro-root uid="${astroId}"${needsAstroTemplate ? ' tmpl' : ''}>${html ?? ''}${template}</astro-root>`);
}
/** Create the Astro.fetchContent() runtime function. */
@ -349,7 +352,7 @@ export function addAttribute(value: any, key: string, shouldEscape = true) {
if (value === false) {
if (htmlEnumAttributes.test(key) || svgEnumAttributes.test(key)) {
return unescapeHTML(` ${key}="false"`);
return markHTMLString(` ${key}="false"`);
}
return '';
}
@ -365,14 +368,14 @@ Make sure to use the static attribute syntax (\`${key}={value}\`) instead of the
// support "class" from an expression passed into an element (#782)
if (key === 'class:list') {
return unescapeHTML(` ${key.slice(0, -5)}="${toAttributeString(serializeListValue(value))}"`);
return markHTMLString(` ${key.slice(0, -5)}="${toAttributeString(serializeListValue(value))}"`);
}
// Boolean values only need the key
if (value === true && (key.startsWith('data-') || htmlBooleanAttributes.test(key))) {
return unescapeHTML(` ${key}`);
return markHTMLString(` ${key}`);
} else {
return unescapeHTML(` ${key}="${toAttributeString(value, shouldEscape)}"`);
return markHTMLString(` ${key}="${toAttributeString(value, shouldEscape)}"`);
}
}
@ -382,7 +385,7 @@ export function spreadAttributes(values: Record<any, any>, shouldEscape = true)
for (const [key, value] of Object.entries(values)) {
output += addAttribute(value, key, shouldEscape);
}
return unescapeHTML(output);
return markHTMLString(output);
}
// Adds CSS variables to an inline style tag
@ -391,7 +394,7 @@ export function defineStyleVars(selector: string, vars: Record<any, any>) {
for (const [key, value] of Object.entries(vars)) {
output += ` --${key}: ${value};\n`;
}
return unescapeHTML(`${selector} {${output}}`);
return markHTMLString(`${selector} {${output}}`);
}
// Adds variables to an inline script.
@ -400,7 +403,7 @@ export function defineScriptVars(vars: Record<any, any>) {
for (const [key, value] of Object.entries(vars)) {
output += `let ${key} = ${JSON.stringify(value)};\n`;
}
return unescapeHTML(output);
return markHTMLString(output);
}
// Renders an endpoint request to completion, returning the body.
@ -467,7 +470,7 @@ export async function renderHead(result: SSRResult): Promise<string> {
const links = Array.from(result.links)
.filter(uniqueElements)
.map((link) => renderElement('link', link, false));
return unescapeHTML(links.join('\n') + styles.join('\n') + scripts.join('\n') + '\n' + '<!--astro:head:injected-->');
return markHTMLString(links.join('\n') + styles.join('\n') + scripts.join('\n') + '\n' + '<!--astro:head:injected-->');
}
export async function renderAstroComponent(component: InstanceType<typeof AstroComponent>) {
@ -479,7 +482,7 @@ export async function renderAstroComponent(component: InstanceType<typeof AstroC
}
}
return unescapeHTML(await _render(template));
return markHTMLString(await _render(template));
}
function componentIsHTMLElement(Component: unknown) {
@ -495,7 +498,7 @@ export async function renderHTMLElement(result: SSRResult, constructor: typeof H
attrHTML += ` ${attr}="${toAttributeString(await props[attr])}"`;
}
return unescapeHTML(`<${name}${attrHTML}>${await renderSlot(result, slots?.default)}</${name}>`);
return markHTMLString(`<${name}${attrHTML}>${await renderSlot(result, slots?.default)}</${name}>`);
}
function getHTMLElementName(constructor: typeof HTMLElement) {

View file

@ -104,8 +104,15 @@ describe('Expressions', () => {
const html = await fixture.readFile('/escape/index.html');
const $ = cheerio.load(html);
expect($('body').children()).to.have.lengthOf(1);
expect($('body').text()).to.include('&lt;script&gt;console.log(&quot;pwnd&quot;)&lt;/script&gt;');
expect($('body').children()).to.have.lengthOf(2);
expect($('body').html()).to.include('&lt;script&gt;console.log("pwnd")&lt;/script&gt;');
expect($('#trusted')).to.have.lengthOf(1);
});
it('Does not double-escape HTML', async () => {
const html = await fixture.readFile('/escape/index.html');
const $ = cheerio.load(html);
expect($('#single-escape').html()).to.equal('Astro &amp; Vite');
});
});

View file

@ -3,6 +3,7 @@
<title>My site</title>
</head>
<body>
<span id="single-escape">{'Astro & Vite'}</span>
{'<script>console.log("pwnd")</script>'}
<Fragment set:html={'<script id="trusted">console.log("yay!")</script>'} />
</body>

View file

@ -352,6 +352,7 @@ importers:
'@types/connect': ^3.4.35
'@types/debug': ^4.1.7
'@types/estree': ^0.0.51
'@types/html-escaper': ^3.0.0
'@types/mime': ^2.0.3
'@types/mocha': ^9.1.0
'@types/parse5': ^6.0.3
@ -374,6 +375,7 @@ importers:
fast-glob: ^3.2.11
fast-xml-parser: ^4.0.6
html-entities: ^2.3.2
html-escaper: ^3.0.3
htmlparser2: ^7.2.0
kleur: ^4.1.4
magic-string: ^0.25.9
@ -428,6 +430,7 @@ importers:
fast-glob: 3.2.11
fast-xml-parser: 4.0.6
html-entities: 2.3.2
html-escaper: 3.0.3
htmlparser2: 7.2.0
kleur: 4.1.4
magic-string: 0.25.9
@ -465,6 +468,7 @@ importers:
'@types/connect': 3.4.35
'@types/debug': 4.1.7
'@types/estree': 0.0.51
'@types/html-escaper': 3.0.0
'@types/mime': 2.0.3
'@types/mocha': 9.1.0
'@types/parse5': 6.0.3
@ -3490,6 +3494,10 @@ packages:
dependencies:
'@types/unist': 2.0.6
/@types/html-escaper/3.0.0:
resolution: {integrity: sha512-OcJcvP3Yk8mjYwf/IdXZtTE1tb/u0WF0qa29ER07ZHCYUBZXSN29Z1mBS+/96+kNMGTFUAbSz9X+pHmHpZrTCw==}
dev: true
/@types/is-ci/3.0.0:
resolution: {integrity: sha512-Q0Op0hdWbYd1iahB+IFNQcWXFq4O0Q5MwQP7uN0souuQ4rPg1vEYcnIOfr1gY+M+6rc8FGoRaBO1mOOvL29sEQ==}
dependencies:
@ -6283,6 +6291,10 @@ packages:
resolution: {integrity: sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==}
dev: false
/html-escaper/3.0.3:
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
dev: false
/html-void-elements/2.0.1:
resolution: {integrity: sha512-0quDb7s97CfemeJAnW9wC0hw78MtW7NU3hqtCD75g2vFlDLt36llsYD7uB7SUzojLMP24N5IatXf7ylGXiGG9A==}
dev: false