vector-im-hydrogen-web/src/platform/web/parsehtml.js

import DOMPurify from "../../../../../lib/dompurify/index.js"

class HTMLParseResult {
    constructor(bodyNode) {
        this._bodyNode = bodyNode;
    }

    get rootNodes() {
        return Array.from(this._bodyNode.childNodes);
    }
    
    getChildNodes(node) {
        return Array.from(node.childNodes);
    }

    getAttributeNames(node) {
        return Array.from(node.getAttributeNames());
    }

    getAttributeValue(node, attr) {
        return node.getAttribute(attr);
    }

    isTextNode(node) { 
        return node.nodeType === Node.TEXT_NODE;
    }

    getNodeText(node) {
        return node.nodeValue;
    }

    isElementNode(node) {
        return node.nodeType === Node.ELEMENT_NODE;
    }

    getNodeElementName(node) {
        return node.tagName;
    }
}

const sanitizeConfig = {
    ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
}

export function parseHTML(html) {
    // If DOMPurify uses DOMParser, can't we just get the built tree from it
    // instead of re-parsing?
    const sanitized = DOMPurify.sanitize(html, sanitizeConfig);
    const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body;
    return new HTMLParseResult(bodyNode);
}
Switch to DOMPurify in platform layer for sanitization. 2021-07-12 14:46:42 -07:00			`import DOMPurify from "../../../../../lib/dompurify/index.js"`

			`class HTMLParseResult {`
Move HTML parsing into platform. 2021-07-02 15:05:50 -07:00			`constructor(bodyNode) {`
			`this._bodyNode = bodyNode;`
			`}`

			`get rootNodes() {`
Wrap DOM iterables in array. 2021-07-07 14:12:24 -07:00			`return Array.from(this._bodyNode.childNodes);`
Move HTML parsing into platform. 2021-07-02 15:05:50 -07:00			`}`

			`getChildNodes(node) {`
Wrap DOM iterables in array. 2021-07-07 14:12:24 -07:00			`return Array.from(node.childNodes);`
Move HTML parsing into platform. 2021-07-02 15:05:50 -07:00			`}`

			`getAttributeNames(node) {`
Wrap DOM iterables in array. 2021-07-07 14:12:24 -07:00			`return Array.from(node.getAttributeNames());`
Move HTML parsing into platform. 2021-07-02 15:05:50 -07:00			`}`

			`getAttributeValue(node, attr) {`
			`return node.getAttribute(attr);`
			`}`

			`isTextNode(node) {`
			`return node.nodeType === Node.TEXT_NODE;`
			`}`

			`getNodeText(node) {`
			`return node.nodeValue;`
			`}`

			`isElementNode(node) {`
			`return node.nodeType === Node.ELEMENT_NODE;`
			`}`

			`getNodeElementName(node) {`
			`return node.tagName;`
			`}`
			`}`
Switch to DOMPurify in platform layer for sanitization. 2021-07-12 14:46:42 -07:00
			`const sanitizeConfig = {`
			`ALLOWED_URI_REGEXP: /^(?:(?:(?:f\|ht)tps?\|mailto\|tel\|callto\|cid\|xmpp\|xxx\|mxc):\|[^a-z]\|[a-z+.\-]+(?:[^a-z+.\-:]\|$))/i,`
			`}`

			`export function parseHTML(html) {`
			`// If DOMPurify uses DOMParser, can't we just get the built tree from it`
			`// instead of re-parsing?`
			`const sanitized = DOMPurify.sanitize(html, sanitizeConfig);`
			`const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body;`
			`return new HTMLParseResult(bodyNode);`
			`}`