2021-07-12 14:46:42 -07:00
|
|
|
import DOMPurify from "../../../../../lib/dompurify/index.js"
|
|
|
|
|
|
|
|
class HTMLParseResult {
|
2021-07-02 15:05:50 -07:00
|
|
|
constructor(bodyNode) {
|
|
|
|
this._bodyNode = bodyNode;
|
|
|
|
}
|
|
|
|
|
|
|
|
get rootNodes() {
|
2021-07-07 14:12:24 -07:00
|
|
|
return Array.from(this._bodyNode.childNodes);
|
2021-07-02 15:05:50 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
getChildNodes(node) {
|
2021-07-07 14:12:24 -07:00
|
|
|
return Array.from(node.childNodes);
|
2021-07-02 15:05:50 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
getAttributeNames(node) {
|
2021-07-07 14:12:24 -07:00
|
|
|
return Array.from(node.getAttributeNames());
|
2021-07-02 15:05:50 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
getAttributeValue(node, attr) {
|
|
|
|
return node.getAttribute(attr);
|
|
|
|
}
|
|
|
|
|
|
|
|
isTextNode(node) {
|
|
|
|
return node.nodeType === Node.TEXT_NODE;
|
|
|
|
}
|
|
|
|
|
|
|
|
getNodeText(node) {
|
2021-07-16 11:32:37 -07:00
|
|
|
return node.textContent;
|
2021-07-02 15:05:50 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
isElementNode(node) {
|
|
|
|
return node.nodeType === Node.ELEMENT_NODE;
|
|
|
|
}
|
|
|
|
|
|
|
|
getNodeElementName(node) {
|
|
|
|
return node.tagName;
|
|
|
|
}
|
|
|
|
}
|
2021-07-12 14:46:42 -07:00
|
|
|
|
|
|
|
const sanitizeConfig = {
|
|
|
|
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
|
|
|
|
}
|
|
|
|
|
|
|
|
export function parseHTML(html) {
|
|
|
|
// If DOMPurify uses DOMParser, can't we just get the built tree from it
|
|
|
|
// instead of re-parsing?
|
|
|
|
const sanitized = DOMPurify.sanitize(html, sanitizeConfig);
|
|
|
|
const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body;
|
|
|
|
return new HTMLParseResult(bodyNode);
|
|
|
|
}
|