2021-07-03 03:20:07 +02:00
|
|
|
import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart, RulePart, TextPart, LinkPart } from "../../../domain/session/room/timeline/MessageBody.js"
|
2021-07-02 09:18:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* At the time of writing (Jul 1 2021), Matrix Spec recommends
|
|
|
|
* allowing the following HTML tags:
|
|
|
|
* font, del, h1, h2, h3, h4, h5, h6, blockquote, p, a, ul, ol, sup, sub, li, b, i, u,
|
|
|
|
* strong, em, strike, code, hr, br, div, table, thead, tbody, tr, th, td, caption, pre, span, img
|
|
|
|
*/
|
|
|
|
|
2021-07-02 09:23:59 +02:00
|
|
|
/**
|
|
|
|
* Nodes that don't have any properties to them other than their tag.
|
|
|
|
* While <a> has `href`, and <img> has `src`, these have... themselves.
|
|
|
|
*/
|
2021-07-02 09:18:37 +02:00
|
|
|
const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
|
|
|
|
|
2021-07-02 09:23:59 +02:00
|
|
|
/**
|
|
|
|
* Return a builder function for a particular tag.
|
|
|
|
*/
|
2021-07-02 09:18:37 +02:00
|
|
|
function basicWrapper(tag) {
|
2021-07-03 00:05:50 +02:00
|
|
|
return (result, node, children) => new FormatPart(tag, children);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|
|
|
|
|
2021-07-02 09:23:59 +02:00
|
|
|
/**
|
|
|
|
* Return a builder function for a particular header level.
|
|
|
|
*/
|
2021-07-02 09:18:37 +02:00
|
|
|
function headerWrapper(level) {
|
2021-07-03 00:05:50 +02:00
|
|
|
return (result, node, children) => new HeaderBlock(level, children);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseLink(result, node, children) {
|
|
|
|
// TODO Not equivalent to `node.href`!
|
|
|
|
// Add another HTMLParseResult method?
|
|
|
|
let href = result.getAttributeValue(node, "href");
|
|
|
|
return new LinkPart(href, children);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseList(result, node) {
|
2021-07-07 01:14:35 +02:00
|
|
|
let start = null;
|
|
|
|
if (result.getNodeElementName(node) == "OL") {
|
|
|
|
// Will return 1 for, say, '1A', which may not be intended?
|
|
|
|
start = parseInt(result.getAttributeValue(node, "start")) || 1;
|
|
|
|
}
|
2021-07-02 09:18:37 +02:00
|
|
|
const nodes = [];
|
2021-07-03 00:05:50 +02:00
|
|
|
for (const child of result.getChildNodes(node)) {
|
|
|
|
if (result.getNodeElementName(child) !== "LI") {
|
2021-07-02 09:18:37 +02:00
|
|
|
continue;
|
|
|
|
}
|
2021-07-03 00:05:50 +02:00
|
|
|
const item = parseNodes(result, result.getChildNodes(child));
|
|
|
|
nodes.push(item);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|
|
|
|
return new ListBlock(start, nodes);
|
|
|
|
}
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseCodeBlock(result, node) {
|
2021-07-02 09:18:37 +02:00
|
|
|
let codeNode;
|
2021-07-03 00:05:50 +02:00
|
|
|
for (const child of result.getChildNodes(node)) {
|
|
|
|
codeNode = child;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!(codeNode && result.getNodeElementName(codeNode) === "CODE")) {
|
2021-07-02 09:18:37 +02:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
let language = "";
|
2021-07-03 00:05:50 +02:00
|
|
|
const cl = result.getAttributeValue(codeNode, "class") || ""
|
|
|
|
for (const clname of cl.split(" ")) {
|
2021-07-02 09:18:37 +02:00
|
|
|
if (clname.startsWith("language-") && !clname.startsWith("language-_")) {
|
|
|
|
language = clname.substring(9) // "language-".length
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return new CodeBlock(language, codeNode.textContent);
|
|
|
|
}
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseImage(result, node) {
|
2021-07-02 09:18:37 +02:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
function buildNodeMap() {
|
|
|
|
let map = {
|
|
|
|
A: { descend: true, parsefn: parseLink },
|
|
|
|
UL: { descend: false, parsefn: parseList },
|
|
|
|
OL: { descend: false, parsefn: parseList },
|
|
|
|
PRE: { descend: false, parsefn: parseCodeBlock },
|
|
|
|
BR: { descend: false, parsefn: () => new NewLinePart() },
|
|
|
|
HR: { descend: false, parsefn: () => new RulePart() },
|
|
|
|
IMG: { descend: false, parsefn: parseImage }
|
|
|
|
}
|
|
|
|
for (const tag of basicNodes) {
|
|
|
|
map[tag] = { descend: true, parsefn: basicWrapper(tag) }
|
|
|
|
}
|
|
|
|
for (let level = 1; level <= 6; level++) {
|
|
|
|
const tag = "h" + level;
|
|
|
|
map[tag] = { descend: true, parsefn: headerWrapper(level) }
|
|
|
|
}
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
2021-07-02 09:23:59 +02:00
|
|
|
/**
|
|
|
|
* Handlers for various nodes.
|
|
|
|
*
|
|
|
|
* Each handler has two properties: `descend` and `parsefn`.
|
|
|
|
* If `descend` is true, the node's children should be
|
|
|
|
* parsed just like any other node, and fed as a second argument
|
|
|
|
* to `parsefn`. If not, the node's children are either to be ignored
|
|
|
|
* (as in <pre>) or processed specially (as in <ul>).
|
|
|
|
*
|
|
|
|
* The `parsefn` combines a node's data and its children into
|
|
|
|
* an internal representation node.
|
|
|
|
*/
|
2021-07-02 09:18:37 +02:00
|
|
|
const nodes = buildNodeMap();
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseNode(result, node) {
|
|
|
|
if (result.isTextNode(node)) {
|
|
|
|
return new TextPart(result.getNodeText(node));
|
|
|
|
} else if (result.isElementNode(node)) {
|
|
|
|
const f = nodes[result.getNodeElementName(node)];
|
2021-07-02 09:18:37 +02:00
|
|
|
if (!f) {
|
|
|
|
return null;
|
|
|
|
}
|
2021-07-03 00:05:50 +02:00
|
|
|
const children = f.descend ? parseNodes(result, node.childNodes) : null;
|
|
|
|
return f.parsefn(result, node, children);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2021-07-03 00:05:50 +02:00
|
|
|
function parseNodes(result, nodes) {
|
2021-07-02 09:18:37 +02:00
|
|
|
const parsed = [];
|
2021-07-03 00:05:50 +02:00
|
|
|
for (const htmlNode of nodes) {
|
|
|
|
let node = parseNode(result, htmlNode);
|
2021-07-02 09:23:59 +02:00
|
|
|
// Just ignore invalid / unknown tags.
|
2021-07-02 09:18:37 +02:00
|
|
|
if (node) {
|
|
|
|
parsed.push(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return parsed;
|
|
|
|
}
|
|
|
|
|
2021-07-03 03:20:07 +02:00
|
|
|
export function parseHTMLBody(platform, html) {
|
2021-07-03 00:05:50 +02:00
|
|
|
const parseResult = platform.parseHTML(html);
|
2021-07-03 03:20:07 +02:00
|
|
|
const parts = parseNodes(parseResult, parseResult.rootNodes);
|
|
|
|
return new MessageBody(html, parts);
|
2021-07-02 09:18:37 +02:00
|
|
|
}
|