mirror of
https://github.com/vector-im/hydrogen-web.git
synced 2024-12-23 19:45:05 +01:00
Refactor regex to improve readability
- Split regex into components - Add informative comments Signed-off-by: RMidhunSuresh <rmidhunsuresh@gmail.com>
This commit is contained in:
parent
31740f4ec6
commit
c6d7cef491
@ -1 +1,29 @@
|
|||||||
export const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui
|
const scheme = "(?:https|http|ftp):\\/\\/";
|
||||||
|
const host = "[a-zA-Z0-9:.\\[\\]-]";
|
||||||
|
|
||||||
|
/*
|
||||||
|
A URL containing path (/) or fragment (#) component
|
||||||
|
is allowed to end with any character which is not
|
||||||
|
space nor punctuation. The ending character may be
|
||||||
|
non-ASCII.
|
||||||
|
*/
|
||||||
|
const end = "[^\\s.,?!]";
|
||||||
|
const additional = `[\\/#][^\\s]*${end}`;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Similarly, a URL not containing path or fragment must
|
||||||
|
also end with a character that is not space nor punctuation.
|
||||||
|
However the ending character must also be ASCII.
|
||||||
|
*/
|
||||||
|
const nonASCII = "\\u{80}-\\u{10ffff}";
|
||||||
|
const endASCII = `[^\\s${nonASCII}.,?!]`;
|
||||||
|
|
||||||
|
/*
|
||||||
|
URL must not contain non-ascii characters in host but may contain
|
||||||
|
them in path or fragment components.
|
||||||
|
https://matrix.org/<smiley> - valid
|
||||||
|
https://matrix.org<smiley> - invalid
|
||||||
|
*/
|
||||||
|
const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`;
|
||||||
|
|
||||||
|
export const regex = new RegExp(urlRegex, "gui");
|
||||||
|
Loading…
Reference in New Issue
Block a user