diff --git a/web/src/linkifiers.ts b/web/src/linkifiers.ts index 26d3e029c6..6a055f0025 100644 --- a/web/src/linkifiers.ts +++ b/web/src/linkifiers.ts @@ -42,7 +42,7 @@ function python_to_js_linkifier( current_group += 1; } // Convert any python in-regex flags to RegExp flags - let js_flags = "g"; + let js_flags = "gu"; const inline_flag_re = /\(\?([Limsux]+)\)/; match = inline_flag_re.exec(pattern); @@ -59,15 +59,11 @@ function python_to_js_linkifier( pattern = pattern.replace(inline_flag_re, ""); } - // Ideally we should have been checking that linkifiers - // begin with certain characters but since there is no - // support for negative lookbehind in javascript, we check - // for this condition in `contains_backend_only_syntax()` - // function. If the condition is satisfied then the message - // is rendered locally, otherwise, we return false there and - // message is rendered on the backend which has proper support - // for negative lookbehind. - pattern = pattern + /(?!\w)/.source; + // This boundary-matching must be kept in sync with prepare_linkifier_pattern in + // zerver.lib.markdown. It does not use look-ahead or look-behind, because re2 + // does not support either. + pattern = + /(^|\s|\u0085|\p{Z}|['"(,:<])/u.source + "(" + pattern + ")" + /($|[^\p{L}\p{N}])/u.source; let final_regex = null; try { final_regex = new RegExp(pattern, js_flags); diff --git a/web/src/markdown.js b/web/src/markdown.js index c89e5de21a..84bee8b57b 100644 --- a/web/src/markdown.js +++ b/web/src/markdown.js @@ -70,31 +70,6 @@ export function translate_emoticons_to_names({src, get_emoticon_translations}) { return translated; } -function contains_problematic_linkifier({content, get_linkifier_map}) { - // If a linkifier doesn't start with some specified characters - // then don't render it locally. It is workaround for the fact that - // javascript regex doesn't support lookbehind. - for (const re of get_linkifier_map().keys()) { - const pattern = /[^\s"'(,:<]/.source + re.source + /(?!\w)/.source; - const regex = new RegExp(pattern); - if (regex.test(content)) { - return true; - } - } - - return false; -} - -function content_contains_backend_only_syntax({content, get_linkifier_map}) { - // Try to guess whether or not a message contains syntax that only the - // backend Markdown processor can correctly handle. - // If it doesn't, we can immediately render it client-side for local echo. - return ( - contains_preview_link(content) || - contains_problematic_linkifier({content, get_linkifier_map}) - ); -} - function parse_with_options({raw_content, helper_config, options}) { // Given the raw markdown content of a message (raw_content) // we return the HTML content (content) and flags. @@ -317,13 +292,27 @@ export function get_topic_links({topic, get_linkifier_map}) { let precedence = 0; for (const [pattern, {url_template, group_number_to_name}] of get_linkifier_map().entries()) { - let match; - while ((match = pattern.exec(topic)) !== null) { - const matched_groups = match.slice(1); + // Strip off the "g" modifier + const non_global_pattern = new RegExp(pattern.source, pattern.flags.replace("g", "")); + let pos = 0; + while (pos < topic.length) { + const match = non_global_pattern.exec(topic.slice(pos)); + if (match === null) { + break; + } + // Advance position to the start of the body of the match, after the index + // offset and any leading whitespace (captured in match[1]) + pos += match.index + match[1].length; + + // match[0] is the entire match, which may have leading or trailing boundary + // characters. match[1] is the leading characters, match[2] is the main body of the + // match, and user-provided groups start in match[3]. Slice those user-provided groups + // off. + const user_groups = match.slice(3, -1); let i = 0; const template_context = {}; - while (i < matched_groups.length) { - const matched_group = matched_groups[i]; + while (i < user_groups.length) { + const matched_group = user_groups[i]; const current_group = i + 1; template_context[group_number_to_name[current_group]] = matched_group; i += 1; @@ -331,7 +320,13 @@ export function get_topic_links({topic, get_linkifier_map}) { const link_url = url_template.expand(template_context); // We store the starting index as well, to sort the order of occurrence of the links // in the topic, similar to the logic implemented in zerver/lib/markdown/__init__.py - links.push({url: link_url, text: match[0], index: match.index, precedence}); + links.push({url: link_url, text: match[2], index: pos, precedence}); + + // Adjust the start point of the match for the next iteration -- we already advanced + // to the start of the user match, so now only advance by match[2], so that we don't + // include the length of any trailing characters. This means that patterns can overlap + // their whitespace. + pos += match[2].length; } precedence += 1; } @@ -448,10 +443,10 @@ function handleEmoji({emoji_name, get_realm_emoji_url, get_emoji_codepoint}) { function handleLinkifier({pattern, matches, get_linkifier_map}) { const {url_template, group_number_to_name} = get_linkifier_map().get(pattern); + const user_groups = matches.slice(1); let current_group = 1; const template_context = {}; - - for (const match of matches) { + for (const match of user_groups) { template_context[group_number_to_name[current_group]] = match; current_group += 1; } @@ -688,10 +683,7 @@ export function add_topic_links(message) { } export function contains_backend_only_syntax(content) { - return content_contains_backend_only_syntax({ - content, - get_linkifier_map: web_app_helpers.get_linkifier_map, - }); + return contains_preview_link(content); } export function parse_non_message(raw_content) { diff --git a/web/tests/linkifiers.test.js b/web/tests/linkifiers.test.js index fcb56d4e8b..c09b3234ca 100644 --- a/web/tests/linkifiers.test.js +++ b/web/tests/linkifiers.test.js @@ -30,7 +30,10 @@ run_test("python_to_js_linkifier", () => { }, ]); let actual_value = get_linkifier_regexes(); - let expected_value = [/\/aa\/g(?!\w)/gim, /\/aa\/g(?!\w)/g]; + let expected_value = [ + /(^|\s|\u0085|\p{Z}|['"(,:<])(\/aa\/g)($|[^\p{L}\p{N}])/gimu, + /(^|\s|\u0085|\p{Z}|['"(,:<])(\/aa\/g)($|[^\p{L}\p{N}])/gu, + ]; assert.deepEqual(actual_value, expected_value); // Test case with multiple replacements. linkifiers.update_linkifier_rules([ @@ -41,7 +44,7 @@ run_test("python_to_js_linkifier", () => { }, ]); actual_value = get_linkifier_regexes(); - expected_value = [/#cf(\d+)([A-Z][\dA-Z]*)(?!\w)/g]; + expected_value = [/(^|\s|\u0085|\p{Z}|['"(,:<])(#cf(\d+)([A-Z][\dA-Z]*))($|[^\p{L}\p{N}])/gu]; assert.deepEqual(actual_value, expected_value); // Test incorrect syntax. blueslip.expect("error", "python_to_js_linkifier failure!"); diff --git a/web/tests/markdown.test.js b/web/tests/markdown.test.js index 50513127ae..23b548de17 100644 --- a/web/tests/markdown.test.js +++ b/web/tests/markdown.test.js @@ -456,9 +456,6 @@ test("marked", () => { expected: '
\n\nUser group mention in quote: Backend
\n
\n', }, - // Test only those linkifiers which don't return True for - // `contains_backend_only_syntax()`. Those which return True - // are tested separately. { input: "This is a linkifier #1234 with text after it", expected: @@ -479,6 +476,14 @@ test("marked", () => { expected: 'Another user group mention in quote: hamletcharacters
\n
This is a linkifier with ZGROUP_123:45 groups
', }, + { + input: "Here is the PR-#123.", + expected: `Here is the PR-#123.
`, + }, + { + input: "Function abc() was introduced in (PR)#123.", + expected: `Function abc() was introduced in (PR)#123.
`, + }, {input: "Test *italic*", expected: "Test italic
"}, { input: "T\n#**Denmark**", @@ -845,16 +850,6 @@ test("message_flags", () => { assert.equal(message.flags.includes("mentioned"), false); }); -test("backend_only_linkifiers", () => { - const backend_only_linkifiers = [ - "Here is the PR-#123.", - "Function abc() was introduced in (PR)#123.", - ]; - for (const content of backend_only_linkifiers) { - assert.equal(markdown.contains_backend_only_syntax(content), true); - } -}); - test("translate_emoticons_to_names", () => { const get_emoticon_translations = emoji.get_emoticon_translations; diff --git a/web/tests/markdown_parse.test.js b/web/tests/markdown_parse.test.js index 1e53500078..6b150443f6 100644 --- a/web/tests/markdown_parse.test.js +++ b/web/tests/markdown_parse.test.js @@ -114,7 +114,7 @@ function get_realm_emoji_url(emoji_name) { return realm_emoji_map.get(emoji_name); } -const regex = /#foo(\d+)(?!\w)/g; +const regex = /(^|\s|\u0085|\p{Z}|['"(,:<])(#foo(\d+))($|[^\p{L}\p{N}])/gu; const linkifier_map = new Map(); linkifier_map.set(regex, { url_template: url_template_lib.parse("http://foo.com/{id}"), @@ -287,30 +287,30 @@ run_test("topic links repeated", () => { run_test("topic links overlapping", () => { linkifiers.initialize([ - {pattern: "[a-z]+(?P