mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	linkifiers: Match JS implementation to server implementation.
Since the server-side implementation no longer uses look-ahead or (more importantly) look-behind, it is possible to exactly implement in Javascript. This removes a common class which would prevent local echo. This requires reworking the topic linking algorithm, to march the server's as well. The tests and behaviour are adjusted in so doing -- previously, the JS implementation would have linked `#foo` with a `foo` regex on the linkifier, but the server implementation would not have.
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							70b20e9d2b
						
					
				
				
					commit
					091e2f177b
				
			@@ -42,7 +42,7 @@ function python_to_js_linkifier(
 | 
			
		||||
        current_group += 1;
 | 
			
		||||
    }
 | 
			
		||||
    // Convert any python in-regex flags to RegExp flags
 | 
			
		||||
    let js_flags = "g";
 | 
			
		||||
    let js_flags = "gu";
 | 
			
		||||
    const inline_flag_re = /\(\?([Limsux]+)\)/;
 | 
			
		||||
    match = inline_flag_re.exec(pattern);
 | 
			
		||||
 | 
			
		||||
@@ -59,15 +59,11 @@ function python_to_js_linkifier(
 | 
			
		||||
 | 
			
		||||
        pattern = pattern.replace(inline_flag_re, "");
 | 
			
		||||
    }
 | 
			
		||||
    // Ideally we should have been checking that linkifiers
 | 
			
		||||
    // begin with certain characters but since there is no
 | 
			
		||||
    // support for negative lookbehind in javascript, we check
 | 
			
		||||
    // for this condition in `contains_backend_only_syntax()`
 | 
			
		||||
    // function. If the condition is satisfied then the message
 | 
			
		||||
    // is rendered locally, otherwise, we return false there and
 | 
			
		||||
    // message is rendered on the backend which has proper support
 | 
			
		||||
    // for negative lookbehind.
 | 
			
		||||
    pattern = pattern + /(?!\w)/.source;
 | 
			
		||||
    // This boundary-matching must be kept in sync with prepare_linkifier_pattern in
 | 
			
		||||
    // zerver.lib.markdown.  It does not use look-ahead or look-behind, because re2
 | 
			
		||||
    // does not support either.
 | 
			
		||||
    pattern =
 | 
			
		||||
        /(^|\s|\u0085|\p{Z}|['"(,:<])/u.source + "(" + pattern + ")" + /($|[^\p{L}\p{N}])/u.source;
 | 
			
		||||
    let final_regex = null;
 | 
			
		||||
    try {
 | 
			
		||||
        final_regex = new RegExp(pattern, js_flags);
 | 
			
		||||
 
 | 
			
		||||
@@ -70,31 +70,6 @@ export function translate_emoticons_to_names({src, get_emoticon_translations}) {
 | 
			
		||||
    return translated;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function contains_problematic_linkifier({content, get_linkifier_map}) {
 | 
			
		||||
    // If a linkifier doesn't start with some specified characters
 | 
			
		||||
    // then don't render it locally. It is workaround for the fact that
 | 
			
		||||
    // javascript regex doesn't support lookbehind.
 | 
			
		||||
    for (const re of get_linkifier_map().keys()) {
 | 
			
		||||
        const pattern = /[^\s"'(,:<]/.source + re.source + /(?!\w)/.source;
 | 
			
		||||
        const regex = new RegExp(pattern);
 | 
			
		||||
        if (regex.test(content)) {
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function content_contains_backend_only_syntax({content, get_linkifier_map}) {
 | 
			
		||||
    // Try to guess whether or not a message contains syntax that only the
 | 
			
		||||
    // backend Markdown processor can correctly handle.
 | 
			
		||||
    // If it doesn't, we can immediately render it client-side for local echo.
 | 
			
		||||
    return (
 | 
			
		||||
        contains_preview_link(content) ||
 | 
			
		||||
        contains_problematic_linkifier({content, get_linkifier_map})
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function parse_with_options({raw_content, helper_config, options}) {
 | 
			
		||||
    // Given the raw markdown content of a message (raw_content)
 | 
			
		||||
    // we return the HTML content (content) and flags.
 | 
			
		||||
@@ -317,13 +292,27 @@ export function get_topic_links({topic, get_linkifier_map}) {
 | 
			
		||||
    let precedence = 0;
 | 
			
		||||
 | 
			
		||||
    for (const [pattern, {url_template, group_number_to_name}] of get_linkifier_map().entries()) {
 | 
			
		||||
        let match;
 | 
			
		||||
        while ((match = pattern.exec(topic)) !== null) {
 | 
			
		||||
            const matched_groups = match.slice(1);
 | 
			
		||||
        // Strip off the "g" modifier
 | 
			
		||||
        const non_global_pattern = new RegExp(pattern.source, pattern.flags.replace("g", ""));
 | 
			
		||||
        let pos = 0;
 | 
			
		||||
        while (pos < topic.length) {
 | 
			
		||||
            const match = non_global_pattern.exec(topic.slice(pos));
 | 
			
		||||
            if (match === null) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            // Advance position to the start of the body of the match, after the index
 | 
			
		||||
            // offset and any leading whitespace (captured in match[1])
 | 
			
		||||
            pos += match.index + match[1].length;
 | 
			
		||||
 | 
			
		||||
            // match[0] is the entire match, which may have leading or trailing boundary
 | 
			
		||||
            // characters. match[1] is the leading characters, match[2] is the main body of the
 | 
			
		||||
            // match, and user-provided groups start in match[3].  Slice those user-provided groups
 | 
			
		||||
            // off.
 | 
			
		||||
            const user_groups = match.slice(3, -1);
 | 
			
		||||
            let i = 0;
 | 
			
		||||
            const template_context = {};
 | 
			
		||||
            while (i < matched_groups.length) {
 | 
			
		||||
                const matched_group = matched_groups[i];
 | 
			
		||||
            while (i < user_groups.length) {
 | 
			
		||||
                const matched_group = user_groups[i];
 | 
			
		||||
                const current_group = i + 1;
 | 
			
		||||
                template_context[group_number_to_name[current_group]] = matched_group;
 | 
			
		||||
                i += 1;
 | 
			
		||||
@@ -331,7 +320,13 @@ export function get_topic_links({topic, get_linkifier_map}) {
 | 
			
		||||
            const link_url = url_template.expand(template_context);
 | 
			
		||||
            // We store the starting index as well, to sort the order of occurrence of the links
 | 
			
		||||
            // in the topic, similar to the logic implemented in zerver/lib/markdown/__init__.py
 | 
			
		||||
            links.push({url: link_url, text: match[0], index: match.index, precedence});
 | 
			
		||||
            links.push({url: link_url, text: match[2], index: pos, precedence});
 | 
			
		||||
 | 
			
		||||
            // Adjust the start point of the match for the next iteration -- we already advanced
 | 
			
		||||
            // to the start of the user match, so now only advance by match[2], so that we don't
 | 
			
		||||
            // include the length of any trailing characters. This means that patterns can overlap
 | 
			
		||||
            // their whitespace.
 | 
			
		||||
            pos += match[2].length;
 | 
			
		||||
        }
 | 
			
		||||
        precedence += 1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -448,10 +443,10 @@ function handleEmoji({emoji_name, get_realm_emoji_url, get_emoji_codepoint}) {
 | 
			
		||||
function handleLinkifier({pattern, matches, get_linkifier_map}) {
 | 
			
		||||
    const {url_template, group_number_to_name} = get_linkifier_map().get(pattern);
 | 
			
		||||
 | 
			
		||||
    const user_groups = matches.slice(1);
 | 
			
		||||
    let current_group = 1;
 | 
			
		||||
    const template_context = {};
 | 
			
		||||
 | 
			
		||||
    for (const match of matches) {
 | 
			
		||||
    for (const match of user_groups) {
 | 
			
		||||
        template_context[group_number_to_name[current_group]] = match;
 | 
			
		||||
        current_group += 1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -688,10 +683,7 @@ export function add_topic_links(message) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function contains_backend_only_syntax(content) {
 | 
			
		||||
    return content_contains_backend_only_syntax({
 | 
			
		||||
        content,
 | 
			
		||||
        get_linkifier_map: web_app_helpers.get_linkifier_map,
 | 
			
		||||
    });
 | 
			
		||||
    return contains_preview_link(content);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function parse_non_message(raw_content) {
 | 
			
		||||
 
 | 
			
		||||
@@ -30,7 +30,10 @@ run_test("python_to_js_linkifier", () => {
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
    let actual_value = get_linkifier_regexes();
 | 
			
		||||
    let expected_value = [/\/aa\/g(?!\w)/gim, /\/aa\/g(?!\w)/g];
 | 
			
		||||
    let expected_value = [
 | 
			
		||||
        /(^|\s|\u0085|\p{Z}|['"(,:<])(\/aa\/g)($|[^\p{L}\p{N}])/gimu,
 | 
			
		||||
        /(^|\s|\u0085|\p{Z}|['"(,:<])(\/aa\/g)($|[^\p{L}\p{N}])/gu,
 | 
			
		||||
    ];
 | 
			
		||||
    assert.deepEqual(actual_value, expected_value);
 | 
			
		||||
    // Test case with multiple replacements.
 | 
			
		||||
    linkifiers.update_linkifier_rules([
 | 
			
		||||
@@ -41,7 +44,7 @@ run_test("python_to_js_linkifier", () => {
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
    actual_value = get_linkifier_regexes();
 | 
			
		||||
    expected_value = [/#cf(\d+)([A-Z][\dA-Z]*)(?!\w)/g];
 | 
			
		||||
    expected_value = [/(^|\s|\u0085|\p{Z}|['"(,:<])(#cf(\d+)([A-Z][\dA-Z]*))($|[^\p{L}\p{N}])/gu];
 | 
			
		||||
    assert.deepEqual(actual_value, expected_value);
 | 
			
		||||
    // Test incorrect syntax.
 | 
			
		||||
    blueslip.expect("error", "python_to_js_linkifier failure!");
 | 
			
		||||
 
 | 
			
		||||
@@ -456,9 +456,6 @@ test("marked", () => {
 | 
			
		||||
            expected:
 | 
			
		||||
                '<blockquote>\n<p>User group mention in quote: <span class="user-group-mention silent" data-user-group-id="2">Backend</span></p>\n</blockquote>\n<blockquote>\n<p>Another user group mention in quote: <span class="user-group-mention silent" data-user-group-id="1">hamletcharacters</span></p>\n</blockquote>',
 | 
			
		||||
        },
 | 
			
		||||
        // Test only those linkifiers which don't return True for
 | 
			
		||||
        // `contains_backend_only_syntax()`. Those which return True
 | 
			
		||||
        // are tested separately.
 | 
			
		||||
        {
 | 
			
		||||
            input: "This is a linkifier #1234 with text after it",
 | 
			
		||||
            expected:
 | 
			
		||||
@@ -479,6 +476,14 @@ test("marked", () => {
 | 
			
		||||
            expected:
 | 
			
		||||
                '<p>This is a linkifier with <a href="https://zone_45.zulip.net/ticket/123" title="https://zone_45.zulip.net/ticket/123">ZGROUP_123:45</a> groups</p>',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            input: "Here is the PR-#123.",
 | 
			
		||||
            expected: `<p>Here is the PR-<a href="https://trac.example.com/ticket/123" title="https://trac.example.com/ticket/123">#123</a>.</p>`,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            input: "Function abc() was introduced in (PR)#123.",
 | 
			
		||||
            expected: `<p>Function abc() was introduced in (PR)<a href="https://trac.example.com/ticket/123" title="https://trac.example.com/ticket/123">#123</a>.</p>`,
 | 
			
		||||
        },
 | 
			
		||||
        {input: "Test *italic*", expected: "<p>Test <em>italic</em></p>"},
 | 
			
		||||
        {
 | 
			
		||||
            input: "T\n#**Denmark**",
 | 
			
		||||
@@ -845,16 +850,6 @@ test("message_flags", () => {
 | 
			
		||||
    assert.equal(message.flags.includes("mentioned"), false);
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
test("backend_only_linkifiers", () => {
 | 
			
		||||
    const backend_only_linkifiers = [
 | 
			
		||||
        "Here is the PR-#123.",
 | 
			
		||||
        "Function abc() was introduced in (PR)#123.",
 | 
			
		||||
    ];
 | 
			
		||||
    for (const content of backend_only_linkifiers) {
 | 
			
		||||
        assert.equal(markdown.contains_backend_only_syntax(content), true);
 | 
			
		||||
    }
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
test("translate_emoticons_to_names", () => {
 | 
			
		||||
    const get_emoticon_translations = emoji.get_emoticon_translations;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -114,7 +114,7 @@ function get_realm_emoji_url(emoji_name) {
 | 
			
		||||
    return realm_emoji_map.get(emoji_name);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const regex = /#foo(\d+)(?!\w)/g;
 | 
			
		||||
const regex = /(^|\s|\u0085|\p{Z}|['"(,:<])(#foo(\d+))($|[^\p{L}\p{N}])/gu;
 | 
			
		||||
const linkifier_map = new Map();
 | 
			
		||||
linkifier_map.set(regex, {
 | 
			
		||||
    url_template: url_template_lib.parse("http://foo.com/{id}"),
 | 
			
		||||
@@ -287,30 +287,30 @@ run_test("topic links repeated", () => {
 | 
			
		||||
 | 
			
		||||
run_test("topic links overlapping", () => {
 | 
			
		||||
    linkifiers.initialize([
 | 
			
		||||
        {pattern: "[a-z]+(?P<id>1\\d+) #[a-z]+", url_template: "http://a.com/{id}"},
 | 
			
		||||
        {pattern: "[a-z]+(?P<id>1\\d+) :[a-z]+", url_template: "http://a.com/{id}"},
 | 
			
		||||
        {pattern: "[a-z]+(?P<id>1\\d+)", url_template: "http://b.com/{id}"},
 | 
			
		||||
        {pattern: ".+#(?P<id>[a-z]+)", url_template: "http://wildcard.com/{id}"},
 | 
			
		||||
        {pattern: "#(?P<id>[a-z]+)", url_template: "http://c.com/{id}"},
 | 
			
		||||
        {pattern: ".+:(?P<id>[a-z]+)", url_template: "http://wildcard.com/{id}"},
 | 
			
		||||
        {pattern: ":(?P<id>[a-z]+)", url_template: "http://c.com/{id}"},
 | 
			
		||||
    ]);
 | 
			
		||||
    // b.com's pattern should be matched while it overlaps with c.com's.
 | 
			
		||||
    assert_topic_links("#foo100", [
 | 
			
		||||
    assert_topic_links(":foo100", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "foo100",
 | 
			
		||||
            url: "http://b.com/100",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
    // a.com's pattern should be matched while it overlaps with b.com's, wildcard.com's and c.com's.
 | 
			
		||||
    assert_topic_links("#asd123 #asd", [
 | 
			
		||||
    assert_topic_links(":asd123 :asd", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "asd123 #asd",
 | 
			
		||||
            text: "asd123 :asd",
 | 
			
		||||
            url: "http://a.com/123",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
    // a.com's pattern do not match, wildcard.com's and b.com's patterns should match
 | 
			
		||||
    // and the links are ordered by the matched index.
 | 
			
		||||
    assert_topic_links("/#asd #foo100", [
 | 
			
		||||
    assert_topic_links("/:asd :foo100", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "/#asd",
 | 
			
		||||
            text: "/:asd",
 | 
			
		||||
            url: "http://wildcard.com/asd",
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -318,27 +318,27 @@ run_test("topic links overlapping", () => {
 | 
			
		||||
            url: "http://b.com/100",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
    assert_topic_links("foo.anything/#asd", [
 | 
			
		||||
    assert_topic_links("foo.anything/:asd", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "foo.anything/#asd",
 | 
			
		||||
            text: "foo.anything/:asd",
 | 
			
		||||
            url: "http://wildcard.com/asd",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    // While the raw URL "http://foo.com/foo100" appears before b.com's match "foo100",
 | 
			
		||||
    // we prioritize the linkifier match first.
 | 
			
		||||
    assert_topic_links("http://foo.com/foo100", [
 | 
			
		||||
    assert_topic_links("http://foo.com/:foo100", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "foo100",
 | 
			
		||||
            url: "http://b.com/100",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    // Here the raw URL "https://foo.com/#asd" appears after wildcard.com's match "something https://foo.com/#asd".
 | 
			
		||||
    // Here the raw URL "https://foo.com/:asd" appears after wildcard.com's match "something https://foo.com/:asd".
 | 
			
		||||
    // The latter is prioritized and the raw URL does not get included.
 | 
			
		||||
    assert_topic_links("something https://foo.com/#asd", [
 | 
			
		||||
    assert_topic_links("something https://foo.com/:asd", [
 | 
			
		||||
        {
 | 
			
		||||
            text: "something https://foo.com/#asd",
 | 
			
		||||
            text: "something https://foo.com/:asd",
 | 
			
		||||
            url: "http://wildcard.com/asd",
 | 
			
		||||
        },
 | 
			
		||||
    ]);
 | 
			
		||||
 
 | 
			
		||||
@@ -650,10 +650,10 @@ InlineLexer.prototype.output = function(src) {
 | 
			
		||||
    regexes.forEach(function (regex) {
 | 
			
		||||
      var ret = self.inlineReplacement(regex, src, function(regex, groups, match) {
 | 
			
		||||
        // Insert the created URL
 | 
			
		||||
        href = self.linkifier(regex, groups, match);
 | 
			
		||||
        href = self.linkifier(regex, groups.slice(1, -1), match);
 | 
			
		||||
        if (href !== undefined) {
 | 
			
		||||
          href = escape(href);
 | 
			
		||||
          return self.renderer.link(href, href, match);
 | 
			
		||||
          return groups[0] + self.renderer.link(href, href, groups[1]) + groups.slice(-1)[0]
 | 
			
		||||
        } else {
 | 
			
		||||
          return match;
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1820,6 +1820,9 @@ def prepare_linkifier_pattern(source: str) -> str:
 | 
			
		||||
    # We use an extended definition of 'whitespace' which is
 | 
			
		||||
    # equivalent to \p{White_Space} -- since \s in re2 only matches
 | 
			
		||||
    # ASCII spaces, and re2 does not support \p{White_Space}.
 | 
			
		||||
    #
 | 
			
		||||
    # This implementation should be kept in sync with the one in
 | 
			
		||||
    # web/src/linkifiers.js
 | 
			
		||||
    regex = rf"""
 | 
			
		||||
        (?P<{BEFORE_CAPTURE_GROUP}>
 | 
			
		||||
            ^  |
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user