From 9c6d8d9d81eae6db0c636dfcad37b169014e82d7 Mon Sep 17 00:00:00 2001 From: akshatdalton Date: Thu, 17 Dec 2020 14:50:34 +0000 Subject: [PATCH] markdown: Add support to shorten GitHub links. We add support to shorten links and test their shortening in well-organized, clean manner that makes it trivial to extend the GitHub approach for GitLab and perhaps other services. We only shorten basic types of GitHub links (issue, PR, commit) that fit a set of simple common patterns; the default behaviour of Autolink is kept for everything else. Logic added in frontend and backend Markdown Processor is identical. This makes easy to extend the logic for other services like GitLab. Fixes #11895. --- static/third/marked/lib/marked.js | 56 ++++++++- zerver/lib/markdown/__init__.py | 56 +++++++++ .../tests/fixtures/markdown_test_cases.json | 115 ++++++++++++++++++ 3 files changed, 225 insertions(+), 2 deletions(-) diff --git a/static/third/marked/lib/marked.js b/static/third/marked/lib/marked.js index eebf3cb429..3a253114c5 100644 --- a/static/third/marked/lib/marked.js +++ b/static/third/marked/lib/marked.js @@ -561,6 +561,58 @@ inline.zulip = merge({}, inline.breaks, { () }); +function shorten_links(href) { + // This value must match what is used in the backend. + const COMMIT_ID_PREFIX_LENGTH = 12; + const url = new URL(href); + if (url.protocol === 'https:' && ['github.com'].includes(url.hostname)) { + // The following part of the code was necessary because unlike Python, str.split + // method in javascript does not return the remaining part of the string. + var parts = url.pathname.split('/').slice(1); + parts = parts.slice(0, 4) + .concat(parts.slice(4).join('/')) + .concat(Array(5).fill('')) + .slice(0, 5); + + const organisation = parts[0] + , repository = parts[1] + , artifact = parts[2] + , value = parts[3] + , remaining_path = parts[4]; + + if (!organisation || !repository || !artifact || !value) { + return href; + } + + const repo_short_text = organisation + '/' + repository; + + if (remaining_path || url.hash) { + return href; + } + + if (url.hostname === 'github.com') { + return shorten_github_links(href, artifact, repo_short_text, + value, COMMIT_ID_PREFIX_LENGTH); + } + } + return href; +} + +/** + * Shorten GitHub Links + */ + +function shorten_github_links(href, artifact, repo_short_text, + value, commit_id_prefix_length) { + if (['pull', 'issues'].includes(artifact)) { + return repo_short_text + '#' + value; + } + if (artifact == 'commit') { + return repo_short_text + '@' + value.slice(0, commit_id_prefix_length); + } + return href; +} + /** * Inline Lexer & Compiler */ @@ -682,8 +734,8 @@ InlineLexer.prototype.output = function(src) { // url (gfm) if (!this.inLink && (cap = this.rules.url.exec(src))) { src = src.substring(cap[0].length); - text = escape(cap[1]); - href = text; + href = escape(cap[1]); + text = shorten_links(href) out += this.renderer.link(href, null, text); continue; } diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index 36db962dea..9ab0ca4d0a 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -1625,9 +1625,65 @@ class CompiledPattern(markdown.inlinepatterns.Pattern): class AutoLink(CompiledPattern): + """AutoLink takes care of linkifying link-format strings directly + present in the message (i.e. without markdown link syntax). In + some hardcoded cases, it will rewrite the label to what the user + probably wanted if they'd taken the time to do so. + """ + + # Ideally, we'd use a dynamic commit prefix length based on the + # size of the repository, like Git itself does, but we're + # shortening Git commit IDs without looking at the corresponding + # repository. It's not essential that the shortenings are + # globally unique as they're just shorthand, but 12 characters is + # the minimum to be unique for projects with about 1M commits. + COMMIT_ID_PREFIX_LENGTH = 12 + + def shorten_links(self, href: str) -> Optional[str]: + parts = urllib.parse.urlparse(href) + scheme, netloc, path, params, query, fragment = parts + if scheme == "https" and netloc in ["github.com"]: + # Split the path to extract our 4 variables. + + # To do it cleanly without multiple if branches based on which of these + # variables are present, we here add a list of ["", "", ""...] + # to the result of path.split, which at worst can be []. We also remove + # the first empty string we'd get from "/foo/bar".split("/"). + + # Example path: "/foo/bar" output: ["foo", "bar", "", "", ""] + # path: "" output: ["", "", "", "", ""] + organisation, repository, artifact, value, remaining_path = ( + path.split("/", 5)[1:] + [""] * 5 + )[:5] + + # Decide what type of links to shorten. + if not organisation or not repository or not artifact or not value: + return None + repo_short_text = "{}/{}".format(organisation, repository) + + if fragment or remaining_path: + # We only intend to shorten links for the basic issue, PR, and commit ones. + return None + + if netloc == "github.com": + return self.shorten_github_links(artifact, repo_short_text, value) + return None + + def shorten_github_links( + self, artifact: str, repo_short_text: str, value: str + ) -> Optional[str]: + if artifact in ["pull", "issues"]: + return "{}#{}".format(repo_short_text, value) + if artifact == "commit": + return "{}@{}".format(repo_short_text, value[0 : self.COMMIT_ID_PREFIX_LENGTH]) + return None + def handleMatch(self, match: Match[str]) -> ElementStringNone: url = match.group("url") db_data = self.md.zulip_db_data + shortened_text = self.shorten_links(url) + if shortened_text is not None: + return url_to_a(db_data, url, shortened_text) return url_to_a(db_data, url) diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index 4e3b6af4e2..9c2f8006df 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -932,6 +932,121 @@ "input": "

*

[

Static types in Python

](https://blog.zulip.com/2016/10/13/static-types-in-python-oh-mypy)*", "expected_output": "

<h1><h1><h2>Static types in Python</h2></h1></h1>

", "marked_expected_output": "

<h1><h1><h2>Static types in Python</h2></h1></h1>\n\n

" + }, + { + "name": "auto_shorten_github_repo_link", + "input": "https://github.com/zulip/zulip-mobile", + "expected_output": "

https://github.com/zulip/zulip-mobile

" + }, + { + "name": "auto_shorten_github_issues_link", + "input": "https://github.com/zulip/zulip-mobile/issues/11895", + "expected_output": "

zulip/zulip-mobile#11895

" + }, + { + "name": "auto_shorten_github_pull_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16665", + "expected_output": "

zulip/zulip-mobile#16665

" + }, + { + "name": "auto_shorten_github_commit_link", + "input": "https://github.com/zulip/zulip-mobile/commit/620e9cbf72ca729534aba41693d7ab2872caa394", + "expected_output": "

zulip/zulip-mobile@620e9cbf72ca

" + }, + { + "name": "auto_shorten_github_commits_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16860/commits/19e96bcea616e6e9b1a3c10f25930ac1f75d4f92", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull/16860/commits/19e96bcea616e6e9b1a3c10f25930ac1f75d4f92

" + }, + { + "name": "auto_shorten_github_pull_initial_comment_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16665#issue-513297835", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull/16665#issue-513297835

" + }, + { + "name": "auto_shorten_github_pull_comment_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16665#issuecomment-719814618", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull/16665#issuecomment-719814618

" + }, + { + "name": "auto_shorten_github_issues_initial_comment_link", + "input": "https://github.com/zulip/zulip-mobile/issues/16579#issue-725908927", + "expected_output": "

https://github.com/zulip/zulip-mobile/issues/16579#issue-725908927

" + }, + { + "name": "auto_shorten_github_issues_comment_link", + "input": "https://github.com/zulip/zulip-mobile/issues/16482#issuecomment-726354516", + "expected_output": "

https://github.com/zulip/zulip-mobile/issues/16482#issuecomment-726354516

" + }, + { + "name": "auto_shorten_github_files_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16860/files", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull/16860/files

" + }, + { + "name": "auto_shorten_github_files_comment_link", + "input": "https://github.com/zulip/zulip-mobile/pull/16860/files#r539133612", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull/16860/files#r539133612

" + }, + { + "name": "auto_shorten_github_repo_issues_link", + "input": "https://github.com/zulip/zulip-mobile/issues", + "expected_output": "

https://github.com/zulip/zulip-mobile/issues

" + }, + { + "name": "auto_shorten_github_repo_pull_link", + "input": "https://github.com/zulip/zulip-mobile/pull", + "expected_output": "

https://github.com/zulip/zulip-mobile/pull

" + }, + { + "name": "auto_shorten_github_repo_commit_link", + "input": "https://github.com/zulip/zulip-mobile/commit", + "expected_output": "

https://github.com/zulip/zulip-mobile/commit

" + }, + { + "name": "auto_shorten_github_repo_clone_link", + "input": "https://github.com/zulip/zulip-mobile.git", + "expected_output": "

https://github.com/zulip/zulip-mobile.git

" + }, + { + "name": "auto_shorten_github_repo_labels_link", + "input": "https://github.com/zulip/zulip-mobile/labels", + "expected_output": "

https://github.com/zulip/zulip-mobile/labels

" + }, + { + "name": "auto_shorten_github_labels_link", + "input": "https://github.com/zulip/zulip-mobile/labels/area%3A%20markdown", + "expected_output": "

https://github.com/zulip/zulip-mobile/labels/area%3A%20markdown

" + }, + { + "name": "auto_shorten_github_tree_link", + "input": "https://github.com/zulip/zulip-mobile/tree/chat.zulip.org", + "expected_output": "

https://github.com/zulip/zulip-mobile/tree/chat.zulip.org

" + }, + { + "name": "auto_shorten_github_marketplace_circleci_link", + "input": "https://github.com/marketplace/circleci", + "expected_output": "

https://github.com/marketplace/circleci

" + }, + { + "name": "auto_shorten_github_foo_bar_baz_link", + "input": "https://github.com/foo/bar/baz", + "expected_output": "

https://github.com/foo/bar/baz

" + }, + { + "name": "auto_shorten_github_foo_bar_baz_zar_link", + "input": "https://github.com/foo/bar/baz/zar", + "expected_output": "

https://github.com/foo/bar/baz/zar

" + }, + { + "name": "auto_shorten_github_marketplace_link", + "input": "https://github.com/marketplace", + "expected_output": "

https://github.com/marketplace

" + }, + { + "name": "auto_shorten_github_link", + "input": "https://github.com", + "expected_output": "

https://github.com

" } ], "linkify_tests": [