markdown: Add support to shorten GitHub links.

We add support to shorten links and test their shortening in
well-organized, clean manner that makes it trivial to extend the
GitHub approach for GitLab and perhaps other services.

We only shorten basic types of GitHub links (issue, PR, commit) that
fit a set of simple common patterns; the default behaviour of Autolink
is kept for everything else.

Logic added in frontend and backend Markdown Processor is identical.
This makes easy to extend the logic for other services like GitLab.

Fixes #11895.
This commit is contained in:
akshatdalton
2020-12-17 14:50:34 +00:00
committed by Tim Abbott
parent abd959cf6a
commit 9c6d8d9d81
3 changed files with 225 additions and 2 deletions

View File

@@ -561,6 +561,58 @@ inline.zulip = merge({}, inline.breaks, {
()
});
function shorten_links(href) {
// This value must match what is used in the backend.
const COMMIT_ID_PREFIX_LENGTH = 12;
const url = new URL(href);
if (url.protocol === 'https:' && ['github.com'].includes(url.hostname)) {
// The following part of the code was necessary because unlike Python, str.split
// method in javascript does not return the remaining part of the string.
var parts = url.pathname.split('/').slice(1);
parts = parts.slice(0, 4)
.concat(parts.slice(4).join('/'))
.concat(Array(5).fill(''))
.slice(0, 5);
const organisation = parts[0]
, repository = parts[1]
, artifact = parts[2]
, value = parts[3]
, remaining_path = parts[4];
if (!organisation || !repository || !artifact || !value) {
return href;
}
const repo_short_text = organisation + '/' + repository;
if (remaining_path || url.hash) {
return href;
}
if (url.hostname === 'github.com') {
return shorten_github_links(href, artifact, repo_short_text,
value, COMMIT_ID_PREFIX_LENGTH);
}
}
return href;
}
/**
* Shorten GitHub Links
*/
function shorten_github_links(href, artifact, repo_short_text,
value, commit_id_prefix_length) {
if (['pull', 'issues'].includes(artifact)) {
return repo_short_text + '#' + value;
}
if (artifact == 'commit') {
return repo_short_text + '@' + value.slice(0, commit_id_prefix_length);
}
return href;
}
/**
* Inline Lexer & Compiler
*/
@@ -682,8 +734,8 @@ InlineLexer.prototype.output = function(src) {
// url (gfm)
if (!this.inLink && (cap = this.rules.url.exec(src))) {
src = src.substring(cap[0].length);
text = escape(cap[1]);
href = text;
href = escape(cap[1]);
text = shorten_links(href)
out += this.renderer.link(href, null, text);
continue;
}

View File

@@ -1625,9 +1625,65 @@ class CompiledPattern(markdown.inlinepatterns.Pattern):
class AutoLink(CompiledPattern):
"""AutoLink takes care of linkifying link-format strings directly
present in the message (i.e. without markdown link syntax). In
some hardcoded cases, it will rewrite the label to what the user
probably wanted if they'd taken the time to do so.
"""
# Ideally, we'd use a dynamic commit prefix length based on the
# size of the repository, like Git itself does, but we're
# shortening Git commit IDs without looking at the corresponding
# repository. It's not essential that the shortenings are
# globally unique as they're just shorthand, but 12 characters is
# the minimum to be unique for projects with about 1M commits.
COMMIT_ID_PREFIX_LENGTH = 12
def shorten_links(self, href: str) -> Optional[str]:
parts = urllib.parse.urlparse(href)
scheme, netloc, path, params, query, fragment = parts
if scheme == "https" and netloc in ["github.com"]:
# Split the path to extract our 4 variables.
# To do it cleanly without multiple if branches based on which of these
# variables are present, we here add a list of ["", "", ""...]
# to the result of path.split, which at worst can be []. We also remove
# the first empty string we'd get from "/foo/bar".split("/").
# Example path: "/foo/bar" output: ["foo", "bar", "", "", ""]
# path: "" output: ["", "", "", "", ""]
organisation, repository, artifact, value, remaining_path = (
path.split("/", 5)[1:] + [""] * 5
)[:5]
# Decide what type of links to shorten.
if not organisation or not repository or not artifact or not value:
return None
repo_short_text = "{}/{}".format(organisation, repository)
if fragment or remaining_path:
# We only intend to shorten links for the basic issue, PR, and commit ones.
return None
if netloc == "github.com":
return self.shorten_github_links(artifact, repo_short_text, value)
return None
def shorten_github_links(
self, artifact: str, repo_short_text: str, value: str
) -> Optional[str]:
if artifact in ["pull", "issues"]:
return "{}#{}".format(repo_short_text, value)
if artifact == "commit":
return "{}@{}".format(repo_short_text, value[0 : self.COMMIT_ID_PREFIX_LENGTH])
return None
def handleMatch(self, match: Match[str]) -> ElementStringNone:
url = match.group("url")
db_data = self.md.zulip_db_data
shortened_text = self.shorten_links(url)
if shortened_text is not None:
return url_to_a(db_data, url, shortened_text)
return url_to_a(db_data, url)

View File

@@ -932,6 +932,121 @@
"input": "<h1>*<h1>[<h2>Static types in Python</h2>](https://blog.zulip.com/2016/10/13/static-types-in-python-oh-mypy)</h1>*</h1>",
"expected_output": "<p>&lt;h1&gt;<em>&lt;h1&gt;<a href=\"https://blog.zulip.com/2016/10/13/static-types-in-python-oh-mypy\">&lt;h2&gt;Static types in Python&lt;/h2&gt;</a>&lt;/h1&gt;</em>&lt;/h1&gt;</p>",
"marked_expected_output": "<p>&lt;h1&gt;<em>&lt;h1&gt;<a href=\"https://blog.zulip.com/2016/10/13/static-types-in-python-oh-mypy\">&lt;h2&gt;Static types in Python&lt;/h2&gt;</a>&lt;/h1&gt;</em>&lt;/h1&gt;\n\n</p>"
},
{
"name": "auto_shorten_github_repo_link",
"input": "https://github.com/zulip/zulip-mobile",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile\">https://github.com/zulip/zulip-mobile</a></p>"
},
{
"name": "auto_shorten_github_issues_link",
"input": "https://github.com/zulip/zulip-mobile/issues/11895",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/issues/11895\">zulip/zulip-mobile#11895</a></p>"
},
{
"name": "auto_shorten_github_pull_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16665",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16665\">zulip/zulip-mobile#16665</a></p>"
},
{
"name": "auto_shorten_github_commit_link",
"input": "https://github.com/zulip/zulip-mobile/commit/620e9cbf72ca729534aba41693d7ab2872caa394",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/commit/620e9cbf72ca729534aba41693d7ab2872caa394\">zulip/zulip-mobile@620e9cbf72ca</a></p>"
},
{
"name": "auto_shorten_github_commits_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16860/commits/19e96bcea616e6e9b1a3c10f25930ac1f75d4f92",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16860/commits/19e96bcea616e6e9b1a3c10f25930ac1f75d4f92\">https://github.com/zulip/zulip-mobile/pull/16860/commits/19e96bcea616e6e9b1a3c10f25930ac1f75d4f92</a></p>"
},
{
"name": "auto_shorten_github_pull_initial_comment_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16665#issue-513297835",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16665#issue-513297835\">https://github.com/zulip/zulip-mobile/pull/16665#issue-513297835</a></p>"
},
{
"name": "auto_shorten_github_pull_comment_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16665#issuecomment-719814618",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16665#issuecomment-719814618\">https://github.com/zulip/zulip-mobile/pull/16665#issuecomment-719814618</a></p>"
},
{
"name": "auto_shorten_github_issues_initial_comment_link",
"input": "https://github.com/zulip/zulip-mobile/issues/16579#issue-725908927",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/issues/16579#issue-725908927\">https://github.com/zulip/zulip-mobile/issues/16579#issue-725908927</a></p>"
},
{
"name": "auto_shorten_github_issues_comment_link",
"input": "https://github.com/zulip/zulip-mobile/issues/16482#issuecomment-726354516",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/issues/16482#issuecomment-726354516\">https://github.com/zulip/zulip-mobile/issues/16482#issuecomment-726354516</a></p>"
},
{
"name": "auto_shorten_github_files_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16860/files",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16860/files\">https://github.com/zulip/zulip-mobile/pull/16860/files</a></p>"
},
{
"name": "auto_shorten_github_files_comment_link",
"input": "https://github.com/zulip/zulip-mobile/pull/16860/files#r539133612",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull/16860/files#r539133612\">https://github.com/zulip/zulip-mobile/pull/16860/files#r539133612</a></p>"
},
{
"name": "auto_shorten_github_repo_issues_link",
"input": "https://github.com/zulip/zulip-mobile/issues",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/issues\">https://github.com/zulip/zulip-mobile/issues</a></p>"
},
{
"name": "auto_shorten_github_repo_pull_link",
"input": "https://github.com/zulip/zulip-mobile/pull",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/pull\">https://github.com/zulip/zulip-mobile/pull</a></p>"
},
{
"name": "auto_shorten_github_repo_commit_link",
"input": "https://github.com/zulip/zulip-mobile/commit",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/commit\">https://github.com/zulip/zulip-mobile/commit</a></p>"
},
{
"name": "auto_shorten_github_repo_clone_link",
"input": "https://github.com/zulip/zulip-mobile.git",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile.git\">https://github.com/zulip/zulip-mobile.git</a></p>"
},
{
"name": "auto_shorten_github_repo_labels_link",
"input": "https://github.com/zulip/zulip-mobile/labels",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/labels\">https://github.com/zulip/zulip-mobile/labels</a></p>"
},
{
"name": "auto_shorten_github_labels_link",
"input": "https://github.com/zulip/zulip-mobile/labels/area%3A%20markdown",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/labels/area%3A%20markdown\">https://github.com/zulip/zulip-mobile/labels/area%3A%20markdown</a></p>"
},
{
"name": "auto_shorten_github_tree_link",
"input": "https://github.com/zulip/zulip-mobile/tree/chat.zulip.org",
"expected_output": "<p><a href=\"https://github.com/zulip/zulip-mobile/tree/chat.zulip.org\">https://github.com/zulip/zulip-mobile/tree/chat.zulip.org</a></p>"
},
{
"name": "auto_shorten_github_marketplace_circleci_link",
"input": "https://github.com/marketplace/circleci",
"expected_output": "<p><a href=\"https://github.com/marketplace/circleci\">https://github.com/marketplace/circleci</a></p>"
},
{
"name": "auto_shorten_github_foo_bar_baz_link",
"input": "https://github.com/foo/bar/baz",
"expected_output": "<p><a href=\"https://github.com/foo/bar/baz\">https://github.com/foo/bar/baz</a></p>"
},
{
"name": "auto_shorten_github_foo_bar_baz_zar_link",
"input": "https://github.com/foo/bar/baz/zar",
"expected_output": "<p><a href=\"https://github.com/foo/bar/baz/zar\">https://github.com/foo/bar/baz/zar</a></p>"
},
{
"name": "auto_shorten_github_marketplace_link",
"input": "https://github.com/marketplace",
"expected_output": "<p><a href=\"https://github.com/marketplace\">https://github.com/marketplace</a></p>"
},
{
"name": "auto_shorten_github_link",
"input": "https://github.com",
"expected_output": "<p><a href=\"https://github.com\">https://github.com</a></p>"
}
],
"linkify_tests": [