bugdown: Match up to 8 groups of nested parens

(imported from commit e3d61ec644fdb293322cab0a4449d8e1a27a513a)
This commit is contained in:
Leo Franchi
2014-02-06 17:07:24 -05:00
parent 1c332f5d6a
commit 8ad2ca15cd
2 changed files with 54 additions and 5 deletions

View File

@@ -532,6 +532,41 @@
"sorry,http://example.com/",
"<p>sorry,%s</p>",
"http://example.com/"
],
[
"two nested parens https://zulip.com/(we_like_(to_nest_)_nest)_nest",
"<p>two nested parens %s</p>",
"https://zulip.com/(we_like_(to_nest_)_nest)_nest"
],
[
"three nested parens https://zulip.com/(we_like_(to_(and_)nest_)_nest)_nest",
"<p>three nested parens %s</p>",
"https://zulip.com/(we_like_(to_(and_)nest_)_nest)_nest"
],
[
"four nested parens https://zulip.com/(we_like_(to_n(e)st_)_n(es)t)_nest",
"<p>four nested parens %s</p>",
"https://zulip.com/(we_like_(to_n(e)st_)_n(es)t)_nest"
],
[
"six nested parens https://zulip.com/(we_like_((t(d(o(___)o)o))o_nest_)_nest)_nest",
"<p>six nested parens %s</p>",
"https://zulip.com/(we_like_((t(d(o(___)o)o))o_nest_)_nest)_nest"
],
[
"eight is too many https://zulip.com/(we_like_((t(d(o(_(1(2)1)__)o)o))o_nest_)_nest)_nest",
"<p>eight is too many https://zulip.com/(we_like_((t(d(o(_(1(2)1)__)o)o))o_nest_)_nest)_nest</p>",
""
],
[
"nested with autodetection zulip.us/(we_like_(to_n(e)st_)_n(es)t)_nest",
"<p>nested with autodetection %s</p>",
"zulip.us/(we_like_(to_n(e)st_)_n(es)t)_nest"
],
[
"quora http://generate.quora.net/render?width=700&from=-4hours&until=now&height=400&bgcolor=black&lineMode=connected&title=arb%20hint%20status&target=alias(ans.hintland.hand.arb.enhint_rate%2C%22enhint%20rate%22)&target=alias(ans.hintland.hand.arb.unhint_rate%2C%22unhint%20rate%22)&target=alias(ans.hintland.hand.arb.size%2C%22hint%20size%22)&target=alias(scale(ans.vagabond.dingarb_cube_count%2C10000)%2C%22cube%20count%20x%2010K%22)&target=alias(scale(hnumbers.time.ding.gegevens.query.count%2C10)%2C%22ding%20gegevens%20query%20count%20x%2010%22)&fgcolor=white&uniq=0.44046106841415167",
"<p>quora %s</p>",
"http://generate.quora.net/render?width=700&amp;from=-4hours&amp;until=now&amp;height=400&amp;bgcolor=black&amp;lineMode=connected&amp;title=arb%20hint%20status&amp;target=alias(ans.hintland.hand.arb.enhint_rate%2C%22enhint%20rate%22)&amp;target=alias(ans.hintland.hand.arb.unhint_rate%2C%22unhint%20rate%22)&amp;target=alias(ans.hintland.hand.arb.size%2C%22hint%20size%22)&amp;target=alias(scale(ans.vagabond.dingarb_cube_count%2C10000)%2C%22cube%20count%20x%2010K%22)&amp;target=alias(scale(hnumbers.time.ding.gegevens.query.count%2C10)%2C%22ding%20gegevens%20query%20count%20x%2010%22)&amp;fgcolor=white&amp;uniq=0.44046106841415167"
]
]
}

View File

@@ -845,6 +845,23 @@ class Bugdown(markdown.Extension):
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
#
# We detect a url either by the `https?://` or by building around the TLD.
# In lieu of having a recursive regex (which python doesn't support) to match
# arbitrary numbers of nested matching parenthesis, we manually build a regexp that
# can match up to six
# The inner_paren_contents chunk matches the innermore non-parenthesis-holding text,
# and the paren_group matches text with, optionally, a matching set of parens
inner_paren_contents = r"[^\s()\"]*"
paren_group = r"""
[^\s()\"]*? # Containing characters that won't end the URL
(?: \( %s \) # and more characters in matched parens
[^\s()\"]*? # followed by more characters
)* # zero-or-more sets of paired parens
"""
nested_paren_chunk = paren_group
for i in range(6):
nested_paren_chunk = nested_paren_chunk % (paren_group,)
nested_paren_chunk = nested_paren_chunk % (inner_paren_contents,)
tlds = '|'.join(list_of_tlds())
link_regex = r"""
(?<![^\s'"\(,:<]) # Start after whitespace or specified chars
@@ -858,10 +875,7 @@ class Bugdown(markdown.Extension):
)
)
(?:/ # A path, beginning with /
[^\s()\"]*? # Containing characters that won't end the URL
(?: \( [^\s()\"]* \) # and more characters in matched parens
[^\s()\"]*? # followed by more characters
)* # zero-or-more sets of paired parens
%s # zero-to-6 sets of paired parens
)?) # Path is optional
| (?:[\w.-]+\@[\w.-]+\.[\w]+) # Email is separate, since it can't have a path
)
@@ -869,7 +883,7 @@ class Bugdown(markdown.Extension):
[:;\?\),\.\'\"\>]* # Optional punctuation characters
(?:\Z|\s) # followed by whitespace or end of string
)
""" % (tlds,)
""" % (tlds, nested_paren_chunk)
md.inlinePatterns.add('autolink', AutoLink(link_regex), '>link')
md.preprocessors.add('hanging_ulists',