markdown: Fix shebang line eliminating behaviour of Codehilite.

See the block comment explaining the motivation for this change, but
basically, the shebang feature of Python-Markdown's Codehilite
extension could be really confusing and is not part of the CommonMark
standard.

1. https://python-markdown.github.io/extensions/code_hilite/#shebang-no-path
2. eacff473a2/markdown/extensions/codehilite.py (L164-L180)

Fixes: #18591.
This commit is contained in:
akshatdalton
2021-05-26 11:23:30 +00:00
committed by Tim Abbott
parent 21fbc94e37
commit 6b5812082e
2 changed files with 70 additions and 2 deletions

View File

@@ -81,8 +81,8 @@ from typing import Any, Iterable, List, Mapping, MutableSequence, Optional, Sequ
import lxml.html
from django.utils.html import escape
from markdown import Markdown
from markdown.extensions import Extension
from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension
from markdown.extensions import Extension, codehilite
from markdown.extensions.codehilite import CodeHiliteExtension, parse_hl_lines
from markdown.preprocessors import Preprocessor
from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound
@@ -354,6 +354,56 @@ class TexHandler(ZulipBaseHandler):
return self.processor.format_tex(text)
class CodeHilite(codehilite.CodeHilite):
def _parseHeader(self) -> None:
# Python-Markdown has a feature to parse-and-hide shebang
# lines present in code blocks:
#
# https://python-markdown.github.io/extensions/code_hilite/#shebang-no-path
#
# While using shebang lines for language detection is
# reasonable, we don't want this feature because it can be
# really confusing when doing anything else in a one-line code
# block that starts with `!` (which would then render as an
# empty code block!). So we disable the feature, by
# overriding this function, which implements it in CodeHilite
# upstream.
# split text into lines
lines = self.src.split("\n")
# Python-Markdown pops out the first line which we are avoiding here.
# Examine first line
fl = lines[0]
c = re.compile(
r"""
(?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
(?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
(?P<lang>[\w#.+-]*) # The language
\s* # Arbitrary whitespace
# Optional highlight lines, single- or double-quote-delimited
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
""",
re.VERBOSE,
)
# Search first line for shebang
m = c.search(fl)
if m:
# We have a match
try:
self.lang = m.group("lang").lower()
except IndexError: # nocoverage
self.lang = None
if self.options["linenos"] is None and m.group("shebang"):
# Overridable and Shebang exists - use line numbers
self.options["linenos"] = True
self.options["hl_lines"] = parse_hl_lines(m.group("hl_lines"))
self.src = "\n".join(lines).strip("\n")
class FencedBlockPreprocessor(Preprocessor):
def __init__(self, md: Markdown, run_content_validators: bool = False) -> None:
super().__init__(md)

View File

@@ -953,6 +953,24 @@
"name": "telephone_sms_link",
"input": "[call me](tel:+14155551234) [or maybe not](sms:+14155551234)",
"expected_output": "<p><a href=\"tel:+14155551234\">call me</a> <a href=\"sms:+14155551234\">or maybe not</a></p>"
},
{
"name": "codeblock_hilite_shebang_feature_no_path_1",
"input": "```\n#!python\nprint(\"Hello World\")\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"ch\">#!python</span>\n<span class=\"nb\">print</span><span class=\"p\">(</span><span class=\"s2\">&quot;Hello World&quot;</span><span class=\"p\">)</span>\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>#!python\nprint(&quot;Hello World&quot;)\n</code></pre></div>"
},
{
"name": "codeblock_hilite_shebang_feature_no_path_2",
"input": "```\n:::python\nprint(\"Hello World\")\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"p\">:::</span><span class=\"n\">python</span>\n<span class=\"nb\">print</span><span class=\"p\">(</span><span class=\"s2\">&quot;Hello World&quot;</span><span class=\"p\">)</span>\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>:::python\nprint(&quot;Hello World&quot;)\n</code></pre></div>"
},
{
"name": "codeblock_hilite_shebang_feature_with_path",
"input": "```\n#!/usr/bin/python\nprint(\"Hello World\")\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"ch\">#!/usr/bin/python</span>\n<span class=\"nb\">print</span><span class=\"p\">(</span><span class=\"s2\">&quot;Hello World&quot;</span><span class=\"p\">)</span>\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>#!/usr/bin/python\nprint(&quot;Hello World&quot;)\n</code></pre></div>"
}
],
"linkify_tests": [