From 6b5812082ebec7c6171ec772272bb4812e5b47fd Mon Sep 17 00:00:00 2001 From: akshatdalton Date: Wed, 26 May 2021 11:23:30 +0000 Subject: [PATCH] markdown: Fix shebang line eliminating behaviour of Codehilite. See the block comment explaining the motivation for this change, but basically, the shebang feature of Python-Markdown's Codehilite extension could be really confusing and is not part of the CommonMark standard. 1. https://python-markdown.github.io/extensions/code_hilite/#shebang-no-path 2. https://github.com/Python-Markdown/markdown/blob/eacff473a2600902c200af8c88446af6c183203f/markdown/extensions/codehilite.py#L164-L180 Fixes: #18591. --- zerver/lib/markdown/fenced_code.py | 54 ++++++++++++++++++- .../tests/fixtures/markdown_test_cases.json | 18 +++++++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/zerver/lib/markdown/fenced_code.py b/zerver/lib/markdown/fenced_code.py index 3afd2beb24..8309d67bef 100644 --- a/zerver/lib/markdown/fenced_code.py +++ b/zerver/lib/markdown/fenced_code.py @@ -81,8 +81,8 @@ from typing import Any, Iterable, List, Mapping, MutableSequence, Optional, Sequ import lxml.html from django.utils.html import escape from markdown import Markdown -from markdown.extensions import Extension -from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension +from markdown.extensions import Extension, codehilite +from markdown.extensions.codehilite import CodeHiliteExtension, parse_hl_lines from markdown.preprocessors import Preprocessor from pygments.lexers import get_lexer_by_name from pygments.util import ClassNotFound @@ -354,6 +354,56 @@ class TexHandler(ZulipBaseHandler): return self.processor.format_tex(text) +class CodeHilite(codehilite.CodeHilite): + def _parseHeader(self) -> None: + # Python-Markdown has a feature to parse-and-hide shebang + # lines present in code blocks: + # + # https://python-markdown.github.io/extensions/code_hilite/#shebang-no-path + # + # While using shebang lines for language detection is + # reasonable, we don't want this feature because it can be + # really confusing when doing anything else in a one-line code + # block that starts with `!` (which would then render as an + # empty code block!). So we disable the feature, by + # overriding this function, which implements it in CodeHilite + # upstream. + + # split text into lines + lines = self.src.split("\n") + # Python-Markdown pops out the first line which we are avoiding here. + # Examine first line + fl = lines[0] + + c = re.compile( + r""" + (?:(?:^::+)|(?P^[#]!)) # Shebang or 2 or more colons + (?P(?:/\w+)*[/ ])? # Zero or 1 path + (?P[\w#.+-]*) # The language + \s* # Arbitrary whitespace + # Optional highlight lines, single- or double-quote-delimited + (hl_lines=(?P"|')(?P.*?)(?P=quot))? + """, + re.VERBOSE, + ) + # Search first line for shebang + m = c.search(fl) + if m: + # We have a match + try: + self.lang = m.group("lang").lower() + except IndexError: # nocoverage + self.lang = None + + if self.options["linenos"] is None and m.group("shebang"): + # Overridable and Shebang exists - use line numbers + self.options["linenos"] = True + + self.options["hl_lines"] = parse_hl_lines(m.group("hl_lines")) + + self.src = "\n".join(lines).strip("\n") + + class FencedBlockPreprocessor(Preprocessor): def __init__(self, md: Markdown, run_content_validators: bool = False) -> None: super().__init__(md) diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index 2c5819e3e6..c449e0ed5a 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -953,6 +953,24 @@ "name": "telephone_sms_link", "input": "[call me](tel:+14155551234) [or maybe not](sms:+14155551234)", "expected_output": "

call me or maybe not

" + }, + { + "name": "codeblock_hilite_shebang_feature_no_path_1", + "input": "```\n#!python\nprint(\"Hello World\")\n```", + "expected_output": "
#!python\nprint("Hello World")\n
", + "marked_expected_output": "
#!python\nprint("Hello World")\n
" + }, + { + "name": "codeblock_hilite_shebang_feature_no_path_2", + "input": "```\n:::python\nprint(\"Hello World\")\n```", + "expected_output": "
:::python\nprint("Hello World")\n
", + "marked_expected_output": "
:::python\nprint("Hello World")\n
" + }, + { + "name": "codeblock_hilite_shebang_feature_with_path", + "input": "```\n#!/usr/bin/python\nprint(\"Hello World\")\n```", + "expected_output": "
#!/usr/bin/python\nprint("Hello World")\n
", + "marked_expected_output": "
#!/usr/bin/python\nprint("Hello World")\n
" } ], "linkify_tests": [