From 033351609d4d674f78d2d2d65cf308c0aa516226 Mon Sep 17 00:00:00 2001 From: Sumanth V Rao Date: Sun, 6 Sep 2020 12:11:37 +0530 Subject: [PATCH] markdown: Add data-codehilite-language attr for fenced code. When converting fenced code markdown, we add the language (if specified) in a data-attribute by tweaking the HTML generated. Doing so, allows the frontend to make use of this attr to display view-in-playground option for codeblocks. We use pygments to get the lexer subclass name and use that instead of directly using the language in the data-attribute. Doing so, helps us map different language aliases (like `js` and `javascript`) into a common variable (like `JavaScript`) - and avoids the client from dealing with multiple tags corresponding to the same language. The html structure for a message like this: ``` js ..content.. ``` would now be:
..content..
Tests and fixtures amended. --- zerver/lib/markdown/fenced_code.py | 21 +++++++++++++++++++ .../tests/fixtures/markdown_test_cases.json | 6 +++--- zerver/tests/test_markdown.py | 4 +++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/zerver/lib/markdown/fenced_code.py b/zerver/lib/markdown/fenced_code.py index 79edb00917..b1b1c76273 100644 --- a/zerver/lib/markdown/fenced_code.py +++ b/zerver/lib/markdown/fenced_code.py @@ -80,7 +80,10 @@ from typing import Any, Dict, Iterable, List, Mapping, MutableSequence, Optional import markdown from django.utils.html import escape +from lxml import etree from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension +from pygments.lexers import get_lexer_by_name +from pygments.util import ClassNotFound from zerver.lib.exceptions import MarkdownRenderingException from zerver.lib.tex import render_tex @@ -392,6 +395,24 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): else: code = CODE_WRAP.format(langclass, self._escape(text)) + # In order to display a "view-in-playground" option in the frontend, + # we need to know the language used in the codeblock. We tweak the HTML + # CodeHilite generates to add this language as a data-attribute. + if lang: + parsed_code = etree.HTML(code) + div_tag = parsed_code[0][0] + # We get the lexer subclass name instead of directly processing the lang, to avoid + # different tags being generated for each of the lang's alias. Eg: `js` and `javascript` + # would now be mapped to `JavaScript`. In case no lexer with that alias is found, we + # return back the text, wrapped in a data-codehilite tag. + try: + lexer_subclass_name = get_lexer_by_name(lang).name + except ClassNotFound: + lexer_subclass_name = lang + div_tag.attrib['data-codehilite-language'] = lexer_subclass_name + # Lxml implicitly converts tags like into + # specifying method="c14n" when converting to string, prevents that. + code = etree.tostring(div_tag, method="c14n").decode() return code def format_quote(self, text: str) -> str: diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index 5a66d952f2..8a8cb5c2e6 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -3,7 +3,7 @@ { "name": "codeblock_hilite", "input": "Hamlet said:\n~~~~.python \ndef speak(self):\n x = 1\n~~~~", - "expected_output": "

Hamlet said:

\n
def speak(self):\n    x = 1\n
", + "expected_output": "

Hamlet said:

\n
def speak(self):\n    x = 1\n
", "marked_expected_output": "

Hamlet said:

\n
def speak(self):\n    x = 1\n
", "text_content": "Hamlet said:\ndef speak(self):\n x = 1\n" }, @@ -786,13 +786,13 @@ { "name": "tex_fenced_tex", "input": "```tex\n\n\\pi \\textbf{ is not } 3.14\n```", - "expected_output": "
\\pi \\textbf{ is not } 3.14\n
", + "expected_output": "
\\pi \\textbf{ is not } 3.14\n
", "marked_expected_output": "
\\pi \\textbf{ is not } 3.14\n
" }, { "name": "tex_fenced_latex", "input": "```latex\n\n\\pi \\textbf{ is not } 3.14\n```", - "expected_output": "
\\pi \\textbf{ is not } 3.14\n
", + "expected_output": "
\\pi \\textbf{ is not } 3.14\n
", "marked_expected_output": "
\\pi \\textbf{ is not } 3.14\n
" }, { diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index f0b0282d8a..13eadaa227 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -1376,6 +1376,7 @@ class MarkdownTest(ZulipTestCase): msg_without_language = markdown_convert_wrapper(text.format('')) msg_with_quote = markdown_convert_wrapper(text.format('quote')) msg_with_math = markdown_convert_wrapper(text.format('math')) + msg_with_none = markdown_convert_wrapper(text.format('none')) # Render with default=javascript do_set_realm_property(realm, 'default_code_block_language', 'javascript') @@ -1403,7 +1404,8 @@ class MarkdownTest(ZulipTestCase): self.assertTrue(msg_with_python == msg_with_python_default_js == msg_without_language_default_py) self.assertTrue(msg_with_quote == msg_without_language_default_quote) self.assertTrue(msg_with_math == msg_without_language_default_math) - self.assertTrue(msg_without_language == msg_with_none_default_py == msg_without_language_final) + self.assertTrue(msg_without_language == msg_without_language_final) + self.assertTrue(msg_with_none == msg_with_none_default_py) # Test checking inside nested quotes nested_text = "````quote\n\n{}\n\n{}````".format(text.format('js'), text.format(''))