markdown: Add data-codehilite-language attr for fenced code.

When converting fenced code markdown, we add the language (if specified)
in a data-attribute by tweaking the HTML generated. Doing so, allows the
frontend to make use of this attr to display view-in-playground option
for codeblocks.

We use pygments to get the lexer subclass name and use that instead of
directly using the language in the data-attribute. Doing so, helps us
map different language aliases (like `js` and `javascript`) into a common
variable (like `JavaScript`) - and avoids the client from dealing with
multiple tags corresponding to the same language.

The html structure for a message like this:

``` js
..content..
```

would now be:

<div class="codehilite" data-codehilite-language="JavaScript">
    <pre>..content..</pre>
</div>

Tests and fixtures amended.
This commit is contained in:
Sumanth V Rao
2020-09-06 12:11:37 +05:30
committed by Tim Abbott
parent e9d0bdea65
commit 033351609d
3 changed files with 27 additions and 4 deletions

View File

@@ -80,7 +80,10 @@ from typing import Any, Dict, Iterable, List, Mapping, MutableSequence, Optional
import markdown import markdown
from django.utils.html import escape from django.utils.html import escape
from lxml import etree
from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension
from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound
from zerver.lib.exceptions import MarkdownRenderingException from zerver.lib.exceptions import MarkdownRenderingException
from zerver.lib.tex import render_tex from zerver.lib.tex import render_tex
@@ -392,6 +395,24 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
else: else:
code = CODE_WRAP.format(langclass, self._escape(text)) code = CODE_WRAP.format(langclass, self._escape(text))
# In order to display a "view-in-playground" option in the frontend,
# we need to know the language used in the codeblock. We tweak the HTML
# CodeHilite generates to add this language as a data-attribute.
if lang:
parsed_code = etree.HTML(code)
div_tag = parsed_code[0][0]
# We get the lexer subclass name instead of directly processing the lang, to avoid
# different tags being generated for each of the lang's alias. Eg: `js` and `javascript`
# would now be mapped to `JavaScript`. In case no lexer with that alias is found, we
# return back the text, wrapped in a data-codehilite tag.
try:
lexer_subclass_name = get_lexer_by_name(lang).name
except ClassNotFound:
lexer_subclass_name = lang
div_tag.attrib['data-codehilite-language'] = lexer_subclass_name
# Lxml implicitly converts tags like <span></span> into <span/>
# specifying method="c14n" when converting to string, prevents that.
code = etree.tostring(div_tag, method="c14n").decode()
return code return code
def format_quote(self, text: str) -> str: def format_quote(self, text: str) -> str:

View File

@@ -3,7 +3,7 @@
{ {
"name": "codeblock_hilite", "name": "codeblock_hilite",
"input": "Hamlet said:\n~~~~.python \ndef speak(self):\n x = 1\n~~~~", "input": "Hamlet said:\n~~~~.python \ndef speak(self):\n x = 1\n~~~~",
"expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">def</span> <span class=\"nf\">speak</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n <span class=\"n\">x</span> <span class=\"o\">=</span> <span class=\"mi\">1</span>\n</code></pre></div>", "expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\" data-codehilite-language=\"Python\"><pre><span></span><code><span class=\"k\">def</span> <span class=\"nf\">speak</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n <span class=\"n\">x</span> <span class=\"o\">=</span> <span class=\"mi\">1</span>\n</code></pre></div>",
"marked_expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\"><pre><span></span><code>def speak(self):\n x = 1\n</code></pre></div>", "marked_expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\"><pre><span></span><code>def speak(self):\n x = 1\n</code></pre></div>",
"text_content": "Hamlet said:\ndef speak(self):\n x = 1\n" "text_content": "Hamlet said:\ndef speak(self):\n x = 1\n"
}, },
@@ -786,13 +786,13 @@
{ {
"name": "tex_fenced_tex", "name": "tex_fenced_tex",
"input": "```tex\n\n\\pi \\textbf{ is not } 3.14\n```", "input": "```tex\n\n\\pi \\textbf{ is not } 3.14\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>", "expected_output": "<div class=\"codehilite\" data-codehilite-language=\"TeX\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>" "marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>"
}, },
{ {
"name": "tex_fenced_latex", "name": "tex_fenced_latex",
"input": "```latex\n\n\\pi \\textbf{ is not } 3.14\n```", "input": "```latex\n\n\\pi \\textbf{ is not } 3.14\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>", "expected_output": "<div class=\"codehilite\" data-codehilite-language=\"TeX\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>" "marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>"
}, },
{ {

View File

@@ -1376,6 +1376,7 @@ class MarkdownTest(ZulipTestCase):
msg_without_language = markdown_convert_wrapper(text.format('')) msg_without_language = markdown_convert_wrapper(text.format(''))
msg_with_quote = markdown_convert_wrapper(text.format('quote')) msg_with_quote = markdown_convert_wrapper(text.format('quote'))
msg_with_math = markdown_convert_wrapper(text.format('math')) msg_with_math = markdown_convert_wrapper(text.format('math'))
msg_with_none = markdown_convert_wrapper(text.format('none'))
# Render with default=javascript # Render with default=javascript
do_set_realm_property(realm, 'default_code_block_language', 'javascript') do_set_realm_property(realm, 'default_code_block_language', 'javascript')
@@ -1403,7 +1404,8 @@ class MarkdownTest(ZulipTestCase):
self.assertTrue(msg_with_python == msg_with_python_default_js == msg_without_language_default_py) self.assertTrue(msg_with_python == msg_with_python_default_js == msg_without_language_default_py)
self.assertTrue(msg_with_quote == msg_without_language_default_quote) self.assertTrue(msg_with_quote == msg_without_language_default_quote)
self.assertTrue(msg_with_math == msg_without_language_default_math) self.assertTrue(msg_with_math == msg_without_language_default_math)
self.assertTrue(msg_without_language == msg_with_none_default_py == msg_without_language_final) self.assertTrue(msg_without_language == msg_without_language_final)
self.assertTrue(msg_with_none == msg_with_none_default_py)
# Test checking inside nested quotes # Test checking inside nested quotes
nested_text = "````quote\n\n{}\n\n{}````".format(text.format('js'), text.format('')) nested_text = "````quote\n\n{}\n\n{}````".format(text.format('js'), text.format(''))