Files
zulip/zerver/lib/bugdown/fenced_code.py
Rohitt Vashishtha 434094e599 bugdown: Restructure Bugdown to extend Markdown from being an extension.
Since we are building our parser from scratch now:

1. We have control over which proccessor goes at what priority number.
   Thus, we have also shifted the deprecated `.add()` calls to use the
   new `.register()` calls with explicit priorities, but maintaining
   the original order that the old method generated.

2. We do not have to remove the processors added by py-markdown that
   we do not use in Zulip; we explicitly add only the processors we
   do require.

3. We can cluster the building of each type of parser in one place,
   and in the order they need to be so that when we register them,
   there is no need to sort the list. This also makes for a huge
   improvement in the readability of the code, as all the components
   of each type are registered in the same function.

These are significant performance improvements, because we save on
calls to `str.startswith` in `.add()`, all the resources taken to
generate the default to-be-removed processors and the time taken to
sort the list of processors.

Following are the profiling results for the changes made. Here, we
build 10 engines one after the other and note the time taken to build
each of them. 1st pass represents the state after this commit and 2nd
pass represent the state after some regex modifications in the commits
that follow by Steve Howell. All times are in microseconds.

| nth Engine | Old Time | 1st Pass | 2nd Pass |
| ---------- | -------- | -------- | -------- |
|          1 |  92117.0 |  81775.0 |  76710.0 |
|          2 |   1254.0 |    558.0 |    341.0 |
|          3 |   1170.0 |    472.0 |    305.0 |
|          4 |   1155.0 |    519.0 |    301.0 |
|          5 |   1170.0 |    546.0 |    326.0 |
|          6 |   1271.0 |    609.0 |    416.0 |
|          7 |   1125.0 |    459.0 |    299.0 |
|          8 |   1146.0 |    476.0 |    390.0 |
|          9 |   1274.0 |    446.0 |    301.0 |
|         10 |   1135.0 |    451.0 |    297.0 |
2019-01-28 13:12:37 -08:00

333 lines
10 KiB
Python

"""
Fenced Code Extension for Python Markdown
=========================================
This extension adds Fenced Code Blocks to Python-Markdown.
>>> import markdown
>>> text = '''
... A paragraph before a fenced code block:
...
... ~~~
... Fenced code block
... ~~~
... '''
>>> html = markdown.markdown(text, extensions=['fenced_code'])
>>> print html
<p>A paragraph before a fenced code block:</p>
<pre><code>Fenced code block
</code></pre>
Works with safe_mode also (we check this because we are using the HtmlStash):
>>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')
<p>A paragraph before a fenced code block:</p>
<pre><code>Fenced code block
</code></pre>
Include tilde's in a code block and wrap with blank lines:
>>> text = '''
... ~~~~~~~~
...
... ~~~~
... ~~~~~~~~'''
>>> print markdown.markdown(text, extensions=['fenced_code'])
<pre><code>
~~~~
</code></pre>
Removes trailing whitespace from code blocks that cause horizontal scrolling
>>> import markdown
>>> text = '''
... A paragraph before a fenced code block:
...
... ~~~
... Fenced code block \t\t\t\t\t\t\t
... ~~~
... '''
>>> html = markdown.markdown(text, extensions=['fenced_code'])
>>> print html
<p>A paragraph before a fenced code block:</p>
<pre><code>Fenced code block
</code></pre>
Language tags:
>>> text = '''
... ~~~~{.python}
... # Some python code
... ~~~~'''
>>> print markdown.markdown(text, extensions=['fenced_code'])
<pre><code class="python"># Some python code
</code></pre>
Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html>
Contact: markdown@freewisdom.org
License: BSD (see ../docs/LICENSE for details)
Dependencies:
* [Python 2.4+](http://python.org)
* [Markdown 2.0+](http://packages.python.org/Markdown/)
* [Pygments (optional)](http://pygments.org)
"""
import re
import subprocess
import markdown
from django.utils.html import escape
from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension
from zerver.lib.tex import render_tex
from typing import Any, Dict, Iterable, List, MutableSequence, Optional, Tuple, Union
# Global vars
FENCE_RE = re.compile("""
# ~~~ or ```
(?P<fence>
^(?:~{3,}|`{3,})
)
[ ]* # spaces
(
\\{?\\.?
(?P<lang>
[a-zA-Z0-9_+-./#]*
) # "py" or "javascript"
\\}?
) # language, like ".py" or "{javascript}"
[ ]* # spaces
$
""", re.VERBOSE)
CODE_WRAP = '<pre><code%s>%s\n</code></pre>'
LANG_TAG = ' class="%s"'
class FencedCodeExtension(markdown.Extension):
def extendMarkdown(self, md: markdown.Markdown, md_globals: Dict[str, Any]) -> None:
""" Add FencedBlockPreprocessor to the Markdown instance. """
md.registerExtension(self)
md.preprocessors.register(FencedBlockPreprocessor(md), 'fenced_code_block', 25)
class BaseHandler:
def handle_line(self, line: str) -> None:
raise NotImplementedError()
def done(self) -> None:
raise NotImplementedError()
def generic_handler(processor: Any, output: MutableSequence[str], fence: str, lang: str) -> BaseHandler:
if lang in ('quote', 'quoted'):
return QuoteHandler(processor, output, fence)
elif lang in ('math', 'tex', 'latex'):
return TexHandler(processor, output, fence)
else:
return CodeHandler(processor, output, fence, lang)
def check_for_new_fence(processor: Any, output: MutableSequence[str], line: str) -> None:
m = FENCE_RE.match(line)
if m:
fence = m.group('fence')
lang = m.group('lang')
handler = generic_handler(processor, output, fence, lang)
processor.push(handler)
else:
output.append(line)
class OuterHandler(BaseHandler):
def __init__(self, processor: Any, output: MutableSequence[str]) -> None:
self.output = output
self.processor = processor
def handle_line(self, line: str) -> None:
check_for_new_fence(self.processor, self.output, line)
def done(self) -> None:
self.processor.pop()
class CodeHandler(BaseHandler):
def __init__(self, processor: Any, output: MutableSequence[str], fence: str, lang: str) -> None:
self.processor = processor
self.output = output
self.fence = fence
self.lang = lang
self.lines = [] # type: List[str]
def handle_line(self, line: str) -> None:
if line.rstrip() == self.fence:
self.done()
else:
self.lines.append(line.rstrip())
def done(self) -> None:
text = '\n'.join(self.lines)
text = self.processor.format_code(self.lang, text)
text = self.processor.placeholder(text)
processed_lines = text.split('\n')
self.output.append('')
self.output.extend(processed_lines)
self.output.append('')
self.processor.pop()
class QuoteHandler(BaseHandler):
def __init__(self, processor: Any, output: MutableSequence[str], fence: str) -> None:
self.processor = processor
self.output = output
self.fence = fence
self.lines = [] # type: List[str]
def handle_line(self, line: str) -> None:
if line.rstrip() == self.fence:
self.done()
else:
check_for_new_fence(self.processor, self.lines, line)
def done(self) -> None:
text = '\n'.join(self.lines)
text = self.processor.format_quote(text)
processed_lines = text.split('\n')
self.output.append('')
self.output.extend(processed_lines)
self.output.append('')
self.processor.pop()
class TexHandler(BaseHandler):
def __init__(self, processor: Any, output: MutableSequence[str], fence: str) -> None:
self.processor = processor
self.output = output
self.fence = fence
self.lines = [] # type: List[str]
def handle_line(self, line: str) -> None:
if line.rstrip() == self.fence:
self.done()
else:
self.lines.append(line)
def done(self) -> None:
text = '\n'.join(self.lines)
text = self.processor.format_tex(text)
text = self.processor.placeholder(text)
processed_lines = text.split('\n')
self.output.append('')
self.output.extend(processed_lines)
self.output.append('')
self.processor.pop()
class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
def __init__(self, md: markdown.Markdown) -> None:
markdown.preprocessors.Preprocessor.__init__(self, md)
self.checked_for_codehilite = False
self.codehilite_conf = {} # type: Dict[str, List[Any]]
def push(self, handler: BaseHandler) -> None:
self.handlers.append(handler)
def pop(self) -> None:
self.handlers.pop()
def run(self, lines: Iterable[str]) -> List[str]:
""" Match and store Fenced Code Blocks in the HtmlStash. """
output = [] # type: List[str]
processor = self
self.handlers = [] # type: List[BaseHandler]
handler = OuterHandler(processor, output)
self.push(handler)
for line in lines:
self.handlers[-1].handle_line(line)
while self.handlers:
self.handlers[-1].done()
# This fiddly handling of new lines at the end of our output was done to make
# existing tests pass. Bugdown is just kind of funny when it comes to new lines,
# but we could probably remove this hack.
if len(output) > 2 and output[-2] != '':
output.append('')
return output
def format_code(self, lang: str, text: str) -> str:
if lang:
langclass = LANG_TAG % (lang,)
else:
langclass = ''
# Check for code hilite extension
if not self.checked_for_codehilite:
for ext in self.markdown.registeredExtensions:
if isinstance(ext, CodeHiliteExtension):
self.codehilite_conf = ext.config
break
self.checked_for_codehilite = True
# If config is not empty, then the codehighlite extension
# is enabled, so we call it to highlite the code
if self.codehilite_conf:
highliter = CodeHilite(text,
linenums=self.codehilite_conf['linenums'][0],
guess_lang=self.codehilite_conf['guess_lang'][0],
css_class=self.codehilite_conf['css_class'][0],
style=self.codehilite_conf['pygments_style'][0],
use_pygments=self.codehilite_conf['use_pygments'][0],
lang=(lang or None),
noclasses=self.codehilite_conf['noclasses'][0])
code = highliter.hilite()
else:
code = CODE_WRAP % (langclass, self._escape(text))
return code
def format_quote(self, text: str) -> str:
paragraphs = text.split("\n\n")
quoted_paragraphs = []
for paragraph in paragraphs:
lines = paragraph.split("\n")
quoted_paragraphs.append("\n".join("> " + line for line in lines if line != ''))
return "\n\n".join(quoted_paragraphs)
def format_tex(self, text: str) -> str:
paragraphs = text.split("\n\n")
tex_paragraphs = []
for paragraph in paragraphs:
html = render_tex(paragraph, is_inline=False)
if html is not None:
tex_paragraphs.append(html)
else:
tex_paragraphs.append('<span class="tex-error">' +
escape(paragraph) + '</span>')
return "\n\n".join(tex_paragraphs)
def placeholder(self, code: str) -> str:
return self.markdown.htmlStash.store(code)
def _escape(self, txt: str) -> str:
""" basic html escaping """
txt = txt.replace('&', '&amp;')
txt = txt.replace('<', '&lt;')
txt = txt.replace('>', '&gt;')
txt = txt.replace('"', '&quot;')
return txt
def makeExtension(*args: Any, **kwargs: None) -> FencedCodeExtension:
return FencedCodeExtension(*args, **kwargs)
if __name__ == "__main__":
import doctest
doctest.testmod()