mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	fenced_code: Optimize FENCE_RE to fix cubic worst-case complexity.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
		
				
					committed by
					
						
						Alex Vandiver
					
				
			
			
				
	
			
			
			
						parent
						
							ad858d2c79
						
					
				
				
					commit
					2df2f7eec6
				
			@@ -165,7 +165,7 @@ def fix_spoilers_in_text(content: str, language: str) -> str:
 | 
			
		||||
        m = FENCE_RE.match(line)
 | 
			
		||||
        if m:
 | 
			
		||||
            fence = m.group("fence")
 | 
			
		||||
            lang = m.group("lang")
 | 
			
		||||
            lang: Optional[str] = m.group("lang")
 | 
			
		||||
            if lang == "spoiler":
 | 
			
		||||
                open_fence = fence
 | 
			
		||||
                output.append(line)
 | 
			
		||||
 
 | 
			
		||||
@@ -1747,12 +1747,13 @@ class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor):
 | 
			
		||||
            m = FENCE_RE.match(lines[i])
 | 
			
		||||
            if m:
 | 
			
		||||
                fence_str = m.group("fence")
 | 
			
		||||
                is_code = not m.group("lang") in ("quote", "quoted")
 | 
			
		||||
                lang: Optional[str] = m.group("lang")
 | 
			
		||||
                is_code = lang not in ("quote", "quoted")
 | 
			
		||||
                has_open_fences = not len(open_fences) == 0
 | 
			
		||||
                matches_last_fence = (
 | 
			
		||||
                    fence_str == open_fences[-1].fence_str if has_open_fences else False
 | 
			
		||||
                )
 | 
			
		||||
                closes_last_fence = not m.group("lang") and matches_last_fence
 | 
			
		||||
                closes_last_fence = not lang and matches_last_fence
 | 
			
		||||
 | 
			
		||||
                if closes_last_fence:
 | 
			
		||||
                    open_fences.pop()
 | 
			
		||||
 
 | 
			
		||||
@@ -76,7 +76,7 @@ Dependencies:
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import re
 | 
			
		||||
from typing import Any, Iterable, List, Mapping, MutableSequence, Optional, Sequence
 | 
			
		||||
from typing import Any, Callable, Dict, Iterable, List, Mapping, MutableSequence, Optional, Sequence
 | 
			
		||||
 | 
			
		||||
import lxml.html
 | 
			
		||||
from django.utils.html import escape
 | 
			
		||||
@@ -100,21 +100,21 @@ FENCE_RE = re.compile(
 | 
			
		||||
 | 
			
		||||
    [ ]* # spaces
 | 
			
		||||
 | 
			
		||||
    (
 | 
			
		||||
    (?:
 | 
			
		||||
        # language, like ".py" or "{javascript}"
 | 
			
		||||
        \{?\.?
 | 
			
		||||
        (?P<lang>
 | 
			
		||||
            [a-zA-Z0-9_+-./#]*
 | 
			
		||||
            [a-zA-Z0-9_+-./#]+
 | 
			
		||||
        ) # "py" or "javascript"
 | 
			
		||||
        \}?
 | 
			
		||||
    ) # language, like ".py" or "{javascript}"
 | 
			
		||||
    [ ]* # spaces
 | 
			
		||||
    (
 | 
			
		||||
        \{?\.?
 | 
			
		||||
 | 
			
		||||
        [ ]* # spaces
 | 
			
		||||
 | 
			
		||||
        # header for features that use fenced block header syntax (like spoilers)
 | 
			
		||||
        (?P<header>
 | 
			
		||||
            [^~`]*
 | 
			
		||||
        )
 | 
			
		||||
            [^ ~`][^~`]*
 | 
			
		||||
        )?
 | 
			
		||||
        \}?
 | 
			
		||||
    ) # header for features that use fenced block header syntax (like spoilers)
 | 
			
		||||
    )?
 | 
			
		||||
    $
 | 
			
		||||
    """,
 | 
			
		||||
    re.VERBOSE,
 | 
			
		||||
@@ -139,7 +139,7 @@ Missing required -X argument in curl command:
 | 
			
		||||
                raise MarkdownRenderingException(error_msg.format(command=line.strip()))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CODE_VALIDATORS = {
 | 
			
		||||
CODE_VALIDATORS: Dict[Optional[str], Callable[[List[str]], None]] = {
 | 
			
		||||
    "curl": validate_curl_content,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -177,12 +177,13 @@ def generic_handler(
 | 
			
		||||
    processor: Any,
 | 
			
		||||
    output: MutableSequence[str],
 | 
			
		||||
    fence: str,
 | 
			
		||||
    lang: str,
 | 
			
		||||
    header: str,
 | 
			
		||||
    lang: Optional[str],
 | 
			
		||||
    header: Optional[str],
 | 
			
		||||
    run_content_validators: bool = False,
 | 
			
		||||
    default_language: Optional[str] = None,
 | 
			
		||||
) -> BaseHandler:
 | 
			
		||||
    lang = lang.lower()
 | 
			
		||||
    if lang is not None:
 | 
			
		||||
        lang = lang.lower()
 | 
			
		||||
    if lang in ("quote", "quoted"):
 | 
			
		||||
        return QuoteHandler(processor, output, fence, default_language)
 | 
			
		||||
    elif lang == "math":
 | 
			
		||||
@@ -203,8 +204,8 @@ def check_for_new_fence(
 | 
			
		||||
    m = FENCE_RE.match(line)
 | 
			
		||||
    if m:
 | 
			
		||||
        fence = m.group("fence")
 | 
			
		||||
        lang = m.group("lang")
 | 
			
		||||
        header = m.group("header")
 | 
			
		||||
        lang: Optional[str] = m.group("lang")
 | 
			
		||||
        header: Optional[str] = m.group("header")
 | 
			
		||||
        if not lang and default_language:
 | 
			
		||||
            lang = default_language
 | 
			
		||||
        handler = generic_handler(
 | 
			
		||||
@@ -243,7 +244,7 @@ class CodeHandler(BaseHandler):
 | 
			
		||||
        processor: Any,
 | 
			
		||||
        output: MutableSequence[str],
 | 
			
		||||
        fence: str,
 | 
			
		||||
        lang: str,
 | 
			
		||||
        lang: Optional[str],
 | 
			
		||||
        run_content_validators: bool = False,
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        self.processor = processor
 | 
			
		||||
@@ -310,7 +311,11 @@ class QuoteHandler(BaseHandler):
 | 
			
		||||
 | 
			
		||||
class SpoilerHandler(BaseHandler):
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self, processor: Any, output: MutableSequence[str], fence: str, spoiler_header: str
 | 
			
		||||
        self,
 | 
			
		||||
        processor: Any,
 | 
			
		||||
        output: MutableSequence[str],
 | 
			
		||||
        fence: str,
 | 
			
		||||
        spoiler_header: Optional[str],
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        self.processor = processor
 | 
			
		||||
        self.output = output
 | 
			
		||||
@@ -407,7 +412,7 @@ class FencedBlockPreprocessor(Preprocessor):
 | 
			
		||||
            output.append("")
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
    def format_code(self, lang: str, text: str) -> str:
 | 
			
		||||
    def format_code(self, lang: Optional[str], text: str) -> str:
 | 
			
		||||
        if lang:
 | 
			
		||||
            langclass = LANG_TAG.format(lang)
 | 
			
		||||
        else:
 | 
			
		||||
@@ -474,14 +479,15 @@ class FencedBlockPreprocessor(Preprocessor):
 | 
			
		||||
            quoted_paragraphs.append("\n".join("> " + line for line in lines))
 | 
			
		||||
        return "\n".join(quoted_paragraphs)
 | 
			
		||||
 | 
			
		||||
    def format_spoiler(self, header: str, text: str) -> str:
 | 
			
		||||
    def format_spoiler(self, header: Optional[str], text: str) -> str:
 | 
			
		||||
        output = []
 | 
			
		||||
        header_div_open_html = '<div class="spoiler-block"><div class="spoiler-header">'
 | 
			
		||||
        end_header_start_content_html = '</div><div class="spoiler-content" aria-hidden="true">'
 | 
			
		||||
        footer_html = "</div></div>"
 | 
			
		||||
 | 
			
		||||
        output.append(self.placeholder(header_div_open_html))
 | 
			
		||||
        output.append(header)
 | 
			
		||||
        if header is not None:
 | 
			
		||||
            output.append(header)
 | 
			
		||||
        output.append(self.placeholder(end_header_start_content_html))
 | 
			
		||||
        output.append(text)
 | 
			
		||||
        output.append(self.placeholder(footer_html))
 | 
			
		||||
 
 | 
			
		||||
@@ -69,8 +69,8 @@ from zerver.models import (
 | 
			
		||||
class SimulatedFencedBlockPreprocessor(FencedBlockPreprocessor):
 | 
			
		||||
    # Simulate code formatting.
 | 
			
		||||
 | 
			
		||||
    def format_code(self, lang: str, code: str) -> str:
 | 
			
		||||
        return lang + ":" + code
 | 
			
		||||
    def format_code(self, lang: Optional[str], code: str) -> str:
 | 
			
		||||
        return (lang or "") + ":" + code
 | 
			
		||||
 | 
			
		||||
    def placeholder(self, s: str) -> str:
 | 
			
		||||
        return "**" + s.strip("\n") + "**"
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user