openapi: Use more carefully-constructed regexes for markdown extension.

This removes a false-positive ReDoS, since the input is always
checked-in code.  It also incidentally refactors to make the regexes
be more explicit about the values they expect, and removes unnecessary
capturing groups.

It removes an optional parenthesized status code for fixtures,
unnecessary since 981e4f8946, as well as
optional key-value language options, unnecessary since
a2be9a0e2d.

Thank you to @erik-krogh and @yoff for bringing this to our attention.
This commit is contained in:
Alex Vandiver
2021-09-29 22:10:12 +00:00
committed by Tim Abbott
parent 206168ed28
commit f1c61fbea1
3 changed files with 38 additions and 96 deletions

View File

@@ -10,7 +10,7 @@ import json
import re import re
import shlex import shlex
from textwrap import dedent from textwrap import dedent
from typing import Any, Dict, List, Mapping, Match, Optional, Pattern, Tuple from typing import Any, Dict, List, Mapping, Match, Optional, Pattern
import markdown import markdown
from django.conf import settings from django.conf import settings
@@ -31,15 +31,32 @@ from zerver.openapi.openapi import (
openapi_spec, openapi_spec,
) )
API_ENDPOINT_NAME = r"/[a-z_/-{}]+:[a-z]+"
API_LANGUAGE = r"\w+"
API_KEY_TYPE = r"fixture|example"
MACRO_REGEXP = re.compile( MACRO_REGEXP = re.compile(
r"\{generate_code_example(\(\s*(.+?)\s*\))*\|\s*(.+?)\s*\|\s*(.+?)\s*(\(\s*(.+)\s*\))?\}" rf"""
{{
generate_code_example
(?: \( \s* ({API_LANGUAGE}) \s* \) )?
\|
\s* ({API_ENDPOINT_NAME}) \s*
\|
\s* ({API_KEY_TYPE}) \s*
}}
""",
re.VERBOSE,
)
PYTHON_EXAMPLE_REGEX = re.compile(r"\# \{code_example\|\s*(start|end)\s*\}")
JS_EXAMPLE_REGEX = re.compile(r"\/\/ \{code_example\|\s*(start|end)\s*\}")
MACRO_REGEXP_DESC = re.compile(rf"{{generate_api_description\(\s*({API_ENDPOINT_NAME})\s*\)}}")
MACRO_REGEXP_TITLE = re.compile(rf"{{generate_api_title\(\s*({API_ENDPOINT_NAME})\s*\)}}")
MACRO_REGEXP_RESPONSE_DESC = re.compile(
rf"{{generate_response_description\(\s*({API_ENDPOINT_NAME})\s*\)}}"
)
MACRO_REGEXP_PARAMETER_DESC = re.compile(
rf"{{generate_parameter_description\(\s*({API_ENDPOINT_NAME})\s*\)}}"
) )
PYTHON_EXAMPLE_REGEX = re.compile(r"\# \{code_example\|\s*(.+?)\s*\}")
JS_EXAMPLE_REGEX = re.compile(r"\/\/ \{code_example\|\s*(.+?)\s*\}")
MACRO_REGEXP_DESC = re.compile(r"\{generate_api_description(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_TITLE = re.compile(r"\{generate_api_title(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_RESPONSE_DESC = re.compile(r"\{generate_response_description(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_PARAMETER_DESC = re.compile(r"\{generate_parameter_description(\(\s*(.+?)\s*\))}")
PYTHON_CLIENT_CONFIG = """ PYTHON_CLIENT_CONFIG = """
#!/usr/bin/env python3 #!/usr/bin/env python3
@@ -87,25 +104,6 @@ DEFAULT_EXAMPLE = {
ADMIN_CONFIG_LANGUAGES = ["python", "javascript"] ADMIN_CONFIG_LANGUAGES = ["python", "javascript"]
def parse_language_and_options(input_str: Optional[str]) -> Tuple[str, Dict[str, Any]]:
if not input_str:
return ("", {})
language_and_options = re.match(
r"(?P<language>\w+)(,\s*(?P<options>[\"\'\w\d\[\],= ]+))?", input_str
)
assert language_and_options is not None
kwargs_pattern = re.compile(r"(?P<key>\w+)\s*=\s*(?P<value>[\'\"\w\d]+|\[[\'\",\w\d ]+\])")
language = language_and_options.group("language")
assert language is not None
if language_and_options.group("options"):
_options = kwargs_pattern.finditer(language_and_options.group("options"))
options = {}
for m in _options:
options[m.group("key")] = json.loads(m.group("value").replace("'", '"'))
return (language, options)
return (language, {})
def extract_code_example( def extract_code_example(
source: List[str], snippet: List[Any], example_regex: Pattern[str] source: List[str], snippet: List[Any], example_regex: Pattern[str]
) -> List[Any]: ) -> List[Any]:
@@ -363,6 +361,7 @@ def generate_curl_example(
def render_curl_example( def render_curl_example(
function: str, function: str,
api_url: str, api_url: str,
admin_config: bool = False,
) -> List[str]: ) -> List[str]:
"""A simple wrapper around generate_curl_example.""" """A simple wrapper around generate_curl_example."""
parts = function.split(":") parts = function.split(":")
@@ -475,7 +474,7 @@ class BasePreprocessor(Preprocessor):
return lines return lines
def generate_text(self, match: Match[str]) -> List[str]: def generate_text(self, match: Match[str]) -> List[str]:
function = match.group(2) function = match.group(1)
text = self.render(function) text = self.render(function)
return text return text
@@ -488,21 +487,22 @@ class APICodeExamplesPreprocessor(BasePreprocessor):
super().__init__(MACRO_REGEXP, md, config) super().__init__(MACRO_REGEXP, md, config)
def generate_text(self, match: Match[str]) -> List[str]: def generate_text(self, match: Match[str]) -> List[str]:
language, options = parse_language_and_options(match.group(2)) language = match.group(1) or ""
function = match.group(3) function = match.group(2)
key = match.group(4) key = match.group(3)
if self.api_url is None: if self.api_url is None:
raise AssertionError("Cannot render curl API examples without API URL set.") raise AssertionError("Cannot render curl API examples without API URL set.")
options["api_url"] = self.api_url
if key == "fixture": if key == "fixture":
text = self.render(function) text = self.render(function)
elif key == "example": elif key == "example":
path, method = function.rsplit(":", 1) path, method = function.rsplit(":", 1)
if language in ADMIN_CONFIG_LANGUAGES and check_requires_administrator(path, method): admin_config = language in ADMIN_CONFIG_LANGUAGES and check_requires_administrator(
text = SUPPORTED_LANGUAGES[language]["render"](function, admin_config=True) path, method
else: )
text = SUPPORTED_LANGUAGES[language]["render"](function, **options) text = SUPPORTED_LANGUAGES[language]["render"](
function, api_url=self.api_url, admin_config=admin_config
)
return text return text
def render(self, function: str) -> List[str]: def render(self, function: str) -> List[str]:

View File

@@ -54,7 +54,7 @@ def test_generated_curl_examples_for_success(client: Client) -> None:
f = open(file_name) f = open(file_name)
for line in f: for line in f:
# A typical example from the Markdown source looks like this: # A typical example from the Markdown source looks like this:
# {generate_code_example(curl, ...} # {generate_code_example(curl)|...|...}
if line.startswith("{generate_code_example(curl"): if line.startswith("{generate_code_example(curl"):
curl_commands_to_test.append(line) curl_commands_to_test.append(line)
else: else:

View File

@@ -14,11 +14,7 @@ from zerver.lib.request import _REQ, arguments_map
from zerver.lib.rest import rest_dispatch from zerver.lib.rest import rest_dispatch
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.utils import assert_is_not_none from zerver.lib.utils import assert_is_not_none
from zerver.openapi.markdown_extension import ( from zerver.openapi.markdown_extension import generate_curl_example, render_curl_example
generate_curl_example,
parse_language_and_options,
render_curl_example,
)
from zerver.openapi.openapi import ( from zerver.openapi.openapi import (
OPENAPI_SPEC_PATH, OPENAPI_SPEC_PATH,
OpenAPISpec, OpenAPISpec,
@@ -623,60 +619,6 @@ so maybe we shouldn't include it in pending_endpoints.
self.check_for_non_existant_openapi_endpoints() self.check_for_non_existant_openapi_endpoints()
class ModifyExampleGenerationTestCase(ZulipTestCase):
def test_no_mod_argument(self) -> None:
res = parse_language_and_options("python")
self.assertEqual(res, ("python", {}))
def test_single_simple_mod_argument(self) -> None:
res = parse_language_and_options("curl, mod=1")
self.assertEqual(res, ("curl", {"mod": 1}))
res = parse_language_and_options("curl, mod='somevalue'")
self.assertEqual(res, ("curl", {"mod": "somevalue"}))
res = parse_language_and_options('curl, mod="somevalue"')
self.assertEqual(res, ("curl", {"mod": "somevalue"}))
def test_multiple_simple_mod_argument(self) -> None:
res = parse_language_and_options("curl, mod1=1, mod2='a'")
self.assertEqual(res, ("curl", {"mod1": 1, "mod2": "a"}))
res = parse_language_and_options("curl, mod1=\"asdf\", mod2='thing', mod3=3")
self.assertEqual(res, ("curl", {"mod1": "asdf", "mod2": "thing", "mod3": 3}))
def test_single_list_mod_argument(self) -> None:
res = parse_language_and_options("curl, exclude=['param1', 'param2']")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
res = parse_language_and_options('curl, exclude=["param1", "param2"]')
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
res = parse_language_and_options("curl, exclude=['param1', \"param2\"]")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
def test_multiple_list_mod_argument(self) -> None:
res = parse_language_and_options("curl, exclude=['param1', \"param2\"], special=['param3']")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"], "special": ["param3"]}))
def test_multiple_mixed_mod_arguments(self) -> None:
res = parse_language_and_options(
'curl, exclude=["asdf", \'sdfg\'], other_key=\'asdf\', more_things="asdf", another_list=[1, "2"]'
)
self.assertEqual(
res,
(
"curl",
{
"exclude": ["asdf", "sdfg"],
"other_key": "asdf",
"more_things": "asdf",
"another_list": [1, "2"],
},
),
)
class TestCurlExampleGeneration(ZulipTestCase): class TestCurlExampleGeneration(ZulipTestCase):
spec_mock_without_examples = { spec_mock_without_examples = {