From e2d303c1bb5f538d17dc3d9134bc8858bdece781 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 28 Sep 2021 23:27:54 +0000 Subject: [PATCH] CVE-2021-41115: Use re2 for user-supplied linkifier patterns. Zulip attempts to validate that the regular expressions that admins enter for linkifiers are well-formatted, and only contain a specific subset of regex grammar. The process of checking these properties (via a regex!) can cause denial-of-service via backtracking. Furthermore, this validation itself does not prevent the creation of linkifiers which themselves cause denial-of-service when they are executed. As the validator accepts literally anything inside of a `(?P...)` block, any quadratic backtracking expression can be hidden therein. Switch user-provided linkifier patterns to be matched in the Markdown processor by the `re2` library, which is guaranteed constant-time. This somewhat limits the possible features of the regular expression (notably, look-head and -behind, and back-references); however, these features had never been advertised as working in the context of linkifiers. A migration removes any existing linkifiers which would not function under re2, after printing them for posterity during the upgrade; they are unlikely to be common, and are impossible to fix automatically. The denial-of-service in the linkifier validator was discovered by @erik-krogh and @yoff, as GHSL-2021-118. --- .../puppeteer_tests/realm-linkifier.ts | 10 ++--- zerver/lib/markdown/__init__.py | 45 ++++++++++++++----- zerver/migrations/0359_re2_linkifiers.py | 39 ++++++++++++++++ zerver/models.py | 25 ++++++----- zerver/tests/test_markdown.py | 9 ++-- zerver/tests/test_realm_linkifiers.py | 18 +++----- 6 files changed, 102 insertions(+), 44 deletions(-) create mode 100644 zerver/migrations/0359_re2_linkifiers.py diff --git a/frontend_tests/puppeteer_tests/realm-linkifier.ts b/frontend_tests/puppeteer_tests/realm-linkifier.ts index 3d19818ac7..f5a0e75a6c 100644 --- a/frontend_tests/puppeteer_tests/realm-linkifier.ts +++ b/frontend_tests/puppeteer_tests/realm-linkifier.ts @@ -42,7 +42,7 @@ async function test_delete_linkifier(page: Page): Promise { async function test_add_invalid_linkifier_pattern(page: Page): Promise { await page.waitForSelector(".admin-linkifier-form", {visible: true}); await common.fill_form(page, "form.admin-linkifier-form", { - pattern: "a$", + pattern: "(foo", url_format_string: "https://trac.example.com/ticket/%(id)s", }); await page.click("form.admin-linkifier-form button.button"); @@ -50,7 +50,7 @@ async function test_add_invalid_linkifier_pattern(page: Page): Promise { await page.waitForSelector("div#admin-linkifier-pattern-status", {visible: true}); assert.strictEqual( await common.get_text_from_selector(page, "div#admin-linkifier-pattern-status"), - "Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", + "Failed: Bad regular expression: missing ): (foo", ); } @@ -83,8 +83,8 @@ async function test_edit_invalid_linkifier(page: Page): Promise { await page.click(".linkifier_row .edit"); await page.waitForFunction(() => document.activeElement === $("#linkifier-edit-form-modal")[0]); await common.fill_form(page, "form.linkifier-edit-form", { - pattern: "####", - url_format_string: "####", + pattern: "#(?Pd????)", + url_format_string: "????", }); await page.click(".submit-linkifier-info-change"); @@ -96,7 +96,7 @@ async function test_edit_invalid_linkifier(page: Page): Promise { ); assert.strictEqual( edit_linkifier_pattern_status, - "Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", + "Failed: Bad regular expression: bad repetition operator: ????", ); const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status"; diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index 6e0c61dbd7..450fe3287f 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -37,6 +37,7 @@ import markdown.inlinepatterns import markdown.postprocessors import markdown.treeprocessors import markdown.util +import re2 import requests from django.conf import settings from django.db.models import Q @@ -1779,7 +1780,9 @@ class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor): # Name for the outer capture group we use to separate whitespace and # other delimiters from the actual content. This value won't be an # option in user-entered capture groups. +BEFORE_CAPTURE_GROUP = "linkifier_before_match" OUTER_CAPTURE_GROUP = "linkifier_actual_match" +AFTER_CAPTURE_GROUP = "linkifier_after_match" def prepare_linkifier_pattern(source: str) -> str: @@ -1787,31 +1790,45 @@ def prepare_linkifier_pattern(source: str) -> str: whitespace, or opening delimiters, won't match if there are word characters directly after, and saves what was matched as OUTER_CAPTURE_GROUP.""" - return fr"""(?{source})(?!\w)""" + return fr"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])""" # Given a regular expression pattern, linkifies groups that match it # using the provided format string to construct the URL. -class LinkifierPattern(markdown.inlinepatterns.Pattern): +class LinkifierPattern(markdown.inlinepatterns.InlineProcessor): """Applied a given linkifier to the input""" def __init__( self, source_pattern: str, format_string: str, - markdown_instance: Optional[markdown.Markdown] = None, + md: markdown.Markdown, ) -> None: - self.pattern = prepare_linkifier_pattern(source_pattern) - self.format_string = format_string - markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance) + # Do not write errors to stderr (this still raises exceptions) + options = re2.Options() + options.log_errors = False - def handleMatch(self, m: Match[str]) -> Union[Element, str]: + self.md = md + self.compiled_re = re2.compile(prepare_linkifier_pattern(source_pattern), options=options) + self.format_string = format_string + + def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype + self, m: Match[str], data: str + ) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]: db_data = self.md.zulip_db_data - return url_to_a( + url = url_to_a( db_data, self.format_string % m.groupdict(), markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)), ) + if isinstance(url, str): + return None, None, None + + return ( + url, + m.start(2), + m.end(2), + ) class UserMentionPattern(markdown.inlinepatterns.InlineProcessor): @@ -2336,11 +2353,19 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]: matches: List[Dict[str, Union[str, int]]] = [] linkifiers = linkifiers_for_realm(linkifiers_key) + options = re2.Options() + options.log_errors = False for linkifier in linkifiers: raw_pattern = linkifier["pattern"] url_format_string = linkifier["url_format"] - pattern = prepare_linkifier_pattern(raw_pattern) - for m in re.finditer(pattern, topic_name): + try: + pattern = re2.compile(prepare_linkifier_pattern(raw_pattern), options=options) + except re2.error: + # An invalid regex shouldn't be possible here, and logging + # here on an invalid regex would spam the logs with every + # message sent; simply move on. + continue + for m in pattern.finditer(topic_name): match_details = m.groupdict() match_text = match_details["linkifier_actual_match"] # We format the linkifier's url string using the matched text. diff --git a/zerver/migrations/0359_re2_linkifiers.py b/zerver/migrations/0359_re2_linkifiers.py new file mode 100644 index 0000000000..7c7449dae5 --- /dev/null +++ b/zerver/migrations/0359_re2_linkifiers.py @@ -0,0 +1,39 @@ +import re2 +from django.db import migrations +from django.db.backends.postgresql.schema import DatabaseSchemaEditor +from django.db.migrations.state import StateApps + + +def delete_re2_invalid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: + options = re2.Options() + options.log_errors = False + + RealmFilter = apps.get_model("zerver", "RealmFilter") + found_errors = False + for linkifier in RealmFilter.objects.all(): + try: + re2.compile(linkifier.pattern, options=options) + except re2.error: + if not found_errors: + print() + found_errors = True + print( + f"Deleting linkifier {linkifier.id} in realm {linkifier.realm.string_id} which is not compatible with new re2 engine:" + ) + print(f" {linkifier.pattern} -> {linkifier.url_format_string}") + linkifier.delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("zerver", "0325_alter_realmplayground_unique_together"), + ] + + operations = [ + migrations.RunPython( + delete_re2_invalid, + reverse_code=migrations.RunPython.noop, + elidable=True, + ) + ] diff --git a/zerver/models.py b/zerver/models.py index b7e70f5ba5..6cbbb7cd2e 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -19,6 +19,7 @@ from typing import ( ) import django.contrib.auth +import re2 from bitfield import BitField from bitfield.types import BitHandler from django.conf import settings @@ -906,19 +907,19 @@ post_delete.connect(flush_realm_emoji, sender=RealmEmoji) def filter_pattern_validator(value: str) -> None: - regex = re.compile(r"^(?:(?:[\w\-#_= /:]*|[+]|[!])(\(\?P<\w+>.+\)))+$") - error_msg = _("Invalid linkifier pattern. Valid characters are {}.").format( - "[ a-zA-Z_#=/:+!-]", - ) - - if not regex.match(str(value)): - raise ValidationError(error_msg) - try: - re.compile(value) - except re.error: - # Regex is invalid - raise ValidationError(error_msg) + # Do not write errors to stderr (this still raises exceptions) + options = re2.Options() + options.log_errors = False + + re2.compile(value, options=options) + except re2.error as e: + if len(e.args) >= 1: + if isinstance(e.args[0], str): # nocoverage + raise ValidationError(_("Bad regular expression: {}").format(e.args[0])) + if isinstance(e.args[0], bytes): + raise ValidationError(_("Bad regular expression: {}").format(e.args[0].decode())) + raise ValidationError(_("Unknown regular expression error")) # nocoverage def filter_format_validator(value: str) -> None: diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 13a6cc1d98..4ef49fb735 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -1401,26 +1401,25 @@ class MarkdownTest(ZulipTestCase): url_format_string = r"https://trac.example.com/ticket/%(id)s" linkifier_1 = RealmFilter( realm=realm, - pattern=r"(?PABC\-[0-9]+)(?![A-Z0-9-])", + pattern=r"(?PABC\-[0-9]+)", url_format_string=url_format_string, ) linkifier_1.save() self.assertEqual( linkifier_1.__str__(), - r"ABC\-[0-9]+)(?![A-Z0-9-])" - " https://trac.example.com/ticket/%(id)s>", + r"ABC\-[0-9]+) https://trac.example.com/ticket/%(id)s>", ) url_format_string = r"https://other-trac.example.com/ticket/%(id)s" linkifier_2 = RealmFilter( realm=realm, - pattern=r"(?P[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])", + pattern=r"(?P[A-Z][A-Z0-9]*\-[0-9]+)", url_format_string=url_format_string, ) linkifier_2.save() self.assertEqual( linkifier_2.__str__(), - r"[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])" + r"[A-Z][A-Z0-9]*\-[0-9]+)" " https://other-trac.example.com/ticket/%(id)s>", ) diff --git a/zerver/tests/test_realm_linkifiers.py b/zerver/tests/test_realm_linkifiers.py index 2b0874f321..be1a85e733 100644 --- a/zerver/tests/test_realm_linkifiers.py +++ b/zerver/tests/test_realm_linkifiers.py @@ -27,17 +27,13 @@ class RealmFilterTest(ZulipTestCase): result = self.client_post("/json/realm/filters", info=data) self.assert_json_error(result, "This field cannot be blank.") - data["pattern"] = "$a" + data["pattern"] = "(foo" result = self.client_post("/json/realm/filters", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: missing ): (foo") - data["pattern"] = r"ZUL-(?P\d++)" + data["pattern"] = r"ZUL-(?P\d????)" result = self.client_post("/json/realm/filters", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????") data["pattern"] = r"ZUL-(?P\d+)" data["url_format_string"] = "$fgfg" @@ -154,13 +150,11 @@ class RealmFilterTest(ZulipTestCase): ) data = { - "pattern": r"ZUL-(?P\d++)", + "pattern": r"ZUL-(?P\d????)", "url_format_string": "https://realm.com/my_realm_filter/%(id)s", } result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????") data["pattern"] = r"ZUL-(?P\d+)" data["url_format_string"] = "$fgfg"