markdown: Format spoilers for emails.

This is a hacky fix to avoid spoiler content leaking in emails. The general idea here is to tell people to open Zulip to view the actual message in full. We create a mini-markdown parser here that strips away the fence content that has the 'spoiler' tag for the text emails. Our handling of html emails is much better in comparison where we can use lxml to parse the spoiler blocks.
2025-10-31 20:13:46 +00:00 · 2020-07-15 04:51:28 +05:30
parent ea1c178305
commit 7d5d90b484
2 changed files with 85 additions and 0 deletions
--- a/zerver/lib/email_notifications.py
+++ b/zerver/lib/email_notifications.py
@@ -17,6 +17,7 @@ from lxml.cssselect import CSSSelector
 from confirmation.models import one_click_unsubscribe_link
 from zerver.decorator import statsd_increment
 from zerver.lib.markdown.fenced_code import FENCE_RE
 from zerver.lib.message import bulk_access_messages
 from zerver.lib.queue import queue_json_publish
 from zerver.lib.send_email import FromAddress, send_future_email
@@ -117,6 +118,54 @@ def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
    content = lxml.html.tostring(fragment).decode('utf-8')
    return content
 def fix_spoilers_in_html(content: str, language: str) -> str:
    with override_language(language):
        spoiler_title: str = _("Open Zulip to see the spoiler content")
    fragment = lxml.html.fromstring(content)
    spoilers = fragment.find_class("spoiler-block")
    for spoiler in spoilers:
        header = spoiler.find_class("spoiler-header")[0]
        spoiler_content = spoiler.find_class("spoiler-content")[0]
        header_content = header.find("p")
        if header_content is None:
            # Create a new element to append the spoiler to)
            header_content = lxml.html.fromstring("<p></p>")
            header.append(header_content)
        else:
            # Add a space. Its simpler to append a new span element than
            # inserting text after the last node ends since neither .text
            # and .tail do the right thing for us.
            header_content.append(lxml.html.fromstring("<span> </span>"))
        span_elem = lxml.html.fromstring(
            f'<span class="spoiler-title" title="{spoiler_title}">({spoiler_title})</span')
        header_content.append(span_elem)
        header.drop_tag()
        spoiler_content.drop_tree()
    content = lxml.html.tostring(fragment).decode("utf-8")
    return content
 def fix_spoilers_in_text(content: str, language: str) -> str:
    with override_language(language):
        spoiler_title: str = _("Open Zulip to see the spoiler content")
    lines = content.split('\n')
    output = []
    open_fence = None
    for line in lines:
        m = FENCE_RE.match(line)
        if m:
            fence = m.group('fence')
            lang = m.group('lang')
            if lang == 'spoiler':
                open_fence = fence
                output.append(line)
                output.append(f"({spoiler_title})")
            elif fence == open_fence:
                open_fence = None
                output.append(line)
        elif not open_fence:
            output.append(line)
    return '\n'.join(output)
 def build_message_list(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]:
    """
    Builds the message list object for the missed message email template.
@@ -159,11 +208,13 @@ def build_message_list(user_profile: UserProfile, messages: List[Message]) -> Li
        plain = re.sub(
            r"/user_uploads/(\S*)",
            user_profile.realm.uri + r"/user_uploads/\1", plain)
        plain = fix_spoilers_in_text(plain, user_profile.default_language)
        assert message.rendered_content is not None
        html = message.rendered_content
        html = relative_to_full_url(user_profile.realm.uri, html)
        html = fix_emojis(html, user_profile.realm.uri, user_profile.emojiset)
        html = fix_spoilers_in_html(html, user_profile.default_language)
        if sender:
            plain, html = append_sender_to_message(plain, html, sender)
        return {'plain': plain, 'html': html}
--- a/zerver/tests/test_email_notifications.py
+++ b/zerver/tests/test_email_notifications.py
@@ -15,6 +15,7 @@ from zerver.lib.actions import do_change_notification_settings, do_change_user_r
 from zerver.lib.email_notifications import (
    enqueue_welcome_emails,
    fix_emojis,
    fix_spoilers_in_html,
    handle_missedmessage_emails,
    relative_to_full_url,
 )
@@ -941,6 +942,39 @@ class TestMissedMessages(ZulipTestCase):
                          'https://www.google.com/images/srpr/logo4w.png</a></p>'
        self.assertEqual(actual_output, expected_output)
    def test_spoilers_in_html_emails(self) -> None:
        test_data = "<div class=\"spoiler-block\"><div class=\"spoiler-header\">\n\n<p><a>header</a> text</p>\n</div><div class=\"spoiler-content\" aria-hidden=\"true\">\n\n<p>content</p>\n</div></div>\n\n<p>outside spoiler</p>"
        actual_output = fix_spoilers_in_html(test_data, 'en')
        expected_output = "<div><div class=\"spoiler-block\">\n\n<p><a>header</a> text<span> </span><span class=\"spoiler-title\" title=\"Open Zulip to see the spoiler content\">(Open Zulip to see the spoiler content)</span></p>\n</div>\n\n<p>outside spoiler</p></div>"
        self.assertEqual(actual_output, expected_output)
        # test against our markdown_test_cases so these features do not get out of sync.
        fixtures = ujson.loads(self.fixture_data("markdown_test_cases.json"))
        test_fixtures = {}
        for test in fixtures['regular_tests']:
            if 'spoiler' in test['name']:
                test_fixtures[test['name']] = test
        for test_name in test_fixtures:
            test_data = test_fixtures[test_name]["expected_output"]
            output_data = fix_spoilers_in_html(test_data, 'en')
            assert('spoiler-header' not in output_data)
            assert('spoiler-content' not in output_data)
            assert('spoiler-block' in output_data)
            assert('spoiler-title' in output_data)
    def test_spoilers_in_text_emails(self) -> None:
        content = "@**King Hamlet**\n\n```spoiler header text\nsecret-text\n```"
        msg_id = self.send_stream_message(self.example_user('othello'), "Denmark", content)
        verify_body_include = [
            "header text",
            "Open Zulip to see the spoiler content"
        ]
        verify_body_does_not_include = ["secret-text"]
        email_subject = '#Denmark > test'
        send_as_user = False
        self._test_cases(msg_id, verify_body_include, email_subject, send_as_user, trigger='mentioned',
                         verify_body_does_not_include=verify_body_does_not_include)
    def test_fix_emoji(self) -> None:
        # An emoji.
        test_data = '<p>See <span aria-label="cloud with lightning and rain" class="emoji emoji-26c8" role="img" title="cloud with lightning and rain">' + \