markdown: Format spoilers for emails.

This is a hacky fix to avoid spoiler content leaking in emails. The
general idea here is to tell people to open Zulip to view the actual
message in full.

We create a mini-markdown parser here that strips away the fence content
that has the 'spoiler' tag for the text emails.

Our handling of html emails is much better in comparison where we can
use lxml to parse the spoiler blocks.
This commit is contained in:
Rohitt Vashishtha
2020-07-15 04:51:28 +05:30
committed by Tim Abbott
parent ea1c178305
commit 7d5d90b484
2 changed files with 85 additions and 0 deletions

View File

@@ -17,6 +17,7 @@ from lxml.cssselect import CSSSelector
from confirmation.models import one_click_unsubscribe_link from confirmation.models import one_click_unsubscribe_link
from zerver.decorator import statsd_increment from zerver.decorator import statsd_increment
from zerver.lib.markdown.fenced_code import FENCE_RE
from zerver.lib.message import bulk_access_messages from zerver.lib.message import bulk_access_messages
from zerver.lib.queue import queue_json_publish from zerver.lib.queue import queue_json_publish
from zerver.lib.send_email import FromAddress, send_future_email from zerver.lib.send_email import FromAddress, send_future_email
@@ -117,6 +118,54 @@ def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
content = lxml.html.tostring(fragment).decode('utf-8') content = lxml.html.tostring(fragment).decode('utf-8')
return content return content
def fix_spoilers_in_html(content: str, language: str) -> str:
with override_language(language):
spoiler_title: str = _("Open Zulip to see the spoiler content")
fragment = lxml.html.fromstring(content)
spoilers = fragment.find_class("spoiler-block")
for spoiler in spoilers:
header = spoiler.find_class("spoiler-header")[0]
spoiler_content = spoiler.find_class("spoiler-content")[0]
header_content = header.find("p")
if header_content is None:
# Create a new element to append the spoiler to)
header_content = lxml.html.fromstring("<p></p>")
header.append(header_content)
else:
# Add a space. Its simpler to append a new span element than
# inserting text after the last node ends since neither .text
# and .tail do the right thing for us.
header_content.append(lxml.html.fromstring("<span> </span>"))
span_elem = lxml.html.fromstring(
f'<span class="spoiler-title" title="{spoiler_title}">({spoiler_title})</span')
header_content.append(span_elem)
header.drop_tag()
spoiler_content.drop_tree()
content = lxml.html.tostring(fragment).decode("utf-8")
return content
def fix_spoilers_in_text(content: str, language: str) -> str:
with override_language(language):
spoiler_title: str = _("Open Zulip to see the spoiler content")
lines = content.split('\n')
output = []
open_fence = None
for line in lines:
m = FENCE_RE.match(line)
if m:
fence = m.group('fence')
lang = m.group('lang')
if lang == 'spoiler':
open_fence = fence
output.append(line)
output.append(f"({spoiler_title})")
elif fence == open_fence:
open_fence = None
output.append(line)
elif not open_fence:
output.append(line)
return '\n'.join(output)
def build_message_list(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]: def build_message_list(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]:
""" """
Builds the message list object for the missed message email template. Builds the message list object for the missed message email template.
@@ -159,11 +208,13 @@ def build_message_list(user_profile: UserProfile, messages: List[Message]) -> Li
plain = re.sub( plain = re.sub(
r"/user_uploads/(\S*)", r"/user_uploads/(\S*)",
user_profile.realm.uri + r"/user_uploads/\1", plain) user_profile.realm.uri + r"/user_uploads/\1", plain)
plain = fix_spoilers_in_text(plain, user_profile.default_language)
assert message.rendered_content is not None assert message.rendered_content is not None
html = message.rendered_content html = message.rendered_content
html = relative_to_full_url(user_profile.realm.uri, html) html = relative_to_full_url(user_profile.realm.uri, html)
html = fix_emojis(html, user_profile.realm.uri, user_profile.emojiset) html = fix_emojis(html, user_profile.realm.uri, user_profile.emojiset)
html = fix_spoilers_in_html(html, user_profile.default_language)
if sender: if sender:
plain, html = append_sender_to_message(plain, html, sender) plain, html = append_sender_to_message(plain, html, sender)
return {'plain': plain, 'html': html} return {'plain': plain, 'html': html}

View File

@@ -15,6 +15,7 @@ from zerver.lib.actions import do_change_notification_settings, do_change_user_r
from zerver.lib.email_notifications import ( from zerver.lib.email_notifications import (
enqueue_welcome_emails, enqueue_welcome_emails,
fix_emojis, fix_emojis,
fix_spoilers_in_html,
handle_missedmessage_emails, handle_missedmessage_emails,
relative_to_full_url, relative_to_full_url,
) )
@@ -941,6 +942,39 @@ class TestMissedMessages(ZulipTestCase):
'https://www.google.com/images/srpr/logo4w.png</a></p>' 'https://www.google.com/images/srpr/logo4w.png</a></p>'
self.assertEqual(actual_output, expected_output) self.assertEqual(actual_output, expected_output)
def test_spoilers_in_html_emails(self) -> None:
test_data = "<div class=\"spoiler-block\"><div class=\"spoiler-header\">\n\n<p><a>header</a> text</p>\n</div><div class=\"spoiler-content\" aria-hidden=\"true\">\n\n<p>content</p>\n</div></div>\n\n<p>outside spoiler</p>"
actual_output = fix_spoilers_in_html(test_data, 'en')
expected_output = "<div><div class=\"spoiler-block\">\n\n<p><a>header</a> text<span> </span><span class=\"spoiler-title\" title=\"Open Zulip to see the spoiler content\">(Open Zulip to see the spoiler content)</span></p>\n</div>\n\n<p>outside spoiler</p></div>"
self.assertEqual(actual_output, expected_output)
# test against our markdown_test_cases so these features do not get out of sync.
fixtures = ujson.loads(self.fixture_data("markdown_test_cases.json"))
test_fixtures = {}
for test in fixtures['regular_tests']:
if 'spoiler' in test['name']:
test_fixtures[test['name']] = test
for test_name in test_fixtures:
test_data = test_fixtures[test_name]["expected_output"]
output_data = fix_spoilers_in_html(test_data, 'en')
assert('spoiler-header' not in output_data)
assert('spoiler-content' not in output_data)
assert('spoiler-block' in output_data)
assert('spoiler-title' in output_data)
def test_spoilers_in_text_emails(self) -> None:
content = "@**King Hamlet**\n\n```spoiler header text\nsecret-text\n```"
msg_id = self.send_stream_message(self.example_user('othello'), "Denmark", content)
verify_body_include = [
"header text",
"Open Zulip to see the spoiler content"
]
verify_body_does_not_include = ["secret-text"]
email_subject = '#Denmark > test'
send_as_user = False
self._test_cases(msg_id, verify_body_include, email_subject, send_as_user, trigger='mentioned',
verify_body_does_not_include=verify_body_does_not_include)
def test_fix_emoji(self) -> None: def test_fix_emoji(self) -> None:
# An emoji. # An emoji.
test_data = '<p>See <span aria-label="cloud with lightning and rain" class="emoji emoji-26c8" role="img" title="cloud with lightning and rain">' + \ test_data = '<p>See <span aria-label="cloud with lightning and rain" class="emoji emoji-26c8" role="img" title="cloud with lightning and rain">' + \