diff --git a/zerver/lib/email_mirror.py b/zerver/lib/email_mirror.py index f2d4abf296..0a92ae327b 100644 --- a/zerver/lib/email_mirror.py +++ b/zerver/lib/email_mirror.py @@ -133,9 +133,9 @@ def mark_missed_message_address_as_used(address: str) -> None: redis_client.delete(key) raise ZulipEmailForwardError('Missed message address has already been used') -def construct_zulip_body(message: message.Message, realm: Realm, - show_sender: bool=False) -> str: - body = extract_body(message) +def construct_zulip_body(message: message.Message, realm: Realm, show_sender: bool=False, + remove_quotations: bool=True) -> str: + body = extract_body(message, remove_quotations) # Remove null characters, since Zulip will reject body = body.replace("\x00", "") body = filter_footer(body) @@ -227,7 +227,7 @@ def get_message_part_by_type(message: message.Message, content_type: str) -> Opt return None talon_initialized = False -def extract_body(message: message.Message) -> str: +def extract_body(message: message.Message, remove_quotations: bool=True) -> str: import talon global talon_initialized if not talon_initialized: @@ -238,12 +238,18 @@ def extract_body(message: message.Message) -> str: # that. plaintext_content = get_message_part_by_type(message, "text/plain") if plaintext_content: - return talon.quotations.extract_from_plain(plaintext_content) + if remove_quotations: + return talon.quotations.extract_from_plain(plaintext_content) + else: + return plaintext_content # If we only have an HTML version, try to make that look nice. html_content = get_message_part_by_type(message, "text/html") if html_content: - return convert_html_to_markdown(talon.quotations.extract_from_html(html_content)) + if remove_quotations: + return convert_html_to_markdown(talon.quotations.extract_from_html(html_content)) + else: + return convert_html_to_markdown(html_content) if plaintext_content is not None or html_content is not None: raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.") @@ -253,7 +259,7 @@ def extract_body(message: message.Message) -> str: def filter_footer(text: str) -> str: # Try to filter out obvious footers. - possible_footers = [line for line in text.split("\n") if line.strip().startswith("--")] + possible_footers = [line for line in text.split("\n") if line.strip() == "--"] if len(possible_footers) != 1: # Be conservative and don't try to scrub content if there # isn't a trivial footer structure. @@ -326,13 +332,21 @@ def strip_from_subject(subject: str) -> str: stripped = re.sub(reg, "", subject, flags = re.IGNORECASE | re.MULTILINE) return stripped.strip() +def is_forwarded(subject: str) -> bool: + # regex taken from strip_from_subject, we use it to detect various forms + # of FWD at the beginning of the subject. + reg = r"([\[\(] *)?\b(FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$" + return bool(re.match(reg, subject, flags=re.IGNORECASE)) + def process_stream_message(to: str, message: message.Message, debug_info: Dict[str, Any]) -> None: subject_header = str(make_header(decode_header(message.get("Subject", "")))) subject = strip_from_subject(subject_header) or "(no topic)" stream, show_sender = extract_and_validate(to) - body = construct_zulip_body(message, stream.realm, show_sender) + # Don't remove quotations if message is forwarded: + remove_quotations = not is_forwarded(subject_header) + body = construct_zulip_body(message, stream.realm, show_sender, remove_quotations) debug_info["stream"] = stream send_zulip(settings.EMAIL_GATEWAY_BOT, stream, subject, body) logger.info("Successfully processed email to %s (%s)" % ( diff --git a/zerver/tests/test_email_mirror.py b/zerver/tests/test_email_mirror.py index 0681d453de..48e602ce62 100644 --- a/zerver/tests/test_email_mirror.py +++ b/zerver/tests/test_email_mirror.py @@ -30,8 +30,10 @@ from zerver.lib.email_mirror import ( create_missed_message_address, get_missed_message_token_from_address, strip_from_subject, + is_forwarded, ) +from zerver.lib.notifications import convert_html_to_markdown from zerver.lib.send_email import FromAddress from email.mime.text import MIMEText @@ -445,6 +447,17 @@ class TestEmptyGatewaySetting(ZulipTestCase): self.assertEqual(test_address, '') class TestReplyExtraction(ZulipTestCase): + def test_is_forwarded(self) -> None: + self.assertTrue(is_forwarded("FWD: hey")) + self.assertTrue(is_forwarded("fwd: hi")) + self.assertTrue(is_forwarded("[fwd] subject")) + + self.assertTrue(is_forwarded("FWD: RE:")) + self.assertTrue(is_forwarded("Fwd: RE: fwd: re: subject")) + + self.assertFalse(is_forwarded("subject")) + self.assertFalse(is_forwarded("RE: FWD: hi")) + def test_reply_is_extracted_from_plain(self) -> None: # build dummy messages for stream @@ -476,6 +489,13 @@ class TestReplyExtraction(ZulipTestCase): self.assertEqual(message.content, "Reply") + # Don't extract if Subject indicates the email has been forwarded into the mirror: + del incoming_valid_message['Subject'] + incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject' + process_message(incoming_valid_message) + message = most_recent_message(user_profile) + self.assertEqual(message.content, text) + def test_reply_is_extracted_from_html(self) -> None: # build dummy messages for stream @@ -520,6 +540,12 @@ class TestReplyExtraction(ZulipTestCase): self.assertEqual(message.content, 'Reply') + # Don't extract if Subject indicates the email has been forwarded into the mirror: + del incoming_valid_message['Subject'] + incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject' + process_message(incoming_valid_message) + message = most_recent_message(user_profile) + self.assertEqual(message.content, convert_html_to_markdown(html)) class TestScriptMTA(ZulipTestCase):