email_mirror: Don't remove quotations from forwarded messages.

Addresses point 2 of #10612. We use a regex to detect if a form of FWD indicator is present at the beginning of the subject, which means the message has been forwarded. remove_quotations argument is added to a couple of functions where it's necessary. In filter_footer, the criteria for a line to be a possible beginning of a footer is changed to line.strip() == "--", instead of line.strip().startswith("--"), because the former would remove quotations from plaintext emails. This change makes sense, because RFC 3676 specifies ""-- " as the separator line between the body and the signature of a message": https://tools.ietf.org/html/rfc3676
2025-10-27 18:13:58 +00:00 · 2019-03-09 22:35:45 +01:00
parent 0633f268fb
commit edcb6d57fc
2 changed files with 48 additions and 8 deletions
--- a/zerver/lib/email_mirror.py
+++ b/zerver/lib/email_mirror.py
@@ -133,9 +133,9 @@ def mark_missed_message_address_as_used(address: str) -> None:
        redis_client.delete(key)
        raise ZulipEmailForwardError('Missed message address has already been used')
-def construct_zulip_body(message: message.Message, realm: Realm,
+def construct_zulip_body(message: message.Message, realm: Realm, show_sender: bool=False,
-                         show_sender: bool=False) -> str:
+                         remove_quotations: bool=True) -> str:
-    body = extract_body(message)
+    body = extract_body(message, remove_quotations)
    # Remove null characters, since Zulip will reject
    body = body.replace("\x00", "")
    body = filter_footer(body)
@@ -227,7 +227,7 @@ def get_message_part_by_type(message: message.Message, content_type: str) -> Opt
    return None
 talon_initialized = False
-def extract_body(message: message.Message) -> str:
+def extract_body(message: message.Message, remove_quotations: bool=True) -> str:
    import talon
    global talon_initialized
    if not talon_initialized:
@@ -238,12 +238,18 @@ def extract_body(message: message.Message) -> str:
    # that.
    plaintext_content = get_message_part_by_type(message, "text/plain")
    if plaintext_content:
-        return talon.quotations.extract_from_plain(plaintext_content)
+        if remove_quotations:
            return talon.quotations.extract_from_plain(plaintext_content)
        else:
            return plaintext_content
    # If we only have an HTML version, try to make that look nice.
    html_content = get_message_part_by_type(message, "text/html")
    if html_content:
-        return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))
+        if remove_quotations:
            return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))
        else:
            return convert_html_to_markdown(html_content)
    if plaintext_content is not None or html_content is not None:
        raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.")
@@ -253,7 +259,7 @@ def extract_body(message: message.Message) -> str:
 def filter_footer(text: str) -> str:
    # Try to filter out obvious footers.
-    possible_footers = [line for line in text.split("\n") if line.strip().startswith("--")]
+    possible_footers = [line for line in text.split("\n") if line.strip() == "--"]
    if len(possible_footers) != 1:
        # Be conservative and don't try to scrub content if there
        # isn't a trivial footer structure.
@@ -326,13 +332,21 @@ def strip_from_subject(subject: str) -> str:
    stripped = re.sub(reg, "", subject, flags = re.IGNORECASE | re.MULTILINE)
    return stripped.strip()
 def is_forwarded(subject: str) -> bool:
    # regex taken from strip_from_subject, we use it to detect various forms
    # of FWD at the beginning of the subject.
    reg = r"([\[\(] *)?\b(FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$"
    return bool(re.match(reg, subject, flags=re.IGNORECASE))
 def process_stream_message(to: str, message: message.Message,
                           debug_info: Dict[str, Any]) -> None:
    subject_header = str(make_header(decode_header(message.get("Subject", ""))))
    subject = strip_from_subject(subject_header) or "(no topic)"
    stream, show_sender = extract_and_validate(to)
-    body = construct_zulip_body(message, stream.realm, show_sender)
+    # Don't remove quotations if message is forwarded:
    remove_quotations = not is_forwarded(subject_header)
    body = construct_zulip_body(message, stream.realm, show_sender, remove_quotations)
    debug_info["stream"] = stream
    send_zulip(settings.EMAIL_GATEWAY_BOT, stream, subject, body)
    logger.info("Successfully processed email to %s (%s)" % (
--- a/zerver/tests/test_email_mirror.py
+++ b/zerver/tests/test_email_mirror.py
@@ -30,8 +30,10 @@ from zerver.lib.email_mirror import (
    create_missed_message_address,
    get_missed_message_token_from_address,
    strip_from_subject,
    is_forwarded,
 )
 from zerver.lib.notifications import convert_html_to_markdown
 from zerver.lib.send_email import FromAddress
 from email.mime.text import MIMEText
@@ -445,6 +447,17 @@ class TestEmptyGatewaySetting(ZulipTestCase):
            self.assertEqual(test_address, '')
 class TestReplyExtraction(ZulipTestCase):
    def test_is_forwarded(self) -> None:
        self.assertTrue(is_forwarded("FWD: hey"))
        self.assertTrue(is_forwarded("fwd: hi"))
        self.assertTrue(is_forwarded("[fwd] subject"))
        self.assertTrue(is_forwarded("FWD: RE:"))
        self.assertTrue(is_forwarded("Fwd: RE: fwd: re: subject"))
        self.assertFalse(is_forwarded("subject"))
        self.assertFalse(is_forwarded("RE: FWD: hi"))
    def test_reply_is_extracted_from_plain(self) -> None:
        # build dummy messages for stream
@@ -476,6 +489,13 @@ class TestReplyExtraction(ZulipTestCase):
        self.assertEqual(message.content, "Reply")
        # Don't extract if Subject indicates the email has been forwarded into the mirror:
        del incoming_valid_message['Subject']
        incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject'
        process_message(incoming_valid_message)
        message = most_recent_message(user_profile)
        self.assertEqual(message.content, text)
    def test_reply_is_extracted_from_html(self) -> None:
        # build dummy messages for stream
@@ -520,6 +540,12 @@ class TestReplyExtraction(ZulipTestCase):
        self.assertEqual(message.content, 'Reply')
        # Don't extract if Subject indicates the email has been forwarded into the mirror:
        del incoming_valid_message['Subject']
        incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject'
        process_message(incoming_valid_message)
        message = most_recent_message(user_profile)
        self.assertEqual(message.content, convert_html_to_markdown(html))
 class TestScriptMTA(ZulipTestCase):