mirror of
https://github.com/zulip/zulip.git
synced 2025-10-27 18:13:58 +00:00
email_mirror: Don't remove quotations from forwarded messages.
Addresses point 2 of #10612. We use a regex to detect if a form of FWD indicator is present at the beginning of the subject, which means the message has been forwarded. remove_quotations argument is added to a couple of functions where it's necessary. In filter_footer, the criteria for a line to be a possible beginning of a footer is changed to line.strip() == "--", instead of line.strip().startswith("--"), because the former would remove quotations from plaintext emails. This change makes sense, because RFC 3676 specifies ""-- " as the separator line between the body and the signature of a message": https://tools.ietf.org/html/rfc3676
This commit is contained in:
committed by
Tim Abbott
parent
0633f268fb
commit
edcb6d57fc
@@ -133,9 +133,9 @@ def mark_missed_message_address_as_used(address: str) -> None:
|
|||||||
redis_client.delete(key)
|
redis_client.delete(key)
|
||||||
raise ZulipEmailForwardError('Missed message address has already been used')
|
raise ZulipEmailForwardError('Missed message address has already been used')
|
||||||
|
|
||||||
def construct_zulip_body(message: message.Message, realm: Realm,
|
def construct_zulip_body(message: message.Message, realm: Realm, show_sender: bool=False,
|
||||||
show_sender: bool=False) -> str:
|
remove_quotations: bool=True) -> str:
|
||||||
body = extract_body(message)
|
body = extract_body(message, remove_quotations)
|
||||||
# Remove null characters, since Zulip will reject
|
# Remove null characters, since Zulip will reject
|
||||||
body = body.replace("\x00", "")
|
body = body.replace("\x00", "")
|
||||||
body = filter_footer(body)
|
body = filter_footer(body)
|
||||||
@@ -227,7 +227,7 @@ def get_message_part_by_type(message: message.Message, content_type: str) -> Opt
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
talon_initialized = False
|
talon_initialized = False
|
||||||
def extract_body(message: message.Message) -> str:
|
def extract_body(message: message.Message, remove_quotations: bool=True) -> str:
|
||||||
import talon
|
import talon
|
||||||
global talon_initialized
|
global talon_initialized
|
||||||
if not talon_initialized:
|
if not talon_initialized:
|
||||||
@@ -238,12 +238,18 @@ def extract_body(message: message.Message) -> str:
|
|||||||
# that.
|
# that.
|
||||||
plaintext_content = get_message_part_by_type(message, "text/plain")
|
plaintext_content = get_message_part_by_type(message, "text/plain")
|
||||||
if plaintext_content:
|
if plaintext_content:
|
||||||
return talon.quotations.extract_from_plain(plaintext_content)
|
if remove_quotations:
|
||||||
|
return talon.quotations.extract_from_plain(plaintext_content)
|
||||||
|
else:
|
||||||
|
return plaintext_content
|
||||||
|
|
||||||
# If we only have an HTML version, try to make that look nice.
|
# If we only have an HTML version, try to make that look nice.
|
||||||
html_content = get_message_part_by_type(message, "text/html")
|
html_content = get_message_part_by_type(message, "text/html")
|
||||||
if html_content:
|
if html_content:
|
||||||
return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))
|
if remove_quotations:
|
||||||
|
return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))
|
||||||
|
else:
|
||||||
|
return convert_html_to_markdown(html_content)
|
||||||
|
|
||||||
if plaintext_content is not None or html_content is not None:
|
if plaintext_content is not None or html_content is not None:
|
||||||
raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.")
|
raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.")
|
||||||
@@ -253,7 +259,7 @@ def extract_body(message: message.Message) -> str:
|
|||||||
|
|
||||||
def filter_footer(text: str) -> str:
|
def filter_footer(text: str) -> str:
|
||||||
# Try to filter out obvious footers.
|
# Try to filter out obvious footers.
|
||||||
possible_footers = [line for line in text.split("\n") if line.strip().startswith("--")]
|
possible_footers = [line for line in text.split("\n") if line.strip() == "--"]
|
||||||
if len(possible_footers) != 1:
|
if len(possible_footers) != 1:
|
||||||
# Be conservative and don't try to scrub content if there
|
# Be conservative and don't try to scrub content if there
|
||||||
# isn't a trivial footer structure.
|
# isn't a trivial footer structure.
|
||||||
@@ -326,13 +332,21 @@ def strip_from_subject(subject: str) -> str:
|
|||||||
stripped = re.sub(reg, "", subject, flags = re.IGNORECASE | re.MULTILINE)
|
stripped = re.sub(reg, "", subject, flags = re.IGNORECASE | re.MULTILINE)
|
||||||
return stripped.strip()
|
return stripped.strip()
|
||||||
|
|
||||||
|
def is_forwarded(subject: str) -> bool:
|
||||||
|
# regex taken from strip_from_subject, we use it to detect various forms
|
||||||
|
# of FWD at the beginning of the subject.
|
||||||
|
reg = r"([\[\(] *)?\b(FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$"
|
||||||
|
return bool(re.match(reg, subject, flags=re.IGNORECASE))
|
||||||
|
|
||||||
def process_stream_message(to: str, message: message.Message,
|
def process_stream_message(to: str, message: message.Message,
|
||||||
debug_info: Dict[str, Any]) -> None:
|
debug_info: Dict[str, Any]) -> None:
|
||||||
subject_header = str(make_header(decode_header(message.get("Subject", ""))))
|
subject_header = str(make_header(decode_header(message.get("Subject", ""))))
|
||||||
subject = strip_from_subject(subject_header) or "(no topic)"
|
subject = strip_from_subject(subject_header) or "(no topic)"
|
||||||
|
|
||||||
stream, show_sender = extract_and_validate(to)
|
stream, show_sender = extract_and_validate(to)
|
||||||
body = construct_zulip_body(message, stream.realm, show_sender)
|
# Don't remove quotations if message is forwarded:
|
||||||
|
remove_quotations = not is_forwarded(subject_header)
|
||||||
|
body = construct_zulip_body(message, stream.realm, show_sender, remove_quotations)
|
||||||
debug_info["stream"] = stream
|
debug_info["stream"] = stream
|
||||||
send_zulip(settings.EMAIL_GATEWAY_BOT, stream, subject, body)
|
send_zulip(settings.EMAIL_GATEWAY_BOT, stream, subject, body)
|
||||||
logger.info("Successfully processed email to %s (%s)" % (
|
logger.info("Successfully processed email to %s (%s)" % (
|
||||||
|
|||||||
@@ -30,8 +30,10 @@ from zerver.lib.email_mirror import (
|
|||||||
create_missed_message_address,
|
create_missed_message_address,
|
||||||
get_missed_message_token_from_address,
|
get_missed_message_token_from_address,
|
||||||
strip_from_subject,
|
strip_from_subject,
|
||||||
|
is_forwarded,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from zerver.lib.notifications import convert_html_to_markdown
|
||||||
from zerver.lib.send_email import FromAddress
|
from zerver.lib.send_email import FromAddress
|
||||||
|
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
@@ -445,6 +447,17 @@ class TestEmptyGatewaySetting(ZulipTestCase):
|
|||||||
self.assertEqual(test_address, '')
|
self.assertEqual(test_address, '')
|
||||||
|
|
||||||
class TestReplyExtraction(ZulipTestCase):
|
class TestReplyExtraction(ZulipTestCase):
|
||||||
|
def test_is_forwarded(self) -> None:
|
||||||
|
self.assertTrue(is_forwarded("FWD: hey"))
|
||||||
|
self.assertTrue(is_forwarded("fwd: hi"))
|
||||||
|
self.assertTrue(is_forwarded("[fwd] subject"))
|
||||||
|
|
||||||
|
self.assertTrue(is_forwarded("FWD: RE:"))
|
||||||
|
self.assertTrue(is_forwarded("Fwd: RE: fwd: re: subject"))
|
||||||
|
|
||||||
|
self.assertFalse(is_forwarded("subject"))
|
||||||
|
self.assertFalse(is_forwarded("RE: FWD: hi"))
|
||||||
|
|
||||||
def test_reply_is_extracted_from_plain(self) -> None:
|
def test_reply_is_extracted_from_plain(self) -> None:
|
||||||
|
|
||||||
# build dummy messages for stream
|
# build dummy messages for stream
|
||||||
@@ -476,6 +489,13 @@ class TestReplyExtraction(ZulipTestCase):
|
|||||||
|
|
||||||
self.assertEqual(message.content, "Reply")
|
self.assertEqual(message.content, "Reply")
|
||||||
|
|
||||||
|
# Don't extract if Subject indicates the email has been forwarded into the mirror:
|
||||||
|
del incoming_valid_message['Subject']
|
||||||
|
incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject'
|
||||||
|
process_message(incoming_valid_message)
|
||||||
|
message = most_recent_message(user_profile)
|
||||||
|
self.assertEqual(message.content, text)
|
||||||
|
|
||||||
def test_reply_is_extracted_from_html(self) -> None:
|
def test_reply_is_extracted_from_html(self) -> None:
|
||||||
|
|
||||||
# build dummy messages for stream
|
# build dummy messages for stream
|
||||||
@@ -520,6 +540,12 @@ class TestReplyExtraction(ZulipTestCase):
|
|||||||
|
|
||||||
self.assertEqual(message.content, 'Reply')
|
self.assertEqual(message.content, 'Reply')
|
||||||
|
|
||||||
|
# Don't extract if Subject indicates the email has been forwarded into the mirror:
|
||||||
|
del incoming_valid_message['Subject']
|
||||||
|
incoming_valid_message['Subject'] = 'FWD: TestStreamEmailMessages Subject'
|
||||||
|
process_message(incoming_valid_message)
|
||||||
|
message = most_recent_message(user_profile)
|
||||||
|
self.assertEqual(message.content, convert_html_to_markdown(html))
|
||||||
|
|
||||||
class TestScriptMTA(ZulipTestCase):
|
class TestScriptMTA(ZulipTestCase):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user