email_mirror: Fix parsing of unicode in subject headers.

Previously, we had some hand-written logic for parsing the subject
line of the email's headers and turning it into a Python string using
each of the valid encodings for an email.  That logic was buggy, and
sometimes resulted in a bytes object being passed into the
`send_zulip`, which would eventually throw an exception.

The fix for this is to use the Python standard library make_header
method for handling internationalized email headers.

https://stackoverflow.com/questions/7331351/python-email-header-decoding-utf-8
This commit is contained in:
Tim Abbott
2019-01-07 10:17:21 -08:00
parent 9ddadd39f4
commit 828577c3b2

View File

@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Union, cast
import logging
import re
from email.header import decode_header, Header
from email.header import decode_header, make_header
from email.utils import getaddresses
import email.message as message
@@ -326,17 +326,8 @@ def process_missed_message(to: str, message: message.Message, pre_checked: bool)
send_to_missed_message_address(to, message)
def process_message(message: message.Message, rcpt_to: Optional[str]=None, pre_checked: bool=False) -> None:
subject_header = strip_from_subject(str(message.get("Subject", "")))
if subject_header == "":
subject_header = "(no topic)"
encoded_subject, encoding = decode_header(subject_header)[0]
if encoding is None:
subject = cast(str, encoded_subject) # encoded_subject has type str when encoding is None
else:
try:
subject = encoded_subject.decode(encoding)
except (UnicodeDecodeError, LookupError):
subject = "(unreadable subject)"
subject_header = make_header(decode_header(message.get("Subject", "")))
subject = strip_from_subject(str(subject_header)) or "(no topic)"
debug_info = {}