mirror of
https://github.com/zulip/zulip.git
synced 2025-11-02 21:13:36 +00:00
email_mirror: Fix parsing of unicode in subject headers.
Previously, we had some hand-written logic for parsing the subject line of the email's headers and turning it into a Python string using each of the valid encodings for an email. That logic was buggy, and sometimes resulted in a bytes object being passed into the `send_zulip`, which would eventually throw an exception. The fix for this is to use the Python standard library make_header method for handling internationalized email headers. https://stackoverflow.com/questions/7331351/python-email-header-decoding-utf-8
This commit is contained in:
@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Union, cast
|
||||
import logging
|
||||
import re
|
||||
|
||||
from email.header import decode_header, Header
|
||||
from email.header import decode_header, make_header
|
||||
from email.utils import getaddresses
|
||||
import email.message as message
|
||||
|
||||
@@ -326,17 +326,8 @@ def process_missed_message(to: str, message: message.Message, pre_checked: bool)
|
||||
send_to_missed_message_address(to, message)
|
||||
|
||||
def process_message(message: message.Message, rcpt_to: Optional[str]=None, pre_checked: bool=False) -> None:
|
||||
subject_header = strip_from_subject(str(message.get("Subject", "")))
|
||||
if subject_header == "":
|
||||
subject_header = "(no topic)"
|
||||
encoded_subject, encoding = decode_header(subject_header)[0]
|
||||
if encoding is None:
|
||||
subject = cast(str, encoded_subject) # encoded_subject has type str when encoding is None
|
||||
else:
|
||||
try:
|
||||
subject = encoded_subject.decode(encoding)
|
||||
except (UnicodeDecodeError, LookupError):
|
||||
subject = "(unreadable subject)"
|
||||
subject_header = make_header(decode_header(message.get("Subject", "")))
|
||||
subject = strip_from_subject(str(subject_header)) or "(no topic)"
|
||||
|
||||
debug_info = {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user