mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
mattermost_import: Except error when converting messages HTML.
This adds a try-except block when running html2text when processing raw
messages from HTML to markdown.
convert_html_to_text is added mainly for testing convinience. We don't
have any sample of Mattermosts' problematic content that could trigger
this sort of error yet, so the test mocks convert_html_to_text to raise
error instead.
(cherry picked from commit 201a71b575
)
This commit is contained in:
@@ -393,6 +393,11 @@ def process_message_attachments(
|
||||
return content, has_image
|
||||
|
||||
|
||||
def convert_html_to_text(content: str) -> str:
|
||||
# html2text is GPL licensed, so run it as a subprocess.
|
||||
return subprocess.check_output(["html2text", "--unicode-snob"], input=content, text=True)
|
||||
|
||||
|
||||
def process_raw_message_batch(
|
||||
realm_id: int,
|
||||
raw_messages: list[dict[str, Any]],
|
||||
@@ -439,8 +444,11 @@ def process_raw_message_batch(
|
||||
mention_user_ids=mention_user_ids,
|
||||
)
|
||||
|
||||
# html2text is GPL licensed, so run it as a subprocess.
|
||||
content = subprocess.check_output(["html2text", "--unicode-snob"], input=content, text=True)
|
||||
try:
|
||||
content = convert_html_to_text(content)
|
||||
except Exception:
|
||||
logging.warning("Error converting HTML to text for message: '%s'; continuing", content)
|
||||
logging.warning(str(raw_message))
|
||||
|
||||
date_sent = raw_message["date_sent"]
|
||||
sender_user_id = raw_message["sender_id"]
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import filecmp
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any
|
||||
from unittest.mock import call, patch
|
||||
|
||||
@@ -981,3 +982,28 @@ class MatterMostImporter(ZulipTestCase):
|
||||
self.assertIsNotNone(message.rendered_content)
|
||||
|
||||
self.verify_emoji_code_foreign_keys()
|
||||
|
||||
def test_fail_process_raw_message_batch(self) -> None:
|
||||
# TODO: Once we have a sample of message content that can trigger this error
|
||||
# we should add that as fixture instead of mocking `convert_html_to_text`.
|
||||
mattermost_data_dir = self.fixture_file_name("", "mattermost_fixtures")
|
||||
output_dir = self.make_import_output_dir("mattermost")
|
||||
|
||||
with (
|
||||
patch("builtins.print"),
|
||||
patch("zerver.data_import.mattermost.convert_html_to_text") as mock_html2text,
|
||||
self.assertLogs(level="WARNING") as warn_log,
|
||||
):
|
||||
mock_html2text.side_effect = subprocess.CalledProcessError(
|
||||
returncode=1, cmd="html2text", output="mocked failure"
|
||||
)
|
||||
do_convert_data(
|
||||
mattermost_data_dir=mattermost_data_dir,
|
||||
output_dir=output_dir,
|
||||
masking_content=True,
|
||||
)
|
||||
mock_html2text.assert_called()
|
||||
self.assertIn(
|
||||
"WARNING:root:Error converting HTML to text for message: 'Xxxxxxx!'; continuing",
|
||||
warn_log.output,
|
||||
)
|
||||
|
Reference in New Issue
Block a user