From f3254bb558b904e8e26e107013b0b8fc1c6da445 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sat, 25 Jun 2022 16:14:48 -0700 Subject: [PATCH] mattermost: Run html2text as a subprocess. html2text is GPL licensed. Signed-off-by: Anders Kaseorg --- zerver/data_import/mattermost.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/zerver/data_import/mattermost.py b/zerver/data_import/mattermost.py index f2b8cda37a..ade304072e 100644 --- a/zerver/data_import/mattermost.py +++ b/zerver/data_import/mattermost.py @@ -422,10 +422,6 @@ def process_raw_message_batch( mention_map: Dict[int, Set[int]] = {} zerver_message = [] - import html2text - - h = html2text.HTML2Text() - pm_members = {} for raw_message in raw_messages: @@ -437,7 +433,9 @@ def process_raw_message_batch( content=raw_message["content"], mention_user_ids=mention_user_ids, ) - content = h.handle(content) + + # html2text is GPL licensed, so run it as a subprocess. + content = subprocess.check_output(["html2text"], input=content, text=True) if len(content) > 10000: # nocoverage logging.info("skipping too-long message of length %s", len(content))