email_gateway: Use html2text directly.

In the rare case that Zulip receives an email with only an HTML
format, we originally (code dating to 2013) shelled out to
html2markdown/python-html2text in order to convert the HTML into
markdown.

We long since added html2text as a reasonably managed Python
dependency of Zulip; we should just use it here.
This commit is contained in:
Tim Abbott
2019-07-23 17:49:16 -07:00
parent 2ac944c31f
commit daca742e9f
2 changed files with 4 additions and 17 deletions

View File

@@ -27,10 +27,10 @@ from zerver.models import (
from datetime import timedelta
from email.utils import formataddr
import html2text
from lxml.cssselect import CSSSelector
import lxml.html
import re
import subprocess
from collections import defaultdict
import pytz
from bs4 import BeautifulSoup
@@ -580,22 +580,9 @@ def enqueue_welcome_emails(user: UserProfile, realm_creation: bool=False) -> Non
from_address=from_address, context=context, delay=followup_day2_email_delay(user))
def convert_html_to_markdown(html: str) -> str:
# On Linux, the tool installs as html2markdown, and there's a command called
# html2text that does something totally different. On OSX, the tool installs
# as html2text.
commands = ["html2markdown", "html2text"]
parser = html2text.HTML2Text()
markdown = parser.handle(html).strip()
for command in commands:
try:
# A body width of 0 means do not try to wrap the text for us.
p = subprocess.Popen(
[command, "--body-width=0"], stdout=subprocess.PIPE,
stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
break
except OSError:
continue
markdown = p.communicate(input=html.encode('utf-8'))[0].decode('utf-8').strip()
# We want images to get linked and inline previewed, but html2text will turn
# them into links of the form `![](http://foo.com/image.png)`, which is
# ugly. Run a regex over the resulting description, turning links of the

View File

@@ -114,7 +114,7 @@ Requester Bob <requester-bob@example.com> added a {} note to \
"""
expected_topic = u"#12: Not enough ☃ guinea pigs"
expected_message = """
Requester \u2603 Bob <requester-bob@example.com> created [ticket #12](http://test1234zzz.freshdesk.com/helpdesk/tickets/12):\n\n``` quote\nThere are too many cat pictures on the internet \u2603. We need more guinea pigs. Exhibit 1:\n\n \n\n\n[guinea_pig.png](http://cdn.freshdesk.com/data/helpdesk/attachments/production/12744808/original/guinea_pig.png)\n```\n\n* **Type**: Problem\n* **Priority**: Urgent\n* **Status**: Open
Requester \u2603 Bob <requester-bob@example.com> created [ticket #12](http://test1234zzz.freshdesk.com/helpdesk/tickets/12):\n\n``` quote\nThere are too many cat pictures on the internet \u2603. We need more guinea pigs.\nExhibit 1:\n\n \n\n[guinea_pig.png](http://cdn.freshdesk.com/data/helpdesk/attachments/production/12744808/original/guinea_pig.png)\n```\n\n* **Type**: Problem\n* **Priority**: Urgent\n* **Status**: Open
""".strip()
self.api_stream_message(self.TEST_USER_EMAIL, "inline_images", expected_topic, expected_message,
content_type="application/x-www-form-urlencoded")