mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	email_gateway: Use html2text directly.
In the rare case that Zulip receives an email with only an HTML format, we originally (code dating to 2013) shelled out to html2markdown/python-html2text in order to convert the HTML into markdown. We long since added html2text as a reasonably managed Python dependency of Zulip; we should just use it here.
This commit is contained in:
		@@ -27,10 +27,10 @@ from zerver.models import (
 | 
			
		||||
 | 
			
		||||
from datetime import timedelta
 | 
			
		||||
from email.utils import formataddr
 | 
			
		||||
import html2text
 | 
			
		||||
from lxml.cssselect import CSSSelector
 | 
			
		||||
import lxml.html
 | 
			
		||||
import re
 | 
			
		||||
import subprocess
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
import pytz
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
@@ -580,22 +580,9 @@ def enqueue_welcome_emails(user: UserProfile, realm_creation: bool=False) -> Non
 | 
			
		||||
            from_address=from_address, context=context, delay=followup_day2_email_delay(user))
 | 
			
		||||
 | 
			
		||||
def convert_html_to_markdown(html: str) -> str:
 | 
			
		||||
    # On Linux, the tool installs as html2markdown, and there's a command called
 | 
			
		||||
    # html2text that does something totally different. On OSX, the tool installs
 | 
			
		||||
    # as html2text.
 | 
			
		||||
    commands = ["html2markdown", "html2text"]
 | 
			
		||||
    parser = html2text.HTML2Text()
 | 
			
		||||
    markdown = parser.handle(html).strip()
 | 
			
		||||
 | 
			
		||||
    for command in commands:
 | 
			
		||||
        try:
 | 
			
		||||
            # A body width of 0 means do not try to wrap the text for us.
 | 
			
		||||
            p = subprocess.Popen(
 | 
			
		||||
                [command, "--body-width=0"], stdout=subprocess.PIPE,
 | 
			
		||||
                stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
 | 
			
		||||
            break
 | 
			
		||||
        except OSError:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
    markdown = p.communicate(input=html.encode('utf-8'))[0].decode('utf-8').strip()
 | 
			
		||||
    # We want images to get linked and inline previewed, but html2text will turn
 | 
			
		||||
    # them into links of the form ``, which is
 | 
			
		||||
    # ugly. Run a regex over the resulting description, turning links of the
 | 
			
		||||
 
 | 
			
		||||
@@ -114,7 +114,7 @@ Requester Bob <requester-bob@example.com> added a {} note to \
 | 
			
		||||
        """
 | 
			
		||||
        expected_topic = u"#12: Not enough ☃ guinea pigs"
 | 
			
		||||
        expected_message = """
 | 
			
		||||
Requester \u2603 Bob <requester-bob@example.com> created [ticket #12](http://test1234zzz.freshdesk.com/helpdesk/tickets/12):\n\n``` quote\nThere are too many cat pictures on the internet \u2603. We need more guinea pigs. Exhibit 1:\n\n  \n\n\n[guinea_pig.png](http://cdn.freshdesk.com/data/helpdesk/attachments/production/12744808/original/guinea_pig.png)\n```\n\n* **Type**: Problem\n* **Priority**: Urgent\n* **Status**: Open
 | 
			
		||||
Requester \u2603 Bob <requester-bob@example.com> created [ticket #12](http://test1234zzz.freshdesk.com/helpdesk/tickets/12):\n\n``` quote\nThere are too many cat pictures on the internet \u2603. We need more guinea pigs.\nExhibit 1:\n\n  \n\n[guinea_pig.png](http://cdn.freshdesk.com/data/helpdesk/attachments/production/12744808/original/guinea_pig.png)\n```\n\n* **Type**: Problem\n* **Priority**: Urgent\n* **Status**: Open
 | 
			
		||||
""".strip()
 | 
			
		||||
        self.api_stream_message(self.TEST_USER_EMAIL, "inline_images", expected_topic, expected_message,
 | 
			
		||||
                                content_type="application/x-www-form-urlencoded")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user