python: Skip unnecessary decode before BeautifulSoup parsing.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg
2020-10-29 17:21:40 -07:00
committed by Tim Abbott
parent 86e8d81c7f
commit 7c4f68d9cf
3 changed files with 8 additions and 13 deletions

View File

@@ -635,7 +635,7 @@ def to_dict_cache_key_id(message_id: int) -> str:
def to_dict_cache_key(message: 'Message', realm_id: Optional[int]=None) -> str:
return to_dict_cache_key_id(message.id)
def open_graph_description_cache_key(content: Any, request: HttpRequest) -> str:
def open_graph_description_cache_key(content: bytes, request: HttpRequest) -> str:
return 'open_graph_description_path:{}'.format(make_safe_digest(request.META['PATH_INFO']))
def flush_message(sender: Any, **kwargs: Any) -> None:

View File

@@ -1,4 +1,4 @@
from typing import Mapping
from typing import Mapping, Union
from bs4 import BeautifulSoup
from django.http import HttpRequest
@@ -7,7 +7,7 @@ from django.utils.html import escape
from zerver.lib.cache import cache_with_key, open_graph_description_cache_key
def html_to_text(content: str, tags: Mapping[str, str] = {'p': ' | '}) -> str:
def html_to_text(content: Union[str, bytes], tags: Mapping[str, str] = {'p': ' | '}) -> str:
bs = BeautifulSoup(content, features='lxml')
# Skip any admonition (warning) blocks, since they're
# usually something about users needing to be an
@@ -35,5 +35,4 @@ def html_to_text(content: str, tags: Mapping[str, str] = {'p': ' | '}) -> str:
@cache_with_key(open_graph_description_cache_key, timeout=3600*24)
def get_content_description(content: bytes, request: HttpRequest) -> str:
str_content = content.decode("utf-8")
return html_to_text(str_content)
return html_to_text(content)

View File

@@ -54,8 +54,7 @@ class OpenGraphTest(ZulipTestCase):
status_code: int=200) -> None:
response = self.client_get(path)
self.assertEqual(response.status_code, status_code)
decoded = response.content.decode('utf-8')
bs = BeautifulSoup(decoded, features='lxml')
bs = BeautifulSoup(response.content, features='lxml')
open_graph_title = bs.select_one('meta[property="og:title"]').get('content')
self.assertEqual(open_graph_title, title)
@@ -157,8 +156,7 @@ class OpenGraphTest(ZulipTestCase):
response = self.client_get('/login/')
self.assertEqual(response.status_code, 200)
decoded = response.content.decode('utf-8')
bs = BeautifulSoup(decoded, features='lxml')
bs = BeautifulSoup(response.content, features='lxml')
open_graph_image = bs.select_one('meta[property="og:image"]').get('content')
self.assertEqual(open_graph_image, f'{realm.uri}{realm_icon}')
@@ -171,8 +169,7 @@ class OpenGraphTest(ZulipTestCase):
response = self.client_get('/login/')
self.assertEqual(response.status_code, 200)
decoded = response.content.decode('utf-8')
bs = BeautifulSoup(decoded, features='lxml')
bs = BeautifulSoup(response.content, features='lxml')
open_graph_image = bs.select_one('meta[property="og:image"]').get('content')
self.assertEqual(open_graph_image, icon_url)
@@ -180,8 +177,7 @@ class OpenGraphTest(ZulipTestCase):
response = self.client_get('/api/', subdomain='')
self.assertEqual(response.status_code, 200)
decoded = response.content.decode('utf-8')
bs = BeautifulSoup(decoded, features='lxml')
bs = BeautifulSoup(response.content, features='lxml')
open_graph_url = bs.select_one('meta[property="og:url"]').get('content')
self.assertTrue(open_graph_url.endswith('/api/'))