mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 14:03:30 +00:00
preview: Use a dataclass for the embed data.
This is significantly cleaner than passing around `Dict[str, Any]` all
of the time.
(cherry picked from commit 327ff9ea0f)
This commit is contained in:
@@ -64,6 +64,7 @@ from zerver.lib.timezone import common_timezones
|
|||||||
from zerver.lib.types import LinkifierDict
|
from zerver.lib.types import LinkifierDict
|
||||||
from zerver.lib.url_encoding import encode_stream, hash_util_encode
|
from zerver.lib.url_encoding import encode_stream, hash_util_encode
|
||||||
from zerver.lib.url_preview import preview as link_preview
|
from zerver.lib.url_preview import preview as link_preview
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
||||||
from zerver.models import EmojiInfo, Message, Realm, linkifiers_for_realm
|
from zerver.models import EmojiInfo, Message, Realm, linkifiers_for_realm
|
||||||
|
|
||||||
ReturnT = TypeVar("ReturnT")
|
ReturnT = TypeVar("ReturnT")
|
||||||
@@ -682,54 +683,44 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
desc_div = SubElement(summary_div, "desc")
|
desc_div = SubElement(summary_div, "desc")
|
||||||
desc_div.set("class", "message_inline_image_desc")
|
desc_div.set("class", "message_inline_image_desc")
|
||||||
|
|
||||||
def add_oembed_data(self, root: Element, link: str, extracted_data: Dict[str, Any]) -> bool:
|
def add_oembed_data(self, root: Element, link: str, extracted_data: UrlOEmbedData) -> None:
|
||||||
oembed_resource_type = extracted_data.get("type", "")
|
if extracted_data.image is None:
|
||||||
title = extracted_data.get("title")
|
# Don't add an embed if an image is not found
|
||||||
|
|
||||||
if oembed_resource_type == "photo":
|
|
||||||
image = extracted_data.get("image")
|
|
||||||
if image:
|
|
||||||
self.add_a(
|
|
||||||
root,
|
|
||||||
image_url=image,
|
|
||||||
link=link,
|
|
||||||
title=title,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif oembed_resource_type == "video":
|
|
||||||
html = extracted_data["html"]
|
|
||||||
image = extracted_data["image"]
|
|
||||||
title = extracted_data.get("title")
|
|
||||||
description = extracted_data.get("description")
|
|
||||||
self.add_a(
|
|
||||||
root,
|
|
||||||
image_url=image,
|
|
||||||
link=link,
|
|
||||||
title=title,
|
|
||||||
desc=description,
|
|
||||||
class_attr="embed-video message_inline_image",
|
|
||||||
data_id=html,
|
|
||||||
already_thumbnailed=True,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def add_embed(self, root: Element, link: str, extracted_data: Dict[str, Any]) -> None:
|
|
||||||
oembed = extracted_data.get("oembed", False)
|
|
||||||
if oembed and self.add_oembed_data(root, link, extracted_data):
|
|
||||||
return
|
return
|
||||||
|
|
||||||
img_link = extracted_data.get("image")
|
if extracted_data.type == "photo":
|
||||||
if not img_link:
|
self.add_a(
|
||||||
|
root,
|
||||||
|
image_url=extracted_data.image,
|
||||||
|
link=link,
|
||||||
|
title=extracted_data.title,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif extracted_data.type == "video":
|
||||||
|
self.add_a(
|
||||||
|
root,
|
||||||
|
image_url=extracted_data.image,
|
||||||
|
link=link,
|
||||||
|
title=extracted_data.title,
|
||||||
|
desc=extracted_data.description,
|
||||||
|
class_attr="embed-video message_inline_image",
|
||||||
|
data_id=extracted_data.html,
|
||||||
|
already_thumbnailed=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_embed(self, root: Element, link: str, extracted_data: UrlEmbedData) -> None:
|
||||||
|
if isinstance(extracted_data, UrlOEmbedData):
|
||||||
|
self.add_oembed_data(root, link, extracted_data)
|
||||||
|
return
|
||||||
|
|
||||||
|
if extracted_data.image is None:
|
||||||
# Don't add an embed if an image is not found
|
# Don't add an embed if an image is not found
|
||||||
return
|
return
|
||||||
|
|
||||||
container = SubElement(root, "div")
|
container = SubElement(root, "div")
|
||||||
container.set("class", "message_embed")
|
container.set("class", "message_embed")
|
||||||
|
|
||||||
img_link = get_camo_url(img_link)
|
img_link = get_camo_url(extracted_data.image)
|
||||||
img = SubElement(container, "a")
|
img = SubElement(container, "a")
|
||||||
img.set("style", "background-image: url(" + css_escape(img_link) + ")")
|
img.set("style", "background-image: url(" + css_escape(img_link) + ")")
|
||||||
img.set("href", link)
|
img.set("href", link)
|
||||||
@@ -738,19 +729,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
data_container = SubElement(container, "div")
|
data_container = SubElement(container, "div")
|
||||||
data_container.set("class", "data-container")
|
data_container.set("class", "data-container")
|
||||||
|
|
||||||
title = extracted_data.get("title")
|
if extracted_data.title:
|
||||||
if title:
|
|
||||||
title_elm = SubElement(data_container, "div")
|
title_elm = SubElement(data_container, "div")
|
||||||
title_elm.set("class", "message_embed_title")
|
title_elm.set("class", "message_embed_title")
|
||||||
a = SubElement(title_elm, "a")
|
a = SubElement(title_elm, "a")
|
||||||
a.set("href", link)
|
a.set("href", link)
|
||||||
a.set("title", title)
|
a.set("title", extracted_data.title)
|
||||||
a.text = title
|
a.text = extracted_data.title
|
||||||
description = extracted_data.get("description")
|
if extracted_data.description:
|
||||||
if description:
|
|
||||||
description_elm = SubElement(data_container, "div")
|
description_elm = SubElement(data_container, "div")
|
||||||
description_elm.set("class", "message_embed_description")
|
description_elm.set("class", "message_embed_description")
|
||||||
description_elm.text = description
|
description_elm.text = extracted_data.description
|
||||||
|
|
||||||
def get_actual_image_url(self, url: str) -> str:
|
def get_actual_image_url(self, url: str) -> str:
|
||||||
# Add specific per-site cases to convert image-preview URLs to image URLs.
|
# Add specific per-site cases to convert image-preview URLs to image URLs.
|
||||||
@@ -868,10 +857,9 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
return None
|
return None
|
||||||
return match.group(2)
|
return match.group(2)
|
||||||
|
|
||||||
def youtube_title(self, extracted_data: Dict[str, Any]) -> Optional[str]:
|
def youtube_title(self, extracted_data: UrlEmbedData) -> Optional[str]:
|
||||||
title = extracted_data.get("title")
|
if extracted_data.title is not None:
|
||||||
if title is not None:
|
return f"YouTube - {extracted_data.title}"
|
||||||
return f"YouTube - {title}"
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def youtube_image(self, url: str) -> Optional[str]:
|
def youtube_image(self, url: str) -> Optional[str]:
|
||||||
@@ -897,10 +885,9 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
return None
|
return None
|
||||||
return match.group(5)
|
return match.group(5)
|
||||||
|
|
||||||
def vimeo_title(self, extracted_data: Dict[str, Any]) -> Optional[str]:
|
def vimeo_title(self, extracted_data: UrlEmbedData) -> Optional[str]:
|
||||||
title = extracted_data.get("title")
|
if extracted_data.title is not None:
|
||||||
if title is not None:
|
return f"Vimeo - {extracted_data.title}"
|
||||||
return f"Vimeo - {title}"
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def twitter_text(
|
def twitter_text(
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
from typing import Any, Dict, Optional
|
from typing import Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from pyoembed import PyOembedException, oEmbed
|
from pyoembed import PyOembedException, oEmbed
|
||||||
|
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
||||||
|
|
||||||
def get_oembed_data(
|
|
||||||
url: str, maxwidth: int = 640, maxheight: int = 480
|
def get_oembed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> Optional[UrlEmbedData]:
|
||||||
) -> Optional[Dict[str, Any]]:
|
|
||||||
try:
|
try:
|
||||||
data = oEmbed(url, maxwidth=maxwidth, maxheight=maxheight)
|
data = oEmbed(url, maxwidth=maxwidth, maxheight=maxheight)
|
||||||
except (PyOembedException, json.decoder.JSONDecodeError, requests.exceptions.ConnectionError):
|
except (PyOembedException, json.decoder.JSONDecodeError, requests.exceptions.ConnectionError):
|
||||||
@@ -16,29 +16,27 @@ def get_oembed_data(
|
|||||||
oembed_resource_type = data.get("type", "")
|
oembed_resource_type = data.get("type", "")
|
||||||
image = data.get("url", data.get("image"))
|
image = data.get("url", data.get("image"))
|
||||||
thumbnail = data.get("thumbnail_url")
|
thumbnail = data.get("thumbnail_url")
|
||||||
html = data.pop("html", "")
|
html = data.get("html", "")
|
||||||
if oembed_resource_type == "photo" and image:
|
if oembed_resource_type == "photo" and image:
|
||||||
return dict(
|
return UrlOEmbedData(
|
||||||
oembed=True,
|
|
||||||
image=image,
|
image=image,
|
||||||
type=oembed_resource_type,
|
type="photo",
|
||||||
title=data.get("title"),
|
title=data.get("title"),
|
||||||
description=data.get("description"),
|
description=data.get("description"),
|
||||||
)
|
)
|
||||||
|
|
||||||
if oembed_resource_type == "video" and html and thumbnail:
|
if oembed_resource_type == "video" and html and thumbnail:
|
||||||
return dict(
|
return UrlOEmbedData(
|
||||||
oembed=True,
|
|
||||||
image=thumbnail,
|
image=thumbnail,
|
||||||
type=oembed_resource_type,
|
type="video",
|
||||||
html=strip_cdata(html),
|
html=strip_cdata(html),
|
||||||
title=data.get("title"),
|
title=data.get("title"),
|
||||||
description=data.get("description"),
|
description=data.get("description"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Otherwise, start with just the embed type.
|
# Otherwise, use the title/description from pyembed as the basis
|
||||||
return dict(
|
# for our other parsers
|
||||||
type=oembed_resource_type,
|
return UrlEmbedData(
|
||||||
title=data.get("title"),
|
title=data.get("title"),
|
||||||
description=data.get("description"),
|
description=data.get("description"),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import cgi
|
import cgi
|
||||||
from typing import Any, Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData
|
||||||
|
|
||||||
|
|
||||||
class BaseParser:
|
class BaseParser:
|
||||||
@@ -14,5 +16,5 @@ class BaseParser:
|
|||||||
charset = cgi.parse_header(content_type)[1].get("charset")
|
charset = cgi.parse_header(content_type)[1].get("charset")
|
||||||
self._soup = BeautifulSoup(html_source, "lxml", from_encoding=charset)
|
self._soup = BeautifulSoup(html_source, "lxml", from_encoding=charset)
|
||||||
|
|
||||||
def extract_data(self) -> Any:
|
def extract_data(self) -> UrlEmbedData:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|||||||
@@ -1,18 +1,19 @@
|
|||||||
from typing import Dict, Optional
|
from typing import Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
|
|
||||||
from zerver.lib.url_preview.parsers.base import BaseParser
|
from zerver.lib.url_preview.parsers.base import BaseParser
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData
|
||||||
|
|
||||||
|
|
||||||
class GenericParser(BaseParser):
|
class GenericParser(BaseParser):
|
||||||
def extract_data(self) -> Dict[str, Optional[str]]:
|
def extract_data(self) -> UrlEmbedData:
|
||||||
return {
|
return UrlEmbedData(
|
||||||
"title": self._get_title(),
|
title=self._get_title(),
|
||||||
"description": self._get_description(),
|
description=self._get_description(),
|
||||||
"image": self._get_image(),
|
image=self._get_image(),
|
||||||
}
|
)
|
||||||
|
|
||||||
def _get_title(self) -> Optional[str]:
|
def _get_title(self) -> Optional[str]:
|
||||||
soup = self._soup
|
soup = self._soup
|
||||||
|
|||||||
@@ -1,37 +1,33 @@
|
|||||||
from typing import Dict
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData
|
||||||
|
|
||||||
from .base import BaseParser
|
from .base import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class OpenGraphParser(BaseParser):
|
class OpenGraphParser(BaseParser):
|
||||||
allowed_og_properties = {
|
def extract_data(self) -> UrlEmbedData:
|
||||||
"og:title",
|
|
||||||
"og:description",
|
|
||||||
"og:image",
|
|
||||||
}
|
|
||||||
|
|
||||||
def extract_data(self) -> Dict[str, str]:
|
|
||||||
meta = self._soup.findAll("meta")
|
meta = self._soup.findAll("meta")
|
||||||
result = {}
|
|
||||||
|
data = UrlEmbedData()
|
||||||
|
|
||||||
for tag in meta:
|
for tag in meta:
|
||||||
if not tag.has_attr("property"):
|
if not tag.has_attr("property"):
|
||||||
continue
|
continue
|
||||||
if tag["property"] not in self.allowed_og_properties:
|
|
||||||
continue
|
|
||||||
|
|
||||||
og_property_name = tag["property"][len("og:") :]
|
|
||||||
if not tag.has_attr("content"):
|
if not tag.has_attr("content"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if og_property_name == "image":
|
if tag["property"] == "og:title":
|
||||||
|
data.title = tag["content"]
|
||||||
|
elif tag["property"] == "og:description":
|
||||||
|
data.description = tag["content"]
|
||||||
|
elif tag["property"] == "og:image":
|
||||||
try:
|
try:
|
||||||
# We use urlparse and not URLValidator because we
|
# We use urlparse and not URLValidator because we
|
||||||
# need to support relative URLs.
|
# need to support relative URLs.
|
||||||
urlparse(tag["content"])
|
urlparse(tag["content"])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
|
data.image = tag["content"]
|
||||||
|
|
||||||
result[og_property_name] = tag["content"]
|
return data
|
||||||
|
|
||||||
return result
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Any, Callable, Dict, Match, Optional
|
from typing import Any, Callable, Match, Optional
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
@@ -13,6 +13,7 @@ from zerver.lib.outgoing_http import OutgoingSession
|
|||||||
from zerver.lib.pysa import mark_sanitized
|
from zerver.lib.pysa import mark_sanitized
|
||||||
from zerver.lib.url_preview.oembed import get_oembed_data
|
from zerver.lib.url_preview.oembed import get_oembed_data
|
||||||
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
||||||
|
|
||||||
# FIXME: Should we use a database cache or a memcached in production? What if
|
# FIXME: Should we use a database cache or a memcached in production? What if
|
||||||
# OpenGraph data is changed for a site?
|
# OpenGraph data is changed for a site?
|
||||||
@@ -87,41 +88,39 @@ def catch_network_errors(func: Callable[..., Any]) -> Callable[..., Any]:
|
|||||||
@cache_with_key(preview_url_cache_key, cache_name=CACHE_NAME, with_statsd_key="urlpreview_data")
|
@cache_with_key(preview_url_cache_key, cache_name=CACHE_NAME, with_statsd_key="urlpreview_data")
|
||||||
def get_link_embed_data(
|
def get_link_embed_data(
|
||||||
url: str, maxwidth: int = 640, maxheight: int = 480
|
url: str, maxwidth: int = 640, maxheight: int = 480
|
||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[UrlEmbedData]:
|
||||||
if not is_link(url):
|
if not is_link(url):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not valid_content_type(url):
|
if not valid_content_type(url):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# We are using two different mechanisms to get the embed data
|
# The oembed data from pyoembed may be complete enough to return
|
||||||
# 1. Use oEmbed data, if found, for photo and video "type" sites
|
# as-is; if so, we use it. Otherwise, we use it as a _base_ for
|
||||||
# 2. Otherwise, use a combination of Open Graph tags and Meta tags
|
# the other, less sophisticated techniques which we apply as
|
||||||
data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
|
# successive fallbacks.
|
||||||
if data.get("oembed"):
|
data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
|
||||||
|
if data is not None and isinstance(data, UrlOEmbedData):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
response = PreviewSession().get(mark_sanitized(url), stream=True)
|
response = PreviewSession().get(mark_sanitized(url), stream=True)
|
||||||
if response.ok:
|
if not response.ok:
|
||||||
og_data = OpenGraphParser(
|
return None
|
||||||
response.content, response.headers.get("Content-Type")
|
|
||||||
).extract_data()
|
|
||||||
for key in ["title", "description", "image"]:
|
|
||||||
if not data.get(key) and og_data.get(key):
|
|
||||||
data[key] = og_data[key]
|
|
||||||
|
|
||||||
generic_data = (
|
if data is None:
|
||||||
GenericParser(response.content, response.headers.get("Content-Type")).extract_data()
|
data = UrlEmbedData()
|
||||||
or {}
|
|
||||||
)
|
for parser_class in (OpenGraphParser, GenericParser):
|
||||||
for key in ["title", "description", "image"]:
|
parser = parser_class(response.content, response.headers.get("Content-Type"))
|
||||||
if not data.get(key) and generic_data.get(key):
|
data.merge(parser.extract_data())
|
||||||
data[key] = generic_data[key]
|
|
||||||
if "image" in data:
|
if data.image:
|
||||||
data["image"] = urljoin(response.url, data["image"])
|
data.image = urljoin(response.url, data.image)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@get_cache_with_key(preview_url_cache_key, cache_name=CACHE_NAME)
|
@get_cache_with_key(preview_url_cache_key, cache_name=CACHE_NAME)
|
||||||
def link_embed_data_from_cache(url: str, maxwidth: int = 640, maxheight: int = 480) -> Any:
|
def link_embed_data_from_cache(
|
||||||
return
|
url: str, maxwidth: int = 640, maxheight: int = 480
|
||||||
|
) -> Optional[UrlEmbedData]:
|
||||||
|
return None
|
||||||
|
|||||||
27
zerver/lib/url_preview/types.py
Normal file
27
zerver/lib/url_preview/types.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from typing_extensions import Literal
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UrlEmbedData:
|
||||||
|
type: Optional[str] = None
|
||||||
|
html: Optional[str] = None
|
||||||
|
title: Optional[str] = None
|
||||||
|
description: Optional[str] = None
|
||||||
|
image: Optional[str] = None
|
||||||
|
|
||||||
|
def merge(self, other: "UrlEmbedData") -> None:
|
||||||
|
if self.title is None and other.title is not None:
|
||||||
|
self.title = other.title
|
||||||
|
if self.description is None and other.description is not None:
|
||||||
|
self.description = other.description
|
||||||
|
if self.image is None and other.image is not None:
|
||||||
|
self.image = other.image
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UrlOEmbedData(UrlEmbedData):
|
||||||
|
type: Literal["photo", "video"]
|
||||||
|
html: Optional[str] = None
|
||||||
@@ -18,6 +18,7 @@ from zerver.lib.test_helpers import mock_queue_publish
|
|||||||
from zerver.lib.url_preview.oembed import get_oembed_data, strip_cdata
|
from zerver.lib.url_preview.oembed import get_oembed_data, strip_cdata
|
||||||
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
||||||
from zerver.lib.url_preview.preview import get_link_embed_data, link_embed_data_from_cache
|
from zerver.lib.url_preview.preview import get_link_embed_data, link_embed_data_from_cache
|
||||||
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
||||||
from zerver.models import Message, Realm, UserProfile
|
from zerver.models import Message, Realm, UserProfile
|
||||||
from zerver.worker.queue_processors import FetchLinksEmbedData
|
from zerver.worker.queue_processors import FetchLinksEmbedData
|
||||||
|
|
||||||
@@ -77,10 +78,9 @@ class OembedTestCase(ZulipTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
data = get_oembed_data(url)
|
data = get_oembed_data(url)
|
||||||
self.assertIsInstance(data, dict)
|
assert data is not None
|
||||||
self.assertIn("title", data)
|
self.assertIsInstance(data, UrlEmbedData)
|
||||||
assert data is not None # allow mypy to infer data is indexable
|
self.assertEqual(data.title, response_data["title"])
|
||||||
self.assertEqual(data["title"], response_data["title"])
|
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_photo_provider(self) -> None:
|
def test_photo_provider(self) -> None:
|
||||||
@@ -107,11 +107,9 @@ class OembedTestCase(ZulipTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
data = get_oembed_data(url)
|
data = get_oembed_data(url)
|
||||||
self.assertIsInstance(data, dict)
|
assert data is not None
|
||||||
self.assertIn("title", data)
|
self.assertIsInstance(data, UrlOEmbedData)
|
||||||
assert data is not None # allow mypy to infer data is indexable
|
self.assertEqual(data.title, response_data["title"])
|
||||||
self.assertEqual(data["title"], response_data["title"])
|
|
||||||
self.assertTrue(data["oembed"])
|
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_video_provider(self) -> None:
|
def test_video_provider(self) -> None:
|
||||||
@@ -136,10 +134,9 @@ class OembedTestCase(ZulipTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
data = get_oembed_data(url)
|
data = get_oembed_data(url)
|
||||||
self.assertIsInstance(data, dict)
|
assert data is not None
|
||||||
self.assertIn("title", data)
|
self.assertIsInstance(data, UrlOEmbedData)
|
||||||
assert data is not None # allow mypy to infer data is indexable
|
self.assertEqual(data.title, response_data["title"])
|
||||||
self.assertEqual(data["title"], response_data["title"])
|
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_connect_error_request(self) -> None:
|
def test_connect_error_request(self) -> None:
|
||||||
@@ -204,30 +201,8 @@ class OpenGraphParserTestCase(ZulipTestCase):
|
|||||||
|
|
||||||
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertIn("title", result)
|
self.assertEqual(result.title, "The Rock")
|
||||||
self.assertEqual(result["title"], "The Rock")
|
self.assertEqual(result.description, "The Rock film")
|
||||||
self.assertEqual(result.get("description"), "The Rock film")
|
|
||||||
|
|
||||||
def test_page_with_evil_og_tags(self) -> None:
|
|
||||||
html = b"""<html>
|
|
||||||
<head>
|
|
||||||
<meta property="og:title" content="The Rock" />
|
|
||||||
<meta property="og:type" content="video.movie" />
|
|
||||||
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
||||||
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
|
||||||
<meta property="og:description" content="The Rock film" />
|
|
||||||
<meta property="og:html" content="<script>alert(window.location)</script>" />
|
|
||||||
<meta property="og:oembed" content="True" />
|
|
||||||
</head>
|
|
||||||
</html>"""
|
|
||||||
|
|
||||||
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
|
||||||
result = parser.extract_data()
|
|
||||||
self.assertIn("title", result)
|
|
||||||
self.assertEqual(result["title"], "The Rock")
|
|
||||||
self.assertEqual(result.get("description"), "The Rock film")
|
|
||||||
self.assertEqual(result.get("oembed"), None)
|
|
||||||
self.assertEqual(result.get("html"), None)
|
|
||||||
|
|
||||||
def test_charset_in_header(self) -> None:
|
def test_charset_in_header(self) -> None:
|
||||||
html = """<html>
|
html = """<html>
|
||||||
@@ -239,7 +214,7 @@ class OpenGraphParserTestCase(ZulipTestCase):
|
|||||||
)
|
)
|
||||||
parser = OpenGraphParser(html, "text/html; charset=Big5")
|
parser = OpenGraphParser(html, "text/html; charset=Big5")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result["title"], "中文")
|
self.assertEqual(result.title, "中文")
|
||||||
|
|
||||||
def test_charset_in_meta(self) -> None:
|
def test_charset_in_meta(self) -> None:
|
||||||
html = """<html>
|
html = """<html>
|
||||||
@@ -252,7 +227,7 @@ class OpenGraphParserTestCase(ZulipTestCase):
|
|||||||
)
|
)
|
||||||
parser = OpenGraphParser(html, "text/html")
|
parser = OpenGraphParser(html, "text/html")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result["title"], "中文")
|
self.assertEqual(result.title, "中文")
|
||||||
|
|
||||||
|
|
||||||
class GenericParserTestCase(ZulipTestCase):
|
class GenericParserTestCase(ZulipTestCase):
|
||||||
@@ -268,8 +243,8 @@ class GenericParserTestCase(ZulipTestCase):
|
|||||||
"""
|
"""
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result.get("title"), "Test title")
|
self.assertEqual(result.title, "Test title")
|
||||||
self.assertEqual(result.get("description"), "Description text")
|
self.assertEqual(result.description, "Description text")
|
||||||
|
|
||||||
def test_extract_image(self) -> None:
|
def test_extract_image(self) -> None:
|
||||||
html = b"""
|
html = b"""
|
||||||
@@ -286,9 +261,9 @@ class GenericParserTestCase(ZulipTestCase):
|
|||||||
"""
|
"""
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result.get("title"), "Main header")
|
self.assertEqual(result.title, "Main header")
|
||||||
self.assertEqual(result.get("description"), "Description text")
|
self.assertEqual(result.description, "Description text")
|
||||||
self.assertEqual(result.get("image"), "http://test.com/test.jpg")
|
self.assertEqual(result.image, "http://test.com/test.jpg")
|
||||||
|
|
||||||
def test_extract_bad_image(self) -> None:
|
def test_extract_bad_image(self) -> None:
|
||||||
html = b"""
|
html = b"""
|
||||||
@@ -305,9 +280,9 @@ class GenericParserTestCase(ZulipTestCase):
|
|||||||
"""
|
"""
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result.get("title"), "Main header")
|
self.assertEqual(result.title, "Main header")
|
||||||
self.assertEqual(result.get("description"), "Description text")
|
self.assertEqual(result.description, "Description text")
|
||||||
self.assertIsNone(result.get("image"))
|
self.assertIsNone(result.image)
|
||||||
|
|
||||||
def test_extract_description(self) -> None:
|
def test_extract_description(self) -> None:
|
||||||
html = b"""
|
html = b"""
|
||||||
@@ -323,7 +298,7 @@ class GenericParserTestCase(ZulipTestCase):
|
|||||||
"""
|
"""
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result.get("description"), "Description text")
|
self.assertEqual(result.description, "Description text")
|
||||||
|
|
||||||
html = b"""
|
html = b"""
|
||||||
<html>
|
<html>
|
||||||
@@ -333,12 +308,12 @@ class GenericParserTestCase(ZulipTestCase):
|
|||||||
"""
|
"""
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertEqual(result.get("description"), "description 123")
|
self.assertEqual(result.description, "description 123")
|
||||||
|
|
||||||
html = b"<html><body></body></html>"
|
html = b"<html><body></body></html>"
|
||||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||||
result = parser.extract_data()
|
result = parser.extract_data()
|
||||||
self.assertIsNone(result.get("description"))
|
self.assertIsNone(result.description)
|
||||||
|
|
||||||
|
|
||||||
class PreviewTestCase(ZulipTestCase):
|
class PreviewTestCase(ZulipTestCase):
|
||||||
@@ -730,8 +705,9 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
in info_logs.output[0]
|
in info_logs.output[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("title", cached_data)
|
assert cached_data
|
||||||
self.assertNotIn("image", cached_data)
|
self.assertIsNotNone(cached_data.title)
|
||||||
|
self.assertIsNone(cached_data.image)
|
||||||
msg = Message.objects.select_related("sender").get(id=msg_id)
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
||||||
@@ -768,8 +744,9 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
in info_logs.output[0]
|
in info_logs.output[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("title", cached_data)
|
assert cached_data
|
||||||
self.assertNotIn("image", cached_data)
|
self.assertIsNotNone(cached_data.title)
|
||||||
|
self.assertIsNone(cached_data.image)
|
||||||
msg = Message.objects.select_related("sender").get(id=msg_id)
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
||||||
@@ -804,8 +781,9 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
in info_logs.output[0]
|
in info_logs.output[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("title", cached_data)
|
assert cached_data
|
||||||
self.assertNotIn("image", cached_data)
|
self.assertIsNotNone(cached_data.title)
|
||||||
|
self.assertIsNone(cached_data.image)
|
||||||
msg = Message.objects.select_related("sender").get(id=msg_id)
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
||||||
@@ -836,12 +814,11 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
in info_logs.output[0]
|
in info_logs.output[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("title", data)
|
assert data is not None
|
||||||
self.assertIn("image", data)
|
|
||||||
|
|
||||||
msg = Message.objects.select_related("sender").get(id=msg_id)
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
||||||
self.assertIn(data["title"], msg.rendered_content)
|
self.assertIn(data.title, msg.rendered_content)
|
||||||
self.assertIn(re.sub(r"([^\w-])", r"\\\1", data["image"]), msg.rendered_content)
|
assert data.image is not None
|
||||||
|
self.assertIn(re.sub(r"([^\w-])", r"\\\1", data.image), msg.rendered_content)
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
||||||
@@ -941,12 +918,11 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
"message_content": url,
|
"message_content": url,
|
||||||
}
|
}
|
||||||
|
|
||||||
mocked_data = {
|
mocked_data = UrlOEmbedData(
|
||||||
"html": f'<iframe src="{url}"></iframe>',
|
html=f'<iframe src="{url}"></iframe>',
|
||||||
"oembed": True,
|
type="video",
|
||||||
"type": "video",
|
image=f"{url}/image.png",
|
||||||
"image": f"{url}/image.png",
|
)
|
||||||
}
|
|
||||||
self.create_mock_response(url)
|
self.create_mock_response(url)
|
||||||
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
||||||
with self.assertLogs(level="INFO") as info_logs:
|
with self.assertLogs(level="INFO") as info_logs:
|
||||||
@@ -963,7 +939,7 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
|
|
||||||
self.assertEqual(data, mocked_data)
|
self.assertEqual(data, mocked_data)
|
||||||
msg.refresh_from_db()
|
msg.refresh_from_db()
|
||||||
self.assertIn('a data-id="{}"'.format(escape(mocked_data["html"])), msg.rendered_content)
|
self.assertIn('a data-id="{}"'.format(escape(mocked_data.html)), msg.rendered_content)
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
||||||
@@ -983,7 +959,9 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
"message_content": url,
|
"message_content": url,
|
||||||
}
|
}
|
||||||
|
|
||||||
mocked_data = {"title": "Clearer Code at Scale - Static Types at Zulip and Dropbox"}
|
mocked_data = UrlEmbedData(
|
||||||
|
title="Clearer Code at Scale - Static Types at Zulip and Dropbox"
|
||||||
|
)
|
||||||
self.create_mock_response(url)
|
self.create_mock_response(url)
|
||||||
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
||||||
with self.assertLogs(level="INFO") as info_logs:
|
with self.assertLogs(level="INFO") as info_logs:
|
||||||
@@ -1019,7 +997,9 @@ class PreviewTestCase(ZulipTestCase):
|
|||||||
"message_content": url,
|
"message_content": url,
|
||||||
}
|
}
|
||||||
|
|
||||||
mocked_data = {"title": "Clearer Code at Scale - Static Types at Zulip and Dropbox"}
|
mocked_data = UrlEmbedData(
|
||||||
|
title="Clearer Code at Scale - Static Types at Zulip and Dropbox"
|
||||||
|
)
|
||||||
self.create_mock_response(url)
|
self.create_mock_response(url)
|
||||||
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
||||||
with self.assertLogs(level="INFO") as info_logs:
|
with self.assertLogs(level="INFO") as info_logs:
|
||||||
|
|||||||
Reference in New Issue
Block a user