preview: Use a dataclass for the embed data.

This is significantly cleaner than passing around `Dict[str, Any]` all
of the time.

(cherry picked from commit 327ff9ea0f)
This commit is contained in:
Alex Vandiver
2022-04-14 12:52:41 -07:00
parent 00b3da0a0c
commit 5ff82c82ae
8 changed files with 180 additions and 190 deletions

View File

@@ -1,37 +1,33 @@
from typing import Dict
from urllib.parse import urlparse
from zerver.lib.url_preview.types import UrlEmbedData
from .base import BaseParser
class OpenGraphParser(BaseParser):
allowed_og_properties = {
"og:title",
"og:description",
"og:image",
}
def extract_data(self) -> Dict[str, str]:
def extract_data(self) -> UrlEmbedData:
meta = self._soup.findAll("meta")
result = {}
data = UrlEmbedData()
for tag in meta:
if not tag.has_attr("property"):
continue
if tag["property"] not in self.allowed_og_properties:
continue
og_property_name = tag["property"][len("og:") :]
if not tag.has_attr("content"):
continue
if og_property_name == "image":
if tag["property"] == "og:title":
data.title = tag["content"]
elif tag["property"] == "og:description":
data.description = tag["content"]
elif tag["property"] == "og:image":
try:
# We use urlparse and not URLValidator because we
# need to support relative URLs.
urlparse(tag["content"])
except ValueError:
continue
data.image = tag["content"]
result[og_property_name] = tag["content"]
return result
return data