Files
zulip/zerver/lib/url_preview/parsers/base.py
Alex Vandiver 5ff82c82ae preview: Use a dataclass for the embed data.
This is significantly cleaner than passing around `Dict[str, Any]` all
of the time.

(cherry picked from commit 327ff9ea0f)
2022-05-03 16:10:25 -07:00

21 lines
727 B
Python

import cgi
from typing import Optional
from zerver.lib.url_preview.types import UrlEmbedData
class BaseParser:
def __init__(self, html_source: bytes, content_type: Optional[str]) -> None:
# We import BeautifulSoup here, because it's not used by most
# processes in production, and bs4 is big enough that
# importing it adds 10s of milliseconds to manage.py startup.
from bs4 import BeautifulSoup
charset = None
if content_type is not None:
charset = cgi.parse_header(content_type)[1].get("charset")
self._soup = BeautifulSoup(html_source, "lxml", from_encoding=charset)
def extract_data(self) -> UrlEmbedData:
raise NotImplementedError()