Files
zulip/zerver/lib/url_preview/parsers/generic.py
Igor Tokarev c93f1d4eda Add oembed/Open Graph/Meta tags data retrieval from inline links.
This change adds support for displaying inline open graph previews for
links posted into Zulip.

It is designed to interact correctly with message editing.

This adds the new settings.INLINE_URL_EMBED_PREVIEW setting to control
whether this feature is enabled.

By default, this setting is currently disabled, so that we can burn it
in for a bit before it impacts users more broadly.

Eventually, we may want to make this manageable via a (set of?)
per-realm settings.  E.g. I can imagine a realm wanting to be able to
enable/disable it for certain URLs.
2016-12-07 17:40:18 -08:00

52 lines
1.6 KiB
Python

from __future__ import absolute_import
from typing import Any, Dict
from zerver.lib.url_preview.parsers.base import BaseParser
class GenericParser(BaseParser):
def extract_data(self):
# type: () -> Dict
return {
'title': self._get_title(),
'description': self._get_description(),
'image': self._get_image()}
def _get_title(self):
# type: () -> Any
soup = self._soup
if (soup.title and soup.title.text != ''):
return soup.title.text
if (soup.h1 and soup.h1.text != ''):
return soup.h1.text
return None
def _get_description(self):
# type: () -> Any
soup = self._soup
meta_description = soup.find('meta', attrs={'name': 'description'})
if (meta_description and meta_description['content'] != ''):
return meta_description['content']
first_h1 = soup.find('h1')
if first_h1:
first_p = first_h1.find_next('p')
if (first_p and first_p.string != ''):
return first_p.text
first_p = soup.find('p')
if (first_p and first_p.string != ''):
return first_p.string
return None
def _get_image(self):
# type: () -> Any
"""
Finding a first image after the h1 header.
Presumably it will be the main image.
"""
soup = self._soup
first_h1 = soup.find('h1')
if first_h1:
first_image = first_h1.find_next_sibling('img')
if first_image and first_image['src'] != '':
return first_image['src']
return None