Files
zulip/zerver/lib/url_preview/preview.py
Igor Tokarev c93f1d4eda Add oembed/Open Graph/Meta tags data retrieval from inline links.
This change adds support for displaying inline open graph previews for
links posted into Zulip.

It is designed to interact correctly with message editing.

This adds the new settings.INLINE_URL_EMBED_PREVIEW setting to control
whether this feature is enabled.

By default, this setting is currently disabled, so that we can burn it
in for a bit before it impacts users more broadly.

Eventually, we may want to make this manageable via a (set of?)
per-realm settings.  E.g. I can imagine a realm wanting to be able to
enable/disable it for certain URLs.
2016-12-07 17:40:18 -08:00

71 lines
2.4 KiB
Python

from __future__ import absolute_import
import re
import logging
import traceback
from six import text_type
from typing import Any, Optional
from typing.re import Match
import requests
from django.conf import settings
from zerver.lib.cache import cache_with_key, get_cache_with_key
from zerver.lib.bugdown import testing_mocks
from zerver.lib.url_preview.oembed import get_oembed_data
from zerver.lib.url_preview.parsers import OpenGraphParser, GenericParser
CACHE_NAME = "database"
# Based on django.core.validators.URLValidator, with ftp support removed.
link_regex = re.compile(
r'^(?:http)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
def is_link(url):
# type: (text_type) -> Match[text_type]
return link_regex.match(str(url))
def cache_key_func(url):
# type: (text_type) -> text_type
return url
@cache_with_key(cache_key_func, cache_name=CACHE_NAME, with_statsd_key="urlpreview_data")
def get_link_embed_data(url, maxwidth=640, maxheight=480):
# type: (text_type, Optional[int], Optional[int]) -> Any
if not is_link(url):
return None
if settings.TEST_SUITE:
return testing_mocks.fake_urlembed_data()
# Fetch information from URL.
# We are using three sources in next order:
# 1. OEmbed
# 2. Open Graph
# 3. Meta tags
try:
data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
except requests.exceptions.RequestException:
msg = 'Unable to fetch information from url {0}, traceback: {1}'
logging.error(msg.format(url, traceback.format_exc()))
return None
data = data or {}
response = requests.get(url)
if response.ok:
og_data = OpenGraphParser(response.text).extract_data()
if og_data:
data.update(og_data)
generic_data = GenericParser(response.text).extract_data() or {}
for key in ['title', 'description', 'image']:
if not data.get(key) and generic_data.get(key):
data[key] = generic_data[key]
return data
@get_cache_with_key(cache_key_func, cache_name=CACHE_NAME)
def link_embed_data_from_cache(url, maxwidth=640, maxheight=480):
# type: (text_type, Optional[int], Optional[int]) -> Any
return