mirror of
https://github.com/zulip/zulip.git
synced 2025-11-21 23:19:10 +00:00
Our open graph parser logic sloppily mixed data obtained by parsing open graph properties with trusted data set by our oembed parser. We fix this by consistenly using our explicit whitelist of generic properties (image, title, and description) in both places where we interact with open graph properties. The fixes are redundant with each other, but doing both helps in making the intent of the code clearer. This issue fixed here was originally reported as an XSS vulnerability in the upcoming Inline URL Previews feature found by Graham Bleaney and Ibrahim Mohamed using Pysa. The recent Oembed changes close that vulnerability, but this change is still worth doing to make the implementation do what it looks like it does.
48 lines
1.5 KiB
Python
48 lines
1.5 KiB
Python
from typing import Optional, Dict, Any
|
|
from pyoembed import oEmbed, PyOembedException
|
|
|
|
def get_oembed_data(url: str,
|
|
maxwidth: Optional[int]=640,
|
|
maxheight: Optional[int]=480) -> Optional[Dict[str, Any]]:
|
|
try:
|
|
data = oEmbed(url, maxwidth=maxwidth, maxheight=maxheight)
|
|
except PyOembedException:
|
|
return None
|
|
|
|
oembed_resource_type = data.get('type', '')
|
|
image = data.get('url', data.get('image'))
|
|
thumbnail = data.get('thumbnail_url')
|
|
html = data.pop('html', '')
|
|
if oembed_resource_type == 'photo' and image:
|
|
return dict(
|
|
oembed=True,
|
|
image=image,
|
|
type=oembed_resource_type,
|
|
title=data.get('title'),
|
|
description=data.get('description'),
|
|
)
|
|
|
|
if oembed_resource_type == 'video' and html and thumbnail:
|
|
return dict(
|
|
oembed=True,
|
|
image=thumbnail,
|
|
type=oembed_resource_type,
|
|
html=strip_cdata(html),
|
|
title=data.get('title'),
|
|
description=data.get('description'),
|
|
)
|
|
|
|
# Otherwise, start with just the embed type.
|
|
return dict(
|
|
type=oembed_resource_type,
|
|
title=data.get('title'),
|
|
description=data.get('description'),
|
|
)
|
|
|
|
def strip_cdata(html: str) -> str:
|
|
# Work around a bug in SoundCloud's XML generation:
|
|
# <html><![CDATA[<iframe ...></iframe>]]></html>
|
|
if html.startswith('<![CDATA[') and html.endswith(']]>'):
|
|
html = html[9:-3]
|
|
return html
|