mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	Our open graph parser logic sloppily mixed data obtained by parsing open graph properties with trusted data set by our oembed parser. We fix this by consistenly using our explicit whitelist of generic properties (image, title, and description) in both places where we interact with open graph properties. The fixes are redundant with each other, but doing both helps in making the intent of the code clearer. This issue fixed here was originally reported as an XSS vulnerability in the upcoming Inline URL Previews feature found by Graham Bleaney and Ibrahim Mohamed using Pysa. The recent Oembed changes close that vulnerability, but this change is still worth doing to make the implementation do what it looks like it does.
		
			
				
	
	
		
			48 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			48 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from typing import Optional, Dict, Any
 | 
						|
from pyoembed import oEmbed, PyOembedException
 | 
						|
 | 
						|
def get_oembed_data(url: str,
 | 
						|
                    maxwidth: Optional[int]=640,
 | 
						|
                    maxheight: Optional[int]=480) -> Optional[Dict[str, Any]]:
 | 
						|
    try:
 | 
						|
        data = oEmbed(url, maxwidth=maxwidth, maxheight=maxheight)
 | 
						|
    except PyOembedException:
 | 
						|
        return None
 | 
						|
 | 
						|
    oembed_resource_type = data.get('type', '')
 | 
						|
    image = data.get('url', data.get('image'))
 | 
						|
    thumbnail = data.get('thumbnail_url')
 | 
						|
    html = data.pop('html', '')
 | 
						|
    if oembed_resource_type == 'photo' and image:
 | 
						|
        return dict(
 | 
						|
            oembed=True,
 | 
						|
            image=image,
 | 
						|
            type=oembed_resource_type,
 | 
						|
            title=data.get('title'),
 | 
						|
            description=data.get('description'),
 | 
						|
        )
 | 
						|
 | 
						|
    if oembed_resource_type == 'video' and html and thumbnail:
 | 
						|
        return dict(
 | 
						|
            oembed=True,
 | 
						|
            image=thumbnail,
 | 
						|
            type=oembed_resource_type,
 | 
						|
            html=strip_cdata(html),
 | 
						|
            title=data.get('title'),
 | 
						|
            description=data.get('description'),
 | 
						|
        )
 | 
						|
 | 
						|
    # Otherwise, start with just the embed type.
 | 
						|
    return dict(
 | 
						|
        type=oembed_resource_type,
 | 
						|
        title=data.get('title'),
 | 
						|
        description=data.get('description'),
 | 
						|
    )
 | 
						|
 | 
						|
def strip_cdata(html: str) -> str:
 | 
						|
    # Work around a bug in SoundCloud's XML generation:
 | 
						|
    # <html><![CDATA[<iframe ...></iframe>]]></html>
 | 
						|
    if html.startswith('<![CDATA[') and html.endswith(']]>'):
 | 
						|
        html = html[9:-3]
 | 
						|
    return html
 |