mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 14:03:30 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			64 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			64 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import re
 | 
						|
import logging
 | 
						|
import traceback
 | 
						|
from typing import Any, Optional, Dict
 | 
						|
from typing.re import Match
 | 
						|
import requests
 | 
						|
from zerver.lib.cache import cache_with_key, get_cache_with_key
 | 
						|
from zerver.lib.url_preview.oembed import get_oembed_data
 | 
						|
from zerver.lib.url_preview.parsers import OpenGraphParser, GenericParser
 | 
						|
from django.utils.encoding import smart_text
 | 
						|
 | 
						|
 | 
						|
CACHE_NAME = "database"
 | 
						|
# Based on django.core.validators.URLValidator, with ftp support removed.
 | 
						|
link_regex = re.compile(
 | 
						|
    r'^(?:http)s?://'  # http:// or https://
 | 
						|
    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain...
 | 
						|
    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
 | 
						|
    r'(?::\d+)?'  # optional port
 | 
						|
    r'(?:/?|[/?]\S+)$', re.IGNORECASE)
 | 
						|
 | 
						|
 | 
						|
def is_link(url: str) -> Match[str]:
 | 
						|
    return link_regex.match(smart_text(url))
 | 
						|
 | 
						|
 | 
						|
def cache_key_func(url: str) -> str:
 | 
						|
    return url
 | 
						|
 | 
						|
 | 
						|
@cache_with_key(cache_key_func, cache_name=CACHE_NAME, with_statsd_key="urlpreview_data")
 | 
						|
def get_link_embed_data(url: str,
 | 
						|
                        maxwidth: Optional[int]=640,
 | 
						|
                        maxheight: Optional[int]=480) -> Optional[Dict[Any, Any]]:
 | 
						|
    if not is_link(url):
 | 
						|
        return None
 | 
						|
    # Fetch information from URL.
 | 
						|
    # We are using three sources in next order:
 | 
						|
    # 1. OEmbed
 | 
						|
    # 2. Open Graph
 | 
						|
    # 3. Meta tags
 | 
						|
    try:
 | 
						|
        data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
 | 
						|
    except requests.exceptions.RequestException:
 | 
						|
        msg = 'Unable to fetch information from url {0}, traceback: {1}'
 | 
						|
        logging.error(msg.format(url, traceback.format_exc()))
 | 
						|
        return None
 | 
						|
    data = data or {}
 | 
						|
    response = requests.get(url)
 | 
						|
    if response.ok:
 | 
						|
        og_data = OpenGraphParser(response.text).extract_data()
 | 
						|
        if og_data:
 | 
						|
            data.update(og_data)
 | 
						|
        generic_data = GenericParser(response.text).extract_data() or {}
 | 
						|
        for key in ['title', 'description', 'image']:
 | 
						|
            if not data.get(key) and generic_data.get(key):
 | 
						|
                data[key] = generic_data[key]
 | 
						|
    return data
 | 
						|
 | 
						|
 | 
						|
@get_cache_with_key(cache_key_func, cache_name=CACHE_NAME)
 | 
						|
def link_embed_data_from_cache(url: str, maxwidth: Optional[int]=640, maxheight: Optional[int]=480) -> Any:
 | 
						|
    return
 |