mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	There's several things we'll want to cleanup with this feature, but for now we're content to just make this not crash.
		
			
				
	
	
		
			47 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			47 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from typing import Dict, Optional
 | 
						|
from zerver.lib.url_preview.parsers.base import BaseParser
 | 
						|
 | 
						|
 | 
						|
class GenericParser(BaseParser):
 | 
						|
    def extract_data(self) -> Dict[str, Optional[str]]:
 | 
						|
        return {
 | 
						|
            'title': self._get_title(),
 | 
						|
            'description': self._get_description(),
 | 
						|
            'image': self._get_image()}
 | 
						|
 | 
						|
    def _get_title(self) -> Optional[str]:
 | 
						|
        soup = self._soup
 | 
						|
        if (soup.title and soup.title.text != ''):
 | 
						|
            return soup.title.text
 | 
						|
        if (soup.h1 and soup.h1.text != ''):
 | 
						|
            return soup.h1.text
 | 
						|
        return None
 | 
						|
 | 
						|
    def _get_description(self) -> Optional[str]:
 | 
						|
        soup = self._soup
 | 
						|
        meta_description = soup.find('meta', attrs={'name': 'description'})
 | 
						|
        if (meta_description and meta_description.get('content', '') != ''):
 | 
						|
            return meta_description['content']
 | 
						|
        first_h1 = soup.find('h1')
 | 
						|
        if first_h1:
 | 
						|
            first_p = first_h1.find_next('p')
 | 
						|
            if (first_p and first_p.string != ''):
 | 
						|
                return first_p.text
 | 
						|
        first_p = soup.find('p')
 | 
						|
        if (first_p and first_p.string != ''):
 | 
						|
            return first_p.string
 | 
						|
        return None
 | 
						|
 | 
						|
    def _get_image(self) -> Optional[str]:
 | 
						|
        """
 | 
						|
        Finding a first image after the h1 header.
 | 
						|
        Presumably it will be the main image.
 | 
						|
        """
 | 
						|
        soup = self._soup
 | 
						|
        first_h1 = soup.find('h1')
 | 
						|
        if first_h1:
 | 
						|
            first_image = first_h1.find_next_sibling('img')
 | 
						|
            if first_image and first_image['src'] != '':
 | 
						|
                return first_image['src']
 | 
						|
        return None
 |