mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	We were using Google's diff-match-patch library to diff HTML. The problem with that approach is that it is a text differ, not an HTML differ and so it ends up messing up the HTML tags. `lxml` is a safer option. Fixes: #7219.
		
			
				
	
	
		
			26 lines
		
	
	
		
			824 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			26 lines
		
	
	
		
			824 B
		
	
	
	
		
			Python
		
	
	
	
	
	
import lxml
 | 
						|
 | 
						|
from lxml.html.diff import htmldiff
 | 
						|
from typing import Optional, Text
 | 
						|
 | 
						|
def highlight_with_class(text, klass):
 | 
						|
    # type: (Text, Text) -> Text
 | 
						|
    return '<span class="%s">%s</span>' % (klass, text)
 | 
						|
 | 
						|
def highlight_html_differences(s1, s2, msg_id=None):
 | 
						|
    # type: (Text, Text, Optional[int]) -> Text
 | 
						|
    retval = htmldiff(s1, s2)
 | 
						|
    fragment = lxml.html.fromstring(retval)  # type: ignore # https://github.com/python/typeshed/issues/525
 | 
						|
 | 
						|
    for elem in fragment.cssselect('del'):
 | 
						|
        elem.tag = 'span'
 | 
						|
        elem.set('class', 'highlight_text_deleted')
 | 
						|
 | 
						|
    for elem in fragment.cssselect('ins'):
 | 
						|
        elem.tag = 'span'
 | 
						|
        elem.set('class', 'highlight_text_inserted')
 | 
						|
 | 
						|
    retval = lxml.html.tostring(fragment)   # type: ignore # https://github.com/python/typeshed/issues/525
 | 
						|
 | 
						|
    return retval
 |