mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	A lot of care has been taken to ensure we're using the realm that the message is being sent into, not the realm of the sender, to correctly handle the logic for cross-realm bot users such as the notifications bot.
		
			
				
	
	
		
			131 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			131 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import absolute_import
 | 
						|
 | 
						|
from typing import Callable, Tuple, Text
 | 
						|
 | 
						|
from django.conf import settings
 | 
						|
 | 
						|
from diff_match_patch import diff_match_patch
 | 
						|
import platform
 | 
						|
import logging
 | 
						|
 | 
						|
# TODO: handle changes in link hrefs
 | 
						|
 | 
						|
def highlight_with_class(klass, text):
 | 
						|
    # type: (Text, Text) -> Text
 | 
						|
    return '<span class="%s">%s</span>' % (klass, text)
 | 
						|
 | 
						|
def highlight_inserted(text):
 | 
						|
    # type: (Text) -> Text
 | 
						|
    return highlight_with_class('highlight_text_inserted', text)
 | 
						|
 | 
						|
def highlight_deleted(text):
 | 
						|
    # type: (Text) -> Text
 | 
						|
    return highlight_with_class('highlight_text_deleted', text)
 | 
						|
 | 
						|
def highlight_replaced(text):
 | 
						|
    # type: (Text) -> Text
 | 
						|
    return highlight_with_class('highlight_text_replaced', text)
 | 
						|
 | 
						|
def chunkize(text, in_tag):
 | 
						|
    # type: (Text, bool) -> Tuple[List[Tuple[Text, Text]], bool]
 | 
						|
    start = 0
 | 
						|
    idx = 0
 | 
						|
    chunks = [] # type: List[Tuple[Text, Text]]
 | 
						|
    for c in text:
 | 
						|
        if c == '<':
 | 
						|
            in_tag = True
 | 
						|
            if start != idx:
 | 
						|
                chunks.append(('text', text[start:idx]))
 | 
						|
            start = idx
 | 
						|
        elif c == '>':
 | 
						|
            in_tag = False
 | 
						|
            if start != idx + 1:
 | 
						|
                chunks.append(('tag', text[start:idx + 1]))
 | 
						|
            start = idx + 1
 | 
						|
        idx += 1
 | 
						|
 | 
						|
    if start != idx:
 | 
						|
        chunks.append(('tag' if in_tag else 'text', text[start:idx]))
 | 
						|
    return chunks, in_tag
 | 
						|
 | 
						|
def highlight_chunks(chunks, highlight_func):
 | 
						|
    # type: (List[Tuple[Text, Text]], Callable[[Text], Text]) -> Text
 | 
						|
    retval = u''
 | 
						|
    for type, text in chunks:
 | 
						|
        if type == 'text':
 | 
						|
            retval += highlight_func(text)
 | 
						|
        else:
 | 
						|
            retval += text
 | 
						|
    return retval
 | 
						|
 | 
						|
def verify_html(html):
 | 
						|
    # type: (Text) -> bool
 | 
						|
    # TODO: Actually parse the resulting HTML to ensure we don't
 | 
						|
    # create mal-formed markup.  This is unfortunately hard because
 | 
						|
    # we both want pretty strict parsing and we want to parse html5
 | 
						|
    # fragments.  For now, we do a basic sanity check.
 | 
						|
    in_tag = False
 | 
						|
    for c in html:
 | 
						|
        if c == '<':
 | 
						|
            if in_tag:
 | 
						|
                return False
 | 
						|
            in_tag = True
 | 
						|
        elif c == '>':
 | 
						|
            if not in_tag:
 | 
						|
                return False
 | 
						|
            in_tag = False
 | 
						|
    if in_tag:
 | 
						|
        return False
 | 
						|
    return True
 | 
						|
 | 
						|
def highlight_html_differences(s1, s2):
 | 
						|
    # type: (Text, Text) -> Text
 | 
						|
    differ = diff_match_patch()
 | 
						|
    ops = differ.diff_main(s1, s2)
 | 
						|
    differ.diff_cleanupSemantic(ops)
 | 
						|
    retval = u''
 | 
						|
    in_tag = False
 | 
						|
 | 
						|
    idx = 0
 | 
						|
    while idx < len(ops):
 | 
						|
        op, text = ops[idx]
 | 
						|
        next_op = None
 | 
						|
        if idx != len(ops) - 1:
 | 
						|
            next_op, next_text = ops[idx + 1]
 | 
						|
        if op == diff_match_patch.DIFF_DELETE and next_op == diff_match_patch.DIFF_INSERT:
 | 
						|
            # Replace operation
 | 
						|
            chunks, in_tag = chunkize(next_text, in_tag)
 | 
						|
            retval += highlight_chunks(chunks, highlight_replaced)
 | 
						|
            idx += 1
 | 
						|
        elif op == diff_match_patch.DIFF_INSERT and next_op == diff_match_patch.DIFF_DELETE:
 | 
						|
            # Replace operation
 | 
						|
            # I have no idea whether diff_match_patch generates inserts followed
 | 
						|
            # by deletes, but it doesn't hurt to handle them
 | 
						|
            chunks, in_tag = chunkize(text, in_tag)
 | 
						|
            retval += highlight_chunks(chunks, highlight_replaced)
 | 
						|
            idx += 1
 | 
						|
        elif op == diff_match_patch.DIFF_DELETE:
 | 
						|
            retval += highlight_deleted(' ')
 | 
						|
        elif op == diff_match_patch.DIFF_INSERT:
 | 
						|
            chunks, in_tag = chunkize(text, in_tag)
 | 
						|
            retval += highlight_chunks(chunks, highlight_inserted)
 | 
						|
        elif op == diff_match_patch.DIFF_EQUAL:
 | 
						|
            chunks, in_tag = chunkize(text, in_tag)
 | 
						|
            retval += text
 | 
						|
        idx += 1
 | 
						|
 | 
						|
    if not verify_html(retval):
 | 
						|
        from zerver.lib.actions import internal_send_message
 | 
						|
        from zerver.models import get_user_profile_by_email
 | 
						|
        # We probably want more information here
 | 
						|
        logging.getLogger('').error('HTML diff produced mal-formed HTML')
 | 
						|
 | 
						|
        if settings.ERROR_BOT is not None:
 | 
						|
            subject = "HTML diff failure on %s" % (platform.node(),)
 | 
						|
            realm = get_user_profile_by_email(settings.ERROR_BOT).realm
 | 
						|
            internal_send_message(realm, settings.ERROR_BOT, "stream",
 | 
						|
                                  "errors", subject, "HTML diff produced malformed HTML")
 | 
						|
        return s2
 | 
						|
 | 
						|
    return retval
 |