mirror of
https://github.com/zulip/zulip.git
synced 2025-11-23 16:01:24 +00:00
Just before this is pushed to prod, we need to rename the Humbug error bot in the database using: ./manage.py change_user_email humbug+errors@humbughq.com error-bot@zulip.com /etc/init.d/memcached restart No action is required on pushing this to staging, but in between when this is pushed to staging and when it is pushed to prod (and that transition performed), error reporting to humbug will not work on staging. (imported from commit 93044bb01797c981067f359676826d4a5791e235)
115 lines
3.7 KiB
Python
115 lines
3.7 KiB
Python
from diff_match_patch import diff_match_patch
|
|
import platform
|
|
import logging
|
|
|
|
# TODO: handle changes in link hrefs
|
|
|
|
def highlight_with_class(klass, text):
|
|
return '<span class="%s">%s</span>' % (klass, text)
|
|
|
|
def highlight_inserted(text):
|
|
return highlight_with_class('highlight_text_inserted', text)
|
|
|
|
def highlight_deleted(text):
|
|
return highlight_with_class('highlight_text_deleted', text)
|
|
|
|
def highlight_replaced(text):
|
|
return highlight_with_class('highlight_text_replaced', text)
|
|
|
|
def chunkize(text, in_tag):
|
|
start = 0
|
|
idx = 0
|
|
chunks = []
|
|
for c in text:
|
|
if c == '<':
|
|
in_tag = True
|
|
if start != idx:
|
|
chunks.append(('text', text[start:idx]))
|
|
start = idx
|
|
elif c == '>':
|
|
in_tag = False
|
|
if start != idx + 1:
|
|
chunks.append(('tag', text[start:idx + 1]))
|
|
start = idx + 1
|
|
idx += 1
|
|
|
|
if start != idx:
|
|
chunks.append(('tag' if in_tag else 'text', text[start:idx]))
|
|
return chunks, in_tag
|
|
|
|
def highlight_chunks(chunks, highlight_func):
|
|
retval = ''
|
|
for type, text in chunks:
|
|
if type == 'text':
|
|
retval += highlight_func(text)
|
|
else:
|
|
retval += text
|
|
return retval
|
|
|
|
def verify_html(html):
|
|
# TODO: Actually parse the resulting HTML to ensure we don't
|
|
# create mal-formed markup. This is unfortunately hard because
|
|
# we both want pretty strict parsing and we want to parse html5
|
|
# fragments. For now, we do a basic sanity check.
|
|
in_tag = False
|
|
for c in html:
|
|
if c == '<':
|
|
if in_tag:
|
|
return False
|
|
in_tag = True
|
|
elif c == '>':
|
|
if not in_tag:
|
|
return False
|
|
in_tag = False
|
|
if in_tag:
|
|
return False
|
|
return True
|
|
|
|
def highlight_html_differences(s1, s2):
|
|
differ = diff_match_patch()
|
|
ops = differ.diff_main(s1, s2)
|
|
differ.diff_cleanupSemantic(ops)
|
|
retval = ''
|
|
in_tag = False
|
|
|
|
idx = 0
|
|
while idx < len(ops):
|
|
op, text = ops[idx]
|
|
next_op = None
|
|
if idx != len(ops) - 1:
|
|
next_op, next_text = ops[idx + 1]
|
|
if op == diff_match_patch.DIFF_DELETE and next_op == diff_match_patch.DIFF_INSERT:
|
|
# Replace operation
|
|
chunks, in_tag = chunkize(next_text, in_tag)
|
|
retval += highlight_chunks(chunks, highlight_replaced)
|
|
idx += 1
|
|
elif op == diff_match_patch.DIFF_INSERT and next_op == diff_match_patch.DIFF_DELETE:
|
|
# Replace operation
|
|
# I have no idea whether diff_match_patch generates inserts followed
|
|
# by deletes, but it doesn't hurt to handle them
|
|
chunks, in_tag = chunkize(text, in_tag)
|
|
retval += highlight_chunks(chunks, highlight_replaced)
|
|
idx += 1
|
|
elif op == diff_match_patch.DIFF_DELETE:
|
|
retval += highlight_deleted(' ')
|
|
elif op == diff_match_patch.DIFF_INSERT:
|
|
chunks, in_tag = chunkize(text, in_tag)
|
|
retval += highlight_chunks(chunks, highlight_inserted)
|
|
elif op == diff_match_patch.DIFF_EQUAL:
|
|
chunks, in_tag = chunkize(text, in_tag)
|
|
retval += text
|
|
idx += 1
|
|
|
|
if not verify_html(retval):
|
|
from zephyr.lib.actions import internal_send_message
|
|
# We probably want more information here
|
|
logging.getLogger('').error('HTML diff produced mal-formed HTML')
|
|
|
|
subject = "HTML diff failure on %s" % (platform.node(),)
|
|
internal_send_message("error-bot@zulip.com", "stream",
|
|
"errors", subject, "HTML diff produced malformed HTML")
|
|
return s2
|
|
|
|
return retval
|
|
|