Files
zulip/zephyr/lib/html_diff.py
Tim Abbott b17741a5c4 [manual] Rename error bot to zulip.com domain.
Just before this is pushed to prod, we need to rename the Humbug error
bot in the database using:

./manage.py change_user_email humbug+errors@humbughq.com error-bot@zulip.com
/etc/init.d/memcached restart

No action is required on pushing this to staging, but in between when
this is pushed to staging and when it is pushed to prod (and that
transition performed), error reporting to humbug will not work on
staging.

(imported from commit 93044bb01797c981067f359676826d4a5791e235)
2013-07-25 17:16:54 -04:00

115 lines
3.7 KiB
Python

from diff_match_patch import diff_match_patch
import platform
import logging
# TODO: handle changes in link hrefs
def highlight_with_class(klass, text):
return '<span class="%s">%s</span>' % (klass, text)
def highlight_inserted(text):
return highlight_with_class('highlight_text_inserted', text)
def highlight_deleted(text):
return highlight_with_class('highlight_text_deleted', text)
def highlight_replaced(text):
return highlight_with_class('highlight_text_replaced', text)
def chunkize(text, in_tag):
start = 0
idx = 0
chunks = []
for c in text:
if c == '<':
in_tag = True
if start != idx:
chunks.append(('text', text[start:idx]))
start = idx
elif c == '>':
in_tag = False
if start != idx + 1:
chunks.append(('tag', text[start:idx + 1]))
start = idx + 1
idx += 1
if start != idx:
chunks.append(('tag' if in_tag else 'text', text[start:idx]))
return chunks, in_tag
def highlight_chunks(chunks, highlight_func):
retval = ''
for type, text in chunks:
if type == 'text':
retval += highlight_func(text)
else:
retval += text
return retval
def verify_html(html):
# TODO: Actually parse the resulting HTML to ensure we don't
# create mal-formed markup. This is unfortunately hard because
# we both want pretty strict parsing and we want to parse html5
# fragments. For now, we do a basic sanity check.
in_tag = False
for c in html:
if c == '<':
if in_tag:
return False
in_tag = True
elif c == '>':
if not in_tag:
return False
in_tag = False
if in_tag:
return False
return True
def highlight_html_differences(s1, s2):
differ = diff_match_patch()
ops = differ.diff_main(s1, s2)
differ.diff_cleanupSemantic(ops)
retval = ''
in_tag = False
idx = 0
while idx < len(ops):
op, text = ops[idx]
next_op = None
if idx != len(ops) - 1:
next_op, next_text = ops[idx + 1]
if op == diff_match_patch.DIFF_DELETE and next_op == diff_match_patch.DIFF_INSERT:
# Replace operation
chunks, in_tag = chunkize(next_text, in_tag)
retval += highlight_chunks(chunks, highlight_replaced)
idx += 1
elif op == diff_match_patch.DIFF_INSERT and next_op == diff_match_patch.DIFF_DELETE:
# Replace operation
# I have no idea whether diff_match_patch generates inserts followed
# by deletes, but it doesn't hurt to handle them
chunks, in_tag = chunkize(text, in_tag)
retval += highlight_chunks(chunks, highlight_replaced)
idx += 1
elif op == diff_match_patch.DIFF_DELETE:
retval += highlight_deleted('&nbsp;')
elif op == diff_match_patch.DIFF_INSERT:
chunks, in_tag = chunkize(text, in_tag)
retval += highlight_chunks(chunks, highlight_inserted)
elif op == diff_match_patch.DIFF_EQUAL:
chunks, in_tag = chunkize(text, in_tag)
retval += text
idx += 1
if not verify_html(retval):
from zephyr.lib.actions import internal_send_message
# We probably want more information here
logging.getLogger('').error('HTML diff produced mal-formed HTML')
subject = "HTML diff failure on %s" % (platform.node(),)
internal_send_message("error-bot@zulip.com", "stream",
"errors", subject, "HTML diff produced malformed HTML")
return s2
return retval