Fix bug with tools/html-grep.

We were ignoring singleton tags like "input" tags in
html-grep.  This was an artifact of our tokenizer originally
being built to check indentation of templates, for which
singleton tags had been a distraction. This fix actually cleans up
the template checking logic as well, since it can now rely
on the tokenizer to classify special tags and singleton tags.
The tokenizer is more complete and more specific.
This commit is contained in:
Steve Howell
2016-08-07 06:28:17 -07:00
committed by Tim Abbott
parent acd76eb604
commit 184e45b774

View File

@@ -66,6 +66,11 @@ def tokenize(text):
if looking_at_html_start(): if looking_at_html_start():
s = get_html_tag(text, state.i) s = get_html_tag(text, state.i)
tag = s[1:-1].split()[0] tag = s[1:-1].split()[0]
if is_special_html_tag(s, tag):
kind = 'html_special'
elif s.endswith('/>'):
kind = 'html_singleton'
else:
kind = 'html_start' kind = 'html_start'
elif looking_at_html_end(): elif looking_at_html_end():
s = get_html_tag(text, state.i) s = get_html_tag(text, state.i)
@@ -171,10 +176,8 @@ def validate(fn=None, text=None, check_indent=True):
for token in tokens: for token in tokens:
kind = token.kind kind = token.kind
tag = token.tag tag = token.tag
s = token.s
if kind == 'html_start': if kind == 'html_start':
if not is_special_html_tag(s, tag):
start_tag_matcher(token) start_tag_matcher(token)
elif kind == 'html_end': elif kind == 'html_end':
state.matcher(token) state.matcher(token)
@@ -204,7 +207,6 @@ def validate(fn=None, text=None, check_indent=True):
def is_special_html_tag(s, tag): def is_special_html_tag(s, tag):
# type: (str, str) -> bool # type: (str, str) -> bool
return (s.startswith('<!--') or return (s.startswith('<!--') or
s.endswith('/>') or
tag in ['link', 'meta', '!DOCTYPE']) tag in ['link', 'meta', '!DOCTYPE'])
def is_django_block_tag(tag): def is_django_block_tag(tag):
@@ -384,11 +386,12 @@ def html_tag_tree(text):
stack = [top_level] stack = [top_level]
for token in tokens: for token in tokens:
if token.kind == 'html_start': if token.kind in ('html_start', 'html_singleton'):
if not is_special_html_tag(token.s, token.tag): if not is_special_html_tag(token.s, token.tag):
parent = stack[-1] parent = stack[-1]
node= Node(token=token, parent=parent) node= Node(token=token, parent=parent)
parent.children.append(node) parent.children.append(node)
if token.kind == 'html_start':
stack.append(node) stack.append(node)
elif token.kind == 'html_end': elif token.kind == 'html_end':
stack.pop() stack.pop()