mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 22:13:26 +00:00
In this commit we add the ability of recognizing comments in handlebar and Jinja2 templates to the template parser. This fixes issue of template parser picking up code tags which are commented out in files. Also we fix the problem of too much deep nesting in the Jinja2 logic statement. Now only nested Jinja2 logic statements will get a deeper indentation level.With this another fix was introduced relating with the tags before a nested handlebar or django tag getting wrong indent. We also fix the issue with wrong offsets with closing tags in cases if they were not matching indent level of the starting tag intially. Also we also start to ignore any tags occuring in between 'pre' tags for deeper indent levels. As well we start to filter out django non block tags from getting deeper indent level.
370 lines
11 KiB
Python
370 lines
11 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
from typing import Callable, List, Optional
|
|
from six.moves import range
|
|
import re
|
|
|
|
class TemplateParserException(Exception):
|
|
def __init__(self, message):
|
|
# type: (str) -> None
|
|
self.message = message
|
|
|
|
def __str__(self):
|
|
# type: () -> str
|
|
return self.message
|
|
|
|
class TokenizationException(Exception):
|
|
def __init__(self, message, line_content=None):
|
|
# type: (str, str) -> None
|
|
self.message = message
|
|
self.line_content = line_content
|
|
|
|
class TokenizerState(object):
|
|
def __init__(self):
|
|
# type: () -> None
|
|
self.i = 0
|
|
self.line = 1
|
|
self.col = 1
|
|
|
|
class Token(object):
|
|
def __init__(self, kind, s, tag, line, col, line_span):
|
|
# type: (str, str, str, int, int, int) -> None
|
|
self.kind = kind
|
|
self.s = s
|
|
self.tag = tag
|
|
self.line = line
|
|
self.col = col
|
|
self.line_span = line_span
|
|
|
|
def tokenize(text):
|
|
# type: (str) -> List[Token]
|
|
def advance(n):
|
|
# type: (int) -> None
|
|
for _ in range(n):
|
|
state.i += 1
|
|
if state.i >= 0 and text[state.i - 1] == '\n':
|
|
state.line += 1
|
|
state.col = 1
|
|
else:
|
|
state.col += 1
|
|
|
|
def looking_at(s):
|
|
# type: (str) -> bool
|
|
return text[state.i:state.i+len(s)] == s
|
|
|
|
def looking_at_htmlcomment():
|
|
# type: () -> bool
|
|
return looking_at("<!--")
|
|
|
|
def looking_at_handlebarcomment():
|
|
# type: () -> bool
|
|
return looking_at("{{!")
|
|
|
|
def looking_at_djangocomment():
|
|
# type: () -> bool
|
|
return looking_at("{#")
|
|
|
|
def looking_at_html_start():
|
|
# type: () -> bool
|
|
return looking_at("<") and not looking_at("</")
|
|
|
|
def looking_at_html_end():
|
|
# type: () -> bool
|
|
return looking_at("</")
|
|
|
|
def looking_at_handlebars_start():
|
|
# type: () -> bool
|
|
return looking_at("{{#") or looking_at("{{^")
|
|
|
|
def looking_at_handlebars_end():
|
|
# type: () -> bool
|
|
return looking_at("{{/")
|
|
|
|
def looking_at_django_start():
|
|
# type: () -> bool
|
|
return looking_at("{% ") and not looking_at("{% end")
|
|
|
|
def looking_at_django_end():
|
|
# type: () -> bool
|
|
return looking_at("{% end")
|
|
|
|
state = TokenizerState()
|
|
tokens = []
|
|
|
|
while state.i < len(text):
|
|
try:
|
|
if looking_at_htmlcomment():
|
|
s = get_html_comment(text, state.i)
|
|
tag = s[4:-3]
|
|
kind = 'html_comment'
|
|
elif looking_at_handlebarcomment():
|
|
s = get_handlebar_comment(text, state.i)
|
|
tag = s[3:-2]
|
|
kind = 'handlebar_comment'
|
|
elif looking_at_djangocomment():
|
|
s = get_django_comment(text, state.i)
|
|
tag = s[2:-2]
|
|
kind = 'django_comment'
|
|
elif looking_at_html_start():
|
|
s = get_html_tag(text, state.i)
|
|
tag_parts = s[1:-1].split()
|
|
|
|
if not tag_parts:
|
|
raise TemplateParserException("Tag name missing")
|
|
|
|
tag = tag_parts[0]
|
|
|
|
if is_special_html_tag(s, tag):
|
|
kind = 'html_special'
|
|
elif s.endswith('/>'):
|
|
kind = 'html_singleton'
|
|
else:
|
|
kind = 'html_start'
|
|
elif looking_at_html_end():
|
|
s = get_html_tag(text, state.i)
|
|
tag = s[2:-1]
|
|
kind = 'html_end'
|
|
elif looking_at_handlebars_start():
|
|
s = get_handlebars_tag(text, state.i)
|
|
tag = s[3:-2].split()[0]
|
|
kind = 'handlebars_start'
|
|
elif looking_at_handlebars_end():
|
|
s = get_handlebars_tag(text, state.i)
|
|
tag = s[3:-2]
|
|
kind = 'handlebars_end'
|
|
elif looking_at_django_start():
|
|
s = get_django_tag(text, state.i)
|
|
tag = s[3:-2].split()[0]
|
|
kind = 'django_start'
|
|
elif looking_at_django_end():
|
|
s = get_django_tag(text, state.i)
|
|
tag = s[6:-3]
|
|
kind = 'django_end'
|
|
else:
|
|
advance(1)
|
|
continue
|
|
except TokenizationException as e:
|
|
raise TemplateParserException('''%s at Line %d Col %d:"%s"''' %
|
|
(e.message, state.line, state.col,
|
|
e.line_content))
|
|
|
|
line_span = len(s.split('\n'))
|
|
token = Token(
|
|
kind=kind,
|
|
s=s,
|
|
tag=tag,
|
|
line=state.line,
|
|
col=state.col,
|
|
line_span=line_span
|
|
)
|
|
tokens.append(token)
|
|
advance(len(s))
|
|
if kind == 'html_singleton':
|
|
# Here we insert a Pseudo html_singleton_end tag so as to have
|
|
# ease of detection of end of singleton html tags which might be
|
|
# needed in some cases as with our html pretty printer.
|
|
token = Token(
|
|
kind='html_singleton_end',
|
|
s='</' + tag + '>',
|
|
tag=tag,
|
|
line=state.line,
|
|
col=state.col,
|
|
line_span=1
|
|
)
|
|
tokens.append(token)
|
|
|
|
return tokens
|
|
|
|
def validate(fn=None, text=None, check_indent=True):
|
|
# type: (Optional[str], Optional[str], bool) -> None
|
|
assert fn or text
|
|
|
|
if fn is None:
|
|
fn = '<in memory file>'
|
|
|
|
if text is None:
|
|
text = open(fn).read()
|
|
|
|
tokens = tokenize(text)
|
|
|
|
class State(object):
|
|
def __init__(self, func):
|
|
# type: (Callable[[Token], None]) -> None
|
|
self.depth = 0
|
|
self.matcher = func
|
|
|
|
def no_start_tag(token):
|
|
# type: (Token) -> None
|
|
raise TemplateParserException('''
|
|
No start tag
|
|
fn: %s
|
|
end tag:
|
|
%s
|
|
line %d, col %d
|
|
''' % (fn, token.tag, token.line, token.col))
|
|
|
|
state = State(no_start_tag)
|
|
|
|
def start_tag_matcher(start_token):
|
|
# type: (Token) -> None
|
|
state.depth += 1
|
|
start_tag = start_token.tag
|
|
start_line = start_token.line
|
|
start_col = start_token.col
|
|
|
|
old_matcher = state.matcher
|
|
|
|
def f(end_token):
|
|
# type: (Token) -> None
|
|
|
|
end_tag = end_token.tag
|
|
end_line = end_token.line
|
|
end_col = end_token.col
|
|
|
|
if start_tag == 'a':
|
|
max_lines = 3
|
|
else:
|
|
max_lines = 1
|
|
|
|
problem = None
|
|
if (start_tag == 'code') and (end_line == start_line + 1):
|
|
problem = 'Code tag is split across two lines.'
|
|
if start_tag != end_tag:
|
|
problem = 'Mismatched tag.'
|
|
elif check_indent and (end_line > start_line + max_lines):
|
|
if end_col != start_col:
|
|
problem = 'Bad indentation.'
|
|
if problem:
|
|
raise TemplateParserException('''
|
|
fn: %s
|
|
%s
|
|
start:
|
|
%s
|
|
line %d, col %d
|
|
end tag:
|
|
%s
|
|
line %d, col %d
|
|
''' % (fn, problem, start_token.s, start_line, start_col, end_tag, end_line, end_col))
|
|
state.matcher = old_matcher
|
|
state.depth -= 1
|
|
state.matcher = f
|
|
|
|
for token in tokens:
|
|
kind = token.kind
|
|
tag = token.tag
|
|
|
|
if kind == 'html_start':
|
|
start_tag_matcher(token)
|
|
elif kind == 'html_end':
|
|
state.matcher(token)
|
|
|
|
elif kind == 'handlebars_start':
|
|
start_tag_matcher(token)
|
|
elif kind == 'handlebars_end':
|
|
state.matcher(token)
|
|
|
|
elif kind == 'django_start':
|
|
if is_django_block_tag(tag):
|
|
start_tag_matcher(token)
|
|
elif kind == 'django_end':
|
|
state.matcher(token)
|
|
|
|
if state.depth != 0:
|
|
raise TemplateParserException('Missing end tag')
|
|
|
|
def is_special_html_tag(s, tag):
|
|
# type: (str, str) -> bool
|
|
return tag in ['link', 'meta', '!DOCTYPE']
|
|
|
|
def is_django_block_tag(tag):
|
|
# type: (str) -> bool
|
|
return tag in [
|
|
'autoescape',
|
|
'block',
|
|
'comment',
|
|
'for',
|
|
'if',
|
|
'ifequal',
|
|
'verbatim',
|
|
'blocktrans',
|
|
'trans',
|
|
'raw',
|
|
]
|
|
|
|
def get_handlebars_tag(text, i):
|
|
# type: (str, int) -> str
|
|
end = i + 2
|
|
while end < len(text) - 1 and text[end] != '}':
|
|
end += 1
|
|
if text[end] != '}' or text[end+1] != '}':
|
|
raise TokenizationException('Tag missing "}}"', text[i:end+2])
|
|
s = text[i:end+2]
|
|
return s
|
|
|
|
def get_django_tag(text, i):
|
|
# type: (str, int) -> str
|
|
end = i + 2
|
|
while end < len(text) - 1 and text[end] != '%':
|
|
end += 1
|
|
if text[end] != '%' or text[end+1] != '}':
|
|
raise TokenizationException('Tag missing "%}"', text[i:end+2])
|
|
s = text[i:end+2]
|
|
return s
|
|
|
|
def get_html_tag(text, i):
|
|
# type: (str, int) -> str
|
|
quote_count = 0
|
|
end = i + 1
|
|
unclosed_end = 0
|
|
while end < len(text) and (text[end] != '>' or quote_count % 2 != 0 and text[end] != '<'):
|
|
if text[end] == '"':
|
|
quote_count += 1
|
|
if not unclosed_end and text[end] == '<':
|
|
unclosed_end = end
|
|
end += 1
|
|
if quote_count % 2 != 0:
|
|
if unclosed_end:
|
|
raise TokenizationException('Unbalanced Quotes', text[i:unclosed_end])
|
|
else:
|
|
raise TokenizationException('Unbalanced Quotes', text[i:end+1])
|
|
if end == len(text) or text[end] != '>':
|
|
raise TokenizationException('Tag missing ">"', text[i:end+1])
|
|
s = text[i:end+1]
|
|
return s
|
|
|
|
def get_html_comment(text, i):
|
|
# type: (str, int) -> str
|
|
end = i + 7
|
|
unclosed_end = 0
|
|
while end <= len(text):
|
|
if text[end-3:end] == '-->':
|
|
return text[i:end]
|
|
if not unclosed_end and text[end] == '<':
|
|
unclosed_end = end
|
|
end += 1
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|
|
|
|
def get_handlebar_comment(text, i):
|
|
# type: (str, int) -> str
|
|
end = i + 5
|
|
unclosed_end = 0
|
|
while end <= len(text):
|
|
if text[end-2:end] == '}}':
|
|
return text[i:end]
|
|
if not unclosed_end and text[end] == '<':
|
|
unclosed_end = end
|
|
end += 1
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|
|
|
|
def get_django_comment(text, i):
|
|
# type: (str, int) -> str
|
|
end = i + 4
|
|
unclosed_end = 0
|
|
while end <= len(text):
|
|
if text[end-2:end] == '#}':
|
|
return text[i:end]
|
|
if not unclosed_end and text[end] == '<':
|
|
unclosed_end = end
|
|
end += 1
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|