Files
zulip/tools/lib/pretty_print.py
Steve Howell a744e38e67 check-templates: Make parser more thorough (and faster).
We now create tokens for whitespace and text, such that you
could rebuild the template file with "".join(token.s for
token in tokens).

I also fixed a few bugs related to not parsing
whitespace-control tokens.

We no longer ignore template variables, although we could do
a lot better at validating them.

The most immediate use case for the more thorough parser is
to simplify the pretty printer, but it should also make it
less likely for us to skip over new template constructs
(i.e. the tool will fail hard rather than acting strange).

Note that this speeds up the tool by almost 3x, which may be
slightly surprising considering we are building more tokens.
The reason is that we are now munching efficiently through
big chunks of whitespace and text at a time, rather than
checking each individual character to see if it starts one
of the N other token types.

The changes to the pretty_print module here are a bit ugly,
but they should mostly be made irrelevant in subsequent
commits.
2021-12-02 09:49:10 -08:00

231 lines
6.4 KiB
Python

import subprocess
from typing import List, Optional, Set
from zulint.printer import ENDC, GREEN
from .template_parser import Token, is_django_block_tag, tokenize
def requires_indent(line: str) -> bool:
line = line.lstrip()
return line.startswith("<")
def open_token(token: Token) -> bool:
if token.kind in (
"handlebars_start",
"html_start",
):
return True
if token.kind in (
"django_start",
"jinja2_whitespace_stripped_start",
"jinja2_whitespace_stripped_type2_start",
):
return is_django_block_tag(token.tag)
return False
def close_token(token: Token) -> bool:
return token.kind in (
"django_end",
"handlebars_end",
"html_end",
"jinja2_whitespace_stripped_end",
)
def else_token(token: Token) -> bool:
return token.kind in (
"django_else",
"handlebars_else",
)
def pop_unused_tokens(tokens: List[Token], row: int) -> bool:
was_closed = False
while tokens and tokens[-1].line <= row:
token = tokens.pop()
if close_token(token):
was_closed = True
return was_closed
def indent_pref(row: int, tokens: List[Token], line: str) -> str:
opens = 0
closes = 0
is_else = False
while tokens and tokens[-1].line == row:
token = tokens.pop()
if open_token(token):
opens += 1
elif close_token(token):
closes += 1
elif else_token(token):
is_else = True
if is_else:
if opens and closes:
return "neutral"
return "else"
i = opens - closes
if i == 0:
return "neutral"
elif i == 1:
return "open"
elif i == -1:
return "close"
else:
print(i, opens, closes)
raise Exception(f"too many tokens on row {row}")
def indent_level(s: str) -> int:
return len(s) - len(s.lstrip())
def same_indent(s1: str, s2: str) -> bool:
return indent_level(s1) == indent_level(s2)
def next_non_blank_line(lines: List[str], i: int) -> str:
next_line = ""
for j in range(i + 1, len(lines)):
next_line = lines[j]
if next_line.strip() != "":
break
return next_line
def get_exempted_lines(tokens: List[Token]) -> Set[int]:
exempted = set()
for code_tag in ("code", "pre", "script"):
for token in tokens:
if token.kind == "html_start" and token.tag == code_tag:
start: Optional[int] = token.line
if token.kind == "html_end" and token.tag == code_tag:
# The pretty printer expects well-formed HTML, even
# if it's strangely formatted, so we expect start
# to be None.
assert start is not None
# We leave code blocks completely alone, including
# the start and end tags.
for i in range(start, token.line + 1):
exempted.add(i)
start = None
return exempted
def pretty_print_html(html: str) -> str:
tokens = tokenize(html)
exempted_lines = get_exempted_lines(tokens)
tokens.reverse()
lines = html.split("\n")
open_offsets: List[str] = []
formatted_lines = []
next_offset: str = ""
tag_end_row: Optional[int] = None
tag_continuation_offset = ""
def line_offset(row: int, line: str, next_line: str) -> Optional[str]:
nonlocal next_offset
nonlocal tag_end_row
nonlocal tag_continuation_offset
if tag_end_row and row < tag_end_row:
was_closed = pop_unused_tokens(tokens, row)
if was_closed:
next_offset = open_offsets.pop()
return tag_continuation_offset
while tokens and tokens[-1].line < row:
token = tokens.pop()
offset = next_offset
if tokens:
token = tokens[-1]
if token.kind == "indent":
token = tokens[-2]
if (
token.line == row
and token.line_span > 1
and token.kind not in ("template_var", "text")
):
if token.kind in ("django_comment", "handlebar_comment", "html_comment"):
tag_continuation_offset = offset
else:
tag_continuation_offset = offset + " "
tag_end_row = row + token.line_span
pref = indent_pref(row, tokens, line)
if pref == "open":
if same_indent(line, next_line) and not requires_indent(line):
next_offset = offset
else:
next_offset = offset + " " * 4
open_offsets.append(offset)
elif pref == "else":
offset = open_offsets[-1]
if same_indent(line, next_line):
next_offset = offset
else:
next_offset = offset + " " * 4
elif pref == "close":
offset = open_offsets.pop()
next_offset = offset
return offset
def adjusted_line(row: int, line: str, next_line: str) -> str:
if line.strip() == "":
return ""
offset = line_offset(row, line, next_line)
if row in exempted_lines:
return line.rstrip()
if offset is None:
return line.rstrip()
return offset + line.strip()
for i, line in enumerate(lines):
# We use 1-based indexing for both rows and columns.
next_line = next_non_blank_line(lines, i)
row = i + 1
formatted_lines.append(adjusted_line(row, line, next_line))
return "\n".join(formatted_lines)
def validate_indent_html(fn: str, fix: bool) -> bool:
with open(fn) as f:
html = f.read()
phtml = pretty_print_html(html)
if not html.split("\n") == phtml.split("\n"):
if fix:
print(GREEN + f"Automatically fixing indentation for {fn}" + ENDC)
with open(fn, "w") as f:
f.write(phtml)
# Since we successfully fixed the issues, we return True.
return True
print(
"Invalid indentation detected in file: "
f"{fn}\nDiff for the file against expected indented file:",
flush=True,
)
subprocess.run(["diff", fn, "-"], input=phtml, universal_newlines=True)
print()
print("This problem can be fixed with the `--fix` option.")
return False
return True