check-templates: Avoid duplicate tokenizing step.

Now we only tokenize the file once, and we pass **validated** tokens to the pretty printer. There are a few reasons for this: * It obviously saves a lot of extra computation just in terms of tokenization. * It allows our validator to add fields to the Token objects that help the pretty printer. I also removed/tweaked a lot of legacy tests for pretty_print.py that were exercising bizarrely formatted HTML that we now simply ban during the validation phase.
2025-11-14 02:48:00 +00:00 · 2021-12-02 12:19:19 +00:00
parent 0decfa8da0
commit c0d72ba236
4 changed files with 68 additions and 236 deletions
--- a/tools/lib/pretty_print.py
+++ b/tools/lib/pretty_print.py
@@ -3,7 +3,7 @@ from typing import List, Optional, Set

 from zulint.printer import ENDC, GREEN

-from .template_parser import Token, is_django_block_tag, tokenize
+from .template_parser import Token, is_django_block_tag


 def requires_indent(line: str) -> bool:
@@ -122,9 +122,7 @@ def get_exempted_lines(tokens: List[Token]) -> Set[int]:
    return exempted


-def pretty_print_html(html: str) -> str:
-    tokens = tokenize(html)
-
+def pretty_print_html(html: str, tokens: List[Token]) -> str:
    exempted_lines = get_exempted_lines(tokens)

    tokens.reverse()
@@ -207,10 +205,10 @@ def pretty_print_html(html: str) -> str:
    return "\n".join(formatted_lines)


-def validate_indent_html(fn: str, fix: bool) -> bool:
+def validate_indent_html(fn: str, tokens: List[Token], fix: bool) -> bool:
    with open(fn) as f:
        html = f.read()
-    phtml = pretty_print_html(html)
+    phtml = pretty_print_html(html, tokens)
    if not html.split("\n") == phtml.split("\n"):
        if fix:
            print(GREEN + f"Automatically fixing indentation for {fn}" + ENDC)