Files
zulip/tools/lib/html_branches.py
Anders Kaseorg df001db1a9 black: Reformat with Black 23.
Black 23 enforces some slightly more specific rules about empty line
counts and redundant parenthesis removal, but the result is still
compatible with Black 22.

(This does not actually upgrade our Python environment to Black 23
yet.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2023-02-02 10:40:13 -08:00

100 lines
2.5 KiB
Python

import re
from collections import defaultdict
from typing import Dict, List
from .template_parser import FormattedError, Token, tokenize
class TagInfo:
def __init__(self, tag: str, classes: List[str], ids: List[str], token: Token) -> None:
self.tag = tag
self.classes = classes
self.ids = ids
self.token = token
self.words = [
self.tag,
*("." + s for s in classes),
*("#" + s for s in ids),
]
def text(self) -> str:
s = self.tag
if self.classes:
s += "." + ".".join(self.classes)
if self.ids:
s += "#" + "#".join(self.ids)
return s
def get_tag_info(token: Token) -> TagInfo:
s = token.s
tag = token.tag
classes: List[str] = []
ids: List[str] = []
searches = [
(classes, ' class="(.*?)"'),
(classes, " class='(.*?)'"),
(ids, ' id="(.*?)"'),
(ids, " id='(.*?)'"),
]
for lst, regex in searches:
m = re.search(regex, s)
if m:
for g in m.groups():
lst += split_for_id_and_class(g)
return TagInfo(tag=tag, classes=classes, ids=ids, token=token)
def split_for_id_and_class(element: str) -> List[str]:
# Here we split a given string which is expected to contain id or class
# attributes from HTML tags. This also takes care of template variables
# in string during splitting process. For eg. 'red black {{ a|b|c }}'
# is split as ['red', 'black', '{{ a|b|c }}']
outside_braces: bool = True
lst = []
s = ""
for ch in element:
if ch == "{":
outside_braces = False
if ch == "}":
outside_braces = True
if ch == " " and outside_braces:
if s != "":
lst.append(s)
s = ""
else:
s += ch
if s != "":
lst.append(s)
return lst
def build_id_dict(templates: List[str]) -> Dict[str, List[str]]:
template_id_dict: (Dict[str, List[str]]) = defaultdict(list)
for fn in templates:
with open(fn) as f:
text = f.read()
try:
list_tags = tokenize(text)
except FormattedError as e:
raise Exception(
f"""
fn: {fn}
{e}"""
)
for tag in list_tags:
info = get_tag_info(tag)
for ids in info.ids:
template_id_dict[ids].append("Line " + str(info.token.line) + ":" + fn)
return template_id_dict