mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 16:14:02 +00:00
template_parser: Check for invalid combinations of HTML elements.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
9452e2cbb2
commit
8691c11933
191
tools/lib/html_elements.py
Normal file
191
tools/lib/html_elements.py
Normal file
@@ -0,0 +1,191 @@
|
||||
from collections.abc import Iterator
|
||||
|
||||
VALID_HTML_CONTEXTS: dict[tuple[str, str], str] = {
|
||||
# https://html.spec.whatwg.org/multipage/indices.html#elements-3
|
||||
("a", "phrasing"): "transparent",
|
||||
("abbr", "phrasing"): "phrasing",
|
||||
("address", "flow"): "flow",
|
||||
("area", "phrasing"): "void",
|
||||
("article", "flow"): "flow",
|
||||
("aside", "flow"): "flow",
|
||||
("audio", "phrasing"): "<audio>",
|
||||
("b", "phrasing"): "phrasing",
|
||||
("base", "<head>"): "void",
|
||||
("bdi", "phrasing"): "phrasing",
|
||||
("bdo", "phrasing"): "phrasing",
|
||||
("blockquote", "flow"): "flow",
|
||||
("body", "<html>"): "flow",
|
||||
("br", "phrasing"): "void",
|
||||
("button", "phrasing"): "phrasing",
|
||||
("button", "<select>"): "phrasing",
|
||||
("canvas", "phrasing"): "transparent",
|
||||
("caption", "<table>"): "flow",
|
||||
("center", "flow"): "flow", # FIXME: obsolete, remove this
|
||||
("cite", "phrasing"): "phrasing",
|
||||
("code", "phrasing"): "phrasing",
|
||||
("col", "<colgroup>"): "void",
|
||||
("colgroup", "<table>"): "<colgroup>",
|
||||
("data", "phrasing"): "phrasing",
|
||||
("datalist", "phrasing"): "<datalist>",
|
||||
("dd", "<dl>"): "flow",
|
||||
("del", "phrasing"): "transparent",
|
||||
("details", "flow"): "<details>",
|
||||
("dfn", "phrasing"): "phrasing",
|
||||
("dialog", "flow"): "flow",
|
||||
("div", "flow"): "flow",
|
||||
("div", "<dl>"): "<dl>",
|
||||
("div", "<select> content"): "<select> content",
|
||||
("div", "<optgroup> content"): "<optgroup> content",
|
||||
("div", "<option> content"): "<option> content",
|
||||
("dl", "flow"): "<dl>",
|
||||
("dt", "<dl>"): "phrasing",
|
||||
("em", "phrasing"): "phrasing",
|
||||
("embed", "phrasing"): "void",
|
||||
("fieldset", "flow"): "<fieldset>",
|
||||
("figcaption", "<figure>"): "flow",
|
||||
("figure", "flow"): "<figure>",
|
||||
("footer", "flow"): "flow",
|
||||
("form", "flow"): "flow",
|
||||
("h1", "plain heading"): "phrasing",
|
||||
("h2", "plain heading"): "phrasing",
|
||||
("h3", "plain heading"): "phrasing",
|
||||
("h4", "plain heading"): "phrasing",
|
||||
("h5", "plain heading"): "phrasing",
|
||||
("h6", "plain heading"): "phrasing",
|
||||
("head", "<html>"): "<head>",
|
||||
("header", "flow"): "flow",
|
||||
("hgroup", "heading"): "<hgroup>",
|
||||
("hr", "flow"): "void",
|
||||
("hr", "<select> content"): "void",
|
||||
("html", "document"): "<html>",
|
||||
("i", "phrasing"): "phrasing",
|
||||
("iframe", "phrasing"): "empty",
|
||||
("img", "phrasing"): "void",
|
||||
("img", "<picture>"): "void",
|
||||
("input", "phrasing"): "void",
|
||||
("ins", "phrasing"): "transparent",
|
||||
("kbd", "phrasing"): "phrasing",
|
||||
("label", "phrasing"): "phrasing",
|
||||
("legend", "<fieldset>"): "phrasing/heading",
|
||||
("li", "list"): "flow",
|
||||
("link", "<head>"): "void",
|
||||
("link", "phrasing"): "void",
|
||||
("main", "flow"): "flow",
|
||||
("map", "phrasing"): "<map>",
|
||||
("mark", "phrasing"): "phrasing",
|
||||
("math", "phrasing"): "MathML",
|
||||
("menu", "flow"): "list",
|
||||
("meta", "<head>"): "void",
|
||||
("meta", "phrasing"): "void",
|
||||
("meter", "phrasing"): "phrasing",
|
||||
("nav", "flow"): "flow",
|
||||
("noscript", "<head>"): "transparent",
|
||||
("noscript", "phrasing"): "transparent",
|
||||
("noscript", "<select> content"): "transparent",
|
||||
("noscript", "<optgroup> content"): "transparent",
|
||||
("object", "phrasing"): "transparent",
|
||||
("ol", "flow"): "list",
|
||||
("optgroup", "<select>"): "<optgroup> content",
|
||||
("option", "<select>"): "<option> content",
|
||||
("option", "<datalist>"): "<option> content",
|
||||
("option", "<optgroup>"): "<option> content",
|
||||
("output", "phrasing"): "phrasing",
|
||||
("p", "flow"): "phrasing",
|
||||
("p", "<hgroup>"): "phrasing",
|
||||
("picture", "phrasing"): "<picture>",
|
||||
("pre", "flow"): "phrasing",
|
||||
("progress", "phrasing"): "phrasing",
|
||||
("q", "phrasing"): "phrasing",
|
||||
("rp", "<ruby>"): "phrasing",
|
||||
("rt", "<ruby>"): "phrasing",
|
||||
("ruby", "phrasing"): "<ruby>",
|
||||
("s", "phrasing"): "phrasing",
|
||||
("samp", "phrasing"): "phrasing",
|
||||
("script", "<head>"): "<script>",
|
||||
("script", "phrasing"): "<script>",
|
||||
("script", "script-supporting"): "<script>",
|
||||
("search", "flow"): "flow",
|
||||
("section", "flow"): "flow",
|
||||
("select", "phrasing"): "<select>",
|
||||
("selectedcontent", "<button>"): "empty",
|
||||
("slot", "phrasing"): "transparent",
|
||||
("small", "phrasing"): "phrasing",
|
||||
("source", "<picture>"): "void",
|
||||
("source", "<video>"): "void",
|
||||
("source", "<audio>"): "void",
|
||||
("span", "phrasing"): "phrasing",
|
||||
("strong", "phrasing"): "phrasing",
|
||||
("style", "<head>"): "<style>",
|
||||
("sub", "phrasing"): "phrasing",
|
||||
("summary", "<details>"): "phrasing/heading",
|
||||
("sup", "phrasing"): "phrasing",
|
||||
("svg", "phrasing"): "SVG",
|
||||
("table", "flow"): "<table>",
|
||||
("tbody", "<table>"): "<tbody>",
|
||||
("td", "<tr>"): "flow",
|
||||
("template", "<head>"): "unknown",
|
||||
("template", "phrasing"): "unknown",
|
||||
("template", "script-supporting"): "unknown",
|
||||
("template", "<colgroup>"): "unknown",
|
||||
("textarea", "phrasing"): "text",
|
||||
("tfoot", "<table>"): "<tfoot>",
|
||||
("th", "<tr>"): "flow",
|
||||
("thead", "<table>"): "<thead>",
|
||||
("time", "phrasing"): "phrasing",
|
||||
("title", "<head>"): "text",
|
||||
("tr", "<table>"): "<tr>",
|
||||
("tr", "<thead>"): "<tr>",
|
||||
("tr", "<tbody>"): "<tr>",
|
||||
("tr", "<tfoot>"): "<tr>",
|
||||
("track", "<audio>"): "void",
|
||||
("track", "<video>"): "void",
|
||||
("u", "phrasing"): "phrasing",
|
||||
("ul", "flow"): "list",
|
||||
("var", "phrasing"): "phrasing",
|
||||
("video", "phrasing"): "<video>",
|
||||
("wbr", "phrasing"): "void",
|
||||
# https://html.spec.whatwg.org/multipage/embedded-content-other.html#mathml
|
||||
("annotation-xml", "MathML"): "flow",
|
||||
("mi", "MathML"): "phrasing",
|
||||
("mo", "MathML"): "phrasing",
|
||||
("mn", "MathML"): "phrasing",
|
||||
("ms", "MathML"): "phrasing",
|
||||
("mtext", "MathML"): "phrasing",
|
||||
# https://html.spec.whatwg.org/multipage/embedded-content-other.html#svg-0
|
||||
("foreignObject", "SVG"): "flow",
|
||||
("title", "SVG"): "phrasing",
|
||||
}
|
||||
|
||||
HTML_CONTEXT_FALLBACKS: dict[str, list[str]] = {
|
||||
"<datalist>": ["phrasing", "script-supporting"],
|
||||
"<details>": ["flow"],
|
||||
"<dl>": ["script-supporting"],
|
||||
"<fieldset>": ["flow"],
|
||||
"<figure>": ["flow"],
|
||||
"<hgroup>": ["plain heading", "script-supporting"],
|
||||
"<optgroup> content": ["script-supporting"],
|
||||
"<option> content": ["phrasing"],
|
||||
"<option>": ["<option> content"],
|
||||
"<picture>": ["script-supporting"],
|
||||
"<ruby>": ["phrasing"],
|
||||
"<select> content": ["script-supporting"],
|
||||
"<select>": ["<select> content"],
|
||||
"<table>": ["script-supporting"],
|
||||
"<tbody>": ["script-supporting"],
|
||||
"<tfoot>": ["script-supporting"],
|
||||
"<thead>": ["script-supporting"],
|
||||
"<tr>": ["script-supporting"],
|
||||
"flow": ["phrasing", "heading"],
|
||||
"heading": ["plain heading"],
|
||||
"list": ["script-supporting"],
|
||||
"phrasing/heading": ["phrasing", "heading"],
|
||||
"unknown": ["document", "flow", "list", "<head>", "<select>", "<table>", "<tr>"],
|
||||
}
|
||||
|
||||
FOREIGN_CONTEXTS = ["MathML", "SVG"]
|
||||
|
||||
|
||||
def html_context_fallbacks(context: str) -> Iterator[str]:
|
||||
yield context
|
||||
for fallback_context in HTML_CONTEXT_FALLBACKS.get(context, []):
|
||||
yield from html_context_fallbacks(fallback_context)
|
@@ -2,6 +2,8 @@ from collections.abc import Callable
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
from .html_elements import FOREIGN_CONTEXTS, VALID_HTML_CONTEXTS, html_context_fallbacks
|
||||
|
||||
|
||||
class FormattedError(Exception):
|
||||
pass
|
||||
@@ -277,25 +279,6 @@ def tokenize(text: str, template_format: str | None = None) -> list[Token]:
|
||||
return tokens
|
||||
|
||||
|
||||
HTML_VOID_TAGS = {
|
||||
"area",
|
||||
"base",
|
||||
"br",
|
||||
"col",
|
||||
"command",
|
||||
"embed",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"keygen",
|
||||
"link",
|
||||
"meta",
|
||||
"param",
|
||||
"source",
|
||||
"track",
|
||||
"wbr",
|
||||
}
|
||||
|
||||
# The following excludes some obscure tags that are never used
|
||||
# in Zulip code.
|
||||
HTML_INLINE_TAGS = {
|
||||
@@ -396,8 +379,8 @@ def validate(
|
||||
class State:
|
||||
def __init__(self, func: Callable[[Token | None], None]) -> None:
|
||||
self.depth = 0
|
||||
self.foreign = False
|
||||
self.matcher = func
|
||||
self.html_context = "unknown"
|
||||
|
||||
def no_start_tag(token: Token | None) -> None:
|
||||
assert token
|
||||
@@ -420,10 +403,7 @@ def validate(
|
||||
start_col = start_token.col
|
||||
|
||||
old_matcher = state.matcher
|
||||
old_foreign = state.foreign
|
||||
|
||||
if start_tag in ["math", "svg"]:
|
||||
state.foreign = True
|
||||
old_html_context = state.html_context
|
||||
|
||||
def f(end_token: Token | None) -> None:
|
||||
if end_token is None:
|
||||
@@ -478,7 +458,7 @@ def validate(
|
||||
|
||||
if not is_else_tag:
|
||||
state.matcher = old_matcher
|
||||
state.foreign = old_foreign
|
||||
state.html_context = old_html_context
|
||||
state.depth -= 1
|
||||
|
||||
# TODO: refine this for the else/elif use cases
|
||||
@@ -491,22 +471,48 @@ def validate(
|
||||
kind = token.kind
|
||||
tag = token.tag
|
||||
|
||||
if not state.foreign:
|
||||
if kind == "html_start" and tag in HTML_VOID_TAGS:
|
||||
raise TemplateParserError(
|
||||
f"Tag must be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||
)
|
||||
elif kind == "html_singleton" and tag not in HTML_VOID_TAGS:
|
||||
raise TemplateParserError(
|
||||
f"Tag must not be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||
)
|
||||
|
||||
flavor = tag_flavor(token)
|
||||
if flavor == "start":
|
||||
start_tag_matcher(token)
|
||||
elif flavor == "end":
|
||||
state.matcher(token)
|
||||
|
||||
if kind in ("html_start", "html_singleton"):
|
||||
for context in html_context_fallbacks(state.html_context):
|
||||
if (tag, context) in VALID_HTML_CONTEXTS:
|
||||
new_context = VALID_HTML_CONTEXTS[tag, context]
|
||||
if new_context == "transparent":
|
||||
new_context = state.html_context
|
||||
break
|
||||
else:
|
||||
if "-" in tag and "phrasing" in html_context_fallbacks(state.html_context):
|
||||
new_context = state.html_context # custom elements
|
||||
elif state.html_context in FOREIGN_CONTEXTS:
|
||||
new_context = state.html_context # unchecked foreign elements
|
||||
else:
|
||||
raise TemplateParserError(
|
||||
f"<{tag}> is not valid in {state.html_context} context"
|
||||
+ (
|
||||
' (consider growing HTML_CONTEXT_FALLBACKS["unknown"]?)'
|
||||
if state.html_context == "unknown"
|
||||
else ""
|
||||
)
|
||||
+ " at {fn} line {token.line}, col {token.col}"
|
||||
)
|
||||
|
||||
if new_context not in FOREIGN_CONTEXTS:
|
||||
if kind == "html_start" and new_context == "void":
|
||||
raise TemplateParserError(
|
||||
f"Tag must be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||
)
|
||||
elif kind == "html_singleton" and new_context != "void":
|
||||
raise TemplateParserError(
|
||||
f"Tag must not be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||
)
|
||||
|
||||
if kind == "html_start":
|
||||
state.html_context = new_context
|
||||
|
||||
if state.depth != 0:
|
||||
state.matcher(None)
|
||||
|
||||
|
Reference in New Issue
Block a user