mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
template_parser: Check for invalid combinations of HTML elements.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
9452e2cbb2
commit
8691c11933
191
tools/lib/html_elements.py
Normal file
191
tools/lib/html_elements.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
from collections.abc import Iterator
|
||||||
|
|
||||||
|
VALID_HTML_CONTEXTS: dict[tuple[str, str], str] = {
|
||||||
|
# https://html.spec.whatwg.org/multipage/indices.html#elements-3
|
||||||
|
("a", "phrasing"): "transparent",
|
||||||
|
("abbr", "phrasing"): "phrasing",
|
||||||
|
("address", "flow"): "flow",
|
||||||
|
("area", "phrasing"): "void",
|
||||||
|
("article", "flow"): "flow",
|
||||||
|
("aside", "flow"): "flow",
|
||||||
|
("audio", "phrasing"): "<audio>",
|
||||||
|
("b", "phrasing"): "phrasing",
|
||||||
|
("base", "<head>"): "void",
|
||||||
|
("bdi", "phrasing"): "phrasing",
|
||||||
|
("bdo", "phrasing"): "phrasing",
|
||||||
|
("blockquote", "flow"): "flow",
|
||||||
|
("body", "<html>"): "flow",
|
||||||
|
("br", "phrasing"): "void",
|
||||||
|
("button", "phrasing"): "phrasing",
|
||||||
|
("button", "<select>"): "phrasing",
|
||||||
|
("canvas", "phrasing"): "transparent",
|
||||||
|
("caption", "<table>"): "flow",
|
||||||
|
("center", "flow"): "flow", # FIXME: obsolete, remove this
|
||||||
|
("cite", "phrasing"): "phrasing",
|
||||||
|
("code", "phrasing"): "phrasing",
|
||||||
|
("col", "<colgroup>"): "void",
|
||||||
|
("colgroup", "<table>"): "<colgroup>",
|
||||||
|
("data", "phrasing"): "phrasing",
|
||||||
|
("datalist", "phrasing"): "<datalist>",
|
||||||
|
("dd", "<dl>"): "flow",
|
||||||
|
("del", "phrasing"): "transparent",
|
||||||
|
("details", "flow"): "<details>",
|
||||||
|
("dfn", "phrasing"): "phrasing",
|
||||||
|
("dialog", "flow"): "flow",
|
||||||
|
("div", "flow"): "flow",
|
||||||
|
("div", "<dl>"): "<dl>",
|
||||||
|
("div", "<select> content"): "<select> content",
|
||||||
|
("div", "<optgroup> content"): "<optgroup> content",
|
||||||
|
("div", "<option> content"): "<option> content",
|
||||||
|
("dl", "flow"): "<dl>",
|
||||||
|
("dt", "<dl>"): "phrasing",
|
||||||
|
("em", "phrasing"): "phrasing",
|
||||||
|
("embed", "phrasing"): "void",
|
||||||
|
("fieldset", "flow"): "<fieldset>",
|
||||||
|
("figcaption", "<figure>"): "flow",
|
||||||
|
("figure", "flow"): "<figure>",
|
||||||
|
("footer", "flow"): "flow",
|
||||||
|
("form", "flow"): "flow",
|
||||||
|
("h1", "plain heading"): "phrasing",
|
||||||
|
("h2", "plain heading"): "phrasing",
|
||||||
|
("h3", "plain heading"): "phrasing",
|
||||||
|
("h4", "plain heading"): "phrasing",
|
||||||
|
("h5", "plain heading"): "phrasing",
|
||||||
|
("h6", "plain heading"): "phrasing",
|
||||||
|
("head", "<html>"): "<head>",
|
||||||
|
("header", "flow"): "flow",
|
||||||
|
("hgroup", "heading"): "<hgroup>",
|
||||||
|
("hr", "flow"): "void",
|
||||||
|
("hr", "<select> content"): "void",
|
||||||
|
("html", "document"): "<html>",
|
||||||
|
("i", "phrasing"): "phrasing",
|
||||||
|
("iframe", "phrasing"): "empty",
|
||||||
|
("img", "phrasing"): "void",
|
||||||
|
("img", "<picture>"): "void",
|
||||||
|
("input", "phrasing"): "void",
|
||||||
|
("ins", "phrasing"): "transparent",
|
||||||
|
("kbd", "phrasing"): "phrasing",
|
||||||
|
("label", "phrasing"): "phrasing",
|
||||||
|
("legend", "<fieldset>"): "phrasing/heading",
|
||||||
|
("li", "list"): "flow",
|
||||||
|
("link", "<head>"): "void",
|
||||||
|
("link", "phrasing"): "void",
|
||||||
|
("main", "flow"): "flow",
|
||||||
|
("map", "phrasing"): "<map>",
|
||||||
|
("mark", "phrasing"): "phrasing",
|
||||||
|
("math", "phrasing"): "MathML",
|
||||||
|
("menu", "flow"): "list",
|
||||||
|
("meta", "<head>"): "void",
|
||||||
|
("meta", "phrasing"): "void",
|
||||||
|
("meter", "phrasing"): "phrasing",
|
||||||
|
("nav", "flow"): "flow",
|
||||||
|
("noscript", "<head>"): "transparent",
|
||||||
|
("noscript", "phrasing"): "transparent",
|
||||||
|
("noscript", "<select> content"): "transparent",
|
||||||
|
("noscript", "<optgroup> content"): "transparent",
|
||||||
|
("object", "phrasing"): "transparent",
|
||||||
|
("ol", "flow"): "list",
|
||||||
|
("optgroup", "<select>"): "<optgroup> content",
|
||||||
|
("option", "<select>"): "<option> content",
|
||||||
|
("option", "<datalist>"): "<option> content",
|
||||||
|
("option", "<optgroup>"): "<option> content",
|
||||||
|
("output", "phrasing"): "phrasing",
|
||||||
|
("p", "flow"): "phrasing",
|
||||||
|
("p", "<hgroup>"): "phrasing",
|
||||||
|
("picture", "phrasing"): "<picture>",
|
||||||
|
("pre", "flow"): "phrasing",
|
||||||
|
("progress", "phrasing"): "phrasing",
|
||||||
|
("q", "phrasing"): "phrasing",
|
||||||
|
("rp", "<ruby>"): "phrasing",
|
||||||
|
("rt", "<ruby>"): "phrasing",
|
||||||
|
("ruby", "phrasing"): "<ruby>",
|
||||||
|
("s", "phrasing"): "phrasing",
|
||||||
|
("samp", "phrasing"): "phrasing",
|
||||||
|
("script", "<head>"): "<script>",
|
||||||
|
("script", "phrasing"): "<script>",
|
||||||
|
("script", "script-supporting"): "<script>",
|
||||||
|
("search", "flow"): "flow",
|
||||||
|
("section", "flow"): "flow",
|
||||||
|
("select", "phrasing"): "<select>",
|
||||||
|
("selectedcontent", "<button>"): "empty",
|
||||||
|
("slot", "phrasing"): "transparent",
|
||||||
|
("small", "phrasing"): "phrasing",
|
||||||
|
("source", "<picture>"): "void",
|
||||||
|
("source", "<video>"): "void",
|
||||||
|
("source", "<audio>"): "void",
|
||||||
|
("span", "phrasing"): "phrasing",
|
||||||
|
("strong", "phrasing"): "phrasing",
|
||||||
|
("style", "<head>"): "<style>",
|
||||||
|
("sub", "phrasing"): "phrasing",
|
||||||
|
("summary", "<details>"): "phrasing/heading",
|
||||||
|
("sup", "phrasing"): "phrasing",
|
||||||
|
("svg", "phrasing"): "SVG",
|
||||||
|
("table", "flow"): "<table>",
|
||||||
|
("tbody", "<table>"): "<tbody>",
|
||||||
|
("td", "<tr>"): "flow",
|
||||||
|
("template", "<head>"): "unknown",
|
||||||
|
("template", "phrasing"): "unknown",
|
||||||
|
("template", "script-supporting"): "unknown",
|
||||||
|
("template", "<colgroup>"): "unknown",
|
||||||
|
("textarea", "phrasing"): "text",
|
||||||
|
("tfoot", "<table>"): "<tfoot>",
|
||||||
|
("th", "<tr>"): "flow",
|
||||||
|
("thead", "<table>"): "<thead>",
|
||||||
|
("time", "phrasing"): "phrasing",
|
||||||
|
("title", "<head>"): "text",
|
||||||
|
("tr", "<table>"): "<tr>",
|
||||||
|
("tr", "<thead>"): "<tr>",
|
||||||
|
("tr", "<tbody>"): "<tr>",
|
||||||
|
("tr", "<tfoot>"): "<tr>",
|
||||||
|
("track", "<audio>"): "void",
|
||||||
|
("track", "<video>"): "void",
|
||||||
|
("u", "phrasing"): "phrasing",
|
||||||
|
("ul", "flow"): "list",
|
||||||
|
("var", "phrasing"): "phrasing",
|
||||||
|
("video", "phrasing"): "<video>",
|
||||||
|
("wbr", "phrasing"): "void",
|
||||||
|
# https://html.spec.whatwg.org/multipage/embedded-content-other.html#mathml
|
||||||
|
("annotation-xml", "MathML"): "flow",
|
||||||
|
("mi", "MathML"): "phrasing",
|
||||||
|
("mo", "MathML"): "phrasing",
|
||||||
|
("mn", "MathML"): "phrasing",
|
||||||
|
("ms", "MathML"): "phrasing",
|
||||||
|
("mtext", "MathML"): "phrasing",
|
||||||
|
# https://html.spec.whatwg.org/multipage/embedded-content-other.html#svg-0
|
||||||
|
("foreignObject", "SVG"): "flow",
|
||||||
|
("title", "SVG"): "phrasing",
|
||||||
|
}
|
||||||
|
|
||||||
|
HTML_CONTEXT_FALLBACKS: dict[str, list[str]] = {
|
||||||
|
"<datalist>": ["phrasing", "script-supporting"],
|
||||||
|
"<details>": ["flow"],
|
||||||
|
"<dl>": ["script-supporting"],
|
||||||
|
"<fieldset>": ["flow"],
|
||||||
|
"<figure>": ["flow"],
|
||||||
|
"<hgroup>": ["plain heading", "script-supporting"],
|
||||||
|
"<optgroup> content": ["script-supporting"],
|
||||||
|
"<option> content": ["phrasing"],
|
||||||
|
"<option>": ["<option> content"],
|
||||||
|
"<picture>": ["script-supporting"],
|
||||||
|
"<ruby>": ["phrasing"],
|
||||||
|
"<select> content": ["script-supporting"],
|
||||||
|
"<select>": ["<select> content"],
|
||||||
|
"<table>": ["script-supporting"],
|
||||||
|
"<tbody>": ["script-supporting"],
|
||||||
|
"<tfoot>": ["script-supporting"],
|
||||||
|
"<thead>": ["script-supporting"],
|
||||||
|
"<tr>": ["script-supporting"],
|
||||||
|
"flow": ["phrasing", "heading"],
|
||||||
|
"heading": ["plain heading"],
|
||||||
|
"list": ["script-supporting"],
|
||||||
|
"phrasing/heading": ["phrasing", "heading"],
|
||||||
|
"unknown": ["document", "flow", "list", "<head>", "<select>", "<table>", "<tr>"],
|
||||||
|
}
|
||||||
|
|
||||||
|
FOREIGN_CONTEXTS = ["MathML", "SVG"]
|
||||||
|
|
||||||
|
|
||||||
|
def html_context_fallbacks(context: str) -> Iterator[str]:
|
||||||
|
yield context
|
||||||
|
for fallback_context in HTML_CONTEXT_FALLBACKS.get(context, []):
|
||||||
|
yield from html_context_fallbacks(fallback_context)
|
@@ -2,6 +2,8 @@ from collections.abc import Callable
|
|||||||
|
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from .html_elements import FOREIGN_CONTEXTS, VALID_HTML_CONTEXTS, html_context_fallbacks
|
||||||
|
|
||||||
|
|
||||||
class FormattedError(Exception):
|
class FormattedError(Exception):
|
||||||
pass
|
pass
|
||||||
@@ -277,25 +279,6 @@ def tokenize(text: str, template_format: str | None = None) -> list[Token]:
|
|||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
HTML_VOID_TAGS = {
|
|
||||||
"area",
|
|
||||||
"base",
|
|
||||||
"br",
|
|
||||||
"col",
|
|
||||||
"command",
|
|
||||||
"embed",
|
|
||||||
"hr",
|
|
||||||
"img",
|
|
||||||
"input",
|
|
||||||
"keygen",
|
|
||||||
"link",
|
|
||||||
"meta",
|
|
||||||
"param",
|
|
||||||
"source",
|
|
||||||
"track",
|
|
||||||
"wbr",
|
|
||||||
}
|
|
||||||
|
|
||||||
# The following excludes some obscure tags that are never used
|
# The following excludes some obscure tags that are never used
|
||||||
# in Zulip code.
|
# in Zulip code.
|
||||||
HTML_INLINE_TAGS = {
|
HTML_INLINE_TAGS = {
|
||||||
@@ -396,8 +379,8 @@ def validate(
|
|||||||
class State:
|
class State:
|
||||||
def __init__(self, func: Callable[[Token | None], None]) -> None:
|
def __init__(self, func: Callable[[Token | None], None]) -> None:
|
||||||
self.depth = 0
|
self.depth = 0
|
||||||
self.foreign = False
|
|
||||||
self.matcher = func
|
self.matcher = func
|
||||||
|
self.html_context = "unknown"
|
||||||
|
|
||||||
def no_start_tag(token: Token | None) -> None:
|
def no_start_tag(token: Token | None) -> None:
|
||||||
assert token
|
assert token
|
||||||
@@ -420,10 +403,7 @@ def validate(
|
|||||||
start_col = start_token.col
|
start_col = start_token.col
|
||||||
|
|
||||||
old_matcher = state.matcher
|
old_matcher = state.matcher
|
||||||
old_foreign = state.foreign
|
old_html_context = state.html_context
|
||||||
|
|
||||||
if start_tag in ["math", "svg"]:
|
|
||||||
state.foreign = True
|
|
||||||
|
|
||||||
def f(end_token: Token | None) -> None:
|
def f(end_token: Token | None) -> None:
|
||||||
if end_token is None:
|
if end_token is None:
|
||||||
@@ -478,7 +458,7 @@ def validate(
|
|||||||
|
|
||||||
if not is_else_tag:
|
if not is_else_tag:
|
||||||
state.matcher = old_matcher
|
state.matcher = old_matcher
|
||||||
state.foreign = old_foreign
|
state.html_context = old_html_context
|
||||||
state.depth -= 1
|
state.depth -= 1
|
||||||
|
|
||||||
# TODO: refine this for the else/elif use cases
|
# TODO: refine this for the else/elif use cases
|
||||||
@@ -491,22 +471,48 @@ def validate(
|
|||||||
kind = token.kind
|
kind = token.kind
|
||||||
tag = token.tag
|
tag = token.tag
|
||||||
|
|
||||||
if not state.foreign:
|
|
||||||
if kind == "html_start" and tag in HTML_VOID_TAGS:
|
|
||||||
raise TemplateParserError(
|
|
||||||
f"Tag must be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
|
||||||
)
|
|
||||||
elif kind == "html_singleton" and tag not in HTML_VOID_TAGS:
|
|
||||||
raise TemplateParserError(
|
|
||||||
f"Tag must not be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
|
||||||
)
|
|
||||||
|
|
||||||
flavor = tag_flavor(token)
|
flavor = tag_flavor(token)
|
||||||
if flavor == "start":
|
if flavor == "start":
|
||||||
start_tag_matcher(token)
|
start_tag_matcher(token)
|
||||||
elif flavor == "end":
|
elif flavor == "end":
|
||||||
state.matcher(token)
|
state.matcher(token)
|
||||||
|
|
||||||
|
if kind in ("html_start", "html_singleton"):
|
||||||
|
for context in html_context_fallbacks(state.html_context):
|
||||||
|
if (tag, context) in VALID_HTML_CONTEXTS:
|
||||||
|
new_context = VALID_HTML_CONTEXTS[tag, context]
|
||||||
|
if new_context == "transparent":
|
||||||
|
new_context = state.html_context
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if "-" in tag and "phrasing" in html_context_fallbacks(state.html_context):
|
||||||
|
new_context = state.html_context # custom elements
|
||||||
|
elif state.html_context in FOREIGN_CONTEXTS:
|
||||||
|
new_context = state.html_context # unchecked foreign elements
|
||||||
|
else:
|
||||||
|
raise TemplateParserError(
|
||||||
|
f"<{tag}> is not valid in {state.html_context} context"
|
||||||
|
+ (
|
||||||
|
' (consider growing HTML_CONTEXT_FALLBACKS["unknown"]?)'
|
||||||
|
if state.html_context == "unknown"
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
+ " at {fn} line {token.line}, col {token.col}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_context not in FOREIGN_CONTEXTS:
|
||||||
|
if kind == "html_start" and new_context == "void":
|
||||||
|
raise TemplateParserError(
|
||||||
|
f"Tag must be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||||
|
)
|
||||||
|
elif kind == "html_singleton" and new_context != "void":
|
||||||
|
raise TemplateParserError(
|
||||||
|
f"Tag must not be self-closing: {tag} at {fn} line {token.line}, col {token.col}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if kind == "html_start":
|
||||||
|
state.html_context = new_context
|
||||||
|
|
||||||
if state.depth != 0:
|
if state.depth != 0:
|
||||||
state.matcher(None)
|
state.matcher(None)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user