help-beta: Use FlattenList component instead of hacky middleware.

We want to follow the Astro way of doing things and the middleware was
adding more magic + it was violating commonmark spec:
https://spec.commonmark.org/0.31.2/#example-301.

We insert FlattenList component where include files are being
treated as part of ordered lists. Astro renders included files as it's
own component, which would result in multiple ordered lists instead of
a single list if we did not use this component. See the astro component
file itself to know more how FlattenedSteps works.

We are not inserting FlattenList component for files with !!! tip
components, since we need to do it inside the include file. There are 4
such files at the time of writing this comment.
`is_include_only_ordered_list` makes sure of that. We can do the
conversion for it manually during cutover or in a followup PR.

All unordered lists at the time of writing this comment are standalone
components and we do not need to do any transformation for them.

We also changed the order of conversion of include and main files.
Include files are now being converted first to calculate
include_files_info.

Relevant topic:
https://chat.zulip.org/#narrow/channel/19-documentation/topic/Stage.202.3A.20New.20syntax.20for.20imports
This commit is contained in:
Shubham Padia
2025-05-21 14:38:36 +00:00
committed by Tim Abbott
parent 9fb06e81b7
commit 2977bf364e
3 changed files with 168 additions and 106 deletions

View File

@@ -0,0 +1,28 @@
---
import assert from "node:assert/strict";
import { fromHtml } from "hast-util-from-html";
import { toHtml } from "hast-util-to-html";
const tree = fromHtml(await Astro.slots.render("default"), { fragment: true });
const tree_with_removed_newlines = {
type: "root",
children: tree.children.filter((child) => {
if (child.type === "text" && child.value === "\n") {
return false;
}
return true;
}),
}
const first_element = tree_with_removed_newlines.children[0];
assert(first_element?.type === "element" && ["ol", "ul"].includes(first_element.tagName));
const flattened = {
...first_element,
children: tree_with_removed_newlines.children.flatMap((other) => {
assert(other.type === "element" && other.tagName === first_element.tagName);
return other.children;
}),
};
---
<Fragment set:html={toHtml(flattened)} />

View File

@@ -1,82 +0,0 @@
import {defineMiddleware} from "astro:middleware";
import type {Element, Root, RootContent} from "hast";
import {fromHtml} from "hast-util-from-html";
import {toHtml} from "hast-util-to-html";
function isList(node: Element): boolean {
return node.tagName === "ol" || node.tagName === "ul";
}
// This function traverses the HTML tree and merges lists of the same
// type if they are adjacent to each other. This is kinda a hack to
// make file imports work within lists. One of our major use cases
// for file imports is to have bullet points as partials to import at
// different places in the project. But when importing the file with
// Astro, it creates its own lists. So we merge lists together if they
// have nothing but whitespace between them.
function mergeAdjacentListsOfSameType(tree: Root): Root {
function recursiveMergeAdjacentLists(node: Element | Root): void {
if (!node.children) {
return;
}
const modifiedChildren: RootContent[] = [];
let currentIndex = 0;
while (currentIndex < node.children.length) {
const currentChild = node.children[currentIndex]!;
if (currentChild.type === "element" && isList(currentChild)) {
const mergedList = structuredClone(currentChild);
let lookaheadIndex = currentIndex + 1;
while (lookaheadIndex < node.children.length) {
const lookaheadChild = node.children[lookaheadIndex]!;
if (lookaheadChild.type === "element" && isList(lookaheadChild)) {
if (lookaheadChild.tagName === currentChild.tagName) {
mergedList.children.push(...lookaheadChild.children);
}
lookaheadIndex += 1;
} else if (
lookaheadChild.type === "text" &&
/^\s*$/.test(lookaheadChild.value)
) {
// Whitespace should be allowed in between lists.
lookaheadIndex += 1;
} else {
break;
}
}
modifiedChildren.push(mergedList);
currentIndex = lookaheadIndex;
} else {
modifiedChildren.push(currentChild);
currentIndex += 1;
}
}
node.children = modifiedChildren;
for (const child of node.children) {
if (child.type === "element") {
recursiveMergeAdjacentLists(child);
}
}
}
recursiveMergeAdjacentLists(tree);
return tree;
}
export const onRequest = defineMiddleware(async (_context, next) => {
const response = await next();
const html = await response.text();
const tree = fromHtml(html);
const result = toHtml(mergeAdjacentListsOfSameType(tree));
return new Response(result, {
status: 200,
headers: response.headers,
});
});

View File

@@ -5,6 +5,7 @@ import re
import shutil
import sys
from textwrap import indent
from typing import TypedDict
import django
from django.template import engines
@@ -26,6 +27,10 @@ from zerver.lib.markdown.tabbed_sections import generate_content_blocks, parse_t
INDENT_SPACES = " "
class IncludeFileInfo(TypedDict):
is_only_ordered_list: bool
def convert_kebab_to_pascal(text: str) -> str:
# to_pascal is a function for converting snake case to pascal.
return to_pascal(text).replace("-", "")
@@ -241,6 +246,98 @@ def detab(text: str) -> tuple[str, str]:
return "\n".join(newtext), "\n".join(lines[len(newtext) :])
def is_include_only_ordered_list(markdown_string: str) -> bool:
"""
Check if a given markdown string is only an ordered list and does not
contain other components. After stripping down whitespace, the
string should start with `1.`. There can be a lot of other
components in the markdown string, but since we are running this
only on the include files, the only other component that we have there
for files starting with `1.` is a tip component.
"""
markdown_string = markdown_string.strip()
return markdown_string.startswith("1.") and "!!!" not in markdown_string
def is_line_part_of_an_ordered_list(line: str) -> bool:
"""
Everywhere is our markdown, we use `1.` for our lists instead of
explicit numbers, so we only check for that here. A single item
in a list can be spread across multiple lines with some indentation.
So if the line starts with at least two spaces, we consider it part
of the list for this conversion script. Newlines can be part of a
list, so we return true for those too.
"""
return line.startswith((" ", "1.")) or line.strip() == ""
def insert_flattened_steps_component(
markdown_string: str,
include_files_info: dict[str, IncludeFileInfo],
import_statement_set: set[str],
) -> str:
"""
We insert FlattenList components where include files
are being treated as part of ordered lists. Astro renders
included files as it's own component, which would result in
multiple ordered lists instead of a single list if we did
not use this component. See the astro component file itself
to know more how FlattenList works.
We are not inserting FlattenList components for files
with !!! tip components, since we need to do it inside the
include file. There are 4 such files at the time of writing
this comment. `is_include_only_ordered_list` makes sure of
that. We can do the conversion for it manually during cutover
or in a followup PR.
"""
file_include_pattern = re.compile(r"^ {,3}\{!([^!]+)!\} *$", re.MULTILINE)
lines = markdown_string.splitlines()
def traverse_to_boundary(start: int, step: int) -> int:
index = start
while 0 <= index < len(lines):
line = lines[index]
if is_line_part_of_an_ordered_list(line):
index += step
continue
file_match = file_include_pattern.match(line)
if file_match:
filename = file_match.group(1)
if include_files_info[filename]["is_only_ordered_list"]:
index += step
continue
break
return index
# If a file with `is_only_ordered_list` set to True is followed
# immediately by a similar file with it set to true, our loop
# will try to insert the same text at the same position twice
# resulting in two opening <FlattenList> one after the other.
# Using a set avoids this problem.
insertions = set()
for match in file_include_pattern.finditer(markdown_string):
filename = match.group(1)
if not include_files_info[filename]["is_only_ordered_list"]:
continue
match_line_index = markdown_string[: match.start()].count("\n")
upper_bound = traverse_to_boundary(match_line_index - 1, step=-1)
insertions.add((upper_bound + 1, "<FlattenList>"))
lower_bound = traverse_to_boundary(match_line_index + 1, step=1)
insertions.add((lower_bound, "</FlattenList>"))
if insertions:
import_statement_set.add("import FlattenList from '../../components/FlattenList.astro';")
# Insert tags in reverse order to avoid index shifting
for index, tag in sorted(insertions, reverse=True):
lines.insert(index, tag)
return "\n".join(lines)
def convert_admonitions_to_asides(
markdown_string: str, import_statement_set: set[str], components_dir_path: str
) -> str:
@@ -358,7 +455,7 @@ def get_markdown_string_from_file(markdown_file_path: str) -> str:
def convert_help_center_file_to_mdx(
markdown_file_path: str,
markdown_file_path: str, include_files_info: dict[str, IncludeFileInfo]
) -> str:
"""
Given a path to a Markdown file, return the equivalent MDX file.
@@ -368,6 +465,15 @@ def convert_help_center_file_to_mdx(
# All imports inserted during conversion should be tracked here.
import_statement_set: set[str] = set()
# We are not inserting FlattenList components for files
# with !!! tip components, since we need to do it inside the
# include file. We can do it during the cutover manually or
# in a followup PR.
# All unordered lists at the time of writing this comment are
# standalone components and we do not need to do any transformation
# for them.
result = insert_flattened_steps_component(result, include_files_info, import_statement_set)
result = fix_file_imports(result, import_statement_set, "./include")
result = convert_admonitions_to_asides(result, import_statement_set, "../../components")
result = convert_tab_syntax(result, import_statement_set)
@@ -384,7 +490,7 @@ def convert_help_center_file_to_mdx(
def convert_include_file_to_mdx(
markdown_file_path: str,
) -> str:
) -> tuple[IncludeFileInfo, str]:
"""
Given a path to a Markdown file, return the equivalent MDX file.
We do not do certain operations that we do on a normal help file
@@ -397,6 +503,9 @@ def convert_include_file_to_mdx(
in files that are not served standalone.
"""
result = get_markdown_string_from_file(markdown_file_path)
include_file_info: IncludeFileInfo = {
"is_only_ordered_list": is_include_only_ordered_list(result)
}
# All imports inserted during conversion should be tracked here.
import_statement_set: set[str] = set()
@@ -410,43 +519,33 @@ def convert_include_file_to_mdx(
result = replace_icons(result, import_statement_set)
result = convert_comments(result)
result = insert_imports(result, import_statement_set, 1)
return result
return include_file_info, result
def run() -> None:
input_dir = os.path.join(BASE_DIR, "help")
output_dir = os.path.join(BASE_DIR, "help-beta/src/content/docs")
include_input_dir = os.path.join(input_dir, "include")
include_output_dir = os.path.join(output_dir, "include")
print("Starting the conversion from MD to MDX...")
converted_count = 0
# We delete the directory first to remove any stale files
# that might have been deleted in the `help` folder but
# their converted mdx files stay around
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
for name in os.listdir(input_dir):
if os.path.isfile(os.path.join(input_dir, name)):
converted_count += 1
mdx = convert_help_center_file_to_mdx(os.path.join(input_dir, name))
with open(
os.path.join(
output_dir,
os.path.basename(name).split(".")[0] + ".mdx",
),
"w",
) as mdx_file:
mdx_file.write(mdx)
print(f"Converted {converted_count} files. Proceeding to the conversion of include files ...")
include_converted_count = 0
include_input_dir = os.path.join(input_dir, "include")
include_output_dir = os.path.join(output_dir, "include")
converted_count = 0
os.makedirs(include_output_dir, exist_ok=True)
include_files_info: dict[str, IncludeFileInfo] = {}
for name in os.listdir(include_input_dir):
if os.path.isfile(os.path.join(include_input_dir, name)):
include_converted_count += 1
mdx = convert_include_file_to_mdx(os.path.join(include_input_dir, name))
converted_count += 1
include_file_info, mdx = convert_include_file_to_mdx(
os.path.join(include_input_dir, name)
)
include_files_info[name] = include_file_info
with open(
os.path.join(
include_output_dir,
@@ -455,7 +554,24 @@ def run() -> None:
"w",
) as mdx_file:
mdx_file.write(mdx)
print(f"Converted {include_converted_count} include files. Conversion completed.")
print(
f"Converted {converted_count} include files. Proceeding to the conversion of main help files ..."
)
converted_count = 0
for name in os.listdir(input_dir):
if os.path.isfile(os.path.join(input_dir, name)):
converted_count += 1
mdx = convert_help_center_file_to_mdx(os.path.join(input_dir, name), include_files_info)
with open(
os.path.join(
output_dir,
os.path.basename(name).split(".")[0] + ".mdx",
),
"w",
) as mdx_file:
mdx_file.write(mdx)
print(f"Converted {converted_count} main help files. Conversion completed.")
run()