Files
zulip/tools/convert-help-center-docs-to-mdx
Shubham Padia 078a27def2 help-beta: Convert admonitions to asides and KeyboardTip.
Fixes #31249.
We have converted `warn` to `note` since that was the translation that
remains most faithful to how we display `warn` admonitions in our
current help center implementation. See
https://chat.zulip.org/#narrow/channel/19-documentation/topic/Stage.202.3A.20New.20syntax.20for.20!!!tip.20in.20help-beta/near/2174415
for more details.

We've also duplicated code from Python-Markdown to represent how
admonitions worked there more accurately. Duplication should be fine
since this is a one-off script and Python-Markdown is not a project
that's gonna change drastically in the future.
2025-05-15 11:53:40 -07:00

382 lines
14 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import re
import shutil
import sys
import django
from django.template import engines
from django.template.backends.jinja2 import Jinja2
from pydantic.alias_generators import to_pascal
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from scripts.lib.setup_path import setup_path
setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
django.setup()
def replace_emoticon_translation_table(markdown_string: str, import_statement_set: set[str]) -> str:
"""
We will replace emoticon_translations custom syntax in Python with
<EmoticonTranslations> astro component.
"""
result = markdown_string.replace(
"\\{emoticon_translations\\}",
"""
<EmoticonTranslations />
""",
)
if result != markdown_string:
import_statement_set.add(
"import EmoticonTranslations from '../../components/EmoticonTranslations.astro';"
)
return result
def replace_image_path(markdown_string: str, replacement_path: str) -> str:
"""
We will point to the existing image folder till
the cutover. After that, we will copy the images
to src folder for help-beta in order to take
advantage of Astro's image optimization.
See https://chat.zulip.org/#narrow/stream/6-frontend/topic/Handling.20images.20in.20help.20center.20starlight.20migration.2E/near/1915130
"""
# We do not replace /static/images directly since there are a few
# instances in the documentation where zulip.com links are
# referenced with that blurb as a part of the url.
result = markdown_string.replace("(/static/images/help-beta", f"({replacement_path}")
return result.replace('="/static/images/help-beta', f'="{replacement_path}')
def fix_file_imports(
markdown_string: str, import_statement_set: set[str], import_relative_base_path: str
) -> str:
def convert_to_pascal(text: str) -> str:
return to_pascal(text).replace("-", "").replace(".Md", "")
def convert_to_astro_tag(match: re.Match[str]) -> str:
return "<" + convert_to_pascal(match.group(1)) + " />"
RE = re.compile(r"^ {,3}\{!([^!]+)!\} *$", re.MULTILINE)
result = RE.sub(convert_to_astro_tag, markdown_string)
matches = RE.findall(markdown_string)
for match in matches:
import_statement_set.add(
f'import {convert_to_pascal(match)} from "{import_relative_base_path}/_{match}x"'
)
return result
def escape_curly_braces(markdown_string: str) -> str:
"""
MDX will treat curly braces as a JS expression,
we need to escape it if we don't want it to be
treated as such.
"""
result = markdown_string.replace("{", r"\{")
return result.replace("}", r"\}")
def fix_relative_path(markdown_string: str) -> str:
"""
Since the docs will live at the `help-beta/` url
until we migrate the project completely, we will
replace `help/` with `help-beta/`
"""
return markdown_string.replace("help/", "help-beta/")
def insert_string_at_line(text: str, destination_str: str, n: int) -> str:
lines = destination_str.splitlines()
if 1 <= n <= len(lines):
lines.insert(n - 1, text)
return "\n".join(lines)
def replace_icon_with_unplugin_component(
match: re.Match[str],
icon_package_name: str,
icon_component_prefix: str,
import_statement_set: set[str],
) -> str:
icon_name = match.group(1)
component_name = icon_component_prefix + to_pascal(icon_name).replace("-", "")
import_statement = f'import {component_name} from "~icons/{icon_package_name}/{icon_name}"'
import_statement_set.add(import_statement)
return f"<{component_name} />"
def replace_icons(markdown_string: str, import_statement_set: set[str]) -> str:
"""
Write some examples here and some assumptions we made about
the icon tags.
"""
font_awesome_pattern = re.compile(
r'<i[^>]*class="(?:[^"]*\s)?fa(?:\s+fa-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?</i>',
)
def replace_font_awesome_icon_with_unplugin_component(match: re.Match[str]) -> str:
return replace_icon_with_unplugin_component(match, "fa", "Fa", import_statement_set)
result = re.sub(
font_awesome_pattern, replace_font_awesome_icon_with_unplugin_component, markdown_string
)
zulip_icon_pattern = re.compile(
r'<i[^>]*class="(?:[^"]*\s)?zulip-icon(?:\s+zulip-icon-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?</i>',
)
def replace_zulip_icon_with_unplugin_component(match: re.Match[str]) -> str:
return replace_icon_with_unplugin_component(
match, "zulip-icon", "ZulipIcons", import_statement_set
)
result = re.sub(zulip_icon_pattern, replace_zulip_icon_with_unplugin_component, result)
return result
def detab(text: str) -> tuple[str, str]:
"""
Remove a tab from the front of each line of the given text.
Taken directly from
https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/blockprocessors.py#L85
We need this function for converting admonitions to asides, it is
okay to be duplicating this code for this script.
"""
tab_length = 4
newtext = []
lines = text.split("\n")
for line in lines:
if line.startswith(" " * tab_length):
newtext.append(line[tab_length:])
elif not line.strip():
newtext.append("")
else:
break
return "\n".join(newtext), "\n".join(lines[len(newtext) :])
def convert_admonitions_to_asides(
markdown_string: str, import_statement_set: set[str], components_dir_path: str
) -> str:
"""
Lots of code in this function is taken from
https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/extensions/admonition.py
"""
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
RE_SPACES = re.compile(" +")
def get_admonition_class_and_title(match: re.Match[str]) -> tuple[str, str | None]:
klass, title = match.group(1).lower(), match.group(2)
klass = RE_SPACES.sub(" ", klass)
if title is None:
# no title was provided, use the capitalized class name as title
title = klass.split(" ", 1)[0].capitalize()
elif title == "":
# an explicit blank title should not be rendered
title = None
return klass, title
def replace_with_mdx_syntax(text: str) -> str:
match = RE.search(text)
if match:
pre_admonition_declaration_text = text[: match.start()]
post_admonition_declaration_text = text[match.end() :] # removes the first line
admonition_content, post_admonition_content_text = detab(
post_admonition_declaration_text
)
klass, title = get_admonition_class_and_title(match)
# We ignore the title obtained above in each of the if
# block since in our current help center files, we do not
# specify the title anywhere. This script only handles cases
# that exist in our help center files, nothing more than that
# is handled.
if klass == "warn":
# We have converted `warn` to `note` since that was the
# translation that remains most faithful to how we
# display `warn` admonitions in our current help center
# implementation.
# See https://chat.zulip.org/#narrow/channel/19-documentation/topic/Stage.202.3A.20New.20syntax.20for.20!!!tip.20in.20help-beta/near/2174415
# for more details.
type = "note"
title = ""
replacement = (
f'\n<Aside type="{type}" title="{title}">\n{admonition_content}\n</Aside>\n'
)
import_statement_set.add("import { Aside } from '@astrojs/starlight/components'")
elif klass == "tip":
type = "tip"
title = "Tip"
replacement = (
f'\n<Aside type="{type}" title="{title}">\n{admonition_content}\n</Aside>\n'
)
import_statement_set.add("import { Aside } from '@astrojs/starlight/components'")
elif klass == "keyboard_tip":
replacement = f"\n<KeyboardTip>\n{admonition_content}\n</KeyboardTip>\n"
import_statement_set.add(
f"import KeyboardTip from '{components_dir_path}/KeyboardTip.astro';"
)
else:
raise Exception(f"Unexpected admonition class during conversion: {klass}")
text = pre_admonition_declaration_text + replacement + post_admonition_content_text
return replace_with_mdx_syntax(text)
else:
return text
return replace_with_mdx_syntax(markdown_string)
def insert_imports(markdown_string: str, import_statement_set: set[str], line_number: int) -> str:
if len(import_statement_set) == 0:
return markdown_string
# This function is called when the frontmatter has not yet been
# inserted. First line of the file is always the heading/title of
# the file. We rely on the heading being the first line later in
# the conversion when inserting frontmatter. For this reason, we
# add the imports to the second line.
for import_statement in import_statement_set:
markdown_string = insert_string_at_line(import_statement, markdown_string, line_number)
# Add empty line at the end of import statement list.
markdown_string = insert_string_at_line(
"", markdown_string, line_number + len(import_statement_set)
)
return markdown_string
def insert_frontmatter(markdown_string: str) -> str:
"""
We use the heading in the first line for the
existing files to extract the document title.
We are not adding a description to the frontmatter
yet.
"""
heading = markdown_string.partition("\n")[0].lstrip("#").strip()
title = f"---\ntitle: {heading}\n---\n"
# Remove the first line since starlight will display the
# `title` as `H1` anyways.
return title + markdown_string.split("\n", 1)[-1]
def get_markdown_string_from_file(markdown_file_path: str) -> str:
jinja = engines["Jinja2"]
assert isinstance(jinja, Jinja2)
if markdown_file_path.startswith("/"):
with open(markdown_file_path) as fp:
return fp.read()
return jinja.env.loader.get_source(jinja.env, markdown_file_path)[0]
def convert_help_center_file_to_mdx(
markdown_file_path: str,
) -> str:
"""
Given a path to a Markdown file, return the equivalent MDX file.
"""
result = get_markdown_string_from_file(markdown_file_path)
# All imports inserted during conversion should be tracked here.
import_statement_set: set[str] = set()
result = fix_file_imports(result, import_statement_set, "./include")
result = escape_curly_braces(result)
result = fix_relative_path(result)
result = replace_emoticon_translation_table(result, import_statement_set)
result = replace_image_path(result, "../../../../static/images/help")
result = replace_icons(result, import_statement_set)
result = convert_admonitions_to_asides(result, import_statement_set, "../../components")
result = insert_imports(result, import_statement_set, 2)
result = insert_frontmatter(result)
return result
def convert_include_file_to_mdx(
markdown_file_path: str,
) -> str:
"""
Given a path to a Markdown file, return the equivalent MDX file.
We do not do certain operations that we do on a normal help file
since these files are not to be served standalone but instead as
macros in other files.
- replace_emoticon_translation_table is skipped since that
function only applies to one file, and that file is not an include
file.
- insert_frontmatter is skipped since frontmatter is not needed
in files that are not served standalone.
"""
result = get_markdown_string_from_file(markdown_file_path)
# All imports inserted during conversion should be tracked here.
import_statement_set: set[str] = set()
result = fix_file_imports(result, import_statement_set, ".")
result = escape_curly_braces(result)
result = fix_relative_path(result)
result = replace_image_path(result, "../../../../../static/images/help")
result = replace_icons(result, import_statement_set)
result = convert_admonitions_to_asides(result, import_statement_set, "../../../components")
result = insert_imports(result, import_statement_set, 1)
return result
def run() -> None:
input_dir = os.path.join(BASE_DIR, "help")
output_dir = os.path.join(BASE_DIR, "help-beta/src/content/docs")
print("Starting the conversion from MD to MDX...")
converted_count = 0
# We delete the directory first to remove any stale files
# that might have been deleted in the `help` folder but
# their converted mdx files stay around
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
for name in os.listdir(input_dir):
if os.path.isfile(os.path.join(input_dir, name)):
converted_count += 1
mdx = convert_help_center_file_to_mdx(os.path.join(input_dir, name))
with open(
os.path.join(
output_dir,
os.path.basename(name).split(".")[0] + ".mdx",
),
"w",
) as mdx_file:
mdx_file.write(mdx)
print(f"Converted {converted_count} files. Proceeding to the conversion of include files ...")
include_converted_count = 0
include_input_dir = os.path.join(input_dir, "include")
include_output_dir = os.path.join(output_dir, "include")
os.makedirs(include_output_dir, exist_ok=True)
for name in os.listdir(include_input_dir):
if os.path.isfile(os.path.join(include_input_dir, name)):
include_converted_count += 1
mdx = convert_include_file_to_mdx(os.path.join(include_input_dir, name))
with open(
os.path.join(
include_output_dir,
"_" + os.path.basename(name).split(".")[0] + ".mdx",
),
"w",
) as mdx_file:
mdx_file.write(mdx)
print(f"Converted {include_converted_count} include files. Conversion completed.")
run()