mirror of
https://github.com/zulip/zulip.git
synced 2025-11-01 20:44:04 +00:00
Fixes #31248. For cases with zero tabs in our current help center, we were using the tab syntax just to put a border around the instructions without any tab label. We do not want that border anymore since Tabs also don't have any borders. So, we just remove the old tab syntax in that case during our conversion. We also move the admonition to asides conversion before the tabs syntax conversion since the tabs conversion happening first was messing with the asides conversion and was resulting into some missing closing tags. We can modify things to work regardless of order, but doesn't seem worth digging into for a conversion script.
450 lines
17 KiB
Python
Executable File
450 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
from textwrap import indent
|
|
|
|
import django
|
|
from django.template import engines
|
|
from django.template.backends.jinja2 import Jinja2
|
|
from pydantic.alias_generators import to_pascal
|
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from scripts.lib.setup_path import setup_path
|
|
|
|
setup_path()
|
|
|
|
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
|
|
django.setup()
|
|
|
|
from zerver.lib.markdown.tabbed_sections import generate_content_blocks, parse_tabs
|
|
|
|
INDENT_SPACES = " "
|
|
|
|
|
|
def replace_emoticon_translation_table(markdown_string: str, import_statement_set: set[str]) -> str:
|
|
"""
|
|
We will replace emoticon_translations custom syntax in Python with
|
|
<EmoticonTranslations> astro component.
|
|
"""
|
|
result = markdown_string.replace(
|
|
"\\{emoticon_translations\\}",
|
|
"""
|
|
<EmoticonTranslations />
|
|
""",
|
|
)
|
|
if result != markdown_string:
|
|
import_statement_set.add(
|
|
"import EmoticonTranslations from '../../components/EmoticonTranslations.astro';"
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def replace_image_path(markdown_string: str, replacement_path: str) -> str:
|
|
"""
|
|
We will point to the existing image folder till
|
|
the cutover. After that, we will copy the images
|
|
to src folder for help-beta in order to take
|
|
advantage of Astro's image optimization.
|
|
See https://chat.zulip.org/#narrow/stream/6-frontend/topic/Handling.20images.20in.20help.20center.20starlight.20migration.2E/near/1915130
|
|
"""
|
|
# We do not replace /static/images directly since there are a few
|
|
# instances in the documentation where zulip.com links are
|
|
# referenced with that blurb as a part of the url.
|
|
result = markdown_string.replace("(/static/images/help-beta", f"({replacement_path}")
|
|
return result.replace('="/static/images/help-beta', f'="{replacement_path}')
|
|
|
|
|
|
def fix_file_imports(
|
|
markdown_string: str, import_statement_set: set[str], import_relative_base_path: str
|
|
) -> str:
|
|
def convert_to_pascal(text: str) -> str:
|
|
return to_pascal(text).replace("-", "").replace(".Md", "")
|
|
|
|
def convert_to_astro_tag(match: re.Match[str]) -> str:
|
|
return "<" + convert_to_pascal(match.group(1)) + " />"
|
|
|
|
RE = re.compile(r"^ {,3}\{!([^!]+)!\} *$", re.MULTILINE)
|
|
result = RE.sub(convert_to_astro_tag, markdown_string)
|
|
matches = RE.findall(markdown_string)
|
|
|
|
for match in matches:
|
|
import_statement_set.add(
|
|
f'import {convert_to_pascal(match)} from "{import_relative_base_path}/_{match}x"'
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def escape_curly_braces(markdown_string: str) -> str:
|
|
"""
|
|
MDX will treat curly braces as a JS expression,
|
|
we need to escape it if we don't want it to be
|
|
treated as such.
|
|
"""
|
|
result = markdown_string.replace("{", r"\{")
|
|
return result.replace("}", r"\}")
|
|
|
|
|
|
def fix_relative_path(markdown_string: str) -> str:
|
|
"""
|
|
Since the docs will live at the `help-beta/` url
|
|
until we migrate the project completely, we will
|
|
replace `help/` with `help-beta/`
|
|
"""
|
|
return markdown_string.replace("help/", "help-beta/")
|
|
|
|
|
|
def insert_string_at_line(text: str, destination_str: str, n: int) -> str:
|
|
lines = destination_str.splitlines()
|
|
if 1 <= n <= len(lines):
|
|
lines.insert(n - 1, text)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def replace_icon_with_unplugin_component(
|
|
match: re.Match[str],
|
|
icon_package_name: str,
|
|
icon_component_prefix: str,
|
|
import_statement_set: set[str],
|
|
) -> str:
|
|
icon_name = match.group(1)
|
|
component_name = icon_component_prefix + to_pascal(icon_name).replace("-", "")
|
|
import_statement = f'import {component_name} from "~icons/{icon_package_name}/{icon_name}"'
|
|
import_statement_set.add(import_statement)
|
|
return f"<{component_name} />"
|
|
|
|
|
|
def replace_icons(markdown_string: str, import_statement_set: set[str]) -> str:
|
|
"""
|
|
Write some examples here and some assumptions we made about
|
|
the icon tags.
|
|
"""
|
|
font_awesome_pattern = re.compile(
|
|
r'<i[^>]*class="(?:[^"]*\s)?fa(?:\s+fa-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?</i>',
|
|
)
|
|
|
|
def replace_font_awesome_icon_with_unplugin_component(match: re.Match[str]) -> str:
|
|
return replace_icon_with_unplugin_component(match, "fa", "Fa", import_statement_set)
|
|
|
|
result = re.sub(
|
|
font_awesome_pattern, replace_font_awesome_icon_with_unplugin_component, markdown_string
|
|
)
|
|
|
|
zulip_icon_pattern = re.compile(
|
|
r'<i[^>]*class="(?:[^"]*\s)?zulip-icon(?:\s+zulip-icon-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?</i>',
|
|
)
|
|
|
|
def replace_zulip_icon_with_unplugin_component(match: re.Match[str]) -> str:
|
|
return replace_icon_with_unplugin_component(
|
|
match, "zulip-icon", "ZulipIcons", import_statement_set
|
|
)
|
|
|
|
result = re.sub(zulip_icon_pattern, replace_zulip_icon_with_unplugin_component, result)
|
|
|
|
return result
|
|
|
|
|
|
def convert_tab_syntax(markdown_string: str, import_statement_set: set[str]) -> str:
|
|
"""
|
|
Convert our custom tab syntax to relevant MDX equivalent.
|
|
This function is inspired from `tabbed_section.py`'s run method.
|
|
"""
|
|
TABBED_SECTION_TEMPLATE = """<Tabs>
|
|
{blocks}
|
|
</Tabs>"""
|
|
|
|
TAB_ITEM_TEMPLATE = """<TabItem label="{tab_label}">
|
|
{content}
|
|
</TabItem>"""
|
|
|
|
def indent_content_inside_blocks(content_block: str) -> str:
|
|
"""
|
|
The original function `generate_content_blocks` does no
|
|
indenting, and we need the content in between our TabItems
|
|
to be indented. This is bit of string manipulation, but it
|
|
is a better alternative compared to duplicating the original
|
|
function here just to indent.
|
|
"""
|
|
content_block_lines = content_block.splitlines()
|
|
result_lines = []
|
|
for line in content_block_lines:
|
|
if line.startswith(("<TabItem", "</TabItem>")):
|
|
result_lines.append(line)
|
|
else:
|
|
result_lines.append(INDENT_SPACES + line)
|
|
|
|
return "\n".join(result_lines)
|
|
|
|
lines = markdown_string.splitlines()
|
|
tab_section = parse_tabs(lines)
|
|
while tab_section:
|
|
if "tabs" in tab_section:
|
|
content_blocks = generate_content_blocks(tab_section, lines, TAB_ITEM_TEMPLATE)
|
|
content_blocks = indent_content_inside_blocks(content_blocks)
|
|
content_blocks = indent(content_blocks, INDENT_SPACES)
|
|
tabs_mdx = TABBED_SECTION_TEMPLATE.format(blocks=content_blocks)
|
|
import_statement_set.add(
|
|
"import { Tabs, TabItem } from '@astrojs/starlight/components'"
|
|
)
|
|
else:
|
|
# This is the case where we don't have any tabs but we were
|
|
# using the `start_tabs` and `end_tabs` syntax to create
|
|
# a border around the instructions. We just put the content
|
|
# as is in this case since we don't want to add that border
|
|
# anymore.
|
|
tabs_mdx = ("\n").join(
|
|
lines[(tab_section["start_tabs_index"] + 1) : (tab_section["end_tabs_index"])]
|
|
)
|
|
|
|
start = tab_section["start_tabs_index"]
|
|
end = tab_section["end_tabs_index"] + 1
|
|
lines = [*lines[:start], tabs_mdx, *lines[end:]]
|
|
tab_section = parse_tabs(lines)
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def detab(text: str) -> tuple[str, str]:
|
|
"""
|
|
Remove a tab from the front of each line of the given text.
|
|
Taken directly from
|
|
https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/blockprocessors.py#L85
|
|
We need this function for converting admonitions to asides, it is
|
|
okay to be duplicating this code for this script.
|
|
"""
|
|
tab_length = 4
|
|
newtext = []
|
|
lines = text.split("\n")
|
|
for line in lines:
|
|
if line.startswith(" " * tab_length):
|
|
newtext.append(line[tab_length:])
|
|
elif not line.strip():
|
|
newtext.append("")
|
|
else:
|
|
break
|
|
return "\n".join(newtext), "\n".join(lines[len(newtext) :])
|
|
|
|
|
|
def convert_admonitions_to_asides(
|
|
markdown_string: str, import_statement_set: set[str], components_dir_path: str
|
|
) -> str:
|
|
"""
|
|
Lots of code in this function is taken from
|
|
https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/extensions/admonition.py
|
|
"""
|
|
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
|
|
RE_SPACES = re.compile(" +")
|
|
|
|
def get_admonition_class_and_title(match: re.Match[str]) -> tuple[str, str | None]:
|
|
klass, title = match.group(1).lower(), match.group(2)
|
|
klass = RE_SPACES.sub(" ", klass)
|
|
if title is None:
|
|
# no title was provided, use the capitalized class name as title
|
|
title = klass.split(" ", 1)[0].capitalize()
|
|
elif title == "":
|
|
# an explicit blank title should not be rendered
|
|
title = None
|
|
return klass, title
|
|
|
|
def replace_with_mdx_syntax(text: str) -> str:
|
|
match = RE.search(text)
|
|
if match:
|
|
pre_admonition_declaration_text = text[: match.start()]
|
|
post_admonition_declaration_text = text[match.end() :] # removes the first line
|
|
admonition_content, post_admonition_content_text = detab(
|
|
post_admonition_declaration_text
|
|
)
|
|
admonition_content = indent(admonition_content, INDENT_SPACES)
|
|
|
|
klass, title = get_admonition_class_and_title(match)
|
|
# We ignore the title obtained above in each of the if
|
|
# block since in our current help center files, we do not
|
|
# specify the title anywhere. This script only handles cases
|
|
# that exist in our help center files, nothing more than that
|
|
# is handled.
|
|
if klass == "warn":
|
|
# We have converted `warn` to `note` since that was the
|
|
# translation that remains most faithful to how we
|
|
# display `warn` admonitions in our current help center
|
|
# implementation.
|
|
# See https://chat.zulip.org/#narrow/channel/19-documentation/topic/Stage.202.3A.20New.20syntax.20for.20!!!tip.20in.20help-beta/near/2174415
|
|
# for more details.
|
|
type = "note"
|
|
title = ""
|
|
replacement = (
|
|
f'\n<Aside type="{type}" title="{title}">\n{admonition_content}\n</Aside>\n'
|
|
)
|
|
import_statement_set.add("import { Aside } from '@astrojs/starlight/components'")
|
|
elif klass == "tip":
|
|
type = "tip"
|
|
title = "Tip"
|
|
replacement = (
|
|
f'\n<Aside type="{type}" title="{title}">\n{admonition_content}\n</Aside>\n'
|
|
)
|
|
import_statement_set.add("import { Aside } from '@astrojs/starlight/components'")
|
|
elif klass == "keyboard_tip":
|
|
replacement = f"\n<KeyboardTip>\n{admonition_content}\n</KeyboardTip>\n"
|
|
import_statement_set.add(
|
|
f"import KeyboardTip from '{components_dir_path}/KeyboardTip.astro';"
|
|
)
|
|
else:
|
|
raise Exception(f"Unexpected admonition class during conversion: {klass}")
|
|
|
|
text = pre_admonition_declaration_text + replacement + post_admonition_content_text
|
|
return replace_with_mdx_syntax(text)
|
|
else:
|
|
return text
|
|
|
|
return replace_with_mdx_syntax(markdown_string)
|
|
|
|
|
|
def insert_imports(markdown_string: str, import_statement_set: set[str], line_number: int) -> str:
|
|
if len(import_statement_set) == 0:
|
|
return markdown_string
|
|
|
|
# This function is called when the frontmatter has not yet been
|
|
# inserted. First line of the file is always the heading/title of
|
|
# the file. We rely on the heading being the first line later in
|
|
# the conversion when inserting frontmatter. For this reason, we
|
|
# add the imports to the second line.
|
|
for import_statement in import_statement_set:
|
|
markdown_string = insert_string_at_line(import_statement, markdown_string, line_number)
|
|
|
|
# Add empty line at the end of import statement list.
|
|
markdown_string = insert_string_at_line(
|
|
"", markdown_string, line_number + len(import_statement_set)
|
|
)
|
|
return markdown_string
|
|
|
|
|
|
def insert_frontmatter(markdown_string: str) -> str:
|
|
"""
|
|
We use the heading in the first line for the
|
|
existing files to extract the document title.
|
|
We are not adding a description to the frontmatter
|
|
yet.
|
|
"""
|
|
heading = markdown_string.partition("\n")[0].lstrip("#").strip()
|
|
title = f"---\ntitle: {heading}\n---\n"
|
|
# Remove the first line since starlight will display the
|
|
# `title` as `H1` anyways.
|
|
return title + markdown_string.split("\n", 1)[-1]
|
|
|
|
|
|
def get_markdown_string_from_file(markdown_file_path: str) -> str:
|
|
jinja = engines["Jinja2"]
|
|
assert isinstance(jinja, Jinja2)
|
|
if markdown_file_path.startswith("/"):
|
|
with open(markdown_file_path) as fp:
|
|
return fp.read()
|
|
|
|
return jinja.env.loader.get_source(jinja.env, markdown_file_path)[0]
|
|
|
|
|
|
def convert_help_center_file_to_mdx(
|
|
markdown_file_path: str,
|
|
) -> str:
|
|
"""
|
|
Given a path to a Markdown file, return the equivalent MDX file.
|
|
"""
|
|
result = get_markdown_string_from_file(markdown_file_path)
|
|
|
|
# All imports inserted during conversion should be tracked here.
|
|
import_statement_set: set[str] = set()
|
|
|
|
result = fix_file_imports(result, import_statement_set, "./include")
|
|
result = convert_admonitions_to_asides(result, import_statement_set, "../../components")
|
|
result = convert_tab_syntax(result, import_statement_set)
|
|
result = escape_curly_braces(result)
|
|
result = fix_relative_path(result)
|
|
result = replace_emoticon_translation_table(result, import_statement_set)
|
|
result = replace_image_path(result, "../../../../static/images/help")
|
|
result = replace_icons(result, import_statement_set)
|
|
result = insert_imports(result, import_statement_set, 2)
|
|
result = insert_frontmatter(result)
|
|
return result
|
|
|
|
|
|
def convert_include_file_to_mdx(
|
|
markdown_file_path: str,
|
|
) -> str:
|
|
"""
|
|
Given a path to a Markdown file, return the equivalent MDX file.
|
|
We do not do certain operations that we do on a normal help file
|
|
since these files are not to be served standalone but instead as
|
|
macros in other files.
|
|
- replace_emoticon_translation_table is skipped since that
|
|
function only applies to one file, and that file is not an include
|
|
file.
|
|
- insert_frontmatter is skipped since frontmatter is not needed
|
|
in files that are not served standalone.
|
|
"""
|
|
result = get_markdown_string_from_file(markdown_file_path)
|
|
|
|
# All imports inserted during conversion should be tracked here.
|
|
import_statement_set: set[str] = set()
|
|
|
|
result = fix_file_imports(result, import_statement_set, ".")
|
|
result = convert_admonitions_to_asides(result, import_statement_set, "../../../components")
|
|
result = convert_tab_syntax(result, import_statement_set)
|
|
result = escape_curly_braces(result)
|
|
result = fix_relative_path(result)
|
|
result = replace_image_path(result, "../../../../../static/images/help")
|
|
result = replace_icons(result, import_statement_set)
|
|
result = insert_imports(result, import_statement_set, 1)
|
|
return result
|
|
|
|
|
|
def run() -> None:
|
|
input_dir = os.path.join(BASE_DIR, "help")
|
|
output_dir = os.path.join(BASE_DIR, "help-beta/src/content/docs")
|
|
print("Starting the conversion from MD to MDX...")
|
|
|
|
converted_count = 0
|
|
|
|
# We delete the directory first to remove any stale files
|
|
# that might have been deleted in the `help` folder but
|
|
# their converted mdx files stay around
|
|
shutil.rmtree(output_dir)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
for name in os.listdir(input_dir):
|
|
if os.path.isfile(os.path.join(input_dir, name)):
|
|
converted_count += 1
|
|
mdx = convert_help_center_file_to_mdx(os.path.join(input_dir, name))
|
|
with open(
|
|
os.path.join(
|
|
output_dir,
|
|
os.path.basename(name).split(".")[0] + ".mdx",
|
|
),
|
|
"w",
|
|
) as mdx_file:
|
|
mdx_file.write(mdx)
|
|
print(f"Converted {converted_count} files. Proceeding to the conversion of include files ...")
|
|
|
|
include_converted_count = 0
|
|
include_input_dir = os.path.join(input_dir, "include")
|
|
include_output_dir = os.path.join(output_dir, "include")
|
|
os.makedirs(include_output_dir, exist_ok=True)
|
|
for name in os.listdir(include_input_dir):
|
|
if os.path.isfile(os.path.join(include_input_dir, name)):
|
|
include_converted_count += 1
|
|
mdx = convert_include_file_to_mdx(os.path.join(include_input_dir, name))
|
|
with open(
|
|
os.path.join(
|
|
include_output_dir,
|
|
"_" + os.path.basename(name).split(".")[0] + ".mdx",
|
|
),
|
|
"w",
|
|
) as mdx_file:
|
|
mdx_file.write(mdx)
|
|
print(f"Converted {include_converted_count} include files. Conversion completed.")
|
|
|
|
|
|
run()
|