#!/usr/bin/env python3 import os import re import shutil import sys import django from django.template import engines from django.template.backends.jinja2 import Jinja2 from pydantic.alias_generators import to_pascal BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from scripts.lib.setup_path import setup_path setup_path() os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" django.setup() def replace_emoticon_translation_table(markdown_string: str, import_statement_set: set[str]) -> str: """ We will replace emoticon_translations custom syntax in Python with astro component. """ result = markdown_string.replace( "\\{emoticon_translations\\}", """ """, ) if result != markdown_string: import_statement_set.add( "import EmoticonTranslations from '../../components/EmoticonTranslations.astro';" ) return result def replace_image_path(markdown_string: str, replacement_path: str) -> str: """ We will point to the existing image folder till the cutover. After that, we will copy the images to src folder for help-beta in order to take advantage of Astro's image optimization. See https://chat.zulip.org/#narrow/stream/6-frontend/topic/Handling.20images.20in.20help.20center.20starlight.20migration.2E/near/1915130 """ # We do not replace /static/images directly since there are a few # instances in the documentation where zulip.com links are # referenced with that blurb as a part of the url. result = markdown_string.replace("(/static/images/help-beta", f"({replacement_path}") return result.replace('="/static/images/help-beta', f'="{replacement_path}') def fix_file_imports( markdown_string: str, import_statement_set: set[str], import_relative_base_path: str ) -> str: def convert_to_pascal(text: str) -> str: return to_pascal(text).replace("-", "").replace(".Md", "") def convert_to_astro_tag(match: re.Match[str]) -> str: return "<" + convert_to_pascal(match.group(1)) + " />" RE = re.compile(r"^ {,3}\{!([^!]+)!\} *$", re.MULTILINE) result = RE.sub(convert_to_astro_tag, markdown_string) matches = RE.findall(markdown_string) for match in matches: import_statement_set.add( f'import {convert_to_pascal(match)} from "{import_relative_base_path}/_{match}x"' ) return result def escape_curly_braces(markdown_string: str) -> str: """ MDX will treat curly braces as a JS expression, we need to escape it if we don't want it to be treated as such. """ result = markdown_string.replace("{", r"\{") return result.replace("}", r"\}") def fix_relative_path(markdown_string: str) -> str: """ Since the docs will live at the `help-beta/` url until we migrate the project completely, we will replace `help/` with `help-beta/` """ return markdown_string.replace("help/", "help-beta/") def insert_string_at_line(text: str, destination_str: str, n: int) -> str: lines = destination_str.splitlines() if 1 <= n <= len(lines): lines.insert(n - 1, text) return "\n".join(lines) def replace_icon_with_unplugin_component( match: re.Match[str], icon_package_name: str, icon_component_prefix: str, import_statement_set: set[str], ) -> str: icon_name = match.group(1) component_name = icon_component_prefix + to_pascal(icon_name).replace("-", "") import_statement = f'import {component_name} from "~icons/{icon_package_name}/{icon_name}"' import_statement_set.add(import_statement) return f"<{component_name} />" def replace_icons(markdown_string: str, import_statement_set: set[str]) -> str: """ Write some examples here and some assumptions we made about the icon tags. """ font_awesome_pattern = re.compile( r']*class="(?:[^"]*\s)?fa(?:\s+fa-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?', ) def replace_font_awesome_icon_with_unplugin_component(match: re.Match[str]) -> str: return replace_icon_with_unplugin_component(match, "fa", "Fa", import_statement_set) result = re.sub( font_awesome_pattern, replace_font_awesome_icon_with_unplugin_component, markdown_string ) zulip_icon_pattern = re.compile( r']*class="(?:[^"]*\s)?zulip-icon(?:\s+zulip-icon-([a-z0-9\-]+))(?:\s[^"]*)?"[^>]*>(?:\s[^<]*)?', ) def replace_zulip_icon_with_unplugin_component(match: re.Match[str]) -> str: return replace_icon_with_unplugin_component( match, "zulip-icon", "ZulipIcons", import_statement_set ) result = re.sub(zulip_icon_pattern, replace_zulip_icon_with_unplugin_component, result) return result def detab(text: str) -> tuple[str, str]: """ Remove a tab from the front of each line of the given text. Taken directly from https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/blockprocessors.py#L85 We need this function for converting admonitions to asides, it is okay to be duplicating this code for this script. """ tab_length = 4 newtext = [] lines = text.split("\n") for line in lines: if line.startswith(" " * tab_length): newtext.append(line[tab_length:]) elif not line.strip(): newtext.append("") else: break return "\n".join(newtext), "\n".join(lines[len(newtext) :]) def convert_admonitions_to_asides( markdown_string: str, import_statement_set: set[str], components_dir_path: str ) -> str: """ Lots of code in this function is taken from https://github.com/Python-Markdown/markdown/blob/64a3c0fbc00327fbfee1fd6b44da0e5453287fe4/markdown/extensions/admonition.py """ RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') RE_SPACES = re.compile(" +") def get_admonition_class_and_title(match: re.Match[str]) -> tuple[str, str | None]: klass, title = match.group(1).lower(), match.group(2) klass = RE_SPACES.sub(" ", klass) if title is None: # no title was provided, use the capitalized class name as title title = klass.split(" ", 1)[0].capitalize() elif title == "": # an explicit blank title should not be rendered title = None return klass, title def replace_with_mdx_syntax(text: str) -> str: match = RE.search(text) if match: pre_admonition_declaration_text = text[: match.start()] post_admonition_declaration_text = text[match.end() :] # removes the first line admonition_content, post_admonition_content_text = detab( post_admonition_declaration_text ) klass, title = get_admonition_class_and_title(match) # We ignore the title obtained above in each of the if # block since in our current help center files, we do not # specify the title anywhere. This script only handles cases # that exist in our help center files, nothing more than that # is handled. if klass == "warn": # We have converted `warn` to `note` since that was the # translation that remains most faithful to how we # display `warn` admonitions in our current help center # implementation. # See https://chat.zulip.org/#narrow/channel/19-documentation/topic/Stage.202.3A.20New.20syntax.20for.20!!!tip.20in.20help-beta/near/2174415 # for more details. type = "note" title = "" replacement = ( f'\n\n' ) import_statement_set.add("import { Aside } from '@astrojs/starlight/components'") elif klass == "tip": type = "tip" title = "Tip" replacement = ( f'\n\n' ) import_statement_set.add("import { Aside } from '@astrojs/starlight/components'") elif klass == "keyboard_tip": replacement = f"\n\n{admonition_content}\n\n" import_statement_set.add( f"import KeyboardTip from '{components_dir_path}/KeyboardTip.astro';" ) else: raise Exception(f"Unexpected admonition class during conversion: {klass}") text = pre_admonition_declaration_text + replacement + post_admonition_content_text return replace_with_mdx_syntax(text) else: return text return replace_with_mdx_syntax(markdown_string) def insert_imports(markdown_string: str, import_statement_set: set[str], line_number: int) -> str: if len(import_statement_set) == 0: return markdown_string # This function is called when the frontmatter has not yet been # inserted. First line of the file is always the heading/title of # the file. We rely on the heading being the first line later in # the conversion when inserting frontmatter. For this reason, we # add the imports to the second line. for import_statement in import_statement_set: markdown_string = insert_string_at_line(import_statement, markdown_string, line_number) # Add empty line at the end of import statement list. markdown_string = insert_string_at_line( "", markdown_string, line_number + len(import_statement_set) ) return markdown_string def insert_frontmatter(markdown_string: str) -> str: """ We use the heading in the first line for the existing files to extract the document title. We are not adding a description to the frontmatter yet. """ heading = markdown_string.partition("\n")[0].lstrip("#").strip() title = f"---\ntitle: {heading}\n---\n" # Remove the first line since starlight will display the # `title` as `H1` anyways. return title + markdown_string.split("\n", 1)[-1] def get_markdown_string_from_file(markdown_file_path: str) -> str: jinja = engines["Jinja2"] assert isinstance(jinja, Jinja2) if markdown_file_path.startswith("/"): with open(markdown_file_path) as fp: return fp.read() return jinja.env.loader.get_source(jinja.env, markdown_file_path)[0] def convert_help_center_file_to_mdx( markdown_file_path: str, ) -> str: """ Given a path to a Markdown file, return the equivalent MDX file. """ result = get_markdown_string_from_file(markdown_file_path) # All imports inserted during conversion should be tracked here. import_statement_set: set[str] = set() result = fix_file_imports(result, import_statement_set, "./include") result = escape_curly_braces(result) result = fix_relative_path(result) result = replace_emoticon_translation_table(result, import_statement_set) result = replace_image_path(result, "../../../../static/images/help") result = replace_icons(result, import_statement_set) result = convert_admonitions_to_asides(result, import_statement_set, "../../components") result = insert_imports(result, import_statement_set, 2) result = insert_frontmatter(result) return result def convert_include_file_to_mdx( markdown_file_path: str, ) -> str: """ Given a path to a Markdown file, return the equivalent MDX file. We do not do certain operations that we do on a normal help file since these files are not to be served standalone but instead as macros in other files. - replace_emoticon_translation_table is skipped since that function only applies to one file, and that file is not an include file. - insert_frontmatter is skipped since frontmatter is not needed in files that are not served standalone. """ result = get_markdown_string_from_file(markdown_file_path) # All imports inserted during conversion should be tracked here. import_statement_set: set[str] = set() result = fix_file_imports(result, import_statement_set, ".") result = escape_curly_braces(result) result = fix_relative_path(result) result = replace_image_path(result, "../../../../../static/images/help") result = replace_icons(result, import_statement_set) result = convert_admonitions_to_asides(result, import_statement_set, "../../../components") result = insert_imports(result, import_statement_set, 1) return result def run() -> None: input_dir = os.path.join(BASE_DIR, "help") output_dir = os.path.join(BASE_DIR, "help-beta/src/content/docs") print("Starting the conversion from MD to MDX...") converted_count = 0 # We delete the directory first to remove any stale files # that might have been deleted in the `help` folder but # their converted mdx files stay around shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) for name in os.listdir(input_dir): if os.path.isfile(os.path.join(input_dir, name)): converted_count += 1 mdx = convert_help_center_file_to_mdx(os.path.join(input_dir, name)) with open( os.path.join( output_dir, os.path.basename(name).split(".")[0] + ".mdx", ), "w", ) as mdx_file: mdx_file.write(mdx) print(f"Converted {converted_count} files. Proceeding to the conversion of include files ...") include_converted_count = 0 include_input_dir = os.path.join(input_dir, "include") include_output_dir = os.path.join(output_dir, "include") os.makedirs(include_output_dir, exist_ok=True) for name in os.listdir(include_input_dir): if os.path.isfile(os.path.join(include_input_dir, name)): include_converted_count += 1 mdx = convert_include_file_to_mdx(os.path.join(include_input_dir, name)) with open( os.path.join( include_output_dir, "_" + os.path.basename(name).split(".")[0] + ".mdx", ), "w", ) as mdx_file: mdx_file.write(mdx) print(f"Converted {include_converted_count} include files. Conversion completed.") run()