mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	slack importer: Add regex for markdown mapping.
* For strikethrough formatting: Slack's '~strike~' to Zulip's '~~strike~~'.
    * For bold formatting: Slack's '*bold*' to Zulip's '**bold**'.
    * For italic formatting: Slack's '_italic_' to Zulip's '*italic*'.
    * For mentioning formatting: Slack's <@slack_id|short_name> to Zulip's @**full_name**.
    * Checking links.
			
			
This commit is contained in:
		@@ -296,31 +296,74 @@ def channel_message_to_zerver_message(constants: List[Any], channel: str,
 | 
			
		||||
    zerver_message = []
 | 
			
		||||
    zerver_usermessage = []
 | 
			
		||||
 | 
			
		||||
    slack_usermention_regex = r"(<@)([a-zA-Z0-9]+)(\|)?([a-zA-Z0-9]+)?(>)"
 | 
			
		||||
    # Slack link can be in the format <http://www.foo.com|www.foo.com> and <http://foo.com/>
 | 
			
		||||
    LINK_REGEX = r"""
 | 
			
		||||
                  ^<(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?
 | 
			
		||||
                  [a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?
 | 
			
		||||
                  (\/.*)?(\|)?(?:\|([^>]+))?>$
 | 
			
		||||
                  """
 | 
			
		||||
    SLACK_USERMENTION_REGEX = r"""
 | 
			
		||||
                               (<@)                  # Start with <@
 | 
			
		||||
                                   ([a-zA-Z0-9]+)    # Here we have the Slack id
 | 
			
		||||
                               (\|)?                 # We not always have a Vertical line in mention
 | 
			
		||||
                                   ([a-zA-Z0-9]+)?   # If Vertical line is present, this is short name
 | 
			
		||||
                               (>)                   # ends with >
 | 
			
		||||
                               """
 | 
			
		||||
    # Slack doesn't have mid-word message-formatting like Zulip.
 | 
			
		||||
    # Hence, ~stri~ke doesn't format the word in slack, but ~~stri~~ke
 | 
			
		||||
    # formats the word in Zulip
 | 
			
		||||
    SLACK_STRIKETHROUGH_REGEX = r"""
 | 
			
		||||
                                 (^|[ -(]|[+-/]|[:-?]|\{|\[|\||\^)     # Start after specified characters
 | 
			
		||||
                                 (\~)                                  # followed by an asterisk
 | 
			
		||||
                                     ([ -)+-}—]*)([!-}]+)              # any character except asterisk
 | 
			
		||||
                                 (\~)                                  # followed by an asterisk
 | 
			
		||||
                                 ($|[ -']|[+-/]|[:-?]|\}|\)|\]|\||\^)  # ends with specified characters
 | 
			
		||||
                                 """
 | 
			
		||||
    SLACK_ITALIC_REGEX = r"""
 | 
			
		||||
                          (^|[ -(]|[+-/]|[:-?]|\{|\[|\||\^)
 | 
			
		||||
                          (\_)
 | 
			
		||||
                              ([ -~—]*)([!-}]+)                  # any character
 | 
			
		||||
                          (\_)
 | 
			
		||||
                          ($|[ -']|[+-/]|[:-?]|\}|\)|\]|\||\^)
 | 
			
		||||
                          """
 | 
			
		||||
    SLACK_BOLD_REGEX = r"""
 | 
			
		||||
                        (^|[ -(]|[+-/]|[:-?]|\{|\[|\||\^)
 | 
			
		||||
                        (\*)
 | 
			
		||||
                            ([ -~—]*)([!-}]+)                   # any character
 | 
			
		||||
                        (\*)
 | 
			
		||||
                        ($|[ -']|[+-/]|[:-?]|\}|\)|\]|\||\^)
 | 
			
		||||
                        """
 | 
			
		||||
 | 
			
		||||
    # TODO mapping italics, bold, strikethrough etc
 | 
			
		||||
    # Markdown mapping
 | 
			
		||||
    def convert_to_zulip_markdown(text: str) -> Tuple[str, List[int], bool]:
 | 
			
		||||
        tokens = text.split(' ')
 | 
			
		||||
        mentioned_users_id = []
 | 
			
		||||
        has_link = False
 | 
			
		||||
 | 
			
		||||
        text = convert_markdown_syntax(text, SLACK_STRIKETHROUGH_REGEX, "~~")
 | 
			
		||||
        text = convert_markdown_syntax(text, SLACK_ITALIC_REGEX, "*")
 | 
			
		||||
        text = convert_markdown_syntax(text, SLACK_BOLD_REGEX, "**")
 | 
			
		||||
 | 
			
		||||
        # Map Slack's mention all: '<!everyone>' to '@**all** '
 | 
			
		||||
        # No regex for this as it can be present anywhere in the sentence
 | 
			
		||||
        text = text.replace('<!everyone>', '@**all**')
 | 
			
		||||
        tokens = text.split(' ')
 | 
			
		||||
        for iterator in range(len(tokens)):
 | 
			
		||||
            # Check user mentions and change mention format from
 | 
			
		||||
            # '<@slack_id|short_name>' to '@**full_name**'
 | 
			
		||||
            if (re.compile(slack_usermention_regex).match(tokens[iterator])):
 | 
			
		||||
            if (re.compile(SLACK_USERMENTION_REGEX, re.VERBOSE).match(tokens[iterator])):
 | 
			
		||||
                tokens[iterator], user_id = get_user_mentions(tokens[iterator])
 | 
			
		||||
                mentioned_users_id.append(user_id)
 | 
			
		||||
 | 
			
		||||
            # Check link
 | 
			
		||||
            if ('http://' in tokens[iterator] or 'https://' in tokens[iterator]):
 | 
			
		||||
            if (re.compile(LINK_REGEX, re.VERBOSE).match(tokens[iterator])):
 | 
			
		||||
                tokens[iterator] = tokens[iterator].replace('<', '').replace('>', '')
 | 
			
		||||
                has_link = True
 | 
			
		||||
 | 
			
		||||
        token = ' '.join(tokens)
 | 
			
		||||
        return token, mentioned_users_id, has_link
 | 
			
		||||
 | 
			
		||||
    def get_user_mentions(token: str) -> Tuple[str, int]:
 | 
			
		||||
        slack_usermention_match = re.search(slack_usermention_regex, token)
 | 
			
		||||
        slack_usermention_match = re.search(SLACK_USERMENTION_REGEX, token, re.VERBOSE)
 | 
			
		||||
        short_name = slack_usermention_match.group(4)
 | 
			
		||||
        slack_id = slack_usermention_match.group(2)
 | 
			
		||||
        for user in users:
 | 
			
		||||
@@ -329,8 +372,22 @@ def channel_message_to_zerver_message(constants: List[Any], channel: str,
 | 
			
		||||
                full_name = get_user_full_name(user)
 | 
			
		||||
                user_id = added_users[slack_id]
 | 
			
		||||
                mention = "@**" + full_name + "** "
 | 
			
		||||
        token = re.sub(slack_usermention_regex, mention, token)
 | 
			
		||||
        return mention, user_id
 | 
			
		||||
        token = re.sub(SLACK_USERMENTION_REGEX, mention, token, flags=re.VERBOSE)
 | 
			
		||||
        return token, user_id
 | 
			
		||||
 | 
			
		||||
    # Map italic, bold and strikethrough markdown
 | 
			
		||||
    def convert_markdown_syntax(text: str, regex: str, zulip_keyword: str) -> str:
 | 
			
		||||
        """
 | 
			
		||||
        Returns:
 | 
			
		||||
        1. For strikethrough formatting: This maps Slack's '~strike~' to Zulip's '~~strike~~'
 | 
			
		||||
        2. For bold formatting: This maps Slack's '*bold*' to Zulip's '**bold**'
 | 
			
		||||
        3. For italic formatting: This maps Slack's '_italic_' to Zulip's '*italic*'
 | 
			
		||||
        """
 | 
			
		||||
        for match in re.finditer(regex, text, re.VERBOSE):
 | 
			
		||||
            converted_token = (match.group(1) + zulip_keyword + match.group(3)
 | 
			
		||||
                               + match.group(4) + zulip_keyword + match.group(6))
 | 
			
		||||
            text = re.sub(regex, converted_token, text, flags=re.VERBOSE)
 | 
			
		||||
        return text
 | 
			
		||||
 | 
			
		||||
    for json_name in json_names:
 | 
			
		||||
        messages = json.load(open(slack_data_dir + '/%s/%s' % (channel, json_name)))
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user