From 54452fef6c2089b94701fdd9c544d26900faf4be Mon Sep 17 00:00:00 2001 From: Priyansh Garg Date: Sat, 7 Aug 2021 15:21:19 +0530 Subject: [PATCH] data_import: Fix channel mentions in Rocket.Chat import. While the STREAM_LINK_REGEX and STREAM_TOPIC_LINK_REGEX identifies the stream and topic mentions in the content correctly (tested by printing out the matches), the stream/topic mentions are still not linked to the corresponding streams/topics for imported messages, as a `zulip_message` instance is required for linking these mentions to actual streams/topics (see `StreamPattern` class in `markdown/__init__.py`) which is not provided while processing the markdown for imported messages. --- zerver/data_import/rocketchat.py | 62 +++++++++++++++++- .../rocketchat_message.bson | Bin 26839 -> 28508 bytes zerver/tests/test_rocketchat_importer.py | 14 ++-- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/zerver/data_import/rocketchat.py b/zerver/data_import/rocketchat.py index 2fb53473e3..58b6fa041a 100644 --- a/zerver/data_import/rocketchat.py +++ b/zerver/data_import/rocketchat.py @@ -153,6 +153,10 @@ def convert_channel_data( stream_desc = channel_dict.get("description", "") if channel_dict.get("teamId"): if channel_dict.get("teamMain") is True: + # In case you change this, please also change the stream name + # used while adding the Rocket.Chat channel mention data to + # message_dict in `message_to_dict` inner-function of + # `process_messages` function below. stream_name = "[TEAM] " + stream_name else: stream_desc = "[Team {} channel]. {}".format( @@ -430,7 +434,10 @@ def process_raw_message_batch( zerver_attachment: List[ZerverFieldsT], upload_id_to_upload_data_map: Dict[str, Dict[str, Any]], ) -> None: - def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: + def fix_mentions( + content: str, mention_user_ids: Set[int], rc_channel_mention_data: List[Dict[str, str]] + ) -> str: + # Fix user mentions for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) rc_mention = "@{short_name}".format(**user) @@ -441,6 +448,13 @@ def process_raw_message_batch( # We don't have an equivalent for Rocket.Chat's @here mention # which mentions all users active in the channel. content = content.replace("@here", "@**all**") + + # Fix channel mentions + for mention_data in rc_channel_mention_data: + rc_mention = mention_data["rc_mention"] + zulip_mention = mention_data["zulip_mention"] + content = content.replace(rc_mention, zulip_mention) + return content mention_map: Dict[int, Set[int]] = {} @@ -454,6 +468,7 @@ def process_raw_message_batch( content = fix_mentions( content=raw_message["content"], mention_user_ids=mention_user_ids, + rc_channel_mention_data=raw_message["rc_channel_mention_data"], ) if len(content) > 10000: # nocoverage @@ -540,6 +555,7 @@ def process_messages( stream_id_to_recipient_id: Dict[int, int], huddle_id_mapper: IdMapper, huddle_id_to_recipient_id: Dict[int, int], + room_id_to_room_map: Dict[str, Dict[str, Any]], dsc_id_to_dsc_map: Dict[str, Dict[str, Any]], direct_id_to_direct_map: Dict[str, Dict[str, Any]], huddle_id_to_huddle_map: Dict[str, Dict[str, Any]], @@ -606,13 +622,17 @@ def process_messages( parent_channel_id = dsc_channel["prid"] stream_id = stream_id_mapper.get(parent_channel_id) message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id] + + # In case you change this, please also change the topic name used + # in discussion mention to topic mention conversion below, while + # adding the Rocket.Chat channel mention data to message_dict. message_dict["topic_name"] = f'{dsc_channel["fname"]} (Imported from Rocket.Chat)' else: stream_id = stream_id_mapper.get(message["rid"]) message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id] message_dict["topic_name"] = "Imported from Rocket.Chat" - # Add mentions to message_dict + # Add user mentions to message_dict mention_user_ids = set() for mention in message.get("mentions", []): mention_id = mention["_id"] @@ -622,6 +642,42 @@ def process_messages( mention_user_ids.add(user_id) message_dict["mention_user_ids"] = mention_user_ids + # Add channel mentions to message_dict + rc_channel_mention_data: List[Dict[str, str]] = [] + for mention in message.get("channels", []): + mention_rc_channel_id = mention["_id"] + mention_rc_channel_name = mention["name"] + rc_mention = f"#{mention_rc_channel_name}" + + if mention_rc_channel_id in room_id_to_room_map: + # Channel is converted to a stream. + converted_stream_name = mention_rc_channel_name + + rc_channel = room_id_to_room_map[mention_rc_channel_id] + if rc_channel.get("teamMain") is True: + # Channel is a team's main channel + converted_stream_name = "[TEAM] " + converted_stream_name + + zulip_mention = f"#**{converted_stream_name}**" + elif mention_rc_channel_id in dsc_id_to_dsc_map: + # Channel is a discussion and is converted to a topic. + dsc_channel = dsc_id_to_dsc_map[mention_rc_channel_id] + parent_channel_id = dsc_channel["prid"] + parent_rc_channel = room_id_to_room_map[parent_channel_id] + + converted_topic_name = f'{dsc_channel["fname"]} (Imported from Rocket.Chat)' + parent_stream_name = parent_rc_channel["name"] + + if parent_rc_channel.get("teamMain") is True: + # Parent channel is a team's main channel + parent_stream_name = "[TEAM] " + parent_stream_name + + zulip_mention = f"#**{parent_stream_name}>{converted_topic_name}**" + + mention_data = {"rc_mention": rc_mention, "zulip_mention": zulip_mention} + rc_channel_mention_data.append(mention_data) + message_dict["rc_channel_mention_data"] = rc_channel_mention_data + # Add uploaded file (attachment) to message_dict if message.get("file"): message_dict["file"] = message["file"] @@ -967,6 +1023,7 @@ def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None: stream_id_to_recipient_id=stream_id_to_recipient_id, huddle_id_mapper=huddle_id_mapper, huddle_id_to_recipient_id=huddle_id_to_recipient_id, + room_id_to_room_map=room_id_to_room_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, @@ -991,6 +1048,7 @@ def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None: stream_id_to_recipient_id=stream_id_to_recipient_id, huddle_id_mapper=huddle_id_mapper, huddle_id_to_recipient_id=huddle_id_to_recipient_id, + room_id_to_room_map=room_id_to_room_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, diff --git a/zerver/tests/fixtures/rocketchat_fixtures/rocketchat_message.bson b/zerver/tests/fixtures/rocketchat_fixtures/rocketchat_message.bson index 12425f49e3c9442348fd315dbf1c1d8e48d562eb..83acebae5987f363eaf6f2c622ed869b4490c0a6 100644 GIT binary patch delta 853 zcmcb9k@3zw#tp)9OaY9OMdc2&u2>o?T|N1tx$fjCV($Em3=9mMB{{haolAhijGNiz zFL=r=W@2DqiqA}85CY1ENBBBtm}NSL_$Ik#I69dWPxg0GG*Ji2D3_!r=IUl778O)p%$C8`EAOZ9m(0BpH$q%(; zbw5BXiv?L$QdyA7AOK|fy84B9`uoK@dpP>}x%x0LmnG(urZSjMc2tp<0r?%9*+6$_ zfD{-q0~P2f04*_MAl_Ijkg-^N2NY#sXa_nO9_~KoKKWkm*=2zqrY=53X5o304drAe z-;xlrNJ{kbuJH49ij1r<_DxHS%4T57Ely_;0h-{ETB(qcTB)N@o{?ChUaU}BU=I#| zaO{G^{}w2ofzfZkkqJ}}44}lx8;#^R93dP_kk71fnFLe=bRcr#@bNY)@kvi}i!e7X zE;4a6Dg`=HQA-x6%p<=*AwN%{Bws< None: @@ -943,12 +943,12 @@ class RocketChatImporter(ZulipTestCase): for message in messages: self.assertIsNotNone(message.rendered_content) # After removing user_joined, added_user, discussion_created, etc. - # messages. (Total messages were 63.) - self.assert_length(messages, 36) + # messages. (Total messages were 66.) + self.assert_length(messages, 39) stream_messages = messages.filter(recipient__type=Recipient.STREAM).order_by("date_sent") stream_recipients = stream_messages.values_list("recipient", flat=True) - self.assert_length(stream_messages, 30) + self.assert_length(stream_messages, 33) self.assert_length(set(stream_recipients), 5) self.assertEqual(stream_messages[0].sender.email, "priyansh3133@email.com") self.assertEqual(stream_messages[0].content, "Hey everyone, how's it going??")