mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-30 19:43:47 +00:00 
			
		
		
		
	`./manage.py import` does not take a tarball; it takes a directory.
Making a separate tarball is a waste of CPU time and disk, as it is
never used.
This was included in the commit of the initial Slack conversion code
in 5b37c5562b and propagated from there into every conversion tool.
Remove the unnecessary tarball creation.
		
	
		
			
				
	
	
		
			1259 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1259 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| import os
 | |
| import random
 | |
| import secrets
 | |
| import uuid
 | |
| from typing import Any, Dict, List, Set, Tuple
 | |
| 
 | |
| import bson
 | |
| from django.conf import settings
 | |
| from django.forms.models import model_to_dict
 | |
| 
 | |
| from zerver.data_import.import_util import (
 | |
|     SubscriberHandler,
 | |
|     ZerverFieldsT,
 | |
|     build_attachment,
 | |
|     build_huddle,
 | |
|     build_huddle_subscriptions,
 | |
|     build_message,
 | |
|     build_personal_subscriptions,
 | |
|     build_realm,
 | |
|     build_realm_emoji,
 | |
|     build_recipients,
 | |
|     build_stream,
 | |
|     build_stream_subscriptions,
 | |
|     build_user_profile,
 | |
|     build_zerver_realm,
 | |
|     create_converted_data_files,
 | |
|     make_subscriber_map,
 | |
|     make_user_messages,
 | |
| )
 | |
| from zerver.data_import.sequencer import NEXT_ID, IdMapper
 | |
| from zerver.data_import.user_handler import UserHandler
 | |
| from zerver.lib.emoji import name_to_codepoint
 | |
| from zerver.lib.markdown import IMAGE_EXTENSIONS
 | |
| from zerver.lib.upload.base import sanitize_name
 | |
| from zerver.lib.utils import make_safe_digest, process_list_in_batches
 | |
| from zerver.models import Reaction, RealmEmoji, Recipient, UserProfile
 | |
| 
 | |
| 
 | |
| def make_realm(
 | |
|     realm_id: int, realm_subdomain: str, domain_name: str, rc_instance: Dict[str, Any]
 | |
| ) -> ZerverFieldsT:
 | |
|     created_at = float(rc_instance["_createdAt"].timestamp())
 | |
| 
 | |
|     zerver_realm = build_zerver_realm(realm_id, realm_subdomain, created_at, "Rocket.Chat")
 | |
|     realm = build_realm(zerver_realm, realm_id, domain_name)
 | |
| 
 | |
|     # We may override these later.
 | |
|     realm["zerver_defaultstream"] = []
 | |
| 
 | |
|     return realm
 | |
| 
 | |
| 
 | |
| def process_users(
 | |
|     user_id_to_user_map: Dict[str, Dict[str, Any]],
 | |
|     realm_id: int,
 | |
|     domain_name: str,
 | |
|     user_handler: UserHandler,
 | |
|     user_id_mapper: IdMapper,
 | |
| ) -> None:
 | |
|     realm_owners: List[int] = []
 | |
|     bots: List[int] = []
 | |
| 
 | |
|     for rc_user_id in user_id_to_user_map:
 | |
|         user_dict = user_id_to_user_map[rc_user_id]
 | |
|         is_mirror_dummy = False
 | |
|         is_bot = False
 | |
|         is_active = True
 | |
| 
 | |
|         # Rocket.Chat has three user types:
 | |
|         # "user": This is a regular user of the system.
 | |
|         # "bot": A special user types for bots.
 | |
|         # "unknown": This usually represents a livechat guest.
 | |
|         if user_dict["type"] != "user":
 | |
|             is_active = False
 | |
| 
 | |
|             if user_dict["type"] == "bot":
 | |
|                 is_bot = True
 | |
|             else:
 | |
|                 is_mirror_dummy = True
 | |
| 
 | |
|             if not user_dict.get("emails"):
 | |
|                 user_dict["emails"] = [
 | |
|                     {
 | |
|                         "address": "{}-{}@{}".format(
 | |
|                             user_dict["username"], user_dict["type"], domain_name
 | |
|                         )
 | |
|                     }
 | |
|                 ]
 | |
| 
 | |
|         # TODO: Change this to use actual exported avatar
 | |
|         avatar_source = "G"
 | |
|         full_name = user_dict["name"]
 | |
|         id = user_id_mapper.get(rc_user_id)
 | |
|         delivery_email = user_dict["emails"][0]["address"]
 | |
|         email = user_dict["emails"][0]["address"]
 | |
|         short_name = user_dict["username"]
 | |
|         date_joined = float(user_dict["createdAt"].timestamp())
 | |
|         timezone = "UTC"
 | |
| 
 | |
|         role = UserProfile.ROLE_MEMBER
 | |
|         if "admin" in user_dict["roles"]:
 | |
|             role = UserProfile.ROLE_REALM_OWNER
 | |
|             realm_owners.append(id)
 | |
|         elif "guest" in user_dict["roles"]:
 | |
|             role = UserProfile.ROLE_GUEST
 | |
| 
 | |
|         if is_bot:
 | |
|             bots.append(id)
 | |
| 
 | |
|         user = build_user_profile(
 | |
|             avatar_source=avatar_source,
 | |
|             date_joined=date_joined,
 | |
|             delivery_email=delivery_email,
 | |
|             email=email,
 | |
|             full_name=full_name,
 | |
|             id=id,
 | |
|             is_active=is_active,
 | |
|             role=role,
 | |
|             is_mirror_dummy=is_mirror_dummy,
 | |
|             realm_id=realm_id,
 | |
|             short_name=short_name,
 | |
|             timezone=timezone,
 | |
|             is_bot=is_bot,
 | |
|             bot_type=1 if is_bot else None,
 | |
|         )
 | |
|         user_handler.add_user(user)
 | |
| 
 | |
|     # Set the first realm_owner as the owner of
 | |
|     # all the bots.
 | |
|     if realm_owners:
 | |
|         for bot_id in bots:
 | |
|             bot_user = user_handler.get_user(user_id=bot_id)
 | |
|             bot_user["bot_owner"] = realm_owners[0]
 | |
| 
 | |
| 
 | |
| def truncate_name(name: str, name_id: int, max_length: int = 60) -> str:
 | |
|     if len(name) > max_length:
 | |
|         name_id_suffix = f" [{name_id}]"
 | |
|         name = name[0 : max_length - len(name_id_suffix)] + name_id_suffix
 | |
|     return name
 | |
| 
 | |
| 
 | |
| def get_stream_name(rc_channel: Dict[str, Any]) -> str:
 | |
|     if rc_channel.get("teamMain"):
 | |
|         stream_name = f'[TEAM] {rc_channel["name"]}'
 | |
|     else:
 | |
|         stream_name = rc_channel["name"]
 | |
| 
 | |
|     stream_name = truncate_name(stream_name, rc_channel["_id"])
 | |
| 
 | |
|     return stream_name
 | |
| 
 | |
| 
 | |
| def convert_channel_data(
 | |
|     room_id_to_room_map: Dict[str, Dict[str, Any]],
 | |
|     team_id_to_team_map: Dict[str, Dict[str, Any]],
 | |
|     stream_id_mapper: IdMapper,
 | |
|     realm_id: int,
 | |
| ) -> List[ZerverFieldsT]:
 | |
|     streams = []
 | |
| 
 | |
|     for rc_room_id in room_id_to_room_map:
 | |
|         channel_dict = room_id_to_room_map[rc_room_id]
 | |
| 
 | |
|         date_created = float(channel_dict["ts"].timestamp())
 | |
|         stream_id = stream_id_mapper.get(rc_room_id)
 | |
|         invite_only = channel_dict["t"] == "p"
 | |
| 
 | |
|         stream_name = get_stream_name(channel_dict)
 | |
| 
 | |
|         stream_desc = channel_dict.get("description", "")
 | |
|         if channel_dict.get("teamId") and not channel_dict.get("teamMain"):
 | |
|             stream_desc = "[Team {} channel]. {}".format(
 | |
|                 team_id_to_team_map[channel_dict["teamId"]]["name"], stream_desc
 | |
|             )
 | |
| 
 | |
|         # If the channel is read-only, then only admins and moderators
 | |
|         # should be allowed to post in the converted Zulip stream.
 | |
|         # For more details: https://zulip.com/help/stream-sending-policy
 | |
|         #
 | |
|         # See `Stream` model in `zerver/models.py` to know about what each
 | |
|         # number represent.
 | |
|         stream_post_policy = 4 if channel_dict.get("ro", False) else 1
 | |
| 
 | |
|         stream = build_stream(
 | |
|             date_created=date_created,
 | |
|             realm_id=realm_id,
 | |
|             name=stream_name,
 | |
|             description=stream_desc,
 | |
|             stream_id=stream_id,
 | |
|             deactivated=False,
 | |
|             invite_only=invite_only,
 | |
|             stream_post_policy=stream_post_policy,
 | |
|         )
 | |
|         streams.append(stream)
 | |
| 
 | |
|     return streams
 | |
| 
 | |
| 
 | |
| def convert_stream_subscription_data(
 | |
|     user_id_to_user_map: Dict[str, Dict[str, Any]],
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
 | |
|     zerver_stream: List[ZerverFieldsT],
 | |
|     stream_id_mapper: IdMapper,
 | |
|     user_id_mapper: IdMapper,
 | |
|     subscriber_handler: SubscriberHandler,
 | |
| ) -> None:
 | |
|     stream_members_map: Dict[int, Set[int]] = {}
 | |
| 
 | |
|     for rc_user_id in user_id_to_user_map:
 | |
|         user_dict = user_id_to_user_map[rc_user_id]
 | |
| 
 | |
|         if not user_dict.get("__rooms"):
 | |
|             continue
 | |
| 
 | |
|         for channel in user_dict["__rooms"]:
 | |
|             if channel in dsc_id_to_dsc_map:
 | |
|                 # Ignore discussion rooms as these are not
 | |
|                 # imported as streams, but topics.
 | |
|                 continue
 | |
|             stream_id = stream_id_mapper.get(channel)
 | |
|             if stream_id not in stream_members_map:
 | |
|                 stream_members_map[stream_id] = set()
 | |
|             stream_members_map[stream_id].add(user_id_mapper.get(rc_user_id))
 | |
| 
 | |
|     for stream in zerver_stream:
 | |
|         if stream["id"] in stream_members_map:
 | |
|             users = stream_members_map[stream["id"]]
 | |
|         else:
 | |
|             users = set()
 | |
|             # Set the stream without any subscribers
 | |
|             # as deactivated.
 | |
|             stream["deactivated"] = True
 | |
|         subscriber_handler.set_info(users=users, stream_id=stream["id"])
 | |
| 
 | |
| 
 | |
| def convert_huddle_data(
 | |
|     huddle_id_to_huddle_map: Dict[str, Dict[str, Any]],
 | |
|     huddle_id_mapper: IdMapper,
 | |
|     user_id_mapper: IdMapper,
 | |
|     subscriber_handler: SubscriberHandler,
 | |
| ) -> List[ZerverFieldsT]:
 | |
|     zerver_huddle: List[ZerverFieldsT] = []
 | |
| 
 | |
|     for rc_huddle_id in huddle_id_to_huddle_map:
 | |
|         huddle_id = huddle_id_mapper.get(rc_huddle_id)
 | |
|         huddle = build_huddle(huddle_id)
 | |
|         zerver_huddle.append(huddle)
 | |
| 
 | |
|         huddle_dict = huddle_id_to_huddle_map[rc_huddle_id]
 | |
|         huddle_user_ids = set()
 | |
|         for rc_user_id in huddle_dict["uids"]:
 | |
|             huddle_user_ids.add(user_id_mapper.get(rc_user_id))
 | |
|         subscriber_handler.set_info(
 | |
|             users=huddle_user_ids,
 | |
|             huddle_id=huddle_id,
 | |
|         )
 | |
| 
 | |
|     return zerver_huddle
 | |
| 
 | |
| 
 | |
| def build_custom_emoji(
 | |
|     realm_id: int, custom_emoji_data: Dict[str, List[Dict[str, Any]]], output_dir: str
 | |
| ) -> List[ZerverFieldsT]:
 | |
|     logging.info("Starting to process custom emoji")
 | |
| 
 | |
|     emoji_folder = os.path.join(output_dir, "emoji")
 | |
|     os.makedirs(emoji_folder, exist_ok=True)
 | |
| 
 | |
|     zerver_realmemoji: List[ZerverFieldsT] = []
 | |
|     emoji_records: List[ZerverFieldsT] = []
 | |
| 
 | |
|     # Map emoji file_id to emoji file data
 | |
|     emoji_file_data = {}
 | |
|     for emoji_file in custom_emoji_data["file"]:
 | |
|         emoji_file_data[emoji_file["_id"]] = {"filename": emoji_file["filename"], "chunks": []}
 | |
|     for emoji_chunk in custom_emoji_data["chunk"]:
 | |
|         emoji_file_data[emoji_chunk["files_id"]]["chunks"].append(emoji_chunk["data"])
 | |
| 
 | |
|     # Build custom emoji
 | |
|     for rc_emoji in custom_emoji_data["emoji"]:
 | |
|         # Subject to change with changes in database
 | |
|         emoji_file_id = ".".join([rc_emoji["name"], rc_emoji["extension"]])
 | |
| 
 | |
|         emoji_file_info = emoji_file_data[emoji_file_id]
 | |
| 
 | |
|         emoji_filename = emoji_file_info["filename"]
 | |
|         emoji_data = b"".join(emoji_file_info["chunks"])
 | |
| 
 | |
|         target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
 | |
|             realm_id=realm_id,
 | |
|             emoji_file_name=emoji_filename,
 | |
|         )
 | |
|         target_path = os.path.join(emoji_folder, target_sub_path)
 | |
| 
 | |
|         os.makedirs(os.path.dirname(target_path), exist_ok=True)
 | |
|         with open(target_path, "wb") as e_file:
 | |
|             e_file.write(emoji_data)
 | |
| 
 | |
|         emoji_aliases = [rc_emoji["name"]]
 | |
|         emoji_aliases.extend(rc_emoji["aliases"])
 | |
| 
 | |
|         for alias in emoji_aliases:
 | |
|             emoji_record = dict(
 | |
|                 path=target_path,
 | |
|                 s3_path=target_path,
 | |
|                 file_name=emoji_filename,
 | |
|                 realm_id=realm_id,
 | |
|                 name=alias,
 | |
|             )
 | |
|             emoji_records.append(emoji_record)
 | |
| 
 | |
|             realmemoji = build_realm_emoji(
 | |
|                 realm_id=realm_id,
 | |
|                 name=alias,
 | |
|                 id=NEXT_ID("realmemoji"),
 | |
|                 file_name=emoji_filename,
 | |
|             )
 | |
|             zerver_realmemoji.append(realmemoji)
 | |
| 
 | |
|     create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
 | |
|     logging.info("Done processing emoji")
 | |
| 
 | |
|     return zerver_realmemoji
 | |
| 
 | |
| 
 | |
| def build_reactions(
 | |
|     total_reactions: List[ZerverFieldsT],
 | |
|     reactions: List[Dict[str, Any]],
 | |
|     message_id: int,
 | |
|     zerver_realmemoji: List[ZerverFieldsT],
 | |
| ) -> None:
 | |
|     realmemoji = {}
 | |
|     for emoji in zerver_realmemoji:
 | |
|         realmemoji[emoji["name"]] = emoji["id"]
 | |
| 
 | |
|     # For the Unicode emoji codes, we use equivalent of
 | |
|     # function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
 | |
|     for reaction_dict in reactions:
 | |
|         emoji_name = reaction_dict["name"]
 | |
|         user_id = reaction_dict["user_id"]
 | |
|         # Check in realm emoji
 | |
|         if emoji_name in realmemoji:
 | |
|             emoji_code = realmemoji[emoji_name]
 | |
|             reaction_type = Reaction.REALM_EMOJI
 | |
|         # Check in Unicode emoji
 | |
|         elif emoji_name in name_to_codepoint:
 | |
|             emoji_code = name_to_codepoint[emoji_name]
 | |
|             reaction_type = Reaction.UNICODE_EMOJI
 | |
|         else:  # nocoverage
 | |
|             continue
 | |
| 
 | |
|         reaction_id = NEXT_ID("reaction")
 | |
|         reaction = Reaction(
 | |
|             id=reaction_id,
 | |
|             emoji_code=emoji_code,
 | |
|             emoji_name=emoji_name,
 | |
|             reaction_type=reaction_type,
 | |
|         )
 | |
| 
 | |
|         reaction_dict = model_to_dict(reaction, exclude=["message", "user_profile"])
 | |
|         reaction_dict["message"] = message_id
 | |
|         reaction_dict["user_profile"] = user_id
 | |
|         total_reactions.append(reaction_dict)
 | |
| 
 | |
| 
 | |
| def process_message_attachment(
 | |
|     upload: Dict[str, Any],
 | |
|     realm_id: int,
 | |
|     message_id: int,
 | |
|     user_id: int,
 | |
|     user_handler: UserHandler,
 | |
|     zerver_attachment: List[ZerverFieldsT],
 | |
|     uploads_list: List[ZerverFieldsT],
 | |
|     upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
 | |
|     output_dir: str,
 | |
| ) -> Tuple[str, bool]:
 | |
|     if upload["_id"] not in upload_id_to_upload_data_map:  # nocoverage
 | |
|         logging.info("Skipping unknown attachment of message_id: %s", message_id)
 | |
|         return "", False
 | |
| 
 | |
|     if "type" not in upload:  # nocoverage
 | |
|         logging.info("Skipping attachment without type of message_id: %s", message_id)
 | |
|         return "", False
 | |
| 
 | |
|     upload_file_data = upload_id_to_upload_data_map[upload["_id"]]
 | |
|     file_name = upload["name"]
 | |
|     file_ext = f'.{upload["type"].split("/")[-1]}'
 | |
| 
 | |
|     has_image = False
 | |
|     if file_ext.lower() in IMAGE_EXTENSIONS:
 | |
|         has_image = True
 | |
| 
 | |
|     try:
 | |
|         sanitized_name = sanitize_name(file_name)
 | |
|     except AssertionError:  # nocoverage
 | |
|         logging.info("Replacing invalid attachment name with random uuid: %s", file_name)
 | |
|         sanitized_name = uuid.uuid4().hex
 | |
| 
 | |
|     if len(sanitized_name) >= 255:  # nocoverage
 | |
|         logging.info("Replacing too long attachment name with random uuid: %s", file_name)
 | |
|         sanitized_name = uuid.uuid4().hex
 | |
| 
 | |
|     s3_path = "/".join(
 | |
|         [
 | |
|             str(realm_id),
 | |
|             format(random.randint(0, 255), "x"),
 | |
|             secrets.token_urlsafe(18),
 | |
|             sanitized_name,
 | |
|         ]
 | |
|     )
 | |
| 
 | |
|     # Build the attachment from chunks and save it to s3_path.
 | |
|     file_out_path = os.path.join(output_dir, "uploads", s3_path)
 | |
|     os.makedirs(os.path.dirname(file_out_path), exist_ok=True)
 | |
|     with open(file_out_path, "wb") as upload_file:
 | |
|         upload_file.write(b"".join(upload_file_data["chunk"]))
 | |
| 
 | |
|     attachment_content = (
 | |
|         f'{upload_file_data.get("description", "")}\n\n[{file_name}](/user_uploads/{s3_path})'
 | |
|     )
 | |
| 
 | |
|     fileinfo = {
 | |
|         "name": file_name,
 | |
|         "size": upload_file_data["size"],
 | |
|         "created": float(upload_file_data["_updatedAt"].timestamp()),
 | |
|     }
 | |
| 
 | |
|     upload = dict(
 | |
|         path=s3_path,
 | |
|         realm_id=realm_id,
 | |
|         content_type=upload["type"],
 | |
|         user_profile_id=user_id,
 | |
|         last_modified=fileinfo["created"],
 | |
|         user_profile_email=user_handler.get_user(user_id=user_id)["email"],
 | |
|         s3_path=s3_path,
 | |
|         size=fileinfo["size"],
 | |
|     )
 | |
|     uploads_list.append(upload)
 | |
| 
 | |
|     build_attachment(
 | |
|         realm_id=realm_id,
 | |
|         message_ids={message_id},
 | |
|         user_id=user_id,
 | |
|         fileinfo=fileinfo,
 | |
|         s3_path=s3_path,
 | |
|         zerver_attachment=zerver_attachment,
 | |
|     )
 | |
| 
 | |
|     return attachment_content, has_image
 | |
| 
 | |
| 
 | |
| def process_raw_message_batch(
 | |
|     realm_id: int,
 | |
|     raw_messages: List[Dict[str, Any]],
 | |
|     subscriber_map: Dict[int, Set[int]],
 | |
|     user_handler: UserHandler,
 | |
|     is_pm_data: bool,
 | |
|     output_dir: str,
 | |
|     zerver_realmemoji: List[ZerverFieldsT],
 | |
|     total_reactions: List[ZerverFieldsT],
 | |
|     uploads_list: List[ZerverFieldsT],
 | |
|     zerver_attachment: List[ZerverFieldsT],
 | |
|     upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
 | |
| ) -> None:
 | |
|     def fix_mentions(
 | |
|         content: str, mention_user_ids: Set[int], rc_channel_mention_data: List[Dict[str, str]]
 | |
|     ) -> str:
 | |
|         # Fix user mentions
 | |
|         for user_id in mention_user_ids:
 | |
|             user = user_handler.get_user(user_id=user_id)
 | |
|             rc_mention = "@{short_name}".format(**user)
 | |
|             zulip_mention = "@**{full_name}**".format(**user)
 | |
|             content = content.replace(rc_mention, zulip_mention)
 | |
| 
 | |
|         content = content.replace("@all", "@**all**")
 | |
|         # We don't have an equivalent for Rocket.Chat's @here mention
 | |
|         # which mentions all users active in the channel.
 | |
|         content = content.replace("@here", "@**all**")
 | |
| 
 | |
|         # Fix channel mentions
 | |
|         for mention_data in rc_channel_mention_data:
 | |
|             rc_mention = mention_data["rc_mention"]
 | |
|             zulip_mention = mention_data["zulip_mention"]
 | |
|             content = content.replace(rc_mention, zulip_mention)
 | |
| 
 | |
|         return content
 | |
| 
 | |
|     user_mention_map: Dict[int, Set[int]] = {}
 | |
|     wildcard_mention_map: Dict[int, bool] = {}
 | |
|     zerver_message: List[ZerverFieldsT] = []
 | |
| 
 | |
|     for raw_message in raw_messages:
 | |
|         message_id = NEXT_ID("message")
 | |
|         mention_user_ids = raw_message["mention_user_ids"]
 | |
|         user_mention_map[message_id] = mention_user_ids
 | |
|         wildcard_mention_map[message_id] = raw_message["wildcard_mention"]
 | |
| 
 | |
|         content = fix_mentions(
 | |
|             content=raw_message["content"],
 | |
|             mention_user_ids=mention_user_ids,
 | |
|             rc_channel_mention_data=raw_message["rc_channel_mention_data"],
 | |
|         )
 | |
| 
 | |
|         if len(content) > 10000:  # nocoverage
 | |
|             logging.info("skipping too-long message of length %s", len(content))
 | |
|             continue
 | |
| 
 | |
|         date_sent = raw_message["date_sent"]
 | |
|         sender_user_id = raw_message["sender_id"]
 | |
|         recipient_id = raw_message["recipient_id"]
 | |
| 
 | |
|         rendered_content = None
 | |
| 
 | |
|         has_attachment = False
 | |
|         has_image = False
 | |
|         has_link = raw_message["has_link"]
 | |
| 
 | |
|         if "file" in raw_message:
 | |
|             has_attachment = True
 | |
|             has_link = True
 | |
| 
 | |
|             attachment_content, has_image = process_message_attachment(
 | |
|                 upload=raw_message["file"],
 | |
|                 realm_id=realm_id,
 | |
|                 message_id=message_id,
 | |
|                 user_id=sender_user_id,
 | |
|                 user_handler=user_handler,
 | |
|                 uploads_list=uploads_list,
 | |
|                 zerver_attachment=zerver_attachment,
 | |
|                 upload_id_to_upload_data_map=upload_id_to_upload_data_map,
 | |
|                 output_dir=output_dir,
 | |
|             )
 | |
| 
 | |
|             content += attachment_content
 | |
| 
 | |
|         topic_name = raw_message["topic_name"]
 | |
| 
 | |
|         message = build_message(
 | |
|             content=content,
 | |
|             message_id=message_id,
 | |
|             date_sent=date_sent,
 | |
|             recipient_id=recipient_id,
 | |
|             realm_id=realm_id,
 | |
|             rendered_content=rendered_content,
 | |
|             topic_name=topic_name,
 | |
|             user_id=sender_user_id,
 | |
|             has_image=has_image,
 | |
|             has_link=has_link,
 | |
|             has_attachment=has_attachment,
 | |
|         )
 | |
|         zerver_message.append(message)
 | |
|         build_reactions(
 | |
|             total_reactions=total_reactions,
 | |
|             reactions=raw_message["reactions"],
 | |
|             message_id=message_id,
 | |
|             zerver_realmemoji=zerver_realmemoji,
 | |
|         )
 | |
| 
 | |
|     zerver_usermessage = make_user_messages(
 | |
|         zerver_message=zerver_message,
 | |
|         subscriber_map=subscriber_map,
 | |
|         is_pm_data=is_pm_data,
 | |
|         mention_map=user_mention_map,
 | |
|         wildcard_mention_map=wildcard_mention_map,
 | |
|     )
 | |
| 
 | |
|     message_json = dict(
 | |
|         zerver_message=zerver_message,
 | |
|         zerver_usermessage=zerver_usermessage,
 | |
|     )
 | |
| 
 | |
|     dump_file_id = NEXT_ID("dump_file_id" + str(realm_id))
 | |
|     message_file = f"/messages-{dump_file_id:06}.json"
 | |
|     create_converted_data_files(message_json, output_dir, message_file)
 | |
| 
 | |
| 
 | |
| def get_topic_name(
 | |
|     message: Dict[str, Any],
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
 | |
|     thread_id_mapper: IdMapper,
 | |
|     is_pm_data: bool = False,
 | |
| ) -> str:
 | |
|     if is_pm_data:
 | |
|         return ""
 | |
|     elif message["rid"] in dsc_id_to_dsc_map:
 | |
|         dsc_channel_name = dsc_id_to_dsc_map[message["rid"]]["fname"]
 | |
|         return truncate_name(f"{dsc_channel_name} (Imported from Rocket.Chat)", message["rid"])
 | |
|     elif message.get("replies"):
 | |
|         # Message is the start of a thread
 | |
|         thread_id = thread_id_mapper.get(message["_id"])
 | |
|         return truncate_name(f"Thread {thread_id} (Imported from Rocket.Chat)", message["_id"])
 | |
|     elif message.get("tmid"):
 | |
|         # Message is a part of a thread
 | |
|         thread_id = thread_id_mapper.get(message["tmid"])
 | |
|         return truncate_name(f"Thread {thread_id} (Imported from Rocket.Chat)", message["tmid"])
 | |
|     else:
 | |
|         # Normal channel message
 | |
|         return "Imported from Rocket.Chat"
 | |
| 
 | |
| 
 | |
| def process_messages(
 | |
|     realm_id: int,
 | |
|     messages: List[Dict[str, Any]],
 | |
|     subscriber_map: Dict[int, Set[int]],
 | |
|     is_pm_data: bool,
 | |
|     username_to_user_id_map: Dict[str, str],
 | |
|     user_id_mapper: IdMapper,
 | |
|     user_handler: UserHandler,
 | |
|     user_id_to_recipient_id: Dict[int, int],
 | |
|     stream_id_mapper: IdMapper,
 | |
|     stream_id_to_recipient_id: Dict[int, int],
 | |
|     huddle_id_mapper: IdMapper,
 | |
|     huddle_id_to_recipient_id: Dict[int, int],
 | |
|     thread_id_mapper: IdMapper,
 | |
|     room_id_to_room_map: Dict[str, Dict[str, Any]],
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
 | |
|     direct_id_to_direct_map: Dict[str, Dict[str, Any]],
 | |
|     huddle_id_to_huddle_map: Dict[str, Dict[str, Any]],
 | |
|     zerver_realmemoji: List[ZerverFieldsT],
 | |
|     total_reactions: List[ZerverFieldsT],
 | |
|     uploads_list: List[ZerverFieldsT],
 | |
|     zerver_attachment: List[ZerverFieldsT],
 | |
|     upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
 | |
|     output_dir: str,
 | |
| ) -> None:
 | |
|     def list_reactions(reactions: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
 | |
|         # List of dictionaries of form:
 | |
|         # {"name": "smile", "user_id": 2}
 | |
|         reactions_list: List[Dict[str, Any]] = []
 | |
|         for react_code in reactions:
 | |
|             name = react_code.split(":")[1]
 | |
|             usernames = reactions[react_code]["usernames"]
 | |
| 
 | |
|             for username in usernames:
 | |
|                 if username not in username_to_user_id_map:  # nocoverage
 | |
|                     # This can happen with production data when old user names no longer exist. We cannot do
 | |
|                     # much about it here so we just ignore the unknown user name.
 | |
|                     continue
 | |
| 
 | |
|                 rc_user_id = username_to_user_id_map[username]
 | |
|                 user_id = user_id_mapper.get(rc_user_id)
 | |
|                 reactions_list.append({"name": name, "user_id": user_id})
 | |
| 
 | |
|         return reactions_list
 | |
| 
 | |
|     def message_to_dict(message: Dict[str, Any]) -> Dict[str, Any]:
 | |
|         rc_sender_id = message["u"]["_id"]
 | |
|         sender_id = user_id_mapper.get(rc_sender_id)
 | |
|         if "msg" in message:
 | |
|             content = message["msg"]
 | |
|         else:  # nocoverage
 | |
|             content = "This message imported from Rocket.Chat had no body in the data export."
 | |
|             logging.info(
 | |
|                 "Message %s contains no message content: %s",
 | |
|                 message["_id"],
 | |
|                 message,
 | |
|             )
 | |
| 
 | |
|         if message.get("reactions"):
 | |
|             reactions = list_reactions(message["reactions"])
 | |
|         else:
 | |
|             reactions = []
 | |
| 
 | |
|         message_dict = dict(
 | |
|             sender_id=sender_id,
 | |
|             content=content,
 | |
|             date_sent=int(message["ts"].timestamp()),
 | |
|             reactions=reactions,
 | |
|             has_link=bool(message.get("urls")),
 | |
|         )
 | |
| 
 | |
|         # Add recipient_id to message_dict
 | |
|         if is_pm_data:
 | |
|             # Message is in a PM or a huddle.
 | |
|             rc_channel_id = message["rid"]
 | |
|             if rc_channel_id in huddle_id_to_huddle_map:
 | |
|                 huddle_id = huddle_id_mapper.get(rc_channel_id)
 | |
|                 message_dict["recipient_id"] = huddle_id_to_recipient_id[huddle_id]
 | |
|             else:
 | |
|                 rc_member_ids = direct_id_to_direct_map[rc_channel_id]["uids"]
 | |
| 
 | |
|                 if len(rc_member_ids) == 1:  # nocoverage
 | |
|                     # PMs to yourself only have one user.
 | |
|                     rc_member_ids.append(rc_member_ids[0])
 | |
|                 if rc_sender_id == rc_member_ids[0]:
 | |
|                     zulip_member_id = user_id_mapper.get(rc_member_ids[1])
 | |
|                     message_dict["recipient_id"] = user_id_to_recipient_id[zulip_member_id]
 | |
|                 else:
 | |
|                     zulip_member_id = user_id_mapper.get(rc_member_ids[0])
 | |
|                     message_dict["recipient_id"] = user_id_to_recipient_id[zulip_member_id]
 | |
|         elif message["rid"] in dsc_id_to_dsc_map:
 | |
|             # Message is in a discussion
 | |
|             dsc_channel = dsc_id_to_dsc_map[message["rid"]]
 | |
|             parent_channel_id = dsc_channel["prid"]
 | |
|             stream_id = stream_id_mapper.get(parent_channel_id)
 | |
|             message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id]
 | |
|         else:
 | |
|             stream_id = stream_id_mapper.get(message["rid"])
 | |
|             message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id]
 | |
| 
 | |
|         # Add topic name to message_dict
 | |
|         message_dict["topic_name"] = get_topic_name(
 | |
|             message, dsc_id_to_dsc_map, thread_id_mapper, is_pm_data
 | |
|         )
 | |
| 
 | |
|         # Add user mentions to message_dict
 | |
|         mention_user_ids = set()
 | |
|         wildcard_mention = False
 | |
|         for mention in message.get("mentions", []):
 | |
|             mention_id = mention["_id"]
 | |
|             if mention_id in ["all", "here"]:
 | |
|                 wildcard_mention = True
 | |
|                 continue
 | |
|             if user_id_mapper.has(mention_id):
 | |
|                 user_id = user_id_mapper.get(mention_id)
 | |
|                 mention_user_ids.add(user_id)
 | |
|             else:  # nocoverage
 | |
|                 logging.info(
 | |
|                     "Message %s contains mention of unknown user %s: %s",
 | |
|                     message["_id"],
 | |
|                     mention_id,
 | |
|                     mention,
 | |
|                 )
 | |
| 
 | |
|         message_dict["mention_user_ids"] = mention_user_ids
 | |
|         message_dict["wildcard_mention"] = wildcard_mention
 | |
| 
 | |
|         # Add channel mentions to message_dict
 | |
|         rc_channel_mention_data: List[Dict[str, str]] = []
 | |
|         for mention in message.get("channels", []):
 | |
|             mention_rc_channel_id = mention["_id"]
 | |
|             mention_rc_channel_name = mention["name"]
 | |
|             rc_mention = f"#{mention_rc_channel_name}"
 | |
| 
 | |
|             if mention_rc_channel_id in room_id_to_room_map:
 | |
|                 # Channel is converted to a stream.
 | |
|                 rc_channel = room_id_to_room_map[mention_rc_channel_id]
 | |
|                 converted_stream_name = get_stream_name(rc_channel)
 | |
| 
 | |
|                 zulip_mention = f"#**{converted_stream_name}**"
 | |
|             elif mention_rc_channel_id in dsc_id_to_dsc_map:
 | |
|                 # Channel is a discussion and is converted to a topic.
 | |
|                 dsc_channel = dsc_id_to_dsc_map[mention_rc_channel_id]
 | |
|                 parent_channel_id = dsc_channel["prid"]
 | |
|                 if (
 | |
|                     parent_channel_id in direct_id_to_direct_map
 | |
|                     or parent_channel_id in huddle_id_to_huddle_map
 | |
|                 ):
 | |
|                     # Discussion belongs to a direct channel and thus, should not be
 | |
|                     # linked.
 | |
| 
 | |
|                     # This logging statement serves the side benefit of avoiding the
 | |
|                     # CPython optimization for `continue` so that the coverage reports
 | |
|                     # aren't misleading.
 | |
|                     logging.info(
 | |
|                         "skipping direct messages discussion mention: %s", dsc_channel["fname"]
 | |
|                     )
 | |
|                     continue
 | |
| 
 | |
|                 converted_topic_name = get_topic_name(
 | |
|                     message={"rid": mention_rc_channel_id},
 | |
|                     dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|                     thread_id_mapper=thread_id_mapper,
 | |
|                 )
 | |
| 
 | |
|                 parent_rc_channel = room_id_to_room_map[parent_channel_id]
 | |
|                 parent_stream_name = get_stream_name(parent_rc_channel)
 | |
| 
 | |
|                 zulip_mention = f"#**{parent_stream_name}>{converted_topic_name}**"
 | |
|             else:  # nocoverage
 | |
|                 logging.info("Failed to map mention '%s' to zulip syntax.", mention)
 | |
|                 continue
 | |
| 
 | |
|             mention_data = {"rc_mention": rc_mention, "zulip_mention": zulip_mention}
 | |
|             rc_channel_mention_data.append(mention_data)
 | |
|         message_dict["rc_channel_mention_data"] = rc_channel_mention_data
 | |
| 
 | |
|         # Add uploaded file (attachment) to message_dict
 | |
|         if message.get("file"):
 | |
|             message_dict["file"] = message["file"]
 | |
| 
 | |
|         return message_dict
 | |
| 
 | |
|     raw_messages: List[Dict[str, Any]] = []
 | |
|     for message in messages:
 | |
|         if message.get("t") is not None:
 | |
|             # Messages with a type are system notifications like user_joined
 | |
|             # that we don't include.
 | |
|             continue
 | |
|         raw_messages.append(message_to_dict(message))
 | |
| 
 | |
|     def process_batch(lst: List[Dict[str, Any]]) -> None:
 | |
|         process_raw_message_batch(
 | |
|             realm_id=realm_id,
 | |
|             raw_messages=lst,
 | |
|             subscriber_map=subscriber_map,
 | |
|             user_handler=user_handler,
 | |
|             is_pm_data=is_pm_data,
 | |
|             output_dir=output_dir,
 | |
|             zerver_realmemoji=zerver_realmemoji,
 | |
|             total_reactions=total_reactions,
 | |
|             uploads_list=uploads_list,
 | |
|             zerver_attachment=zerver_attachment,
 | |
|             upload_id_to_upload_data_map=upload_id_to_upload_data_map,
 | |
|         )
 | |
| 
 | |
|     chunk_size = 1000
 | |
| 
 | |
|     process_list_in_batches(
 | |
|         lst=raw_messages,
 | |
|         chunk_size=chunk_size,
 | |
|         process_batch=process_batch,
 | |
|     )
 | |
| 
 | |
| 
 | |
| def map_upload_id_to_upload_data(
 | |
|     upload_data: Dict[str, List[Dict[str, Any]]],
 | |
| ) -> Dict[str, Dict[str, Any]]:
 | |
|     upload_id_to_upload_data_map: Dict[str, Dict[str, Any]] = {}
 | |
| 
 | |
|     for upload in upload_data["upload"]:
 | |
|         upload_id_to_upload_data_map[upload["_id"]] = {**upload, "chunk": []}
 | |
| 
 | |
|     for chunk in upload_data["chunk"]:
 | |
|         if chunk["files_id"] not in upload_id_to_upload_data_map:  # nocoverage
 | |
|             logging.info("Skipping chunk %s without metadata", chunk["files_id"])
 | |
|             # It's unclear why this apparent data corruption in the
 | |
|             # Rocket.Chat database is possible, but empirically, some
 | |
|             # chunks don't have any associated metadata.
 | |
|             continue
 | |
| 
 | |
|         upload_id_to_upload_data_map[chunk["files_id"]]["chunk"].append(chunk["data"])
 | |
| 
 | |
|     return upload_id_to_upload_data_map
 | |
| 
 | |
| 
 | |
| def separate_channel_private_and_livechat_messages(
 | |
|     messages: List[Dict[str, Any]],
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
 | |
|     direct_id_to_direct_map: Dict[str, Dict[str, Any]],
 | |
|     huddle_id_to_huddle_map: Dict[str, Dict[str, Any]],
 | |
|     livechat_id_to_livechat_map: Dict[str, Dict[str, Any]],
 | |
|     channel_messages: List[Dict[str, Any]],
 | |
|     private_messages: List[Dict[str, Any]],
 | |
|     livechat_messages: List[Dict[str, Any]],
 | |
| ) -> None:
 | |
|     private_channels_list = list(direct_id_to_direct_map.keys()) + list(
 | |
|         huddle_id_to_huddle_map.keys()
 | |
|     )
 | |
|     for message in messages:
 | |
|         if not message.get("rid"):
 | |
|             # Message does not belong to any channel (might be
 | |
|             # related to livechat), so ignore all such messages.
 | |
|             continue
 | |
|         if message["rid"] in dsc_id_to_dsc_map:
 | |
|             parent_channel_id = dsc_id_to_dsc_map[message["rid"]]["prid"]
 | |
|             if parent_channel_id in private_channels_list:
 | |
|                 # Messages in discussions originating from direct channels
 | |
|                 # are treated as if they were posted in the parent direct
 | |
|                 # channel only.
 | |
|                 message["rid"] = parent_channel_id
 | |
|         if message["rid"] in private_channels_list:
 | |
|             private_messages.append(message)
 | |
|         elif message["rid"] in livechat_id_to_livechat_map:
 | |
|             livechat_messages.append(message)
 | |
|         else:
 | |
|             channel_messages.append(message)
 | |
| 
 | |
| 
 | |
| def map_receiver_id_to_recipient_id(
 | |
|     zerver_recipient: List[ZerverFieldsT],
 | |
|     stream_id_to_recipient_id: Dict[int, int],
 | |
|     huddle_id_to_recipient_id: Dict[int, int],
 | |
|     user_id_to_recipient_id: Dict[int, int],
 | |
| ) -> None:
 | |
|     # receiver_id represents stream_id/huddle_id/user_id
 | |
|     for recipient in zerver_recipient:
 | |
|         if recipient["type"] == Recipient.STREAM:
 | |
|             stream_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
 | |
|         elif recipient["type"] == Recipient.HUDDLE:
 | |
|             huddle_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
 | |
|         elif recipient["type"] == Recipient.PERSONAL:
 | |
|             user_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
 | |
| 
 | |
| 
 | |
| # This is inspired by get_huddle_hash from zerver/models.py. It
 | |
| # expects strings identifying Rocket.Chat users, like
 | |
| # `LdBZ7kPxtKESyHPEe`, not integer IDs.
 | |
| #
 | |
| # Its purpose is to be a stable map usable for deduplication/merging
 | |
| # of Rocket.Chat threads involving the same set of people. Thus, its
 | |
| # only important property is that if two sets of users S and T are
 | |
| # equal and thus will have the same actual huddle hash once imported,
 | |
| # that get_string_huddle_hash(S) = get_string_huddle_hash(T).
 | |
| def get_string_huddle_hash(id_list: List[str]) -> str:
 | |
|     id_list = sorted(set(id_list))
 | |
|     hash_key = ",".join(str(x) for x in id_list)
 | |
|     return make_safe_digest(hash_key)
 | |
| 
 | |
| 
 | |
| def categorize_channels_and_map_with_id(
 | |
|     channel_data: List[Dict[str, Any]],
 | |
|     room_id_to_room_map: Dict[str, Dict[str, Any]],
 | |
|     team_id_to_team_map: Dict[str, Dict[str, Any]],
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
 | |
|     direct_id_to_direct_map: Dict[str, Dict[str, Any]],
 | |
|     huddle_id_to_huddle_map: Dict[str, Dict[str, Any]],
 | |
|     livechat_id_to_livechat_map: Dict[str, Dict[str, Any]],
 | |
| ) -> None:
 | |
|     huddle_hashed_channels: Dict[str, Any] = {}
 | |
|     for channel in channel_data:
 | |
|         if channel.get("prid"):
 | |
|             dsc_id_to_dsc_map[channel["_id"]] = channel
 | |
|         elif channel["t"] == "d":
 | |
|             if len(channel["uids"]) > 2:
 | |
|                 huddle_hash = get_string_huddle_hash(channel["uids"])
 | |
|                 logging.info(
 | |
|                     "Huddle channel found. UIDs: %s -> hash %s", channel["uids"], huddle_hash
 | |
|                 )
 | |
| 
 | |
|                 if channel["msgs"] == 0:  # nocoverage
 | |
|                     # Rocket.Chat exports in the wild sometimes
 | |
|                     # contain duplicates of real huddles, with no
 | |
|                     # messages in the duplicate.  We ignore these
 | |
|                     # minor database corruptions in the Rocket.Chat
 | |
|                     # export. Doing so is safe, because a huddle with no
 | |
|                     # message history has no value in Zulip's data
 | |
|                     # model.
 | |
|                     logging.debug("Skipping huddle with 0 messages: %s", channel)
 | |
|                 elif huddle_hash in huddle_hashed_channels:  # nocoverage
 | |
|                     logging.info(
 | |
|                         "Mapping huddle hash %s to existing channel: %s",
 | |
|                         huddle_hash,
 | |
|                         huddle_hashed_channels[huddle_hash],
 | |
|                     )
 | |
|                     huddle_id_to_huddle_map[channel["_id"]] = huddle_hashed_channels[huddle_hash]
 | |
| 
 | |
|                     # Ideally, we'd merge the duplicate huddles. Doing
 | |
|                     # so correctly requires special handling in
 | |
|                     # convert_huddle_data() and on the message import
 | |
|                     # side as well, since those appear to be mapped
 | |
|                     # via rocketchat channel IDs and not all of that
 | |
|                     # information is resolved via the
 | |
|                     # huddle_id_to_huddle_map.
 | |
|                     #
 | |
|                     # For now, just throw an exception here rather
 | |
|                     # than during the import process.
 | |
|                     raise NotImplementedError(
 | |
|                         "Mapping multiple huddles with messages to one is not fully implemented yet"
 | |
|                     )
 | |
|                 else:
 | |
|                     huddle_id_to_huddle_map[channel["_id"]] = channel
 | |
|                     huddle_hashed_channels[huddle_hash] = channel
 | |
|             else:
 | |
|                 direct_id_to_direct_map[channel["_id"]] = channel
 | |
|         elif channel["t"] == "l":
 | |
|             livechat_id_to_livechat_map[channel["_id"]] = channel
 | |
|         else:
 | |
|             room_id_to_room_map[channel["_id"]] = channel
 | |
|             if channel.get("teamMain"):
 | |
|                 team_id_to_team_map[channel["teamId"]] = channel
 | |
| 
 | |
| 
 | |
| def map_username_to_user_id(user_id_to_user_map: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
 | |
|     username_to_user_id_map: Dict[str, str] = {}
 | |
|     for user_id, user_dict in user_id_to_user_map.items():
 | |
|         username_to_user_id_map[user_dict["username"]] = user_id
 | |
|     return username_to_user_id_map
 | |
| 
 | |
| 
 | |
| def map_user_id_to_user(user_data_list: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
 | |
|     user_id_to_user_map = {}
 | |
|     for user in user_data_list:
 | |
|         user_id_to_user_map[user["_id"]] = user
 | |
|     return user_id_to_user_map
 | |
| 
 | |
| 
 | |
| def rocketchat_data_to_dict(rocketchat_data_dir: str) -> Dict[str, Any]:
 | |
|     codec_options = bson.DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
 | |
| 
 | |
|     rocketchat_data: Dict[str, Any] = {}
 | |
|     rocketchat_data["instance"] = []
 | |
|     rocketchat_data["user"] = []
 | |
|     rocketchat_data["avatar"] = {"avatar": [], "file": [], "chunk": []}
 | |
|     rocketchat_data["room"] = []
 | |
|     rocketchat_data["message"] = []
 | |
|     rocketchat_data["custom_emoji"] = {"emoji": [], "file": [], "chunk": []}
 | |
|     rocketchat_data["upload"] = {"upload": [], "file": [], "chunk": []}
 | |
| 
 | |
|     # Get instance
 | |
|     with open(os.path.join(rocketchat_data_dir, "instances.bson"), "rb") as fcache:
 | |
|         rocketchat_data["instance"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get user
 | |
|     with open(os.path.join(rocketchat_data_dir, "users.bson"), "rb") as fcache:
 | |
|         rocketchat_data["user"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get avatar
 | |
|     with open(os.path.join(rocketchat_data_dir, "rocketchat_avatars.bson"), "rb") as fcache:
 | |
|         rocketchat_data["avatar"]["avatar"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     if rocketchat_data["avatar"]["avatar"]:
 | |
|         with open(
 | |
|             os.path.join(rocketchat_data_dir, "rocketchat_avatars.files.bson"), "rb"
 | |
|         ) as fcache:
 | |
|             rocketchat_data["avatar"]["file"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|         with open(
 | |
|             os.path.join(rocketchat_data_dir, "rocketchat_avatars.chunks.bson"), "rb"
 | |
|         ) as fcache:
 | |
|             rocketchat_data["avatar"]["chunk"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get room
 | |
|     with open(os.path.join(rocketchat_data_dir, "rocketchat_room.bson"), "rb") as fcache:
 | |
|         rocketchat_data["room"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get messages
 | |
|     with open(os.path.join(rocketchat_data_dir, "rocketchat_message.bson"), "rb") as fcache:
 | |
|         rocketchat_data["message"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get custom emoji
 | |
|     with open(os.path.join(rocketchat_data_dir, "rocketchat_custom_emoji.bson"), "rb") as fcache:
 | |
|         rocketchat_data["custom_emoji"]["emoji"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     if rocketchat_data["custom_emoji"]["emoji"]:
 | |
|         with open(os.path.join(rocketchat_data_dir, "custom_emoji.files.bson"), "rb") as fcache:
 | |
|             rocketchat_data["custom_emoji"]["file"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|         with open(os.path.join(rocketchat_data_dir, "custom_emoji.chunks.bson"), "rb") as fcache:
 | |
|             rocketchat_data["custom_emoji"]["chunk"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     # Get uploads
 | |
|     with open(os.path.join(rocketchat_data_dir, "rocketchat_uploads.bson"), "rb") as fcache:
 | |
|         rocketchat_data["upload"]["upload"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     if rocketchat_data["upload"]["upload"]:
 | |
|         with open(
 | |
|             os.path.join(rocketchat_data_dir, "rocketchat_uploads.files.bson"), "rb"
 | |
|         ) as fcache:
 | |
|             rocketchat_data["upload"]["file"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|         with open(
 | |
|             os.path.join(rocketchat_data_dir, "rocketchat_uploads.chunks.bson"), "rb"
 | |
|         ) as fcache:
 | |
|             rocketchat_data["upload"]["chunk"] = bson.decode_all(fcache.read(), codec_options)
 | |
| 
 | |
|     return rocketchat_data
 | |
| 
 | |
| 
 | |
| def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
 | |
|     # Get all required exported data in a dictionary
 | |
|     rocketchat_data = rocketchat_data_to_dict(rocketchat_data_dir)
 | |
| 
 | |
|     # Subdomain is set by the user while running the import command
 | |
|     realm_subdomain = ""
 | |
|     realm_id = 0
 | |
|     domain_name = settings.EXTERNAL_HOST
 | |
| 
 | |
|     realm = make_realm(realm_id, realm_subdomain, domain_name, rocketchat_data["instance"][0])
 | |
| 
 | |
|     user_id_to_user_map: Dict[str, Dict[str, Any]] = map_user_id_to_user(rocketchat_data["user"])
 | |
|     username_to_user_id_map: Dict[str, str] = map_username_to_user_id(user_id_to_user_map)
 | |
| 
 | |
|     user_handler = UserHandler()
 | |
|     subscriber_handler = SubscriberHandler()
 | |
|     user_id_mapper = IdMapper()
 | |
|     stream_id_mapper = IdMapper()
 | |
|     huddle_id_mapper = IdMapper()
 | |
|     thread_id_mapper = IdMapper()
 | |
| 
 | |
|     process_users(
 | |
|         user_id_to_user_map=user_id_to_user_map,
 | |
|         realm_id=realm_id,
 | |
|         domain_name=domain_name,
 | |
|         user_handler=user_handler,
 | |
|         user_id_mapper=user_id_mapper,
 | |
|     )
 | |
| 
 | |
|     room_id_to_room_map: Dict[str, Dict[str, Any]] = {}
 | |
|     team_id_to_team_map: Dict[str, Dict[str, Any]] = {}
 | |
|     dsc_id_to_dsc_map: Dict[str, Dict[str, Any]] = {}
 | |
|     direct_id_to_direct_map: Dict[str, Dict[str, Any]] = {}
 | |
|     huddle_id_to_huddle_map: Dict[str, Dict[str, Any]] = {}
 | |
|     livechat_id_to_livechat_map: Dict[str, Dict[str, Any]] = {}
 | |
| 
 | |
|     categorize_channels_and_map_with_id(
 | |
|         channel_data=rocketchat_data["room"],
 | |
|         room_id_to_room_map=room_id_to_room_map,
 | |
|         team_id_to_team_map=team_id_to_team_map,
 | |
|         dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|         direct_id_to_direct_map=direct_id_to_direct_map,
 | |
|         huddle_id_to_huddle_map=huddle_id_to_huddle_map,
 | |
|         livechat_id_to_livechat_map=livechat_id_to_livechat_map,
 | |
|     )
 | |
| 
 | |
|     zerver_stream = convert_channel_data(
 | |
|         room_id_to_room_map=room_id_to_room_map,
 | |
|         team_id_to_team_map=team_id_to_team_map,
 | |
|         stream_id_mapper=stream_id_mapper,
 | |
|         realm_id=realm_id,
 | |
|     )
 | |
|     realm["zerver_stream"] = zerver_stream
 | |
| 
 | |
|     # Add stream subscription data to `subscriber_handler`
 | |
|     convert_stream_subscription_data(
 | |
|         user_id_to_user_map=user_id_to_user_map,
 | |
|         dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|         zerver_stream=zerver_stream,
 | |
|         stream_id_mapper=stream_id_mapper,
 | |
|         user_id_mapper=user_id_mapper,
 | |
|         subscriber_handler=subscriber_handler,
 | |
|     )
 | |
| 
 | |
|     zerver_huddle = convert_huddle_data(
 | |
|         huddle_id_to_huddle_map=huddle_id_to_huddle_map,
 | |
|         huddle_id_mapper=huddle_id_mapper,
 | |
|         user_id_mapper=user_id_mapper,
 | |
|         subscriber_handler=subscriber_handler,
 | |
|     )
 | |
|     realm["zerver_huddle"] = zerver_huddle
 | |
| 
 | |
|     all_users = user_handler.get_all_users()
 | |
| 
 | |
|     zerver_recipient = build_recipients(
 | |
|         zerver_userprofile=all_users,
 | |
|         zerver_stream=zerver_stream,
 | |
|         zerver_huddle=zerver_huddle,
 | |
|     )
 | |
|     realm["zerver_recipient"] = zerver_recipient
 | |
| 
 | |
|     stream_subscriptions = build_stream_subscriptions(
 | |
|         get_users=subscriber_handler.get_users,
 | |
|         zerver_recipient=zerver_recipient,
 | |
|         zerver_stream=zerver_stream,
 | |
|     )
 | |
| 
 | |
|     huddle_subscriptions = build_huddle_subscriptions(
 | |
|         get_users=subscriber_handler.get_users,
 | |
|         zerver_recipient=zerver_recipient,
 | |
|         zerver_huddle=zerver_huddle,
 | |
|     )
 | |
| 
 | |
|     personal_subscriptions = build_personal_subscriptions(
 | |
|         zerver_recipient=zerver_recipient,
 | |
|     )
 | |
| 
 | |
|     zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
 | |
|     realm["zerver_subscription"] = zerver_subscription
 | |
| 
 | |
|     zerver_realmemoji = build_custom_emoji(
 | |
|         realm_id=realm_id,
 | |
|         custom_emoji_data=rocketchat_data["custom_emoji"],
 | |
|         output_dir=output_dir,
 | |
|     )
 | |
|     realm["zerver_realmemoji"] = zerver_realmemoji
 | |
| 
 | |
|     subscriber_map = make_subscriber_map(
 | |
|         zerver_subscription=zerver_subscription,
 | |
|     )
 | |
| 
 | |
|     stream_id_to_recipient_id: Dict[int, int] = {}
 | |
|     huddle_id_to_recipient_id: Dict[int, int] = {}
 | |
|     user_id_to_recipient_id: Dict[int, int] = {}
 | |
| 
 | |
|     map_receiver_id_to_recipient_id(
 | |
|         zerver_recipient=zerver_recipient,
 | |
|         stream_id_to_recipient_id=stream_id_to_recipient_id,
 | |
|         huddle_id_to_recipient_id=huddle_id_to_recipient_id,
 | |
|         user_id_to_recipient_id=user_id_to_recipient_id,
 | |
|     )
 | |
| 
 | |
|     channel_messages: List[Dict[str, Any]] = []
 | |
|     private_messages: List[Dict[str, Any]] = []
 | |
|     livechat_messages: List[Dict[str, Any]] = []
 | |
| 
 | |
|     separate_channel_private_and_livechat_messages(
 | |
|         messages=rocketchat_data["message"],
 | |
|         dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|         direct_id_to_direct_map=direct_id_to_direct_map,
 | |
|         huddle_id_to_huddle_map=huddle_id_to_huddle_map,
 | |
|         livechat_id_to_livechat_map=livechat_id_to_livechat_map,
 | |
|         channel_messages=channel_messages,
 | |
|         private_messages=private_messages,
 | |
|         livechat_messages=livechat_messages,
 | |
|     )
 | |
| 
 | |
|     total_reactions: List[ZerverFieldsT] = []
 | |
|     uploads_list: List[ZerverFieldsT] = []
 | |
|     zerver_attachment: List[ZerverFieldsT] = []
 | |
| 
 | |
|     upload_id_to_upload_data_map = map_upload_id_to_upload_data(rocketchat_data["upload"])
 | |
| 
 | |
|     # Process channel messages
 | |
|     process_messages(
 | |
|         realm_id=realm_id,
 | |
|         messages=channel_messages,
 | |
|         subscriber_map=subscriber_map,
 | |
|         is_pm_data=False,
 | |
|         username_to_user_id_map=username_to_user_id_map,
 | |
|         user_id_mapper=user_id_mapper,
 | |
|         user_handler=user_handler,
 | |
|         user_id_to_recipient_id=user_id_to_recipient_id,
 | |
|         stream_id_mapper=stream_id_mapper,
 | |
|         stream_id_to_recipient_id=stream_id_to_recipient_id,
 | |
|         huddle_id_mapper=huddle_id_mapper,
 | |
|         huddle_id_to_recipient_id=huddle_id_to_recipient_id,
 | |
|         thread_id_mapper=thread_id_mapper,
 | |
|         room_id_to_room_map=room_id_to_room_map,
 | |
|         dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|         direct_id_to_direct_map=direct_id_to_direct_map,
 | |
|         huddle_id_to_huddle_map=huddle_id_to_huddle_map,
 | |
|         zerver_realmemoji=zerver_realmemoji,
 | |
|         total_reactions=total_reactions,
 | |
|         uploads_list=uploads_list,
 | |
|         zerver_attachment=zerver_attachment,
 | |
|         upload_id_to_upload_data_map=upload_id_to_upload_data_map,
 | |
|         output_dir=output_dir,
 | |
|     )
 | |
|     # Process private messages
 | |
|     process_messages(
 | |
|         realm_id=realm_id,
 | |
|         messages=private_messages,
 | |
|         subscriber_map=subscriber_map,
 | |
|         is_pm_data=True,
 | |
|         username_to_user_id_map=username_to_user_id_map,
 | |
|         user_id_mapper=user_id_mapper,
 | |
|         user_handler=user_handler,
 | |
|         user_id_to_recipient_id=user_id_to_recipient_id,
 | |
|         stream_id_mapper=stream_id_mapper,
 | |
|         stream_id_to_recipient_id=stream_id_to_recipient_id,
 | |
|         huddle_id_mapper=huddle_id_mapper,
 | |
|         huddle_id_to_recipient_id=huddle_id_to_recipient_id,
 | |
|         thread_id_mapper=thread_id_mapper,
 | |
|         room_id_to_room_map=room_id_to_room_map,
 | |
|         dsc_id_to_dsc_map=dsc_id_to_dsc_map,
 | |
|         direct_id_to_direct_map=direct_id_to_direct_map,
 | |
|         huddle_id_to_huddle_map=huddle_id_to_huddle_map,
 | |
|         zerver_realmemoji=zerver_realmemoji,
 | |
|         total_reactions=total_reactions,
 | |
|         uploads_list=uploads_list,
 | |
|         zerver_attachment=zerver_attachment,
 | |
|         upload_id_to_upload_data_map=upload_id_to_upload_data_map,
 | |
|         output_dir=output_dir,
 | |
|     )
 | |
|     realm["zerver_reaction"] = total_reactions
 | |
|     realm["zerver_userprofile"] = user_handler.get_all_users()
 | |
|     realm["sort_by_date"] = True
 | |
| 
 | |
|     create_converted_data_files(realm, output_dir, "/realm.json")
 | |
|     # TODO: Add support for importing avatars
 | |
|     create_converted_data_files([], output_dir, "/avatars/records.json")
 | |
| 
 | |
|     # Import attachments
 | |
|     attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment}
 | |
|     create_converted_data_files(attachment, output_dir, "/attachment.json")
 | |
|     create_converted_data_files(uploads_list, output_dir, "/uploads/records.json")
 |