mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1054 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			1054 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
import os
 | 
						|
import json
 | 
						|
import ujson
 | 
						|
import hashlib
 | 
						|
import sys
 | 
						|
import argparse
 | 
						|
import shutil
 | 
						|
import subprocess
 | 
						|
import re
 | 
						|
import logging
 | 
						|
import random
 | 
						|
import requests
 | 
						|
import random
 | 
						|
 | 
						|
from django.conf import settings
 | 
						|
from django.db import connection
 | 
						|
from django.utils.timezone import now as timezone_now
 | 
						|
from django.forms.models import model_to_dict
 | 
						|
from typing import Any, Dict, List, Optional, Tuple
 | 
						|
from zerver.forms import check_subdomain_available
 | 
						|
from zerver.models import Reaction, RealmEmoji, Realm, UserProfile
 | 
						|
from zerver.lib.slack_message_conversion import convert_to_zulip_markdown, \
 | 
						|
    get_user_full_name
 | 
						|
from zerver.lib.parallel import run_parallel
 | 
						|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
 | 
						|
from zerver.lib.actions import STREAM_ASSIGNMENT_COLORS as stream_colors
 | 
						|
from zerver.lib.upload import random_name, sanitize_name
 | 
						|
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE
 | 
						|
from zerver.lib.emoji import NAME_TO_CODEPOINT_PATH
 | 
						|
 | 
						|
# stubs
 | 
						|
ZerverFieldsT = Dict[str, Any]
 | 
						|
AddedUsersT = Dict[str, int]
 | 
						|
AddedChannelsT = Dict[str, Tuple[str, int]]
 | 
						|
AddedRecipientsT = Dict[str, int]
 | 
						|
 | 
						|
def rm_tree(path: str) -> None:
 | 
						|
    if os.path.exists(path):
 | 
						|
        shutil.rmtree(path)
 | 
						|
 | 
						|
def slack_workspace_to_realm(domain_name: str, realm_id: int, user_list: List[ZerverFieldsT],
 | 
						|
                             realm_subdomain: str, slack_data_dir: str,
 | 
						|
                             custom_emoji_list: ZerverFieldsT)-> Tuple[ZerverFieldsT, AddedUsersT,
 | 
						|
                                                                       AddedRecipientsT,
 | 
						|
                                                                       AddedChannelsT,
 | 
						|
                                                                       List[ZerverFieldsT],
 | 
						|
                                                                       ZerverFieldsT]:
 | 
						|
    """
 | 
						|
    Returns:
 | 
						|
    1. realm, Converted Realm data
 | 
						|
    2. added_users, which is a dictionary to map from slack user id to zulip user id
 | 
						|
    3. added_recipient, which is a dictionary to map from channel name to zulip recipient_id
 | 
						|
    4. added_channels, which is a dictionary to map from channel name to channel id, zulip stream_id
 | 
						|
    5. avatars, which is list to map avatars to zulip avatar records.json
 | 
						|
    6. emoji_url_map, which is maps emoji name to its slack url
 | 
						|
    """
 | 
						|
    NOW = float(timezone_now().timestamp())
 | 
						|
 | 
						|
    zerver_realm = build_zerver_realm(realm_id, realm_subdomain, NOW)
 | 
						|
 | 
						|
    realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
 | 
						|
                                {"name": "website", "id": 2},
 | 
						|
                                {"name": "API", "id": 3}],
 | 
						|
                 zerver_userpresence=[],  # shows last logged in data, which is not available in slack
 | 
						|
                 zerver_userprofile_mirrordummy=[],
 | 
						|
                 zerver_realmdomain=[{"realm": realm_id,
 | 
						|
                                      "allow_subdomains": False,
 | 
						|
                                      "domain": domain_name,
 | 
						|
                                      "id": realm_id}],
 | 
						|
                 zerver_useractivity=[],
 | 
						|
                 zerver_realm=zerver_realm,
 | 
						|
                 zerver_huddle=[],
 | 
						|
                 zerver_userprofile_crossrealm=[],
 | 
						|
                 zerver_useractivityinterval=[],
 | 
						|
                 zerver_realmfilter=[])
 | 
						|
 | 
						|
    zerver_userprofile, avatars, added_users, zerver_customprofilefield, \
 | 
						|
        zerver_customprofilefield_value = users_to_zerver_userprofile(slack_data_dir, user_list,
 | 
						|
                                                                      realm_id, int(NOW), domain_name)
 | 
						|
    channels_to_zerver_stream_fields = channels_to_zerver_stream(slack_data_dir,
 | 
						|
                                                                 realm_id,
 | 
						|
                                                                 added_users,
 | 
						|
                                                                 zerver_userprofile)
 | 
						|
    zerver_realmemoji, emoji_url_map = build_realmemoji(custom_emoji_list, realm_id)
 | 
						|
    realm['zerver_realmemoji'] = zerver_realmemoji
 | 
						|
 | 
						|
    # See https://zulipchat.com/help/set-default-streams-for-new-users
 | 
						|
    # for documentation on zerver_defaultstream
 | 
						|
    realm['zerver_userprofile'] = zerver_userprofile
 | 
						|
 | 
						|
    # Custom profile fields
 | 
						|
    realm['zerver_customprofilefield'] = zerver_customprofilefield
 | 
						|
    realm['zerver_customprofilefieldvalue'] = zerver_customprofilefield_value
 | 
						|
 | 
						|
    realm['zerver_defaultstream'] = channels_to_zerver_stream_fields[0]
 | 
						|
    realm['zerver_stream'] = channels_to_zerver_stream_fields[1]
 | 
						|
    realm['zerver_subscription'] = channels_to_zerver_stream_fields[3]
 | 
						|
    realm['zerver_recipient'] = channels_to_zerver_stream_fields[4]
 | 
						|
    added_channels = channels_to_zerver_stream_fields[2]
 | 
						|
    added_recipient = channels_to_zerver_stream_fields[5]
 | 
						|
 | 
						|
    return realm, added_users, added_recipient, added_channels, avatars, emoji_url_map
 | 
						|
 | 
						|
def build_zerver_realm(realm_id: int, realm_subdomain: str,
 | 
						|
                       time: float) -> List[ZerverFieldsT]:
 | 
						|
    realm = Realm(id=realm_id, date_created=time,
 | 
						|
                  name=realm_subdomain, string_id=realm_subdomain,
 | 
						|
                  description="Organization imported from Slack!")
 | 
						|
    auth_methods = [[flag[0], flag[1]] for flag in realm.authentication_methods]
 | 
						|
    realm_dict = model_to_dict(realm, exclude='authentication_methods')
 | 
						|
    realm_dict['authentication_methods'] = auth_methods
 | 
						|
    return[realm_dict]
 | 
						|
 | 
						|
def build_realmemoji(custom_emoji_list: ZerverFieldsT,
 | 
						|
                     realm_id: int) -> Tuple[List[ZerverFieldsT],
 | 
						|
                                             ZerverFieldsT]:
 | 
						|
    zerver_realmemoji = []
 | 
						|
    emoji_url_map = {}
 | 
						|
    emoji_id = 0
 | 
						|
    for emoji_name, url in custom_emoji_list.items():
 | 
						|
        if 'emoji.slack-edge.com' in url:
 | 
						|
            # Some of the emojis we get from the api have invalid links
 | 
						|
            # this is to prevent errors related to them
 | 
						|
            realmemoji = dict(
 | 
						|
                name=emoji_name,
 | 
						|
                id=emoji_id,
 | 
						|
                author=None,
 | 
						|
                realm=realm_id,
 | 
						|
                file_name=os.path.basename(url),
 | 
						|
                deactivated=False)
 | 
						|
            emoji_url_map[emoji_name] = url
 | 
						|
            zerver_realmemoji.append(realmemoji)
 | 
						|
            emoji_id += 1
 | 
						|
    return zerver_realmemoji, emoji_url_map
 | 
						|
 | 
						|
def users_to_zerver_userprofile(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
 | 
						|
                                timestamp: Any, domain_name: str) -> Tuple[List[ZerverFieldsT],
 | 
						|
                                                                           List[ZerverFieldsT],
 | 
						|
                                                                           AddedUsersT,
 | 
						|
                                                                           List[ZerverFieldsT],
 | 
						|
                                                                           List[ZerverFieldsT]]:
 | 
						|
    """
 | 
						|
    Returns:
 | 
						|
    1. zerver_userprofile, which is a list of user profile
 | 
						|
    2. avatar_list, which is list to map avatars to zulip avatard records.json
 | 
						|
    3. added_users, which is a dictionary to map from slack user id to zulip
 | 
						|
       user id
 | 
						|
    4. zerver_customprofilefield, which is a list of all custom profile fields
 | 
						|
    5. zerver_customprofilefield_values, which is a list of user profile fields
 | 
						|
    """
 | 
						|
    logging.info('######### IMPORTING USERS STARTED #########\n')
 | 
						|
    zerver_userprofile = []
 | 
						|
    zerver_customprofilefield = []  # type: List[ZerverFieldsT]
 | 
						|
    zerver_customprofilefield_values = []  # type: List[ZerverFieldsT]
 | 
						|
    avatar_list = []  # type: List[ZerverFieldsT]
 | 
						|
    added_users = {}
 | 
						|
 | 
						|
    # The user data we get from the slack api does not contain custom profile data
 | 
						|
    # Hence we get it from the slack zip file
 | 
						|
    slack_data_file_user_list = get_data_file(slack_data_dir + '/users.json')
 | 
						|
 | 
						|
    # To map user id with the custom profile fields of the corresponding user
 | 
						|
    slack_user_custom_field_map = {}  # type: ZerverFieldsT
 | 
						|
    # To store custom fields corresponding to their ids
 | 
						|
    custom_field_map = {}  # type: ZerverFieldsT
 | 
						|
 | 
						|
    for user in slack_data_file_user_list:
 | 
						|
        process_slack_custom_fields(user, slack_user_custom_field_map)
 | 
						|
 | 
						|
    # We have only one primary owner in slack, see link
 | 
						|
    # https://get.slack.help/hc/en-us/articles/201912948-Owners-and-Administrators
 | 
						|
    # This is to import the primary owner first from all the users
 | 
						|
    user_id_count = custom_field_id_count = customprofilefield_id = 0
 | 
						|
    primary_owner_id = user_id_count
 | 
						|
    user_id_count += 1
 | 
						|
 | 
						|
    for user in users:
 | 
						|
        slack_user_id = user['id']
 | 
						|
 | 
						|
        if user.get('is_primary_owner', False):
 | 
						|
            user_id = primary_owner_id
 | 
						|
        else:
 | 
						|
            user_id = user_id_count
 | 
						|
 | 
						|
        # email
 | 
						|
        email = get_user_email(user, domain_name)
 | 
						|
 | 
						|
        # avatar
 | 
						|
        # ref: https://chat.zulip.org/help/change-your-avatar
 | 
						|
        avatar_url = build_avatar_url(slack_user_id, user['team_id'],
 | 
						|
                                      user['profile']['avatar_hash'])
 | 
						|
        build_avatar(user_id, realm_id, email, avatar_url, timestamp, avatar_list)
 | 
						|
 | 
						|
        # check if user is the admin
 | 
						|
        realm_admin = get_admin(user)
 | 
						|
 | 
						|
        # timezone
 | 
						|
        timezone = get_user_timezone(user)
 | 
						|
 | 
						|
        # Check for custom profile fields
 | 
						|
        if slack_user_id in slack_user_custom_field_map:
 | 
						|
            # For processing the fields
 | 
						|
            custom_field_map, customprofilefield_id = build_customprofile_field(
 | 
						|
                zerver_customprofilefield, slack_user_custom_field_map[slack_user_id],
 | 
						|
                customprofilefield_id, realm_id, custom_field_map)
 | 
						|
            # Store the custom field values for the corresponding user
 | 
						|
            custom_field_id_count = build_customprofilefields_values(
 | 
						|
                custom_field_map, slack_user_custom_field_map[slack_user_id], user_id,
 | 
						|
                custom_field_id_count, zerver_customprofilefield_values)
 | 
						|
 | 
						|
        userprofile = UserProfile(
 | 
						|
            full_name=get_user_full_name(user),
 | 
						|
            short_name=user['name'],
 | 
						|
            is_active=not user['deleted'],
 | 
						|
            id=user_id,
 | 
						|
            email=email,
 | 
						|
            avatar_source='U',
 | 
						|
            is_bot=user.get('is_bot', False),
 | 
						|
            pointer=-1,
 | 
						|
            is_realm_admin=realm_admin,
 | 
						|
            bot_type=1 if user.get('is_bot', False) else None,
 | 
						|
            date_joined=timestamp,
 | 
						|
            timezone=timezone,
 | 
						|
            last_login=timestamp)
 | 
						|
        userprofile_dict = model_to_dict(userprofile)
 | 
						|
        # Set realm id separately as the corresponding realm is not yet a Realm model instance
 | 
						|
        userprofile_dict['realm'] = realm_id
 | 
						|
 | 
						|
        zerver_userprofile.append(userprofile_dict)
 | 
						|
        added_users[slack_user_id] = user_id
 | 
						|
        if not user.get('is_primary_owner', False):
 | 
						|
            user_id_count += 1
 | 
						|
 | 
						|
        logging.info(u"{} -> {}".format(user['name'], userprofile_dict['email']))
 | 
						|
 | 
						|
    process_customprofilefields(zerver_customprofilefield, zerver_customprofilefield_values)
 | 
						|
    logging.info('######### IMPORTING USERS FINISHED #########\n')
 | 
						|
    return zerver_userprofile, avatar_list, added_users, zerver_customprofilefield, \
 | 
						|
        zerver_customprofilefield_values
 | 
						|
 | 
						|
def build_customprofile_field(customprofile_field: List[ZerverFieldsT], fields: ZerverFieldsT,
 | 
						|
                              customprofilefield_id: int, realm_id: int,
 | 
						|
                              custom_field_map: ZerverFieldsT) -> Tuple[ZerverFieldsT, int]:
 | 
						|
    # The name of the custom profile field is not provided in the slack data
 | 
						|
    # Hash keys of the fields are provided
 | 
						|
    # Reference: https://api.slack.com/methods/users.profile.set
 | 
						|
    for field, value in fields.items():
 | 
						|
        if field not in custom_field_map:
 | 
						|
            slack_custom_fields = ['phone', 'skype']
 | 
						|
            if field in slack_custom_fields:
 | 
						|
                field_name = field
 | 
						|
            else:
 | 
						|
                field_name = ("slack custom field %s" % str(customprofilefield_id + 1))
 | 
						|
            customprofilefield = dict(
 | 
						|
                id=customprofilefield_id,
 | 
						|
                realm=realm_id,
 | 
						|
                name=field_name,
 | 
						|
                field_type=1  # For now this is defaulted to 'SHORT_TEXT'
 | 
						|
                              # Processing is done in the function 'process_customprofilefields'
 | 
						|
            )
 | 
						|
            custom_field_map[field] = customprofilefield_id
 | 
						|
            customprofilefield_id += 1
 | 
						|
            customprofile_field.append(customprofilefield)
 | 
						|
    return custom_field_map, customprofilefield_id
 | 
						|
 | 
						|
def process_slack_custom_fields(user: ZerverFieldsT,
 | 
						|
                                slack_user_custom_field_map: ZerverFieldsT) -> None:
 | 
						|
    slack_user_custom_field_map[user['id']] = {}
 | 
						|
    if user['profile'].get('fields'):
 | 
						|
        slack_user_custom_field_map[user['id']] = user['profile']['fields']
 | 
						|
 | 
						|
    slack_custom_fields = ['phone', 'skype']
 | 
						|
    for field in slack_custom_fields:
 | 
						|
        if field in user['profile']:
 | 
						|
            slack_user_custom_field_map[user['id']][field] = {'value': user['profile'][field]}
 | 
						|
 | 
						|
def build_customprofilefields_values(custom_field_map: ZerverFieldsT, fields: ZerverFieldsT,
 | 
						|
                                     user_id: int, custom_field_id: int,
 | 
						|
                                     custom_field_values: List[ZerverFieldsT]) -> int:
 | 
						|
    for field, value in fields.items():
 | 
						|
        custom_field_value = dict(
 | 
						|
            id=custom_field_id,
 | 
						|
            user_profile=user_id,
 | 
						|
            field=custom_field_map[field],
 | 
						|
            value=value['value'])
 | 
						|
        custom_field_values.append(custom_field_value)
 | 
						|
        custom_field_id += 1
 | 
						|
    return custom_field_id
 | 
						|
 | 
						|
def process_customprofilefields(customprofilefield: List[ZerverFieldsT],
 | 
						|
                                customprofilefield_value: List[ZerverFieldsT]) -> None:
 | 
						|
    # Process the field types by checking all field values
 | 
						|
    for field in customprofilefield:
 | 
						|
        for field_value in customprofilefield_value:
 | 
						|
            if field_value['field'] == field['id'] and len(field_value['value']) > 50:
 | 
						|
                field['field_type'] = 2  # corresponding to Long text
 | 
						|
                break
 | 
						|
 | 
						|
def get_user_email(user: ZerverFieldsT, domain_name: str) -> str:
 | 
						|
    if 'email' in user['profile']:
 | 
						|
        return user['profile']['email']
 | 
						|
    if 'bot_id' in user['profile']:
 | 
						|
        if 'real_name_normalized' in user['profile']:
 | 
						|
            slack_bot_name = user['profile']['real_name_normalized']
 | 
						|
        elif 'first_name' in user['profile']:
 | 
						|
            slack_bot_name = user['profile']['first_name']
 | 
						|
        else:
 | 
						|
            raise AssertionError("Could not identify bot type")
 | 
						|
        return slack_bot_name.replace("Bot", "").replace(" ", "") + "-bot@%s" % (domain_name,)
 | 
						|
    # TODO: Do we need this fallback case at all?
 | 
						|
    return (hashlib.sha256(user['real_name'].encode()).hexdigest() +
 | 
						|
            "@%s" % (domain_name,))
 | 
						|
 | 
						|
def build_avatar_url(slack_user_id: str, team_id: str, avatar_hash: str) -> str:
 | 
						|
    avatar_url = "https://ca.slack-edge.com/{}-{}-{}".format(team_id, slack_user_id,
 | 
						|
                                                             avatar_hash)
 | 
						|
    return avatar_url
 | 
						|
 | 
						|
def build_avatar(zulip_user_id: int, realm_id: int, email: str, avatar_url: str,
 | 
						|
                 timestamp: Any, avatar_list: List[ZerverFieldsT]) -> None:
 | 
						|
    avatar = dict(
 | 
						|
        path=avatar_url,  # Save slack's url here, which is used later while processing
 | 
						|
        realm_id=realm_id,
 | 
						|
        content_type=None,
 | 
						|
        user_profile_id=zulip_user_id,
 | 
						|
        last_modified=timestamp,
 | 
						|
        user_profile_email=email,
 | 
						|
        s3_path="",
 | 
						|
        size="")
 | 
						|
    avatar_list.append(avatar)
 | 
						|
 | 
						|
def get_admin(user: ZerverFieldsT) -> bool:
 | 
						|
    admin = user.get('is_admin', False)
 | 
						|
    owner = user.get('is_owner', False)
 | 
						|
    primary_owner = user.get('is_primary_owner', False)
 | 
						|
 | 
						|
    if admin or owner or primary_owner:
 | 
						|
        return True
 | 
						|
    return False
 | 
						|
 | 
						|
def get_user_timezone(user: ZerverFieldsT) -> str:
 | 
						|
    _default_timezone = "America/New_York"
 | 
						|
    timezone = user.get("tz", _default_timezone)
 | 
						|
    if timezone is None or '/' not in timezone:
 | 
						|
        timezone = _default_timezone
 | 
						|
    return timezone
 | 
						|
 | 
						|
def channels_to_zerver_stream(slack_data_dir: str, realm_id: int, added_users: AddedUsersT,
 | 
						|
                              zerver_userprofile: List[ZerverFieldsT]) -> Tuple[List[ZerverFieldsT],
 | 
						|
                                                                                List[ZerverFieldsT],
 | 
						|
                                                                                AddedChannelsT,
 | 
						|
                                                                                List[ZerverFieldsT],
 | 
						|
                                                                                List[ZerverFieldsT],
 | 
						|
                                                                                AddedRecipientsT]:
 | 
						|
    """
 | 
						|
    Returns:
 | 
						|
    1. zerver_defaultstream, which is a list of the default streams
 | 
						|
    2. zerver_stream, while is a list of all streams
 | 
						|
    3. added_channels, which is a dictionary to map from channel name to channel id, zulip stream_id
 | 
						|
    4. zerver_subscription, which is a list of the subscriptions
 | 
						|
    5. zerver_recipient, which is a list of the recipients
 | 
						|
    6. added_recipient, which is a dictionary to map from channel name to zulip recipient_id
 | 
						|
    """
 | 
						|
    logging.info('######### IMPORTING CHANNELS STARTED #########\n')
 | 
						|
    channels = get_data_file(slack_data_dir + '/channels.json')
 | 
						|
 | 
						|
    added_channels = {}
 | 
						|
    added_recipient = {}
 | 
						|
 | 
						|
    zerver_stream = []
 | 
						|
    zerver_subscription = []  # type: List[ZerverFieldsT]
 | 
						|
    zerver_recipient = []
 | 
						|
    zerver_defaultstream = []
 | 
						|
 | 
						|
    stream_id_count = subscription_id_count = recipient_id_count = defaultstream_id = 0
 | 
						|
 | 
						|
    for channel in channels:
 | 
						|
        # slack_channel_id = channel['id']
 | 
						|
 | 
						|
        # map Slack's topic and purpose content into Zulip's stream description.
 | 
						|
        # WARN This mapping is lossy since the topic.creator, topic.last_set,
 | 
						|
        # purpose.creator, purpose.last_set fields are not preserved.
 | 
						|
        description = channel["purpose"]["value"]
 | 
						|
        stream_id = stream_id_count
 | 
						|
        recipient_id = recipient_id_count
 | 
						|
 | 
						|
        # construct the stream object and append it to zerver_stream
 | 
						|
        stream = dict(
 | 
						|
            realm=realm_id,
 | 
						|
            name=channel["name"],
 | 
						|
            deactivated=channel["is_archived"],
 | 
						|
            description=description,
 | 
						|
            invite_only=False,  # TODO: private channels are not
 | 
						|
                                # exported with Slack's standard plan;
 | 
						|
                                # so this field is always false
 | 
						|
            date_created=float(channel["created"]),
 | 
						|
            id=stream_id)
 | 
						|
 | 
						|
        # construct defaultstream object
 | 
						|
        # slack has the default channel 'general' and 'random'
 | 
						|
        # where every user is subscribed
 | 
						|
        default_channels = ['general', 'random']  # Slack specific
 | 
						|
        if channel['name'] in default_channels:
 | 
						|
            defaultstream = build_defaultstream(channel['name'], realm_id, stream_id,
 | 
						|
                                                defaultstream_id)
 | 
						|
            zerver_defaultstream.append(defaultstream)
 | 
						|
            defaultstream_id += 1
 | 
						|
 | 
						|
        zerver_stream.append(stream)
 | 
						|
        added_channels[stream['name']] = (channel['id'], stream_id)
 | 
						|
 | 
						|
        # construct the recipient object and append it to zerver_recipient
 | 
						|
        # type 1: private
 | 
						|
        # type 2: stream
 | 
						|
        # type 3: huddle
 | 
						|
        recipient = dict(
 | 
						|
            type_id=stream_id,
 | 
						|
            id=recipient_id,
 | 
						|
            type=2)
 | 
						|
        zerver_recipient.append(recipient)
 | 
						|
        added_recipient[stream['name']] = recipient_id
 | 
						|
        # TODO add recipients for private message and huddles
 | 
						|
 | 
						|
        # construct the subscription object and append it to zerver_subscription
 | 
						|
        subscription_id_count = build_subscription(channel['members'], zerver_subscription,
 | 
						|
                                                   recipient_id, added_users,
 | 
						|
                                                   subscription_id_count)
 | 
						|
        # TODO add zerver_subscription which correspond to
 | 
						|
        # huddles type recipient
 | 
						|
        # For huddles:
 | 
						|
        # sub['recipient']=recipient['id'] where recipient['type_id']=added_users[member]
 | 
						|
 | 
						|
        stream_id_count += 1
 | 
						|
        recipient_id_count += 1
 | 
						|
        logging.info(u"{} -> created".format(channel['name']))
 | 
						|
 | 
						|
        # TODO map Slack's pins to Zulip's stars
 | 
						|
        # There is the security model that Slack's pins are known to the team owner
 | 
						|
        # as evident from where it is stored at (channels)
 | 
						|
        # "pins": [
 | 
						|
        #         {
 | 
						|
        #             "id": "1444755381.000003",
 | 
						|
        #             "type": "C",
 | 
						|
        #             "user": "U061A5N1G",
 | 
						|
        #             "owner": "U061A5N1G",
 | 
						|
        #             "created": "1444755463"
 | 
						|
        #         }
 | 
						|
        #         ],
 | 
						|
 | 
						|
    for user in zerver_userprofile:
 | 
						|
        zulip_user_id = user['id']
 | 
						|
        # this maps the recipients and subscriptions
 | 
						|
        # related to private messages
 | 
						|
        recipient_id = recipient_id_count
 | 
						|
        subscription_id = subscription_id_count
 | 
						|
 | 
						|
        recipient, sub = build_pm_recipient_sub_from_user(zulip_user_id, recipient_id,
 | 
						|
                                                          subscription_id)
 | 
						|
        zerver_recipient.append(recipient)
 | 
						|
        zerver_subscription.append(sub)
 | 
						|
        subscription_id_count += 1
 | 
						|
        recipient_id_count += 1
 | 
						|
 | 
						|
    logging.info('######### IMPORTING STREAMS FINISHED #########\n')
 | 
						|
    return zerver_defaultstream, zerver_stream, added_channels, zerver_subscription, \
 | 
						|
        zerver_recipient, added_recipient
 | 
						|
 | 
						|
def build_defaultstream(channel_name: str, realm_id: int, stream_id: int,
 | 
						|
                        defaultstream_id: int) -> ZerverFieldsT:
 | 
						|
    defaultstream = dict(
 | 
						|
        stream=stream_id,
 | 
						|
        realm=realm_id,
 | 
						|
        id=defaultstream_id)
 | 
						|
    return defaultstream
 | 
						|
 | 
						|
def build_pm_recipient_sub_from_user(zulip_user_id: int, recipient_id: int,
 | 
						|
                                     subscription_id: int) -> Tuple[ZerverFieldsT,
 | 
						|
                                                                    ZerverFieldsT]:
 | 
						|
    recipient = dict(
 | 
						|
        type_id=zulip_user_id,
 | 
						|
        id=recipient_id,
 | 
						|
        type=1)
 | 
						|
 | 
						|
    sub = dict(
 | 
						|
        recipient=recipient_id,
 | 
						|
        notifications=False,
 | 
						|
        color=random.choice(stream_colors),
 | 
						|
        desktop_notifications=True,
 | 
						|
        pin_to_top=False,
 | 
						|
        in_home_view=True,
 | 
						|
        active=True,
 | 
						|
        user_profile=zulip_user_id,
 | 
						|
        id=subscription_id)
 | 
						|
 | 
						|
    return recipient, sub
 | 
						|
 | 
						|
def build_subscription(channel_members: List[str], zerver_subscription: List[ZerverFieldsT],
 | 
						|
                       recipient_id: int, added_users: AddedUsersT,
 | 
						|
                       subscription_id: int) -> int:
 | 
						|
    for member in channel_members:
 | 
						|
        sub = dict(
 | 
						|
            recipient=recipient_id,
 | 
						|
            notifications=False,
 | 
						|
            color=random.choice(stream_colors),
 | 
						|
            desktop_notifications=True,
 | 
						|
            pin_to_top=False,
 | 
						|
            in_home_view=True,
 | 
						|
            active=True,
 | 
						|
            user_profile=added_users[member],
 | 
						|
            id=subscription_id)
 | 
						|
        # The recipient corresponds to a stream for stream-readable message.
 | 
						|
        zerver_subscription.append(sub)
 | 
						|
        subscription_id += 1
 | 
						|
    return subscription_id
 | 
						|
 | 
						|
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
 | 
						|
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
 | 
						|
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
 | 
						|
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
 | 
						|
                                     output_dir: str,
 | 
						|
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
 | 
						|
                                                                                        List[ZerverFieldsT],
 | 
						|
                                                                                        List[ZerverFieldsT]]:
 | 
						|
    """
 | 
						|
    Returns:
 | 
						|
    1. reactions, which is a list of the reactions
 | 
						|
    2. uploads, which is a list of uploads to be mapped in uploads records.json
 | 
						|
    3. attachment, which is a list of the attachments
 | 
						|
    """
 | 
						|
    all_messages = get_all_messages(slack_data_dir, added_channels)
 | 
						|
 | 
						|
    # we sort the messages according to the timestamp to show messages with
 | 
						|
    # the proper date order
 | 
						|
    all_messages = sorted(all_messages, key=lambda message: message['ts'])
 | 
						|
 | 
						|
    logging.info('######### IMPORTING MESSAGES STARTED #########\n')
 | 
						|
 | 
						|
    total_reactions = []  # type: List[ZerverFieldsT]
 | 
						|
    total_attachments = []  # type: List[ZerverFieldsT]
 | 
						|
    total_uploads = []  # type: List[ZerverFieldsT]
 | 
						|
 | 
						|
    message_id = usermessage_id = reaction_id = attachment_id = 0
 | 
						|
    id_list = (message_id, usermessage_id, reaction_id, attachment_id)
 | 
						|
 | 
						|
    # The messages are stored in batches
 | 
						|
    low_index = 0
 | 
						|
    upper_index = low_index + chunk_size
 | 
						|
    dump_file_id = 1
 | 
						|
 | 
						|
    while True:
 | 
						|
        message_data = all_messages[low_index:upper_index]
 | 
						|
        if len(message_data) == 0:
 | 
						|
            break
 | 
						|
        zerver_message, zerver_usermessage, attachment, uploads, \
 | 
						|
            reactions, id_list = channel_message_to_zerver_message(
 | 
						|
                realm_id, users, added_users, added_recipient, message_data,
 | 
						|
                zerver_realmemoji, realm['zerver_subscription'], added_channels,
 | 
						|
                id_list, domain_name)
 | 
						|
 | 
						|
        message_json = dict(
 | 
						|
            zerver_message=zerver_message,
 | 
						|
            zerver_usermessage=zerver_usermessage)
 | 
						|
 | 
						|
        message_file = "/messages-%06d.json" % (dump_file_id,)
 | 
						|
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
 | 
						|
        create_converted_data_files(message_json, output_dir, message_file)
 | 
						|
 | 
						|
        total_reactions += reactions
 | 
						|
        total_attachments += attachment
 | 
						|
        total_uploads += uploads
 | 
						|
 | 
						|
        low_index = upper_index
 | 
						|
        upper_index = chunk_size + low_index
 | 
						|
        dump_file_id += 1
 | 
						|
 | 
						|
    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
 | 
						|
    return total_reactions, total_uploads, total_attachments
 | 
						|
 | 
						|
def get_all_messages(slack_data_dir: str, added_channels: AddedChannelsT) -> List[ZerverFieldsT]:
 | 
						|
    all_messages = []  # type: List[ZerverFieldsT]
 | 
						|
    for channel_name in added_channels.keys():
 | 
						|
        channel_dir = os.path.join(slack_data_dir, channel_name)
 | 
						|
        json_names = os.listdir(channel_dir)
 | 
						|
        for json_name in json_names:
 | 
						|
            message_dir = os.path.join(channel_dir, json_name)
 | 
						|
            messages = get_data_file(message_dir)
 | 
						|
            for message in messages:
 | 
						|
                # To give every message the channel information
 | 
						|
                message['channel_name'] = channel_name
 | 
						|
            all_messages += messages
 | 
						|
    return all_messages
 | 
						|
 | 
						|
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
 | 
						|
                                      added_users: AddedUsersT,
 | 
						|
                                      added_recipient: AddedRecipientsT,
 | 
						|
                                      all_messages: List[ZerverFieldsT],
 | 
						|
                                      zerver_realmemoji: List[ZerverFieldsT],
 | 
						|
                                      zerver_subscription: List[ZerverFieldsT],
 | 
						|
                                      added_channels: AddedChannelsT,
 | 
						|
                                      id_list: Tuple[int, int, int, int],
 | 
						|
                                      domain_name: str) -> Tuple[List[ZerverFieldsT],
 | 
						|
                                                                 List[ZerverFieldsT],
 | 
						|
                                                                 List[ZerverFieldsT],
 | 
						|
                                                                 List[ZerverFieldsT],
 | 
						|
                                                                 List[ZerverFieldsT],
 | 
						|
                                                                 Tuple[int, int, int, int]]:
 | 
						|
    """
 | 
						|
    Returns:
 | 
						|
    1. zerver_message, which is a list of the messages
 | 
						|
    2. zerver_usermessage, which is a list of the usermessages
 | 
						|
    3. zerver_attachment, which is a list of the attachments
 | 
						|
    4. uploads_list, which is a list of uploads to be mapped in uploads records.json
 | 
						|
    5. reaction_list, which is a list of all user reactions
 | 
						|
    6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments
 | 
						|
    """
 | 
						|
    message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list
 | 
						|
    zerver_message = []
 | 
						|
    zerver_usermessage = []  # type: List[ZerverFieldsT]
 | 
						|
    uploads_list = []  # type: List[ZerverFieldsT]
 | 
						|
    zerver_attachment = []  # type: List[ZerverFieldsT]
 | 
						|
    reaction_list = []  # type: List[ZerverFieldsT]
 | 
						|
 | 
						|
    # For unicode emoji
 | 
						|
    with open(NAME_TO_CODEPOINT_PATH) as fp:
 | 
						|
        name_to_codepoint = ujson.load(fp)
 | 
						|
 | 
						|
    for message in all_messages:
 | 
						|
        user = get_message_sending_user(message)
 | 
						|
        if not user:
 | 
						|
            # Ignore messages without user names
 | 
						|
            # These are Sometimes produced by slack
 | 
						|
            continue
 | 
						|
        if message.get('subtype') in [
 | 
						|
                # Zulip doesn't have a pinned_item concept
 | 
						|
                "pinned_item",
 | 
						|
                "unpinned_item",
 | 
						|
                # Slack's channel join/leave notices are spammy
 | 
						|
                "channel_join",
 | 
						|
                "channel_leave",
 | 
						|
                "channel_name"
 | 
						|
        ]:
 | 
						|
            continue
 | 
						|
 | 
						|
        has_attachment = has_image = False
 | 
						|
        try:
 | 
						|
            content, mentioned_users_id, has_link = convert_to_zulip_markdown(
 | 
						|
                message['text'], users, added_channels, added_users)
 | 
						|
        except Exception:
 | 
						|
            print("Slack message unexpectedly missing text representation:")
 | 
						|
            print(json.dumps(message, indent=4))
 | 
						|
            continue
 | 
						|
        rendered_content = None
 | 
						|
 | 
						|
        recipient_id = added_recipient[message['channel_name']]
 | 
						|
        message_id = message_id_count
 | 
						|
 | 
						|
        # Process message reactions
 | 
						|
        if 'reactions' in message.keys():
 | 
						|
            reaction_id_count = build_reactions(reaction_list, message['reactions'], added_users,
 | 
						|
                                                message_id, reaction_id_count, name_to_codepoint,
 | 
						|
                                                zerver_realmemoji)
 | 
						|
 | 
						|
        # Process different subtypes of slack messages
 | 
						|
        if 'subtype' in message.keys():
 | 
						|
            subtype = message['subtype']
 | 
						|
            # Subtypes which have only the action in the message should
 | 
						|
            # be rendered with '/me' in the content initially
 | 
						|
            # For example "sh_room_created" has the message 'started a call'
 | 
						|
            # which should be displayed as '/me started a call'
 | 
						|
            if subtype in ["bot_add", "sh_room_created", "me_message"]:
 | 
						|
                content = ('/me %s' % (content))
 | 
						|
 | 
						|
            # For attachments with slack download link
 | 
						|
            elif subtype == "file_share" and 'files.slack.com' in message['file']['url_private']:
 | 
						|
                fileinfo = message['file']
 | 
						|
 | 
						|
                has_attachment = has_link = True
 | 
						|
                has_image = True if 'image' in fileinfo['mimetype'] else False
 | 
						|
 | 
						|
                file_user = [iterate_user for iterate_user in users if message['user'] == user]
 | 
						|
                file_user_email = get_user_email(file_user[0], domain_name)
 | 
						|
 | 
						|
                s3_path, content = get_attachment_path_and_content(fileinfo, realm_id)
 | 
						|
 | 
						|
                # construct attachments
 | 
						|
                build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
 | 
						|
                              uploads_list)
 | 
						|
 | 
						|
                attachment_id = attachment_id_count
 | 
						|
                build_zerver_attachment(realm_id, message_id, attachment_id, added_users[user],
 | 
						|
                                        fileinfo, s3_path, zerver_attachment)
 | 
						|
                attachment_id_count += 1
 | 
						|
 | 
						|
            # For attachments with link not from slack
 | 
						|
            # Example: Google drive integration
 | 
						|
            elif subtype == "file_share":
 | 
						|
                fileinfo = message['file']
 | 
						|
                has_link = True
 | 
						|
                if 'title' in fileinfo:
 | 
						|
                    file_name = fileinfo['title']
 | 
						|
                else:
 | 
						|
                    file_name = fileinfo['name']
 | 
						|
                content = '[%s](%s)' % (file_name, fileinfo['url_private'])
 | 
						|
 | 
						|
        # construct message
 | 
						|
        zulip_message = dict(
 | 
						|
            sending_client=1,
 | 
						|
            rendered_content_version=1,  # This is Zulip-specific
 | 
						|
            has_image=has_image,
 | 
						|
            subject='imported from slack',  # This is Zulip-specific
 | 
						|
            pub_date=float(message['ts']),
 | 
						|
            id=message_id,
 | 
						|
            has_attachment=has_attachment,  # attachment will be posted in the subsequent message;
 | 
						|
                                            # this is how Slack does it, i.e. less like email
 | 
						|
            edit_history=None,
 | 
						|
            sender=added_users[user],  # map slack id to zulip id
 | 
						|
            content=content,
 | 
						|
            rendered_content=rendered_content,  # slack doesn't cache this
 | 
						|
            recipient=recipient_id,
 | 
						|
            last_edit_time=None,
 | 
						|
            has_link=has_link)
 | 
						|
        zerver_message.append(zulip_message)
 | 
						|
 | 
						|
        # construct usermessages
 | 
						|
        usermessage_id_count = build_zerver_usermessage(
 | 
						|
            zerver_usermessage, usermessage_id_count, zerver_subscription,
 | 
						|
            recipient_id, mentioned_users_id, message_id)
 | 
						|
 | 
						|
        message_id_count += 1
 | 
						|
 | 
						|
    id_list = (message_id_count, usermessage_id_count,
 | 
						|
               reaction_id_count, attachment_id_count)
 | 
						|
    return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \
 | 
						|
        reaction_list, id_list
 | 
						|
 | 
						|
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
 | 
						|
                                                                                     str]:
 | 
						|
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
 | 
						|
    # 'upload_message_file'
 | 
						|
    s3_path = "/".join([
 | 
						|
        str(realm_id),
 | 
						|
        'SlackImportAttachment',  # This is a special placeholder which should be kept
 | 
						|
                                  # in sync with 'exports.py' function 'import_message_data'
 | 
						|
        format(random.randint(0, 255), 'x'),
 | 
						|
        random_name(18),
 | 
						|
        sanitize_name(fileinfo['name'])
 | 
						|
    ])
 | 
						|
    attachment_path = ('/user_uploads/%s' % (s3_path))
 | 
						|
    content = '[%s](%s)' % (fileinfo['title'], attachment_path)
 | 
						|
 | 
						|
    return s3_path, content
 | 
						|
 | 
						|
def build_reactions(reaction_list: List[ZerverFieldsT], reactions: List[ZerverFieldsT],
 | 
						|
                    added_users: AddedUsersT, message_id: int, reaction_id: int,
 | 
						|
                    name_to_codepoint: ZerverFieldsT,
 | 
						|
                    zerver_realmemoji: List[ZerverFieldsT]) -> int:
 | 
						|
    realmemoji = {}
 | 
						|
    for realm_emoji in zerver_realmemoji:
 | 
						|
        realmemoji[realm_emoji['name']] = realm_emoji['id']
 | 
						|
 | 
						|
    # For the unicode emoji codes, we use equivalent of
 | 
						|
    # function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
 | 
						|
    for slack_reaction in reactions:
 | 
						|
        emoji_name = slack_reaction['name']
 | 
						|
        # Check in unicode emoji
 | 
						|
        if emoji_name in name_to_codepoint:
 | 
						|
            emoji_code = name_to_codepoint[emoji_name]
 | 
						|
            reaction_type = Reaction.UNICODE_EMOJI
 | 
						|
        # Check in realm emoji
 | 
						|
        elif emoji_name in realmemoji:
 | 
						|
            emoji_code = realmemoji[emoji_name]
 | 
						|
            reaction_type = Reaction.REALM_EMOJI
 | 
						|
        else:
 | 
						|
            continue
 | 
						|
 | 
						|
        for user in slack_reaction['users']:
 | 
						|
            reaction = dict(
 | 
						|
                id=reaction_id,
 | 
						|
                emoji_code=emoji_code,
 | 
						|
                emoji_name=emoji_name,
 | 
						|
                message=message_id,
 | 
						|
                reaction_type=reaction_type,
 | 
						|
                user_profile=added_users[user])
 | 
						|
            reaction_id += 1
 | 
						|
            reaction_list.append(reaction)
 | 
						|
    return reaction_id
 | 
						|
 | 
						|
def build_uploads(user_id: int, realm_id: int, email: str, fileinfo: ZerverFieldsT, s3_path: str,
 | 
						|
                  uploads_list: List[ZerverFieldsT]) -> None:
 | 
						|
    upload = dict(
 | 
						|
        path=fileinfo['url_private'],  # Save slack's url here, which is used later while processing
 | 
						|
        realm_id=realm_id,
 | 
						|
        content_type=None,
 | 
						|
        user_profile_id=user_id,
 | 
						|
        last_modified=fileinfo['timestamp'],
 | 
						|
        user_profile_email=email,
 | 
						|
        s3_path=s3_path,
 | 
						|
        size=fileinfo['size'])
 | 
						|
    uploads_list.append(upload)
 | 
						|
 | 
						|
def build_zerver_attachment(realm_id: int, message_id: int, attachment_id: int,
 | 
						|
                            user_id: int, fileinfo: ZerverFieldsT, s3_path: str,
 | 
						|
                            zerver_attachment: List[ZerverFieldsT]) -> None:
 | 
						|
    attachment = dict(
 | 
						|
        owner=user_id,
 | 
						|
        messages=[message_id],
 | 
						|
        id=attachment_id,
 | 
						|
        size=fileinfo['size'],
 | 
						|
        create_time=fileinfo['created'],
 | 
						|
        is_realm_public=True,  # is always true for stream message
 | 
						|
        path_id=s3_path,
 | 
						|
        realm=realm_id,
 | 
						|
        file_name=fileinfo['name'])
 | 
						|
    zerver_attachment.append(attachment)
 | 
						|
 | 
						|
def get_message_sending_user(message: ZerverFieldsT) -> Optional[str]:
 | 
						|
    if 'user' in message:
 | 
						|
        return message['user']
 | 
						|
    if message.get('file'):
 | 
						|
        return message['file'].get('user')
 | 
						|
    return None
 | 
						|
 | 
						|
def build_zerver_usermessage(zerver_usermessage: List[ZerverFieldsT], usermessage_id: int,
 | 
						|
                             zerver_subscription: List[ZerverFieldsT], recipient_id: int,
 | 
						|
                             mentioned_users_id: List[int], message_id: int) -> int:
 | 
						|
    for subscription in zerver_subscription:
 | 
						|
        if subscription['recipient'] == recipient_id:
 | 
						|
            flags_mask = 1  # For read
 | 
						|
            if subscription['user_profile'] in mentioned_users_id:
 | 
						|
                flags_mask = 9  # For read and mentioned
 | 
						|
 | 
						|
            usermessage = dict(
 | 
						|
                user_profile=subscription['user_profile'],
 | 
						|
                id=usermessage_id,
 | 
						|
                flags_mask=flags_mask,
 | 
						|
                message=message_id)
 | 
						|
            usermessage_id += 1
 | 
						|
            zerver_usermessage.append(usermessage)
 | 
						|
    return usermessage_id
 | 
						|
 | 
						|
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int=6) -> None:
 | 
						|
    # Subdomain is set by the user while running the import command
 | 
						|
    realm_subdomain = ""
 | 
						|
    realm_id = 0
 | 
						|
    domain_name = settings.EXTERNAL_HOST
 | 
						|
 | 
						|
    slack_data_dir = slack_zip_file.replace('.zip', '')
 | 
						|
    if not os.path.exists(slack_data_dir):
 | 
						|
        os.makedirs(slack_data_dir)
 | 
						|
 | 
						|
    os.makedirs(output_dir, exist_ok=True)
 | 
						|
    # output directory should be empty initially
 | 
						|
    if os.listdir(output_dir):
 | 
						|
        raise Exception('Output directory should be empty!')
 | 
						|
 | 
						|
    subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir])
 | 
						|
    # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
 | 
						|
    #     zip_ref.extractall(slack_data_dir)
 | 
						|
 | 
						|
    # We get the user data from the legacy token method of slack api, which is depreciated
 | 
						|
    # but we use it as the user email data is provided only in this method
 | 
						|
    user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members")
 | 
						|
    # Get custom emoji from slack api
 | 
						|
    custom_emoji_list = get_slack_api_data(token, "https://slack.com/api/emoji.list", "emoji")
 | 
						|
 | 
						|
    realm, added_users, added_recipient, added_channels, avatar_list, \
 | 
						|
        emoji_url_map = slack_workspace_to_realm(domain_name, realm_id, user_list,
 | 
						|
                                                 realm_subdomain,
 | 
						|
                                                 slack_data_dir, custom_emoji_list)
 | 
						|
 | 
						|
    reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages(
 | 
						|
        slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels,
 | 
						|
        realm, realm['zerver_realmemoji'], domain_name, output_dir)
 | 
						|
 | 
						|
    # Move zerver_reactions to realm.json file
 | 
						|
    realm['zerver_reaction'] = reactions
 | 
						|
 | 
						|
    emoji_folder = os.path.join(output_dir, 'emoji')
 | 
						|
    os.makedirs(emoji_folder, exist_ok=True)
 | 
						|
    emoji_records = process_emojis(realm['zerver_realmemoji'], emoji_folder, emoji_url_map, threads)
 | 
						|
 | 
						|
    avatar_folder = os.path.join(output_dir, 'avatars')
 | 
						|
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
 | 
						|
    os.makedirs(avatar_realm_folder, exist_ok=True)
 | 
						|
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)
 | 
						|
 | 
						|
    uploads_folder = os.path.join(output_dir, 'uploads')
 | 
						|
    os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
 | 
						|
    uploads_records = process_uploads(uploads_list, uploads_folder, threads)
 | 
						|
    attachment = {"zerver_attachment": zerver_attachment}
 | 
						|
 | 
						|
    # IO realm.json
 | 
						|
    create_converted_data_files(realm, output_dir, '/realm.json')
 | 
						|
    # IO emoji records
 | 
						|
    create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')
 | 
						|
    # IO avatar records
 | 
						|
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
 | 
						|
    # IO uploads records
 | 
						|
    create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
 | 
						|
    # IO attachments records
 | 
						|
    create_converted_data_files(attachment, output_dir, '/attachment.json')
 | 
						|
 | 
						|
    # remove slack dir
 | 
						|
    rm_tree(slack_data_dir)
 | 
						|
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
 | 
						|
 | 
						|
    logging.info('######### DATA CONVERSION FINISHED #########\n')
 | 
						|
    logging.info("Zulip data dump created at %s" % (output_dir))
 | 
						|
 | 
						|
def process_emojis(zerver_realmemoji: List[ZerverFieldsT], emoji_dir: str,
 | 
						|
                   emoji_url_map: ZerverFieldsT, threads: int) -> List[ZerverFieldsT]:
 | 
						|
    """
 | 
						|
    This function gets the custom emojis and saves in the output emoji folder
 | 
						|
    """
 | 
						|
    def get_emojis(upload: List[str]) -> int:
 | 
						|
        slack_emoji_url = upload[0]
 | 
						|
        emoji_path = upload[1]
 | 
						|
        upload_emoji_path = os.path.join(emoji_dir, emoji_path)
 | 
						|
 | 
						|
        response = requests.get(slack_emoji_url, stream=True)
 | 
						|
        os.makedirs(os.path.dirname(upload_emoji_path), exist_ok=True)
 | 
						|
        with open(upload_emoji_path, 'wb') as emoji_file:
 | 
						|
            shutil.copyfileobj(response.raw, emoji_file)
 | 
						|
        return 0
 | 
						|
 | 
						|
    emoji_records = []
 | 
						|
    upload_emoji_list = []
 | 
						|
    logging.info('######### GETTING EMOJIS #########\n')
 | 
						|
    logging.info('DOWNLOADING EMOJIS .......\n')
 | 
						|
    for emoji in zerver_realmemoji:
 | 
						|
        slack_emoji_url = emoji_url_map[emoji['name']]
 | 
						|
        emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
 | 
						|
            realm_id=emoji['realm'],
 | 
						|
            emoji_file_name=emoji['name'])
 | 
						|
 | 
						|
        upload_emoji_list.append([slack_emoji_url, emoji_path])
 | 
						|
 | 
						|
        emoji_record = dict(emoji)
 | 
						|
        emoji_record['path'] = emoji_path
 | 
						|
        emoji_record['s3_path'] = emoji_path
 | 
						|
        emoji_record['realm_id'] = emoji_record['realm']
 | 
						|
        emoji_record.pop('realm')
 | 
						|
 | 
						|
        emoji_records.append(emoji_record)
 | 
						|
 | 
						|
    # Run downloads parallely
 | 
						|
    output = []
 | 
						|
    for (status, job) in run_parallel(get_emojis, upload_emoji_list, threads=threads):
 | 
						|
        output.append(job)
 | 
						|
 | 
						|
    logging.info('######### GETTING EMOJIS FINISHED #########\n')
 | 
						|
    return emoji_records
 | 
						|
 | 
						|
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str,
 | 
						|
                    realm_id: int, threads: int) -> List[ZerverFieldsT]:
 | 
						|
    """
 | 
						|
    This function gets the avatar of size 512 px and saves it in the
 | 
						|
    user's avatar directory with both the extensions
 | 
						|
    '.png' and '.original'
 | 
						|
    """
 | 
						|
    def get_avatar(avatar_upload_list: List[str]) -> int:
 | 
						|
        # get avatar of size 512
 | 
						|
        slack_avatar_url = avatar_upload_list[0]
 | 
						|
        image_path = avatar_upload_list[1]
 | 
						|
        original_image_path = avatar_upload_list[2]
 | 
						|
        response = requests.get(slack_avatar_url + '-512', stream=True)
 | 
						|
        with open(image_path, 'wb') as image_file:
 | 
						|
            shutil.copyfileobj(response.raw, image_file)
 | 
						|
        shutil.copy(image_path, original_image_path)
 | 
						|
        return 0
 | 
						|
 | 
						|
    logging.info('######### GETTING AVATARS #########\n')
 | 
						|
    logging.info('DOWNLOADING AVATARS .......\n')
 | 
						|
    avatar_original_list = []
 | 
						|
    avatar_upload_list = []
 | 
						|
    for avatar in avatar_list:
 | 
						|
        avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id)
 | 
						|
        slack_avatar_url = avatar['path']
 | 
						|
        avatar_original = dict(avatar)
 | 
						|
 | 
						|
        image_path = ('%s/%s.png' % (avatar_dir, avatar_hash))
 | 
						|
        original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash))
 | 
						|
 | 
						|
        avatar_upload_list.append([slack_avatar_url, image_path, original_image_path])
 | 
						|
 | 
						|
        # We don't add the size field here in avatar's records.json,
 | 
						|
        # since the metadata is not needed on the import end, and we
 | 
						|
        # don't have it until we've downloaded the files anyway.
 | 
						|
        avatar['path'] = image_path
 | 
						|
        avatar['s3_path'] = image_path
 | 
						|
 | 
						|
        avatar_original['path'] = original_image_path
 | 
						|
        avatar_original['s3_path'] = original_image_path
 | 
						|
        avatar_original_list.append(avatar_original)
 | 
						|
 | 
						|
    # Run downloads parallely
 | 
						|
    output = []
 | 
						|
    for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads):
 | 
						|
        output.append(job)
 | 
						|
 | 
						|
    logging.info('######### GETTING AVATARS FINISHED #########\n')
 | 
						|
    return avatar_list + avatar_original_list
 | 
						|
 | 
						|
def process_uploads(upload_list: List[ZerverFieldsT], upload_dir: str,
 | 
						|
                    threads: int) -> List[ZerverFieldsT]:
 | 
						|
    """
 | 
						|
    This function gets the uploads and saves it in the realm's upload directory
 | 
						|
    """
 | 
						|
    def get_uploads(upload: List[str]) -> int:
 | 
						|
        upload_url = upload[0]
 | 
						|
        upload_path = upload[1]
 | 
						|
        upload_path = os.path.join(upload_dir, upload_path)
 | 
						|
 | 
						|
        response = requests.get(upload_url, stream=True)
 | 
						|
        os.makedirs(os.path.dirname(upload_path), exist_ok=True)
 | 
						|
        with open(upload_path, 'wb') as upload_file:
 | 
						|
            shutil.copyfileobj(response.raw, upload_file)
 | 
						|
        return 0
 | 
						|
 | 
						|
    logging.info('######### GETTING ATTACHMENTS #########\n')
 | 
						|
    logging.info('DOWNLOADING ATTACHMENTS .......\n')
 | 
						|
    upload_url_list = []
 | 
						|
    for upload in upload_list:
 | 
						|
        upload_url = upload['path']
 | 
						|
        upload_s3_path = upload['s3_path']
 | 
						|
        upload_url_list.append([upload_url, upload_s3_path])
 | 
						|
        upload['path'] = upload_s3_path
 | 
						|
 | 
						|
    # Run downloads parallely
 | 
						|
    output = []
 | 
						|
    for (status, job) in run_parallel(get_uploads, upload_url_list, threads=threads):
 | 
						|
        output.append(job)
 | 
						|
 | 
						|
    logging.info('######### GETTING ATTACHMENTS FINISHED #########\n')
 | 
						|
    return upload_list
 | 
						|
 | 
						|
def get_data_file(path: str) -> Any:
 | 
						|
    data = json.load(open(path))
 | 
						|
    return data
 | 
						|
 | 
						|
def get_slack_api_data(token: str, slack_api_url: str, get_param: str) -> Any:
 | 
						|
    data = requests.get('%s?token=%s' % (slack_api_url, token))
 | 
						|
    if data.status_code == requests.codes.ok:
 | 
						|
        if 'error' in data.json():
 | 
						|
            raise Exception('Enter a valid token!')
 | 
						|
        json_data = data.json()[get_param]
 | 
						|
        return json_data
 | 
						|
    else:
 | 
						|
        raise Exception('Something went wrong. Please try again!')
 | 
						|
 | 
						|
def create_converted_data_files(data: Any, output_dir: str, file_path: str) -> None:
 | 
						|
    output_file = output_dir + file_path
 | 
						|
    json.dump(data, open(output_file, 'w'), indent=4)
 |