mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 14:03:30 +00:00
This is a very early version of a tool to convert Hipchat
tar files into data files that can be used by the Zulip
import process.
We include the most fundamental entities--users and
streams. Customers who don't care about past messages
or customizations could start an instance off of this
and start communicating.
Of course, there are a lot of things missing in the
initial version:
* messages!
* file assets -- avatars, emojis, attachments
* probably lots of other minor things
We currently ignore any incoming dates from Hipchat data
and just use the current time. This is consistent with
other imports.
We also don't have any docs yet, although the process
will be extremely similar to the "Slack" process:
https://zulipchat.com/help/import-from-slack
Also, there's a comment at the top of convert_hipchat_data.py
that describes how to test this in dev mode.
I tested this by following the steps in the comment above.
The users just "show up" in /devlogin, so that's nice, and
you can send messages to other users. To verify the stream
data you have to go into the gear menu and click on "All
Streams", then you can subscribe and send a message.
Production users will need to get new passwords and
re-subscribe to streams. We will probably auto-subscribe
all users to public streams.
440 lines
16 KiB
Python
440 lines
16 KiB
Python
import random
|
|
import requests
|
|
import shutil
|
|
import logging
|
|
import os
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
from django.forms.models import model_to_dict
|
|
|
|
from zerver.models import Realm, RealmEmoji, Subscription, Recipient, \
|
|
Attachment, Stream, Message, UserProfile
|
|
from zerver.lib.actions import STREAM_ASSIGNMENT_COLORS as stream_colors
|
|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
|
|
from zerver.lib.parallel import run_parallel
|
|
|
|
# stubs
|
|
ZerverFieldsT = Dict[str, Any]
|
|
|
|
def build_zerver_realm(realm_id: int, realm_subdomain: str, time: float,
|
|
other_product: str) -> List[ZerverFieldsT]:
|
|
realm = Realm(id=realm_id, date_created=time,
|
|
name=realm_subdomain, string_id=realm_subdomain,
|
|
description=("Organization imported from %s!" % (other_product)))
|
|
auth_methods = [[flag[0], flag[1]] for flag in realm.authentication_methods]
|
|
realm_dict = model_to_dict(realm, exclude='authentication_methods')
|
|
realm_dict['authentication_methods'] = auth_methods
|
|
return[realm_dict]
|
|
|
|
def build_user(avatar_source: str,
|
|
date_joined: Any,
|
|
delivery_email: str,
|
|
email: str,
|
|
full_name: str,
|
|
id: int,
|
|
is_realm_admin: bool,
|
|
realm_id: int,
|
|
short_name: str,
|
|
timezone: str) -> ZerverFieldsT:
|
|
pointer = -1
|
|
obj = UserProfile(
|
|
avatar_source=avatar_source,
|
|
date_joined=date_joined,
|
|
delivery_email=delivery_email,
|
|
email=email,
|
|
full_name=full_name,
|
|
id=id,
|
|
is_realm_admin=is_realm_admin,
|
|
pointer=pointer,
|
|
realm_id=realm_id,
|
|
short_name=short_name,
|
|
timezone=timezone,
|
|
)
|
|
dct = model_to_dict(obj)
|
|
return dct
|
|
|
|
def build_avatar(zulip_user_id: int, realm_id: int, email: str, avatar_url: str,
|
|
timestamp: Any, avatar_list: List[ZerverFieldsT]) -> None:
|
|
avatar = dict(
|
|
path=avatar_url, # Save original avatar url here, which is downloaded later
|
|
realm_id=realm_id,
|
|
content_type=None,
|
|
user_profile_id=zulip_user_id,
|
|
last_modified=timestamp,
|
|
user_profile_email=email,
|
|
s3_path="",
|
|
size="")
|
|
avatar_list.append(avatar)
|
|
|
|
def build_subscription(recipient_id: int, user_id: int,
|
|
subscription_id: int) -> ZerverFieldsT:
|
|
subscription = Subscription(
|
|
color=random.choice(stream_colors),
|
|
id=subscription_id)
|
|
subscription_dict = model_to_dict(subscription, exclude=['user_profile', 'recipient_id'])
|
|
subscription_dict['user_profile'] = user_id
|
|
subscription_dict['recipient'] = recipient_id
|
|
return subscription_dict
|
|
|
|
def build_subscriptions(
|
|
zerver_userprofile: List[ZerverFieldsT],
|
|
zerver_recipient: List[ZerverFieldsT],
|
|
zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
|
|
'''
|
|
This function is only used for Hipchat now, but it may apply to
|
|
future conversions. We often don't get full subscriber data in
|
|
the Hipchat export, so this function just autosubscribes all
|
|
users to every public stream. This returns a list of Subscription
|
|
dicts.
|
|
|
|
This function also creates personal subscriptions.
|
|
|
|
If you need more fine tuning on how to subscribe folks, look
|
|
at the code in slack.py.
|
|
'''
|
|
subscriptions = [] # type: List[ZerverFieldsT]
|
|
|
|
public_stream_ids = {
|
|
stream['id']
|
|
for stream in zerver_stream
|
|
if not stream['invite_only']
|
|
}
|
|
|
|
public_stream_recipient_ids = {
|
|
recipient['id']
|
|
for recipient in zerver_recipient
|
|
if recipient['type'] == Recipient.STREAM
|
|
and recipient['type_id'] in public_stream_ids
|
|
}
|
|
|
|
user_ids = [
|
|
user['id']
|
|
for user in zerver_userprofile
|
|
]
|
|
|
|
subscription_id = 1
|
|
|
|
for recipient_id in public_stream_recipient_ids:
|
|
for user_id in user_ids:
|
|
subscription = build_subscription(
|
|
recipient_id=recipient_id,
|
|
user_id=user_id,
|
|
subscription_id=subscription_id,
|
|
)
|
|
subscriptions.append(subscription)
|
|
subscription_id += 1
|
|
|
|
personal_recipients = [
|
|
recipient
|
|
for recipient in zerver_recipient
|
|
if recipient['type'] == Recipient.PERSONAL
|
|
]
|
|
|
|
for recipient in personal_recipients:
|
|
recipient_id = recipient['id']
|
|
user_id = recipient['type_id']
|
|
subscription = build_subscription(
|
|
recipient_id=recipient_id,
|
|
user_id=user_id,
|
|
subscription_id=subscription_id,
|
|
)
|
|
subscriptions.append(subscription)
|
|
subscription_id += 1
|
|
|
|
return subscriptions
|
|
|
|
def build_recipient(type_id: int, recipient_id: int, type: int) -> ZerverFieldsT:
|
|
recipient = Recipient(
|
|
type_id=type_id, # stream id
|
|
id=recipient_id,
|
|
type=type)
|
|
recipient_dict = model_to_dict(recipient)
|
|
return recipient_dict
|
|
|
|
def build_recipients(zerver_userprofile: List[ZerverFieldsT],
|
|
zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
|
|
'''
|
|
As of this writing, we only use this in the HipChat
|
|
conversion. The Slack and Gitter conversions do it more
|
|
tightly integrated with creating other objects.
|
|
'''
|
|
|
|
recipient_id = 1
|
|
recipients = []
|
|
|
|
for user in zerver_userprofile:
|
|
type_id = user['id']
|
|
type = Recipient.PERSONAL
|
|
recipient = Recipient(
|
|
type_id=type_id,
|
|
id=recipient_id,
|
|
type=type,
|
|
)
|
|
recipient_dict = model_to_dict(recipient)
|
|
recipients.append(recipient_dict)
|
|
recipient_id += 1
|
|
|
|
for stream in zerver_stream:
|
|
type_id = stream['id']
|
|
type = Recipient.STREAM
|
|
recipient = Recipient(
|
|
type_id=type_id,
|
|
id=recipient_id,
|
|
type=type,
|
|
)
|
|
recipient_dict = model_to_dict(recipient)
|
|
recipients.append(recipient_dict)
|
|
recipient_id += 1
|
|
|
|
return recipients
|
|
|
|
def build_realm(zerver_realm: List[ZerverFieldsT], realm_id: int,
|
|
domain_name: str) -> ZerverFieldsT:
|
|
realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
|
|
{"name": "website", "id": 2},
|
|
{"name": "API", "id": 3}],
|
|
zerver_customprofilefield=[],
|
|
zerver_customprofilefieldvalue=[],
|
|
zerver_userpresence=[], # shows last logged in data, which is not available
|
|
zerver_userprofile_mirrordummy=[],
|
|
zerver_realmdomain=[{"realm": realm_id,
|
|
"allow_subdomains": False,
|
|
"domain": domain_name,
|
|
"id": realm_id}],
|
|
zerver_useractivity=[],
|
|
zerver_realm=zerver_realm,
|
|
zerver_huddle=[],
|
|
zerver_userprofile_crossrealm=[],
|
|
zerver_useractivityinterval=[],
|
|
zerver_reaction=[],
|
|
zerver_realmemoji=[],
|
|
zerver_realmfilter=[])
|
|
return realm
|
|
|
|
def build_usermessages(zerver_usermessage: List[ZerverFieldsT], usermessage_id: int,
|
|
zerver_subscription: List[ZerverFieldsT], recipient_id: int,
|
|
mentioned_users_id: List[int], message_id: int) -> int:
|
|
for subscription in zerver_subscription:
|
|
if subscription['recipient'] == recipient_id:
|
|
flags_mask = 1 # For read
|
|
if subscription['user_profile'] in mentioned_users_id:
|
|
flags_mask = 9 # For read and mentioned
|
|
|
|
usermessage = dict(
|
|
user_profile=subscription['user_profile'],
|
|
id=usermessage_id,
|
|
flags_mask=flags_mask,
|
|
message=message_id)
|
|
usermessage_id += 1
|
|
zerver_usermessage.append(usermessage)
|
|
return usermessage_id
|
|
|
|
def build_defaultstream(realm_id: int, stream_id: int,
|
|
defaultstream_id: int) -> ZerverFieldsT:
|
|
defaultstream = dict(
|
|
stream=stream_id,
|
|
realm=realm_id,
|
|
id=defaultstream_id)
|
|
return defaultstream
|
|
|
|
def build_stream(date_created: Any, realm_id: int, name: str,
|
|
description: str, stream_id: int, deactivated: bool=False,
|
|
invite_only: bool=False) -> ZerverFieldsT:
|
|
stream = Stream(
|
|
name=name,
|
|
deactivated=deactivated,
|
|
description=description,
|
|
date_created=date_created,
|
|
invite_only=invite_only,
|
|
id=stream_id)
|
|
stream_dict = model_to_dict(stream,
|
|
exclude=['realm'])
|
|
stream_dict['realm'] = realm_id
|
|
return stream_dict
|
|
|
|
def build_message(subject: str, pub_date: float, message_id: int, content: str,
|
|
rendered_content: Optional[str], user_id: int, recipient_id: int,
|
|
has_image: bool=False, has_link: bool=False,
|
|
has_attachment: bool=True) -> ZerverFieldsT:
|
|
zulip_message = Message(
|
|
rendered_content_version=1, # this is Zulip specific
|
|
subject=subject,
|
|
pub_date=pub_date,
|
|
id=message_id,
|
|
content=content,
|
|
rendered_content=rendered_content,
|
|
has_image=has_image,
|
|
has_attachment=has_attachment,
|
|
has_link=has_link)
|
|
zulip_message_dict = model_to_dict(zulip_message,
|
|
exclude=['recipient', 'sender', 'sending_client'])
|
|
zulip_message_dict['sender'] = user_id
|
|
zulip_message_dict['sending_client'] = 1
|
|
zulip_message_dict['recipient'] = recipient_id
|
|
|
|
return zulip_message_dict
|
|
|
|
def build_attachment(realm_id: int, message_id: int, attachment_id: int,
|
|
user_id: int, fileinfo: ZerverFieldsT, s3_path: str,
|
|
zerver_attachment: List[ZerverFieldsT]) -> None:
|
|
"""
|
|
This function should be passed a 'fileinfo' dictionary, which contains
|
|
information about 'size', 'created' (created time) and ['name'] (filename).
|
|
"""
|
|
attachment = Attachment(
|
|
id=attachment_id,
|
|
size=fileinfo['size'],
|
|
create_time=fileinfo['created'],
|
|
is_realm_public=True,
|
|
path_id=s3_path,
|
|
file_name=fileinfo['name'])
|
|
|
|
attachment_dict = model_to_dict(attachment,
|
|
exclude=['owner', 'messages', 'realm'])
|
|
attachment_dict['owner'] = user_id
|
|
attachment_dict['messages'] = [message_id]
|
|
attachment_dict['realm'] = realm_id
|
|
|
|
zerver_attachment.append(attachment_dict)
|
|
|
|
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int,
|
|
threads: int, size_url_suffix: str='') -> List[ZerverFieldsT]:
|
|
"""
|
|
This function gets the avatar of the user and saves it in the
|
|
user's avatar directory with both the extensions '.png' and '.original'
|
|
Required parameters:
|
|
|
|
1. avatar_list: List of avatars to be mapped in avatars records.json file
|
|
2. avatar_dir: Folder where the downloaded avatars are saved
|
|
3. realm_id: Realm ID.
|
|
"""
|
|
|
|
def get_avatar(avatar_upload_list: List[str]) -> int:
|
|
avatar_url = avatar_upload_list[0]
|
|
|
|
image_path = os.path.join(avatar_dir, avatar_original_list[1])
|
|
original_image_path = os.path.join(avatar_dir, avatar_original_list[2])
|
|
|
|
response = requests.get(avatar_url + size_url_suffix, stream=True)
|
|
with open(image_path, 'wb') as image_file:
|
|
shutil.copyfileobj(response.raw, image_file)
|
|
shutil.copy(image_path, original_image_path)
|
|
return 0
|
|
|
|
logging.info('######### GETTING AVATARS #########\n')
|
|
logging.info('DOWNLOADING AVATARS .......\n')
|
|
avatar_original_list = []
|
|
avatar_upload_list = []
|
|
for avatar in avatar_list:
|
|
avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id)
|
|
avatar_url = avatar['path']
|
|
avatar_original = dict(avatar)
|
|
|
|
image_path = ('%s.png' % (avatar_hash))
|
|
original_image_path = ('%s.original' % (avatar_hash))
|
|
|
|
avatar_upload_list.append([avatar_url, image_path, original_image_path])
|
|
# We don't add the size field here in avatar's records.json,
|
|
# since the metadata is not needed on the import end, and we
|
|
# don't have it until we've downloaded the files anyway.
|
|
avatar['path'] = image_path
|
|
avatar['s3_path'] = image_path
|
|
|
|
avatar_original['path'] = original_image_path
|
|
avatar_original['s3_path'] = original_image_path
|
|
avatar_original_list.append(avatar_original)
|
|
|
|
# Run downloads parallely
|
|
output = []
|
|
for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads):
|
|
output.append(job)
|
|
|
|
logging.info('######### GETTING AVATARS FINISHED #########\n')
|
|
return avatar_list + avatar_original_list
|
|
|
|
def process_uploads(upload_list: List[ZerverFieldsT], upload_dir: str,
|
|
threads: int) -> List[ZerverFieldsT]:
|
|
"""
|
|
This function downloads the uploads and saves it in the realm's upload directory.
|
|
Required parameters:
|
|
|
|
1. upload_list: List of uploads to be mapped in uploads records.json file
|
|
2. upload_dir: Folder where the downloaded uploads are saved
|
|
"""
|
|
def get_uploads(upload: List[str]) -> int:
|
|
upload_url = upload[0]
|
|
upload_path = upload[1]
|
|
upload_path = os.path.join(upload_dir, upload_path)
|
|
|
|
response = requests.get(upload_url, stream=True)
|
|
os.makedirs(os.path.dirname(upload_path), exist_ok=True)
|
|
with open(upload_path, 'wb') as upload_file:
|
|
shutil.copyfileobj(response.raw, upload_file)
|
|
return 0
|
|
|
|
logging.info('######### GETTING ATTACHMENTS #########\n')
|
|
logging.info('DOWNLOADING ATTACHMENTS .......\n')
|
|
upload_url_list = []
|
|
for upload in upload_list:
|
|
upload_url = upload['path']
|
|
upload_s3_path = upload['s3_path']
|
|
upload_url_list.append([upload_url, upload_s3_path])
|
|
upload['path'] = upload_s3_path
|
|
|
|
# Run downloads parallely
|
|
output = []
|
|
for (status, job) in run_parallel(get_uploads, upload_url_list, threads=threads):
|
|
output.append(job)
|
|
|
|
logging.info('######### GETTING ATTACHMENTS FINISHED #########\n')
|
|
return upload_list
|
|
|
|
def process_emojis(zerver_realmemoji: List[ZerverFieldsT], emoji_dir: str,
|
|
emoji_url_map: ZerverFieldsT, threads: int) -> List[ZerverFieldsT]:
|
|
"""
|
|
This function downloads the custom emojis and saves in the output emoji folder.
|
|
Required parameters:
|
|
|
|
1. zerver_realmemoji: List of all RealmEmoji objects to be imported
|
|
2. emoji_dir: Folder where the downloaded emojis are saved
|
|
3. emoji_url_map: Maps emoji name to its url
|
|
"""
|
|
def get_emojis(upload: List[str]) -> int:
|
|
emoji_url = upload[0]
|
|
emoji_path = upload[1]
|
|
upload_emoji_path = os.path.join(emoji_dir, emoji_path)
|
|
|
|
response = requests.get(emoji_url, stream=True)
|
|
os.makedirs(os.path.dirname(upload_emoji_path), exist_ok=True)
|
|
with open(upload_emoji_path, 'wb') as emoji_file:
|
|
shutil.copyfileobj(response.raw, emoji_file)
|
|
return 0
|
|
|
|
emoji_records = []
|
|
upload_emoji_list = []
|
|
logging.info('######### GETTING EMOJIS #########\n')
|
|
logging.info('DOWNLOADING EMOJIS .......\n')
|
|
for emoji in zerver_realmemoji:
|
|
emoji_url = emoji_url_map[emoji['name']]
|
|
emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
|
realm_id=emoji['realm'],
|
|
emoji_file_name=emoji['name'])
|
|
|
|
upload_emoji_list.append([emoji_url, emoji_path])
|
|
|
|
emoji_record = dict(emoji)
|
|
emoji_record['path'] = emoji_path
|
|
emoji_record['s3_path'] = emoji_path
|
|
emoji_record['realm_id'] = emoji_record['realm']
|
|
emoji_record.pop('realm')
|
|
|
|
emoji_records.append(emoji_record)
|
|
|
|
# Run downloads parallely
|
|
output = []
|
|
for (status, job) in run_parallel(get_emojis, upload_emoji_list, threads=threads):
|
|
output.append(job)
|
|
|
|
logging.info('######### GETTING EMOJIS FINISHED #########\n')
|
|
return emoji_records
|