mirror of
https://github.com/zulip/zulip.git
synced 2025-10-25 00:53:56 +00:00
import realm: Use processes for resizing avatar images.
This should significantly improve the data import performance when importing large open source realms from Slack. Fixes #11009.
This commit is contained in:
@@ -26,6 +26,7 @@ from zerver.lib.bugdown import version as bugdown_version
|
||||
from zerver.lib.upload import random_name, sanitize_name, \
|
||||
guess_type, BadImageError
|
||||
from zerver.lib.utils import generate_api_key, process_list_in_batches
|
||||
from zerver.lib.parallel import run_parallel
|
||||
from zerver.models import UserProfile, Realm, Client, Huddle, Stream, \
|
||||
UserMessage, Subscription, Message, RealmEmoji, \
|
||||
RealmDomain, Recipient, get_user_profile_by_id, \
|
||||
@@ -532,7 +533,7 @@ def bulk_import_client(data: TableData, model: Any, table: TableName) -> None:
|
||||
client = Client.objects.create(name=item['name'])
|
||||
update_id_map(table='client', old_id=item['id'], new_id=client.id)
|
||||
|
||||
def import_uploads(import_dir: Path, processing_avatars: bool=False,
|
||||
def import_uploads(import_dir: Path, processes: int, processing_avatars: bool=False,
|
||||
processing_emojis: bool=False) -> None:
|
||||
if processing_avatars and processing_emojis:
|
||||
raise AssertionError("Cannot import avatars and emojis at the same time!")
|
||||
@@ -640,7 +641,8 @@ def import_uploads(import_dir: Path, processing_avatars: bool=False,
|
||||
# avatar. TODO: This implementation is hacky, both in that it
|
||||
# does get_user_profile_by_id for each user, and in that it
|
||||
# might be better to require the export to just have these.
|
||||
for record in records:
|
||||
|
||||
def process_avatars(record: Dict[Any, Any]) -> int:
|
||||
if record['s3_path'].endswith('.original'):
|
||||
user_profile = get_user_profile_by_id(record['user_profile_id'])
|
||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||
@@ -662,6 +664,16 @@ def import_uploads(import_dir: Path, processing_avatars: bool=False,
|
||||
user_profile.id))
|
||||
# Delete the record of the avatar to avoid 404s.
|
||||
do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR)
|
||||
return 0
|
||||
|
||||
if processes == 1:
|
||||
for record in records:
|
||||
process_avatars(record)
|
||||
else:
|
||||
connection.close()
|
||||
output = []
|
||||
for (status, job) in run_parallel(process_avatars, records, processes):
|
||||
output.append(job)
|
||||
|
||||
# Importing data suffers from a difficult ordering problem because of
|
||||
# models that reference each other circularly. Here is a correct order.
|
||||
@@ -681,7 +693,7 @@ def import_uploads(import_dir: Path, processing_avatars: bool=False,
|
||||
# Because the Python object => JSON conversion process is not fully
|
||||
# faithful, we have to use a set of fixers (e.g. on DateTime objects
|
||||
# and Foreign Keys) to do the import correctly.
|
||||
def do_import_realm(import_dir: Path, subdomain: str) -> Realm:
|
||||
def do_import_realm(import_dir: Path, subdomain: str, processes: int=1) -> Realm:
|
||||
logging.info("Importing realm dump %s" % (import_dir,))
|
||||
if not os.path.exists(import_dir):
|
||||
raise Exception("Missing import directory!")
|
||||
@@ -922,14 +934,14 @@ def do_import_realm(import_dir: Path, subdomain: str) -> Realm:
|
||||
bulk_import_model(data, CustomProfileFieldValue)
|
||||
|
||||
# Import uploaded files and avatars
|
||||
import_uploads(os.path.join(import_dir, "avatars"), processing_avatars=True)
|
||||
import_uploads(os.path.join(import_dir, "uploads"))
|
||||
import_uploads(os.path.join(import_dir, "avatars"), processes, processing_avatars=True)
|
||||
import_uploads(os.path.join(import_dir, "uploads"), processes)
|
||||
|
||||
# We need to have this check as the emoji files are only present in the data
|
||||
# importer from slack
|
||||
# For Zulip export, this doesn't exist
|
||||
if os.path.exists(os.path.join(import_dir, "emoji")):
|
||||
import_uploads(os.path.join(import_dir, "emoji"), processing_emojis=True)
|
||||
import_uploads(os.path.join(import_dir, "emoji"), processes, processing_emojis=True)
|
||||
|
||||
sender_map = {
|
||||
user['id']: user
|
||||
|
||||
@@ -7,7 +7,7 @@ from typing import Any
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandParser
|
||||
from django.core.management.base import BaseCommand, CommandParser, CommandError
|
||||
|
||||
from zerver.lib.import_realm import do_import_realm, do_import_system_bots
|
||||
from zerver.forms import check_subdomain_available
|
||||
@@ -37,6 +37,11 @@ import a database dump from one or more JSON files."""
|
||||
parser.add_argument('export_paths', nargs='+',
|
||||
metavar='<export path>',
|
||||
help="list of export directories to import")
|
||||
parser.add_argument('--processes',
|
||||
dest='processes',
|
||||
action="store",
|
||||
default=6,
|
||||
help='Number of processes to use for uploading Avatars to S3 in parallel')
|
||||
parser.formatter_class = argparse.RawTextHelpFormatter
|
||||
|
||||
def do_destroy_and_rebuild_database(self, db_name: str) -> None:
|
||||
@@ -44,6 +49,10 @@ import a database dump from one or more JSON files."""
|
||||
subprocess.check_call([os.path.join(settings.DEPLOY_ROOT, "scripts/setup/flush-memcached")])
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
num_processes = int(options['processes'])
|
||||
if num_processes < 1:
|
||||
raise CommandError('You must have at least one process.')
|
||||
|
||||
subdomain = options['subdomain']
|
||||
|
||||
if options["destroy_rebuild_database"]:
|
||||
@@ -68,6 +77,6 @@ import a database dump from one or more JSON files."""
|
||||
|
||||
for path in paths:
|
||||
print("Processing dump: %s ..." % (path,))
|
||||
realm = do_import_realm(path, subdomain)
|
||||
realm = do_import_realm(path, subdomain, num_processes)
|
||||
print("Checking the system bots.")
|
||||
do_import_system_bots(realm)
|
||||
|
||||
Reference in New Issue
Block a user