mirror of
https://github.com/zulip/zulip.git
synced 2025-11-03 13:33:24 +00:00
import_realm: Migrate from run_parallel to multiprocessing.
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
This commit is contained in:
committed by
Tim Abbott
parent
9a2aad58d0
commit
7f410ff0de
@@ -1,5 +1,6 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import secrets
|
import secrets
|
||||||
import shutil
|
import shutil
|
||||||
@@ -27,7 +28,6 @@ from zerver.lib.bulk_create import bulk_create_users, bulk_set_users_or_streams_
|
|||||||
from zerver.lib.export import DATE_FIELDS, Field, Path, Record, TableData, TableName
|
from zerver.lib.export import DATE_FIELDS, Field, Path, Record, TableData, TableName
|
||||||
from zerver.lib.markdown import markdown_convert
|
from zerver.lib.markdown import markdown_convert
|
||||||
from zerver.lib.markdown import version as markdown_version
|
from zerver.lib.markdown import version as markdown_version
|
||||||
from zerver.lib.parallel import run_parallel
|
|
||||||
from zerver.lib.server_initialization import create_internal_realm, server_initialized
|
from zerver.lib.server_initialization import create_internal_realm, server_initialized
|
||||||
from zerver.lib.streams import render_stream_description
|
from zerver.lib.streams import render_stream_description
|
||||||
from zerver.lib.timestamp import datetime_to_timestamp
|
from zerver.lib.timestamp import datetime_to_timestamp
|
||||||
@@ -601,6 +601,32 @@ def bulk_import_client(data: TableData, model: Any, table: TableName) -> None:
|
|||||||
client = Client.objects.create(name=item['name'])
|
client = Client.objects.create(name=item['name'])
|
||||||
update_id_map(table='client', old_id=item['id'], new_id=client.id)
|
update_id_map(table='client', old_id=item['id'], new_id=client.id)
|
||||||
|
|
||||||
|
def process_avatars(record: Dict[str, Any]) -> None:
|
||||||
|
from zerver.lib.upload import upload_backend
|
||||||
|
if record['s3_path'].endswith('.original'):
|
||||||
|
user_profile = get_user_profile_by_id(record['user_profile_id'])
|
||||||
|
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||||
|
avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id'])
|
||||||
|
medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
|
||||||
|
avatar_path) + '-medium.png'
|
||||||
|
if os.path.exists(medium_file_path):
|
||||||
|
# We remove the image here primarily to deal with
|
||||||
|
# issues when running the import script multiple
|
||||||
|
# times in development (where one might reuse the
|
||||||
|
# same realm ID from a previous iteration).
|
||||||
|
os.remove(medium_file_path)
|
||||||
|
try:
|
||||||
|
upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
|
||||||
|
if record.get("importer_should_thumbnail"):
|
||||||
|
upload_backend.ensure_basic_avatar_image(user_profile=user_profile)
|
||||||
|
except BadImageError:
|
||||||
|
logging.warning(
|
||||||
|
"Could not thumbnail avatar image for user %s; ignoring",
|
||||||
|
user_profile.id,
|
||||||
|
)
|
||||||
|
# Delete the record of the avatar to avoid 404s.
|
||||||
|
do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None)
|
||||||
|
|
||||||
def import_uploads(realm: Realm, import_dir: Path, processes: int, processing_avatars: bool=False,
|
def import_uploads(realm: Realm, import_dir: Path, processes: int, processing_avatars: bool=False,
|
||||||
processing_emojis: bool=False, processing_realm_icons: bool=False) -> None:
|
processing_emojis: bool=False, processing_realm_icons: bool=False) -> None:
|
||||||
if processing_avatars and processing_emojis:
|
if processing_avatars and processing_emojis:
|
||||||
@@ -723,47 +749,19 @@ def import_uploads(realm: Realm, import_dir: Path, processes: int, processing_av
|
|||||||
shutil.copy(orig_file_path, file_path)
|
shutil.copy(orig_file_path, file_path)
|
||||||
|
|
||||||
if processing_avatars:
|
if processing_avatars:
|
||||||
from zerver.lib.upload import upload_backend
|
|
||||||
|
|
||||||
# Ensure that we have medium-size avatar images for every
|
# Ensure that we have medium-size avatar images for every
|
||||||
# avatar. TODO: This implementation is hacky, both in that it
|
# avatar. TODO: This implementation is hacky, both in that it
|
||||||
# does get_user_profile_by_id for each user, and in that it
|
# does get_user_profile_by_id for each user, and in that it
|
||||||
# might be better to require the export to just have these.
|
# might be better to require the export to just have these.
|
||||||
|
|
||||||
def process_avatars(record: Dict[Any, Any]) -> int:
|
|
||||||
if record['s3_path'].endswith('.original'):
|
|
||||||
user_profile = get_user_profile_by_id(record['user_profile_id'])
|
|
||||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
|
||||||
avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id'])
|
|
||||||
medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
|
|
||||||
avatar_path) + '-medium.png'
|
|
||||||
if os.path.exists(medium_file_path):
|
|
||||||
# We remove the image here primarily to deal with
|
|
||||||
# issues when running the import script multiple
|
|
||||||
# times in development (where one might reuse the
|
|
||||||
# same realm ID from a previous iteration).
|
|
||||||
os.remove(medium_file_path)
|
|
||||||
try:
|
|
||||||
upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
|
|
||||||
if record.get("importer_should_thumbnail"):
|
|
||||||
upload_backend.ensure_basic_avatar_image(user_profile=user_profile)
|
|
||||||
except BadImageError:
|
|
||||||
logging.warning(
|
|
||||||
"Could not thumbnail avatar image for user %s; ignoring",
|
|
||||||
user_profile.id,
|
|
||||||
)
|
|
||||||
# Delete the record of the avatar to avoid 404s.
|
|
||||||
do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if processes == 1:
|
if processes == 1:
|
||||||
for record in records:
|
for record in records:
|
||||||
process_avatars(record)
|
process_avatars(record)
|
||||||
else:
|
else:
|
||||||
connection.close()
|
connection.close()
|
||||||
output = []
|
with multiprocessing.Pool(processes) as p:
|
||||||
for (status, job) in run_parallel(process_avatars, records, processes):
|
for out in p.imap_unordered(process_avatars, records):
|
||||||
output.append(job)
|
pass
|
||||||
|
|
||||||
# Importing data suffers from a difficult ordering problem because of
|
# Importing data suffers from a difficult ordering problem because of
|
||||||
# models that reference each other circularly. Here is a correct order.
|
# models that reference each other circularly. Here is a correct order.
|
||||||
|
|||||||
Reference in New Issue
Block a user