From 493aae2958b037790c7ba77d890237ae7aee402e Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Fri, 12 Oct 2018 21:42:17 +0000 Subject: [PATCH] imports: Make loading UserMessage faster and more robust. We use UserMessageLite to avoid Django overhead, and we do updates in chunks of 10000. (The export may be broken into several files already, but a reasonable chunking at import time is good defense against running out of memory.) --- zerver/lib/actions.py | 5 +++-- zerver/lib/import_realm.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/zerver/lib/actions.py b/zerver/lib/actions.py index 734ffd21d2..6cf3ec8493 100644 --- a/zerver/lib/actions.py +++ b/zerver/lib/actions.py @@ -1443,10 +1443,10 @@ class UserMessageLite: is optimized for the simple use case of inserting a bunch of rows into zerver_usermessage. ''' - def __init__(self, user_profile_id: int, message_id: int) -> None: + def __init__(self, user_profile_id: int, message_id: int, flags: int) -> None: self.user_profile_id = user_profile_id self.message_id = message_id - self.flags = 0 + self.flags = flags def flags_list(self) -> List[str]: return UserMessage.flags_list_for_flags(self.flags) @@ -1462,6 +1462,7 @@ def create_user_messages(message: Message, um = UserMessageLite( user_profile_id=user_profile_id, message_id=message.id, + flags=0, ) ums_to_create.append(um) diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 87d4f03b3a..ff30a47521 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -13,6 +13,7 @@ from django.utils.timezone import utc as timezone_utc, now as timezone_now from typing import Any, Dict, List, Optional, Set, Tuple, \ Iterable +from zerver.lib.actions import UserMessageLite, bulk_insert_ums from zerver.lib.avatar_hash import user_avatar_path_from_ids from zerver.lib.bulk_create import bulk_create_users from zerver.lib.timestamp import datetime_to_timestamp @@ -418,6 +419,34 @@ def update_model_ids(model: Any, data: TableData, related_table: TableName) -> N update_id_map(related_table, old_id_list[item], allocated_id_list[item]) re_map_foreign_keys(data, table, 'id', related_table=related_table, id_field=True) +def bulk_import_user_message_data(data: TableData, dump_file_id: int) -> None: + model = UserMessage + table = 'zerver_usermessage' + lst = data[table] + + def process_batch(items: List[Dict[str, Any]]) -> None: + ums = [ + UserMessageLite( + user_profile_id = item['user_profile_id'], + message_id = item['message_id'], + flags=item['flags'], + ) + for item in items + ] + bulk_insert_ums(ums) + + offset = 0 + chunk_size = 10000 + + while True: + items = lst[offset:offset+chunk_size] + if not items: + break + process_batch(items) + offset += chunk_size + + logging.info("Successfully imported %s from %s[%s]." % (model, table, dump_file_id)) + def bulk_import_model(data: TableData, model: Any, dump_file_id: Optional[str]=None) -> None: table = get_db_table(model) # TODO, deprecate dump_file_id @@ -931,8 +960,9 @@ def import_message_data(import_dir: Path) -> None: re_map_foreign_keys(data, 'zerver_usermessage', 'message', related_table="message") re_map_foreign_keys(data, 'zerver_usermessage', 'user_profile', related_table="user_profile") fix_bitfield_keys(data, 'zerver_usermessage', 'flags') + update_model_ids(UserMessage, data, 'usermessage') - bulk_import_model(data, UserMessage) + bulk_import_user_message_data(data, dump_file_id) dump_file_id += 1 def import_attachments(data: TableData) -> None: