imports: Make loading UserMessage faster and more robust.

We use UserMessageLite to avoid Django overhead, and we
do updates in chunks of 10000.  (The export may be broken
into several files already, but a reasonable chunking at
import time is good defense against running out of memory.)
This commit is contained in:
Steve Howell
2018-10-12 21:42:17 +00:00
committed by Tim Abbott
parent 0d8ce4201c
commit 493aae2958
2 changed files with 34 additions and 3 deletions

View File

@@ -1443,10 +1443,10 @@ class UserMessageLite:
is optimized for the simple use case of inserting a bunch of
rows into zerver_usermessage.
'''
def __init__(self, user_profile_id: int, message_id: int) -> None:
def __init__(self, user_profile_id: int, message_id: int, flags: int) -> None:
self.user_profile_id = user_profile_id
self.message_id = message_id
self.flags = 0
self.flags = flags
def flags_list(self) -> List[str]:
return UserMessage.flags_list_for_flags(self.flags)
@@ -1462,6 +1462,7 @@ def create_user_messages(message: Message,
um = UserMessageLite(
user_profile_id=user_profile_id,
message_id=message.id,
flags=0,
)
ums_to_create.append(um)

View File

@@ -13,6 +13,7 @@ from django.utils.timezone import utc as timezone_utc, now as timezone_now
from typing import Any, Dict, List, Optional, Set, Tuple, \
Iterable
from zerver.lib.actions import UserMessageLite, bulk_insert_ums
from zerver.lib.avatar_hash import user_avatar_path_from_ids
from zerver.lib.bulk_create import bulk_create_users
from zerver.lib.timestamp import datetime_to_timestamp
@@ -418,6 +419,34 @@ def update_model_ids(model: Any, data: TableData, related_table: TableName) -> N
update_id_map(related_table, old_id_list[item], allocated_id_list[item])
re_map_foreign_keys(data, table, 'id', related_table=related_table, id_field=True)
def bulk_import_user_message_data(data: TableData, dump_file_id: int) -> None:
model = UserMessage
table = 'zerver_usermessage'
lst = data[table]
def process_batch(items: List[Dict[str, Any]]) -> None:
ums = [
UserMessageLite(
user_profile_id = item['user_profile_id'],
message_id = item['message_id'],
flags=item['flags'],
)
for item in items
]
bulk_insert_ums(ums)
offset = 0
chunk_size = 10000
while True:
items = lst[offset:offset+chunk_size]
if not items:
break
process_batch(items)
offset += chunk_size
logging.info("Successfully imported %s from %s[%s]." % (model, table, dump_file_id))
def bulk_import_model(data: TableData, model: Any, dump_file_id: Optional[str]=None) -> None:
table = get_db_table(model)
# TODO, deprecate dump_file_id
@@ -931,8 +960,9 @@ def import_message_data(import_dir: Path) -> None:
re_map_foreign_keys(data, 'zerver_usermessage', 'message', related_table="message")
re_map_foreign_keys(data, 'zerver_usermessage', 'user_profile', related_table="user_profile")
fix_bitfield_keys(data, 'zerver_usermessage', 'flags')
update_model_ids(UserMessage, data, 'usermessage')
bulk_import_model(data, UserMessage)
bulk_import_user_message_data(data, dump_file_id)
dump_file_id += 1
def import_attachments(data: TableData) -> None: