diff --git a/zerver/lib/slack_data_to_zulip_data.py b/zerver/lib/slack_data_to_zulip_data.py index c0ef594b20..5889566ba3 100755 --- a/zerver/lib/slack_data_to_zulip_data.py +++ b/zerver/lib/slack_data_to_zulip_data.py @@ -516,9 +516,9 @@ def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFiel added_users: AddedUsersT, added_recipient: AddedRecipientsT, added_channels: AddedChannelsT, realm: ZerverFieldsT, zerver_realmemoji: List[ZerverFieldsT], domain_name: str, - output_dir: str) -> Tuple[List[ZerverFieldsT], - List[ZerverFieldsT], - List[ZerverFieldsT]]: + output_dir: str, chunk_size: int=800) -> Tuple[List[ZerverFieldsT], + List[ZerverFieldsT], + List[ZerverFieldsT]]: """ Returns: 1. reactions, which is a list of the reactions @@ -533,24 +533,46 @@ def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFiel logging.info('######### IMPORTING MESSAGES STARTED #########\n') + total_reactions = [] # type: List[ZerverFieldsT] + total_attachments = [] # type: List[ZerverFieldsT] + total_uploads = [] # type: List[ZerverFieldsT] + message_id = usermessage_id = reaction_id = attachment_id = 0 id_list = (message_id, usermessage_id, reaction_id, attachment_id) - zerver_message, zerver_usermessage, attachment, uploads, \ - reactions, id_list = channel_message_to_zerver_message( - realm_id, users, added_users, added_recipient, all_messages, - zerver_realmemoji, realm['zerver_subscription'], added_channels, - id_list, domain_name) + # The messages are stored in batches + low_index = 0 + upper_index = low_index + chunk_size + dump_file_id = 1 - message_json = dict( - zerver_message=zerver_message, - zerver_usermessage=zerver_usermessage) - message_filename = os.path.join(output_dir, "messages-000001.json") - logging.info("Writing Messages to %s\n" % (message_filename,)) - create_converted_data_files(message_json, output_dir, '/messages-000001.json') + while True: + message_data = all_messages[low_index:upper_index] + if len(message_data) == 0: + break + zerver_message, zerver_usermessage, attachment, uploads, \ + reactions, id_list = channel_message_to_zerver_message( + realm_id, users, added_users, added_recipient, message_data, + zerver_realmemoji, realm['zerver_subscription'], added_channels, + id_list, domain_name) + + message_json = dict( + zerver_message=zerver_message, + zerver_usermessage=zerver_usermessage) + + message_file = "/messages-%06d.json" % (dump_file_id,) + logging.info("Writing Messages to %s\n" % (output_dir + message_file)) + create_converted_data_files(message_json, output_dir, message_file) + + total_reactions += reactions + total_attachments += attachment + total_uploads += uploads + + low_index = upper_index + upper_index = chunk_size + low_index + dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n') - return reactions, uploads, attachment + return total_reactions, total_uploads, total_attachments def get_all_messages(slack_data_dir: str, added_channels: AddedChannelsT) -> List[ZerverFieldsT]: all_messages = [] # type: List[ZerverFieldsT] diff --git a/zerver/tests/test_slack_importer.py b/zerver/tests/test_slack_importer.py index 4428850f89..af4ca6d248 100644 --- a/zerver/tests/test_slack_importer.py +++ b/zerver/tests/test_slack_importer.py @@ -465,8 +465,10 @@ class SlackImporter(ZulipTestCase): @mock.patch("zerver.lib.slack_data_to_zulip_data.get_all_messages") def test_convert_slack_workspace_messages(self, mock_get_all_messages: mock.Mock, mock_message: mock.Mock) -> None: + os.makedirs('var/test-slack-import', exist_ok=True) added_channels = {'random': ('c5', 1), 'general': ('c6', 2)} # type: Dict[str, Tuple[str, int]] - zerver_message = [{'id': 1}, {'id': 5}] + time = float(timezone_now().timestamp()) + zerver_message = [{'id': 1, 'ts': time}, {'id': 5, 'ts': time}] realm = {'zerver_subscription': []} # type: Dict[str, Any] user_list = [] # type: List[Dict[str, Any]] @@ -476,17 +478,29 @@ class SlackImporter(ZulipTestCase): zerver_usermessage = [{'id': 3}, {'id': 5}, {'id': 6}, {'id': 9}] - mock_message.side_effect = [[zerver_message, zerver_usermessage, attachments, uploads, - reactions, id_list]] + mock_get_all_messages.side_effect = [zerver_message] + mock_message.side_effect = [[zerver_message[:1], zerver_usermessage[:2], + attachments, uploads, reactions[:1], id_list], + [zerver_message[1:2], zerver_usermessage[2:5], + attachments, uploads, reactions[1:1], id_list]] test_reactions, uploads, zerver_attachment = convert_slack_workspace_messages( - './random_path', user_list, 2, {}, {}, added_channels, realm, [], 'domain', 'var/test-slack-import') - messages_file = os.path.join('var', 'test-slack-import', 'messages-000001.json') - self.assertTrue(os.path.exists(messages_file)) + './random_path', user_list, 2, {}, {}, added_channels, + realm, [], 'domain', 'var/test-slack-import', chunk_size=1) + messages_file_1 = os.path.join('var', 'test-slack-import', 'messages-000001.json') + self.assertTrue(os.path.exists(messages_file_1)) + messages_file_2 = os.path.join('var', 'test-slack-import', 'messages-000002.json') + self.assertTrue(os.path.exists(messages_file_2)) - with open(messages_file) as f: + with open(messages_file_1) as f: message_json = ujson.load(f) - self.assertEqual(message_json['zerver_message'], zerver_message) - self.assertEqual(message_json['zerver_usermessage'], zerver_usermessage) + self.assertEqual(message_json['zerver_message'], zerver_message[:1]) + self.assertEqual(message_json['zerver_usermessage'], zerver_usermessage[:2]) + + with open(messages_file_2) as f: + message_json = ujson.load(f) + self.assertEqual(message_json['zerver_message'], zerver_message[1:2]) + self.assertEqual(message_json['zerver_usermessage'], zerver_usermessage[2:5]) + self.assertEqual(test_reactions, reactions) @mock.patch("zerver.lib.slack_data_to_zulip_data.process_uploads", return_value = [])