From 6289a551aa21183fd94eb344f5fbbd6345299161 Mon Sep 17 00:00:00 2001 From: PieterCK Date: Mon, 30 Sep 2024 15:26:03 +0700 Subject: [PATCH] data_import: Add email validation to third-party data converters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit makes the third-party data converters check for invalid user emails. If it finds any, it’ll raise an Exception and show an error message with all the bad emails listed out. Fixes: #31783 --- zerver/data_import/import_util.py | 18 ++++++++++++++++++ zerver/data_import/mattermost.py | 14 +++++--------- zerver/data_import/rocketchat.py | 1 + zerver/data_import/slack.py | 2 ++ zerver/data_import/user_handler.py | 6 ++++++ zerver/tests/test_mattermost_importer.py | 10 ++++++++++ zerver/tests/test_rocketchat_importer.py | 16 ++++++++++++++++ zerver/tests/test_slack_importer.py | 11 ++++++++++- 8 files changed, 68 insertions(+), 10 deletions(-) diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index 99a2f6a388..3953f58697 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -10,6 +10,8 @@ from typing import Any, Protocol, TypeAlias, TypeVar import orjson import requests +from django.core.exceptions import ValidationError +from django.core.validators import validate_email from django.forms.models import model_to_dict from django.utils.timezone import now as timezone_now @@ -825,3 +827,19 @@ def long_term_idle_helper( user_profile_row["last_active_message_id"] = 1 return long_term_idle + + +def validate_user_emails_for_import(user_emails: list[str]) -> None: + invalid_emails = [] + for email in user_emails: + try: + validate_email(email) + except ValidationError: + invalid_emails.append(email) + + if invalid_emails: + details = ", ".join(invalid_emails) + error_log = ( + f"Invalid email format, please fix the following email(s) and try again: {details}" + ) + raise ValidationError(error_log) diff --git a/zerver/data_import/mattermost.py b/zerver/data_import/mattermost.py index 4d45465de7..9dcf0feeb0 100644 --- a/zerver/data_import/mattermost.py +++ b/zerver/data_import/mattermost.py @@ -132,15 +132,11 @@ def convert_user_data( realm_id: int, team_name: str, ) -> None: - user_data_list = [] - for username in user_data_map: - user = user_data_map[username] - if check_user_in_team(user, team_name) or user["is_mirror_dummy"]: - user_data_list.append(user) - - for raw_item in user_data_list: - user = process_user(raw_item, realm_id, team_name, user_id_mapper) - user_handler.add_user(user) + for user_data in user_data_map.values(): + if check_user_in_team(user_data, team_name) or user_data["is_mirror_dummy"]: + user = process_user(user_data, realm_id, team_name, user_id_mapper) + user_handler.add_user(user) + user_handler.validate_user_emails() def convert_channel_data( diff --git a/zerver/data_import/rocketchat.py b/zerver/data_import/rocketchat.py index 1ec5cc4aaa..906f0a1182 100644 --- a/zerver/data_import/rocketchat.py +++ b/zerver/data_import/rocketchat.py @@ -128,6 +128,7 @@ def process_users( ) user_handler.add_user(user) + user_handler.validate_user_emails() # Set the first realm_owner as the owner of # all the bots. if realm_owners: diff --git a/zerver/data_import/slack.py b/zerver/data_import/slack.py index 00606fc26d..fb66f342b3 100644 --- a/zerver/data_import/slack.py +++ b/zerver/data_import/slack.py @@ -39,6 +39,7 @@ from zerver.data_import.import_util import ( process_avatars, process_emojis, process_uploads, + validate_user_emails_for_import, ) from zerver.data_import.sequencer import NEXT_ID from zerver.data_import.slack_message_conversion import ( @@ -358,6 +359,7 @@ def users_to_zerver_userprofile( logging.info("%s: %s -> %s", slack_user_id, user["name"], userprofile_dict["email"]) + validate_user_emails_for_import(list(found_emails)) process_customprofilefields(zerver_customprofilefield, zerver_customprofilefield_values) logging.info("######### IMPORTING USERS FINISHED #########\n") return ( diff --git a/zerver/data_import/user_handler.py b/zerver/data_import/user_handler.py index 6a934d6834..385ef74215 100644 --- a/zerver/data_import/user_handler.py +++ b/zerver/data_import/user_handler.py @@ -1,5 +1,7 @@ from typing import Any +from zerver.data_import.import_util import validate_user_emails_for_import + class UserHandler: """ @@ -25,3 +27,7 @@ class UserHandler: def get_all_users(self) -> list[dict[str, Any]]: users = list(self.id_to_user_map.values()) return users + + def validate_user_emails(self) -> None: + all_users = self.get_all_users() + validate_user_emails_for_import([user["delivery_email"] for user in all_users]) diff --git a/zerver/tests/test_mattermost_importer.py b/zerver/tests/test_mattermost_importer.py index b368db337d..36019b00e6 100644 --- a/zerver/tests/test_mattermost_importer.py +++ b/zerver/tests/test_mattermost_importer.py @@ -202,6 +202,16 @@ class MatterMostImporter(ZulipTestCase): convert_user_data(user_handler, user_id_mapper, username_to_user, realm_id, team_name) self.assert_length(user_handler.get_all_users(), 3) + # Importer should raise error when user emails are malformed + team_name = "gryffindor" + bad_email1 = username_to_user["harry"]["email"] = "harry.ceramicist@zuL1[p.c0m" + bad_email2 = username_to_user["ron"]["email"] = "ron.ferret@zulup...com" + with self.assertRaises(Exception) as e: + convert_user_data(user_handler, user_id_mapper, username_to_user, realm_id, team_name) + error_message = str(e.exception) + expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email2}, {bad_email1}']" + self.assertEqual(error_message, expected_error_message) + def test_convert_channel_data(self) -> None: fixture_file_name = self.fixture_file_name("export.json", "mattermost_fixtures") mattermost_data = mattermost_data_file_to_dict(fixture_file_name) diff --git a/zerver/tests/test_rocketchat_importer.py b/zerver/tests/test_rocketchat_importer.py index b943249603..1fa8b83bc1 100644 --- a/zerver/tests/test_rocketchat_importer.py +++ b/zerver/tests/test_rocketchat_importer.py @@ -180,6 +180,22 @@ class RocketChatImporter(ZulipTestCase): self.assertEqual(user["is_mirror_dummy"], True) self.assertEqual(user["is_bot"], False) + # Importer should raise error when user emails are malformed + bad_email1 = rocketchat_data["user"][0]["emails"][0]["address"] = "test1@hotmai,l.om" + bad_email2 = rocketchat_data["user"][1]["emails"][0]["address"] = "test2@gmail.c" + user_id_to_user_map = map_user_id_to_user(rocketchat_data["user"]) + with self.assertRaises(Exception) as e: + process_users( + user_id_to_user_map=user_id_to_user_map, + realm_id=realm_id, + domain_name=domain_name, + user_handler=user_handler, + user_id_mapper=user_id_mapper, + ) + error_message = str(e.exception) + expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email1}, {bad_email2}']" + self.assertEqual(error_message, expected_error_message) + def test_categorize_channels_and_map_with_id(self) -> None: fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures") rocketchat_data = rocketchat_data_to_dict(fixture_dir_name) diff --git a/zerver/tests/test_slack_importer.py b/zerver/tests/test_slack_importer.py index a89d23d952..434f4e1a8a 100644 --- a/zerver/tests/test_slack_importer.py +++ b/zerver/tests/test_slack_importer.py @@ -374,7 +374,7 @@ class SlackImporter(ZulipTestCase): "Xf06054BBB": {"value": "random2"}, "Xf023DSCdd": {"value": "employer"}, } - user_data = [ + user_data: list[dict[str, Any]] = [ { "id": "U08RGD1RD", "team_id": "T5YFFM2QY", @@ -636,6 +636,15 @@ class SlackImporter(ZulipTestCase): self.assertEqual(zerver_userprofile[7]["is_active"], True) self.assertEqual(zerver_userprofile[7]["is_mirror_dummy"], False) + # Importer should raise error when user emails are malformed + bad_email1 = user_data[0]["profile"]["email"] = "jon@gmail,com" + bad_email2 = user_data[1]["profile"]["email"] = "jane@gmail.m" + with self.assertRaises(Exception) as e, self.assertLogs(level="INFO"): + users_to_zerver_userprofile(slack_data_dir, user_data, 1, timestamp, "test_domain") + error_message = str(e.exception) + expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email1}, {bad_email2}']" + self.assertEqual(error_message, expected_error_message) + def test_build_defaultstream(self) -> None: realm_id = 1 stream_id = 1