export: Add detailed tests for export of public vs private data.

Adds detailed tests for the work in the prior commits fixing the treatment of private data in various tables in exports with consent and public exports.
2025-10-23 04:52:12 +00:00 · 2025-03-18 03:38:17 +08:00
parent 38de0ce7af
commit 28fee7aab8
1 changed files with 582 additions and 19 deletions
--- a/zerver/tests/test_import_export.py
+++ b/zerver/tests/test_import_export.py
@@ -13,6 +13,7 @@ from django.conf import settings
 from django.core.exceptions import ValidationError
 from django.core.management.base import CommandError
 from django.db.models import Q, QuerySet
+from django.forms.models import model_to_dict
 from django.utils.timezone import now as timezone_now
 from typing_extensions import override

@@ -36,8 +37,9 @@ from zerver.actions.realm_settings import (
 )
 from zerver.actions.saved_snippets import do_create_saved_snippet
 from zerver.actions.scheduled_messages import check_schedule_message
+from zerver.actions.streams import do_change_stream_description
 from zerver.actions.user_activity import do_update_user_activity_interval
-from zerver.actions.user_settings import do_change_user_setting
+from zerver.actions.user_settings import do_change_user_delivery_email, do_change_user_setting
 from zerver.actions.user_status import do_update_user_status
 from zerver.actions.user_topics import do_set_user_topic_visibility_policy
 from zerver.actions.users import do_deactivate_user
@@ -45,7 +47,14 @@ from zerver.lib import upload
 from zerver.lib.avatar_hash import user_avatar_path
 from zerver.lib.bot_config import set_bot_config
 from zerver.lib.bot_lib import StateHandler
-from zerver.lib.export import Record, do_export_realm, do_export_user, export_usermessages_batch
+from zerver.lib.export import (
+    PRESERVED_AUDIT_LOG_EVENT_TYPES,
+    Record,
+    do_export_realm,
+    do_export_user,
+    export_usermessages_batch,
+    get_consented_user_ids,
+)
 from zerver.lib.import_realm import do_import_realm, get_incoming_message_ids
 from zerver.lib.migration_status import STALE_MIGRATIONS, AppMigrations, MigrationStatusJson
 from zerver.lib.streams import create_stream_if_needed
@@ -61,7 +70,7 @@ from zerver.lib.test_helpers import (
 )
 from zerver.lib.thumbnail import BadImageError
 from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment
-from zerver.lib.utils import assert_is_not_none
+from zerver.lib.utils import assert_is_not_none, get_fk_field_name
 from zerver.models import (
    AlertWord,
    Attachment,
@@ -95,7 +104,7 @@ from zerver.models import (
    UserStatus,
    UserTopic,
 )
-from zerver.models.clients import get_client
+from zerver.models.clients import Client, get_client
 from zerver.models.groups import SystemGroups
 from zerver.models.messages import ImageAttachment
 from zerver.models.presence import PresenceSequence
@@ -397,6 +406,9 @@ class RealmImportExportTest(ExportFile):
        exportable_user_ids: set[int] | None = None,
    ) -> None:
        output_dir = make_export_output_dir()
+        if export_type == RealmExport.EXPORT_FULL_WITH_CONSENT:
+            assert exportable_user_ids is not None
+
        with patch("zerver.lib.export.create_soft_link"), self.assertLogs(level="INFO"):
            do_export_realm(
                realm=realm,
@@ -416,6 +428,7 @@ class RealmImportExportTest(ExportFile):
                input_path=os.path.join(output_dir, "messages-000001.json.partial"),
                output_path=os.path.join(output_dir, "messages-000001.json"),
                export_full_with_consent=export_type == RealmExport.EXPORT_FULL_WITH_CONSENT,
+                consented_user_ids=exportable_user_ids,
            )

    def export_realm_and_create_auditlog(
@@ -649,6 +662,229 @@ class RealmImportExportTest(ExportFile):
        for user_profile_id in [cordelia.id, hamlet.id, iago.id, othello.id, polonius.id]:
            self.assertIn(user_profile_id, personal_recipient_type_ids)

+    def test_get_consented_user_ids(self) -> None:
+        realm = get_realm("zulip")
+        consented_user = self.example_user("iago")
+        do_change_user_setting(consented_user, "allow_private_data_export", True, acting_user=None)
+
+        non_consented_user = self.example_user("hamlet")
+        do_change_user_setting(
+            non_consented_user, "allow_private_data_export", False, acting_user=None
+        )
+
+        bot_of_consented_user = self.create_test_bot(
+            "bot-of-consented-user", consented_user, full_name="Bot of consented user"
+        )
+        # Bots don't really use allow_private_data_export setting (and it should be False by default)
+        # but set explicitly just to be clear on the setup.
+        # A bot of a consented user is considered consented no matter what.
+        do_change_user_setting(
+            bot_of_consented_user, "allow_private_data_export", False, acting_user=None
+        )
+
+        deactivated_bot_of_consented_user = self.create_test_bot(
+            "deactivated-bot-of-consented-user",
+            consented_user,
+            full_name="Deactivated bot of consented user",
+        )
+        do_change_user_setting(
+            deactivated_bot_of_consented_user, "allow_private_data_export", False, acting_user=None
+        )
+        do_deactivate_user(deactivated_bot_of_consented_user, acting_user=None)
+
+        # A bot of a non-consented user is considered not consented.
+        bot_of_non_consented_user = self.create_test_bot(
+            "bot-of-non-consented-user", non_consented_user, full_name="Bot of non-consented user"
+        )
+        do_change_user_setting(
+            bot_of_consented_user, "allow_private_data_export", False, acting_user=None
+        )
+
+        # Unless the bot has allow_private_data_export explicitly set to True. This is a rather
+        # unlikely case to encounter, since bots don't have a UI for editing such settings; but it could
+        # be flipped by a server admin.
+        consented_bot_of_non_consented_user = self.create_test_bot(
+            "consented-bot-of-non-consented-user",
+            non_consented_user,
+            full_name="Consented bot of non-consented user",
+        )
+        do_change_user_setting(
+            consented_bot_of_non_consented_user, "allow_private_data_export", True, acting_user=None
+        )
+
+        consented_user_ids = get_consented_user_ids(realm)
+
+        self.assertIn(consented_user.id, consented_user_ids)
+        self.assertNotIn(non_consented_user.id, consented_user_ids)
+
+        self.assertIn(bot_of_consented_user.id, consented_user_ids)
+        self.assertIn(deactivated_bot_of_consented_user.id, consented_user_ids)
+
+        self.assertNotIn(bot_of_non_consented_user.id, consented_user_ids)
+        self.assertIn(consented_bot_of_non_consented_user.id, consented_user_ids)
+
+        # A deactivated consented user is still considered consented.
+        do_deactivate_user(consented_user, acting_user=None)
+        self.assertIn(consented_user.id, get_consented_user_ids(realm))
+
+        # A mirror dummy is always considered consented, no matter the setting.
+        do_deactivate_user(non_consented_user, acting_user=None)
+        non_consented_user.is_mirror_dummy = True
+        non_consented_user.save(update_fields=["is_mirror_dummy"])
+        self.assertIn(non_consented_user.id, get_consented_user_ids(realm))
+
+    def test_client_objects_export(self) -> None:
+        """
+        Client objects require some special handling when exporting. They aren't
+        scoped to a realm - e.g. a Client object "website" can be used by multiple
+        realms. Any Message sent from the web app in any realm will have sending_client
+        pointing to the "website" Client row.
+
+        However, we cannot just export the whole server table when exporting a realm,
+        as it'll leak information about Clients which might only be used by some other realm(s).
+        Instead, the export system needs to carefully determine the set of Clients which are pointed
+        to by other objects in the export and only export those.
+
+        In an export with consent, this means only exporting Clients which were used by at least
+        one consented user in the realm - omitting those that may have only been used by a non-consenting
+        user, as those should be considered private.
+        """
+
+        realm = get_realm("zulip")
+        lear_realm = get_realm("lear")
+
+        # Set up a message in the lear realm for more ease of testing.
+        cordelia_lear = self.lear_user("cordelia")
+        king_lear = self.lear_user("king")
+        self.send_personal_message(cordelia_lear, king_lear)
+
+        # Consented users:
+        hamlet = self.example_user("hamlet")
+        othello = self.example_user("othello")
+        # Iago will be non-consenting.
+        iago = self.example_user("iago")
+        cordelia = self.example_user("cordelia")
+        do_change_user_setting(hamlet, "allow_private_data_export", True, acting_user=None)
+        do_change_user_setting(othello, "allow_private_data_export", True, acting_user=None)
+        do_change_user_setting(iago, "allow_private_data_export", False, acting_user=None)
+
+        a_message_id = self.send_personal_message(hamlet, othello)
+
+        # This Client won't be used by the realm.
+        non_realm_client = Client.objects.create(name="Non-realm client")
+
+        # This Client will be used by both of the realms.
+        realm_shared_client = Client.objects.create(name="Client shared between realms")
+        last_realm_message = Message.objects.get(id=a_message_id)
+        last_second_realm_message = Message.objects.filter(realm=lear_realm).latest("id")
+        last_realm_message.sending_client = realm_shared_client
+        last_second_realm_message.sending_client = realm_shared_client
+        last_realm_message.save()
+        last_second_realm_message.save()
+
+        # This Client will be used by a message sent to a consenting user.
+        b_message_id = self.send_personal_message(iago, hamlet)
+        consented_user_client = Client.objects.create(name="Consented user client")
+        b_message = Message.objects.get(id=b_message_id)
+        b_message.sending_client = consented_user_client
+        b_message.save()
+
+        # This Client will be used in a message sent between non-consented users
+        # and therefore should not be exported an export with consent.
+        c_message_id = self.send_personal_message(iago, cordelia)
+        non_consented_user_client = Client.objects.create(name="Non-consented uesr client")
+        c_message = Message.objects.get(id=c_message_id)
+        c_message.sending_client = non_consented_user_client
+        c_message.save()
+
+        all_our_client_ids = [
+            non_realm_client.id,
+            realm_shared_client.id,
+            consented_user_client.id,
+            non_consented_user_client.id,
+        ]
+        Client.objects.exclude(id__in=all_our_client_ids).delete()
+
+        self.export_realm_and_create_auditlog(
+            realm,
+            export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+            exportable_user_ids=get_consented_user_ids(realm),
+        )
+
+        realm_data = read_json("realm.json")
+
+        exported_client_ids = self.get_set(realm_data["zerver_client"], "id")
+        self.assertEqual({realm_shared_client.id, consented_user_client.id}, exported_client_ids)
+
+        # Now verify that in a full export without consent, also the non_consented_user_client
+        # is included.
+        self.export_realm_and_create_auditlog(
+            realm,
+            export_type=RealmExport.EXPORT_FULL_WITHOUT_CONSENT,
+        )
+
+        realm_data = read_json("realm.json")
+
+        exported_client_ids = self.get_set(realm_data["zerver_client"], "id")
+        self.assertEqual(
+            {non_consented_user_client.id, realm_shared_client.id, consented_user_client.id},
+            exported_client_ids,
+        )
+
+    def test_public_export_private_and_public_data(self) -> None:
+        """
+        Public exports are essentially a special case of exports with consent: where
+        none of the users are consenting. The difference is that in a public export
+        we don't turn these non-consenting users into is_mirror_dummy=True.
+
+        Therefore it's enough to just test the basics here, as the detailed logic
+        is covered in tests for exports with consent.
+        """
+
+        realm = get_realm("zulip")
+
+        # Consented users:
+        hamlet = self.example_user("hamlet")
+        othello = self.example_user("othello")
+        # Iago will be non-consenting.
+        iago = self.example_user("iago")
+
+        do_change_user_setting(hamlet, "allow_private_data_export", True, acting_user=None)
+        do_change_user_setting(othello, "allow_private_data_export", True, acting_user=None)
+        do_change_user_setting(iago, "allow_private_data_export", False, acting_user=None)
+
+        # Despite both hamlet and othello having consent enabled, in a public export
+        # everyone is non-consenting - so a Client object used only in a DM will not
+        # be exported.
+        a_message_id = self.send_personal_message(hamlet, othello)
+        private_client = Client.objects.create(name="private client")
+        a_message = Message.objects.get(id=a_message_id)
+        a_message.sending_client = private_client
+        a_message.save()
+
+        # SavedSnippets are private content - so in a public export, despite
+        # hamlet having consent enabled, such objects should not be exported.
+        saved_snippet = do_create_saved_snippet("test", "test", hamlet)
+
+        # Data of some other tables (e.g. UserPresence) is public, so will
+        # be exported regardless of consent - even in a public export.
+        iago_presence = UserPresence.objects.create(user_profile=iago, realm=realm)
+
+        self.export_realm_and_create_auditlog(realm, export_type=RealmExport.EXPORT_PUBLIC)
+
+        realm_data = read_json("realm.json")
+
+        exported_client_ids = self.get_set(realm_data["zerver_client"], "id")
+        self.assertNotEqual(len(exported_client_ids), 0)
+        self.assertNotIn(private_client.id, exported_client_ids)
+
+        exported_saved_snippet_ids = self.get_set(realm_data["zerver_savedsnippet"], "id")
+        self.assertNotIn(saved_snippet.id, exported_saved_snippet_ids)
+        self.assertEqual(exported_saved_snippet_ids, set())
+
+        exported_user_presence_ids = self.get_set(realm_data["zerver_userpresence"], "id")
+        self.assertIn(iago_presence.id, exported_user_presence_ids)
+
    def test_export_realm_with_member_consent(self) -> None:
        realm = Realm.objects.get(string_id="zulip")

@@ -703,6 +939,7 @@ class RealmImportExportTest(ExportFile):
            self.example_user("AARON"),
            [self.example_user("cordelia"), self.example_user("ZOE"), self.example_user("othello")],
        )
+        direct_message_group_c = DirectMessageGroup.objects.last()

        # Create direct messages
        pm_a_msg_id = self.send_personal_message(
@@ -718,8 +955,15 @@ class RealmImportExportTest(ExportFile):
            self.example_user("iago"), self.example_user("hamlet")
        )

+        # Create some non-message private data for users. We will use SavedSnippet objects as they're simple
+        # to create and are private data that should not be exported for non-consenting users. There are many
+        # such private types of data (e.g. UserTopic, Draft) - we could test any of them equivalently.
+        iago_saved_snippet = do_create_saved_snippet("test", "test", self.example_user("iago"))
+        cordelia_saved_snippet = do_create_saved_snippet(
+            "test", "test", self.example_user("cordelia")
+        )
+
        # Iago and Hamlet consented to export their private data.
-        consented_user_ids = [self.example_user(user).id for user in ["iago", "hamlet"]]
        do_change_user_setting(
            self.example_user("iago"), "allow_private_data_export", True, acting_user=None
        )
@@ -727,23 +971,113 @@ class RealmImportExportTest(ExportFile):
            self.example_user("hamlet"), "allow_private_data_export", True, acting_user=None
        )

-        self.export_realm_and_create_auditlog(
-            realm, export_type=RealmExport.EXPORT_FULL_WITH_CONSENT
+        # Additionally, we set prospero's email visibility to NOBODY in order to test this is respected by the
+        # export. His real email value should not be exported.
+        non_consented_user_with_private_email = self.example_user("prospero")
+        do_change_user_setting(
+            non_consented_user_with_private_email,
+            "email_address_visibility",
+            UserProfile.EMAIL_ADDRESS_VISIBILITY_NOBODY,
+            acting_user=None,
+        )
+        # Do the same for the consenting iago, just to verify consent causes the private email address to be
+        # exported
+        do_change_user_setting(
+            self.example_user("iago"),
+            "email_address_visibility",
+            UserProfile.EMAIL_ADDRESS_VISIBILITY_NOBODY,
+            acting_user=None,
        )

-        data = read_json("realm.json")
+        # default-bot is a bot of a non-consenting user - let's also set up a bot for a consenting user
+        # to verify the bot gets treated as consenting.
+        consented_bot = self.create_test_bot(
+            "non-consented-bot", self.example_user("iago"), "Non consented bot"
+        )
+        consented_user_ids = {self.example_user(user).id for user in ["iago", "hamlet"]}
+        consented_user_ids.add(consented_bot.id)

-        self.assert_length(data["zerver_userprofile_crossrealm"], 3)
-        self.assert_length(data["zerver_userprofile_mirrordummy"], 0)
+        # Set up a non-consented, deactivated user to test the special behavior we have for them. In order to prevent
+        # originally deactivated users from being able to reactivate their account via signup after
+        # export->import cycle, we don't turn them into mirror dummies. Instead, they stay as regular deactivated
+        # users.
+        # At the same time, we have to be careful - despite ending up in zerver_userprofile in the export,
+        # they might not be consented to exporting their private data - so we have to test that this is handled
+        # correctly.
+        deactivated_non_consented_user = self.example_user("polonius")
+        deactivated_non_consented_user_saved_snippet = do_create_saved_snippet(
+            "test", "test", deactivated_non_consented_user
+        )
+        do_deactivate_user(deactivated_non_consented_user, acting_user=None)

-        exported_user_emails = self.get_set(data["zerver_userprofile"], "delivery_email")
-        self.assertIn(self.example_email("cordelia"), exported_user_emails)
+        self.assertEqual(get_consented_user_ids(realm), consented_user_ids)
+
+        self.export_realm_and_create_auditlog(
+            realm,
+            export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+            exportable_user_ids=consented_user_ids,
+        )
+
+        realm_data = read_json("realm.json")
+
+        user_count = UserProfile.objects.filter(realm=realm).count()
+
+        self.assert_length(realm_data["zerver_userprofile_crossrealm"], 3)
+        # Non-consenting users will become mirror dummy users - with the exception of deactivated users.
+        # Those stay as regular, deactivated users.
+        # We offset the counts below by 1 exactly to account for deactivated_non_consented_user.
+        self.assert_length(
+            realm_data["zerver_userprofile_mirrordummy"], user_count - len(consented_user_ids) - 1
+        )
+        self.assert_length(realm_data["zerver_userprofile"], len(consented_user_ids) + 1)
+
+        exported_user_emails = self.get_set(realm_data["zerver_userprofile"], "delivery_email")
+        exported_mirror_dummy_user_emails = self.get_set(
+            realm_data["zerver_userprofile_mirrordummy"], "delivery_email"
+        )
+
+        self.assertIn(self.example_email("cordelia"), exported_mirror_dummy_user_emails)
        self.assertIn(self.example_email("hamlet"), exported_user_emails)
        self.assertIn(self.example_email("iago"), exported_user_emails)
-        self.assertIn(self.example_email("othello"), exported_user_emails)
-        self.assertIn("default-bot@zulip.com", exported_user_emails)
+        self.assertIn(consented_bot.delivery_email, exported_user_emails)
+        self.assertIn(self.example_email("othello"), exported_mirror_dummy_user_emails)
+        self.assertIn(deactivated_non_consented_user.delivery_email, exported_user_emails)
+        self.assertIn("default-bot@zulip.com", exported_mirror_dummy_user_emails)

-        exported_streams = self.get_set(data["zerver_stream"], "name")
+        # Verify that the _mirrordummy tables is the table of mirror dummy users, as expected.
+        self.assertTrue(
+            all(row["is_active"] is False for row in realm_data["zerver_userprofile_mirrordummy"])
+        )
+        self.assertTrue(
+            all(
+                row["is_mirror_dummy"] is True
+                for row in realm_data["zerver_userprofile_mirrordummy"]
+            )
+        )
+        self.assertTrue(
+            all(row["is_mirror_dummy"] is False for row in realm_data["zerver_userprofile"])
+        )
+
+        # Verify that the deactivated_non_consented_user did not not become a mirror dummy.
+        exported_deactivated_non_consented_user = next(
+            user
+            for user in realm_data["zerver_userprofile"]
+            if user["id"] == deactivated_non_consented_user.id
+        )
+        self.assertEqual(exported_deactivated_non_consented_user["is_active"], False)
+        self.assertEqual(exported_deactivated_non_consented_user["is_mirror_dummy"], False)
+
+        exported_non_consented_user_with_private_email = next(
+            user
+            for user in realm_data["zerver_userprofile_mirrordummy"]
+            if user["id"] == non_consented_user_with_private_email.id
+        )
+        self.assertRegex(
+            exported_non_consented_user_with_private_email["delivery_email"],
+            r"exported-user-[a-zA-Z0-9]+@zulip\.testserver",
+        )
+
+        exported_streams = self.get_set(realm_data["zerver_stream"], "name")
        self.assertEqual(
            exported_streams,
            {
@@ -817,9 +1151,13 @@ class RealmImportExportTest(ExportFile):
            .values_list("id", flat=True)
        )

+        assert (
+            direct_message_group_a is not None
+            and direct_message_group_b is not None
+            and direct_message_group_c is not None
+        )
        # Third direct message group is not exported since none of
        # the members gave consent
-        assert direct_message_group_a is not None and direct_message_group_b is not None
        direct_message_group_recipients = Recipient.objects.filter(
            type_id__in=[direct_message_group_a.id, direct_message_group_b.id],
            type=Recipient.DIRECT_MESSAGE_GROUP,
@@ -827,7 +1165,7 @@ class RealmImportExportTest(ExportFile):
        pm_query = Q(recipient__in=direct_message_group_recipients) | Q(
            sender__in=consented_user_ids
        )
-        exported_direct_message_group_ids = (
+        exported_dm_group_message_ids = (
            Message.objects.filter(pm_query, realm=realm.id)
            .values_list("id", flat=True)
            .values_list("id", flat=True)
@@ -838,7 +1176,7 @@ class RealmImportExportTest(ExportFile):
            *private_stream_message_ids,
            stream_b_second_message_id,
            *exported_pm_ids,
-            *exported_direct_message_group_ids,
+            *exported_dm_group_message_ids,
        }
        self.assertEqual(self.get_set(data["zerver_message"], "id"), exported_msg_ids)

@@ -852,6 +1190,211 @@ class RealmImportExportTest(ExportFile):
        self.assertIn(pm_c_msg_id, exported_msg_ids)
        self.assertIn(pm_d_msg_id, exported_msg_ids)

+        # iago is the only consented user with a SavedSnippet. cordelia didn't consent so her SavedSnippet
+        # should not be exported.
+        exported_saved_snippet_ids = self.get_set(realm_data["zerver_savedsnippet"], "id")
+        self.assertNotIn(cordelia_saved_snippet.id, exported_saved_snippet_ids)
+        self.assertNotIn(
+            deactivated_non_consented_user_saved_snippet.id, exported_saved_snippet_ids
+        )
+        self.assertEqual(exported_saved_snippet_ids, {iago_saved_snippet.id})
+
+        exported_direct_message_group_ids = self.get_set(realm_data["zerver_huddle"], "id")
+        self.assertNotIn(direct_message_group_c.id, exported_direct_message_group_ids)
+        self.assertEqual(
+            exported_direct_message_group_ids,
+            {direct_message_group_a.id, direct_message_group_b.id},
+        )
+
+        # We also want to verify Subscriptions to the DirectMessageGroups were exported correctly.
+        # As long as a DirectMessageGroup is exported (due to having at least one consenting user
+        # in it), *all* the Subscriptions to it should be exported - to maintain the expected
+        # structure of the data.
+        exported_direct_message_group_a_recipient = next(
+            huddle["recipient"]
+            for huddle in realm_data["zerver_huddle"]
+            if huddle["id"] == direct_message_group_a.id
+        )
+        exported_direct_message_group_a_sub_ids = [
+            sub["id"]
+            for sub in realm_data["zerver_subscription"]
+            if sub["recipient"] == exported_direct_message_group_a_recipient
+        ]
+        self.assert_length(exported_direct_message_group_a_sub_ids, 3)
+
+        exported_direct_message_group_b_recipient = next(
+            huddle["recipient"]
+            for huddle in realm_data["zerver_huddle"]
+            if huddle["id"] == direct_message_group_b.id
+        )
+        exported_direct_message_group_b_sub_ids = [
+            sub["id"]
+            for sub in realm_data["zerver_subscription"]
+            if sub["recipient"] == exported_direct_message_group_b_recipient
+        ]
+        self.assert_length(exported_direct_message_group_b_sub_ids, 4)
+
+    def test_export_realm_data_scrubbing(self) -> None:
+        realm = get_realm("zulip")
+        consented_user = self.example_user("iago")
+        non_consented_user = self.example_user("hamlet")
+        do_change_user_setting(
+            non_consented_user,
+            "email_address_visibility",
+            UserProfile.EMAIL_ADDRESS_VISIBILITY_NOBODY,
+            acting_user=None,
+        )
+
+        # Change some user settings to test the scrubbing of settings for non-consenting users.
+        do_change_user_setting(consented_user, "web_font_size_px", 123, acting_user=None)
+        do_change_user_setting(non_consented_user, "web_font_size_px", 456, acting_user=None)
+
+        # For testing the scrubbing of RealmAuditLogs, we generate some events that will get
+        # RealmAuditLog events.
+        # RealmAuditLogs where the modified_user is consenting will be preserved, while those
+        # where the modified_user is not consenting will not be included in the export, with
+        # the exception of a few event types such as SUBSCRIPTION_CREATED - which are exported.
+        do_change_user_delivery_email(consented_user, "iago-new@zulip.com", acting_user=None)
+        consented_user_email_change_log = RealmAuditLog.objects.last()
+        assert consented_user_email_change_log is not None
+
+        self.subscribe(consented_user, "some-new-stream")
+        consented_user_subscription_event = RealmAuditLog.objects.last()
+        assert consented_user_subscription_event is not None
+        assert (
+            consented_user_subscription_event.event_type == AuditLogEventType.SUBSCRIPTION_CREATED
+        )
+
+        do_change_user_delivery_email(non_consented_user, "hamlet-new@zulip.com", acting_user=None)
+        non_consented_user_email_change_log = RealmAuditLog.objects.last()
+        assert non_consented_user_email_change_log is not None
+
+        self.subscribe(non_consented_user, "some-new-stream")
+        non_consented_user_subscription_event = RealmAuditLog.objects.last()
+        assert non_consented_user_subscription_event is not None
+        assert (
+            non_consented_user_subscription_event.event_type
+            == AuditLogEventType.SUBSCRIPTION_CREATED
+        )
+
+        # Make sure we also have a RealmAuditLog of a type with modified_user=None to test that edge case
+        # and ensure we don't accidentally drop such entries.
+        stream = get_stream("Denmark", realm)
+        do_change_stream_description(
+            # Ensure that logs with non-consenting acting_acting user aren't accidentally dropped either.
+            stream,
+            "some new description",
+            acting_user=non_consented_user,
+        )
+        stream_description_change_log = RealmAuditLog.objects.last()
+        assert stream_description_change_log is not None
+
+        consented_user_original_subs_count = Subscription.objects.filter(
+            user_profile=consented_user
+        ).count()
+        non_consented_user_original_subs_count = Subscription.objects.filter(
+            user_profile=non_consented_user
+        ).count()
+
+        self.assertNotEqual(consented_user_original_subs_count, 0)
+        self.assertNotEqual(non_consented_user_original_subs_count, 0)
+
+        # Change the color of subscription as an easy reference point to use to detect if they Subscription
+        # attributes are getting scrubbed or not.
+        Subscription.objects.filter(user_profile__in=[consented_user, non_consented_user]).update(
+            color="#foo"
+        )
+        consented_user_ids = {consented_user.id}
+        do_change_user_setting(consented_user, "allow_private_data_export", True, acting_user=None)
+
+        self.export_realm_and_create_auditlog(
+            realm,
+            export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+            exportable_user_ids=consented_user_ids,
+        )
+
+        realm_data = read_json("realm.json")
+
+        exported_realm_audit_logs = realm_data["zerver_realmauditlog"]
+        exported_realm_audit_log_ids = self.get_set(exported_realm_audit_logs, "id")
+        self.assertNotIn(non_consented_user_email_change_log.id, exported_realm_audit_log_ids)
+        self.assertIn(non_consented_user_subscription_event.id, exported_realm_audit_log_ids)
+        self.assertIn(consented_user_subscription_event.id, exported_realm_audit_log_ids)
+        self.assertIn(consented_user_email_change_log.id, exported_realm_audit_log_ids)
+        self.assertIn(stream_description_change_log.id, exported_realm_audit_log_ids)
+
+        # More general assertions on the entire sets of audit logs pertaining to our users.
+        for log in RealmAuditLog.objects.filter(modified_user=non_consented_user).exclude(
+            event_type__in=PRESERVED_AUDIT_LOG_EVENT_TYPES
+        ):
+            self.assertNotIn(log.id, exported_realm_audit_log_ids)
+        for log in RealmAuditLog.objects.filter(modified_user=consented_user):
+            self.assertIn(log.id, exported_realm_audit_log_ids)
+
+        # Now check scrubbing of Subscriptions data.
+        consented_user_exported_subscriptions = {
+            sub["id"]: sub
+            for sub in realm_data["zerver_subscription"]
+            if sub["user_profile"] == consented_user.id
+        }
+        non_consented_user_exported_subscriptions = {
+            sub["id"]: sub
+            for sub in realm_data["zerver_subscription"]
+            if sub["user_profile"] == non_consented_user.id
+        }
+        self.assertEqual(
+            len(consented_user_exported_subscriptions), consented_user_original_subs_count
+        )
+        self.assertEqual(
+            len(non_consented_user_exported_subscriptions), non_consented_user_original_subs_count
+        )
+        for sub in Subscription.objects.filter(user_profile=consented_user):
+            original_sub = model_to_dict(sub)
+            exported_sub = consented_user_exported_subscriptions[sub.id]
+            # Subscriptions of the consenting users get exported fully as they are,
+            # so the #foo color should be preserved.
+            self.assertEqual(original_sub, exported_sub)
+            self.assertEqual(exported_sub["color"], "#foo")
+        for sub in Subscription.objects.filter(user_profile=non_consented_user):
+            original_sub = model_to_dict(sub)
+            exported_sub = non_consented_user_exported_subscriptions[sub.id]
+            self.assertNotEqual(original_sub, exported_sub)
+            # Subscriptions of the non-consenting users get get scrubbed to replace
+            # "user setting"-type attributes with default values, so the #foo color
+            # will be overwritten in the process.
+            self.assertNotEqual(exported_sub["color"], "#foo")
+
+        # Verify the export/scrubbing of user settings for consenting/non-consenting users.
+        realm_user_default = RealmUserDefault.objects.get(realm=realm)
+        exported_consented_user = next(
+            user for user in realm_data["zerver_userprofile"] if user["id"] == consented_user.id
+        )
+        exported_non_consented_user = next(
+            user
+            for user in realm_data["zerver_userprofile_mirrordummy"]
+            if user["id"] == non_consented_user.id
+        )
+
+        # Settings get exported with the user-set values for consenting users, but are
+        # scrubbed to realm default values for non-consenting users.
+        self.assertEqual(exported_consented_user["web_font_size_px"], 123)
+        self.assertNotEqual(exported_non_consented_user["web_font_size_px"], 456)
+        self.assertEqual(
+            exported_non_consented_user["web_font_size_px"], realm_user_default.web_font_size_px
+        )
+        # Email visibility is an exception, as we should preserve user's choice in who to show their real
+        # email address to across the export->import cycle - regardless of export consent.
+        self.assertEqual(
+            exported_non_consented_user["email_address_visibility"],
+            UserProfile.EMAIL_ADDRESS_VISIBILITY_NOBODY,
+        )
+        # Sanity check that this doesn't match the realm default - since with a matching realm default, the
+        # above asserion would be moot and not testing the preservation of the email_address_visibility
+        # setting.
+        self.assertNotEqual(
+            realm_user_default.email_address_visibility, UserProfile.EMAIL_ADDRESS_VISIBILITY_NOBODY
+        )
+
    """
    Tests for import_realm
    """
@@ -1742,9 +2285,16 @@ class RealmImportExportTest(ExportFile):
    def test_import_realm_with_no_realm_user_default_table(self) -> None:
        original_realm = Realm.objects.get(string_id="zulip")

-        RealmUserDefault.objects.get(realm=original_realm).delete()
        self.export_realm_and_create_auditlog(original_realm)

+        # We want to remove the RealmUserDefault object from the export.
+        realm_data = read_json("realm.json")
+        realm_data["zerver_realmuserdefault"] = []
+        output_dir = get_output_dir()
+        full_fn = os.path.join(output_dir, "realm.json")
+        with open(full_fn, "wb") as f:
+            f.write(orjson.dumps(realm_data))
+
        with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"):
            do_import_realm(get_output_dir(), "test-zulip")

@@ -2107,6 +2657,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")

@@ -2133,6 +2684,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")
        expected_error_message = self.get_applied_migrations_error_message(
@@ -2156,6 +2708,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm_and_create_auditlog(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")
        missing_apps_log = [
@@ -2183,6 +2736,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm_and_create_auditlog(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")
        missing_apps_log = [
@@ -2218,6 +2772,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm_and_create_auditlog(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")

@@ -2277,6 +2832,7 @@ class RealmImportExportTest(ExportFile):
            self.export_realm_and_create_auditlog(
                realm,
                export_type=RealmExport.EXPORT_FULL_WITH_CONSENT,
+                exportable_user_ids=get_consented_user_ids(realm),
            )
            do_import_realm(get_output_dir(), "test-zulip")

@@ -2652,3 +3208,10 @@ class SingleUserExportTest(ExportFile):
                    Try to mostly keep checkers in alphabetical order.
                    """
                )
+
+
+class GetFKFieldNameTest(ZulipTestCase):
+    def test_get_fk_field_name(self) -> None:
+        self.assertEqual(get_fk_field_name(UserProfile, Realm), "realm")
+        self.assertEqual(get_fk_field_name(Reaction, Stream), None)
+        self.assertEqual(get_fk_field_name(Message, UserProfile), "sender")