mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-30 19:43:47 +00:00 
			
		
		
		
	This avoids a potential unnecessary message.recipient fetch required by is_stream_message(). is_stream_message() methods precedes the addition of the denormalized is_channel_message column and is now unnecessary. In practice, we usually fetch Message objects with `.recipient` already, so I don't expect any notable performance impact here - but it's still a useful change to make.
		
			
				
	
	
		
			1161 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1161 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from datetime import datetime, timedelta
 | |
| from typing import Any
 | |
| from unittest import mock
 | |
| 
 | |
| import time_machine
 | |
| from django.conf import settings
 | |
| from django.utils.timezone import now as timezone_now
 | |
| from typing_extensions import override
 | |
| 
 | |
| from zerver.actions.create_realm import do_create_realm
 | |
| from zerver.actions.message_delete import do_delete_messages
 | |
| from zerver.actions.message_send import internal_send_private_message
 | |
| from zerver.actions.realm_settings import do_set_realm_property
 | |
| from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message
 | |
| from zerver.actions.submessage import do_add_submessage
 | |
| from zerver.lib.retention import (
 | |
|     archive_messages,
 | |
|     clean_archived_data,
 | |
|     get_realms_and_streams_for_archiving,
 | |
|     move_messages_to_archive,
 | |
|     restore_all_data_from_archive,
 | |
|     restore_retention_policy_deletions_for_stream,
 | |
| )
 | |
| from zerver.lib.test_classes import ZulipTestCase
 | |
| from zerver.lib.test_helpers import zulip_reaction_info
 | |
| from zerver.lib.upload import create_attachment
 | |
| from zerver.models import (
 | |
|     ArchivedAttachment,
 | |
|     ArchivedMessage,
 | |
|     ArchivedReaction,
 | |
|     ArchivedSubMessage,
 | |
|     ArchivedUserMessage,
 | |
|     ArchiveTransaction,
 | |
|     Attachment,
 | |
|     Message,
 | |
|     Reaction,
 | |
|     Realm,
 | |
|     Stream,
 | |
|     SubMessage,
 | |
|     UserMessage,
 | |
| )
 | |
| from zerver.models.clients import get_client
 | |
| from zerver.models.realms import get_realm
 | |
| from zerver.models.streams import get_stream
 | |
| from zerver.models.users import get_system_bot
 | |
| 
 | |
| ZULIP_REALM_DAYS = 30
 | |
| MIT_REALM_DAYS = 100
 | |
| 
 | |
| 
 | |
| class RetentionTestingBase(ZulipTestCase):
 | |
|     def _get_usermessage_ids(self, message_ids: list[int]) -> list[int]:
 | |
|         return list(
 | |
|             UserMessage.objects.filter(message_id__in=message_ids).values_list("id", flat=True)
 | |
|         )
 | |
| 
 | |
|     def _verify_archive_data(
 | |
|         self, expected_message_ids: list[int], expected_usermessage_ids: list[int]
 | |
|     ) -> None:
 | |
|         self.assertEqual(
 | |
|             set(ArchivedMessage.objects.values_list("id", flat=True)),
 | |
|             set(expected_message_ids),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(ArchivedUserMessage.objects.values_list("id", flat=True)),
 | |
|             set(expected_usermessage_ids),
 | |
|         )
 | |
| 
 | |
|         # Archived Messages and UserMessages should have been removed from the normal tables:
 | |
|         self.assertEqual(Message.objects.filter(id__in=expected_message_ids).count(), 0)
 | |
|         self.assertEqual(UserMessage.objects.filter(id__in=expected_usermessage_ids).count(), 0)
 | |
| 
 | |
|     def _verify_restored_data(
 | |
|         self, expected_message_ids: list[int], expected_usermessage_ids: list[int]
 | |
|     ) -> None:
 | |
|         # Check that the data was restored:
 | |
|         self.assertEqual(
 | |
|             set(Message.objects.filter(id__in=expected_message_ids).values_list("id", flat=True)),
 | |
|             set(expected_message_ids),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(
 | |
|                 UserMessage.objects.filter(id__in=expected_usermessage_ids).values_list(
 | |
|                     "id", flat=True
 | |
|                 )
 | |
|             ),
 | |
|             set(expected_usermessage_ids),
 | |
|         )
 | |
| 
 | |
|         # The Messages and UserMessages should still be in the archive - we don't delete them.
 | |
|         self.assertEqual(
 | |
|             set(ArchivedMessage.objects.values_list("id", flat=True)),
 | |
|             set(expected_message_ids),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(ArchivedUserMessage.objects.values_list("id", flat=True)),
 | |
|             set(expected_usermessage_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class ArchiveMessagesTestingBase(RetentionTestingBase):
 | |
|     @override
 | |
|     def setUp(self) -> None:
 | |
|         super().setUp()
 | |
|         self.zulip_realm = get_realm("zulip")
 | |
|         self.mit_realm = get_realm("zephyr")
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
 | |
|         self._set_realm_message_retention_value(self.mit_realm, MIT_REALM_DAYS)
 | |
| 
 | |
|         # Set publication date of all existing messages to "now", so that we have full
 | |
|         # control over what's expired and what isn't.
 | |
|         Message.objects.all().update(date_sent=timezone_now())
 | |
| 
 | |
|     def _set_realm_message_retention_value(self, realm: Realm, retention_period: int) -> None:
 | |
|         realm.message_retention_days = retention_period
 | |
|         realm.save()
 | |
| 
 | |
|     def _set_stream_message_retention_value(
 | |
|         self, stream: Stream, retention_period: int | None
 | |
|     ) -> None:
 | |
|         stream.message_retention_days = retention_period
 | |
|         stream.save(update_fields=["message_retention_days"])
 | |
| 
 | |
|     def _change_messages_date_sent(self, msgs_ids: list[int], date_sent: datetime) -> None:
 | |
|         Message.objects.filter(id__in=msgs_ids).update(date_sent=date_sent)
 | |
| 
 | |
|     def _make_mit_messages(self, message_quantity: int, date_sent: datetime) -> Any:
 | |
|         # send messages from mit.edu realm and change messages pub date
 | |
|         sender = self.mit_user("espuser")
 | |
|         recipient = self.mit_user("starnine")
 | |
|         msg_ids = [self.send_personal_message(sender, recipient) for i in range(message_quantity)]
 | |
| 
 | |
|         self._change_messages_date_sent(msg_ids, date_sent)
 | |
|         return msg_ids
 | |
| 
 | |
|     def _send_cross_realm_personal_message(self) -> int:
 | |
|         # Send message from bot to users from different realm.
 | |
|         bot_email = "notification-bot@zulip.com"
 | |
|         internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
 | |
|         zulip_user = self.example_user("hamlet")
 | |
|         msg_id = internal_send_private_message(
 | |
|             sender=get_system_bot(bot_email, internal_realm.id),
 | |
|             recipient_user=zulip_user,
 | |
|             content="test message",
 | |
|         )
 | |
|         assert msg_id is not None
 | |
|         return msg_id
 | |
| 
 | |
|     def _send_personal_message_to_cross_realm_bot(self) -> int:
 | |
|         # Send message from bot to users from different realm.
 | |
|         bot_email = "notification-bot@zulip.com"
 | |
|         internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
 | |
|         zulip_user = self.example_user("hamlet")
 | |
|         msg_id = internal_send_private_message(
 | |
|             sender=zulip_user,
 | |
|             recipient_user=get_system_bot(bot_email, internal_realm.id),
 | |
|             content="test message",
 | |
|         )
 | |
|         assert msg_id is not None
 | |
|         return msg_id
 | |
| 
 | |
|     def _make_expired_zulip_messages(self, message_quantity: int) -> list[int]:
 | |
|         msg_ids = list(
 | |
|             Message.objects.order_by("id")
 | |
|             .filter(realm=self.zulip_realm)
 | |
|             .values_list("id", flat=True)[3 : 3 + message_quantity]
 | |
|         )
 | |
|         self._change_messages_date_sent(
 | |
|             msg_ids,
 | |
|             timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1),
 | |
|         )
 | |
| 
 | |
|         return msg_ids
 | |
| 
 | |
|     def _send_messages_with_attachments(self) -> dict[str, int]:
 | |
|         user_profile = self.example_user("hamlet")
 | |
|         host = user_profile.realm.host
 | |
|         realm_id = get_realm("zulip").id
 | |
|         dummy_files = [
 | |
|             ("zulip.txt", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt"),
 | |
|             ("temp_file.py", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py"),
 | |
|             ("abc.py", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"),
 | |
|         ]
 | |
| 
 | |
|         for file_name, path_id in dummy_files:
 | |
|             create_attachment(
 | |
|                 file_name, path_id, "text/plain", b"1234567890", user_profile, user_profile.realm
 | |
|             )
 | |
| 
 | |
|         self.subscribe(user_profile, "Denmark")
 | |
|         body = (
 | |
|             "Some files here ..."
 | |
|             f" [zulip.txt](http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)"
 | |
|             f" http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some"
 | |
|             f" more.... http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"
 | |
|         )
 | |
| 
 | |
|         expired_message_id = self.send_stream_message(user_profile, "Denmark", body)
 | |
|         actual_message_id = self.send_stream_message(user_profile, "Denmark", body)
 | |
| 
 | |
|         othello = self.example_user("othello")
 | |
|         other_message_id = self.send_stream_message(othello, "Denmark", body)
 | |
|         self._change_messages_date_sent(
 | |
|             [expired_message_id], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1)
 | |
|         )
 | |
|         return {
 | |
|             "expired_message_id": expired_message_id,
 | |
|             "actual_message_id": actual_message_id,
 | |
|             "other_user_message_id": other_message_id,
 | |
|         }
 | |
| 
 | |
| 
 | |
| class TestArchiveMessagesGeneral(ArchiveMessagesTestingBase):
 | |
|     def test_no_expired_messages(self) -> None:
 | |
|         archive_messages()
 | |
| 
 | |
|         self.assertEqual(ArchivedUserMessage.objects.count(), 0)
 | |
|         self.assertEqual(ArchivedMessage.objects.count(), 0)
 | |
| 
 | |
|     def test_expired_messages_in_each_realm(self) -> None:
 | |
|         """General test for archiving expired messages properly with
 | |
|         multiple realms involved"""
 | |
|         # Make some expired messages in MIT:
 | |
|         expired_mit_msg_ids = self._make_mit_messages(
 | |
|             5,
 | |
|             timezone_now() - timedelta(days=MIT_REALM_DAYS + 1),
 | |
|         )
 | |
|         # Make some non-expired messages in MIT:
 | |
|         self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS - 1))
 | |
| 
 | |
|         # Change some Zulip messages to be expired:
 | |
|         expired_zulip_msg_ids = list(
 | |
|             Message.objects.order_by("id")
 | |
|             .filter(realm=self.zulip_realm)
 | |
|             .values_list("id", flat=True)[3:10]
 | |
|         )
 | |
|         self._change_messages_date_sent(
 | |
|             expired_zulip_msg_ids,
 | |
|             timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1),
 | |
|         )
 | |
| 
 | |
|         expired_msg_ids = expired_mit_msg_ids + expired_zulip_msg_ids
 | |
|         expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
 | |
| 
 | |
|         archive_messages()
 | |
|         self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|     def test_expired_messages_in_one_realm(self) -> None:
 | |
|         """Test with a retention policy set for only the MIT realm"""
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, -1)
 | |
| 
 | |
|         # Make some expired messages in MIT:
 | |
|         expired_mit_msg_ids = self._make_mit_messages(
 | |
|             5,
 | |
|             timezone_now() - timedelta(days=MIT_REALM_DAYS + 1),
 | |
|         )
 | |
|         # Make some non-expired messages in MIT:
 | |
|         self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS - 1))
 | |
| 
 | |
|         # Change some Zulip messages date_sent, but the realm has no retention policy,
 | |
|         # so they shouldn't get archived
 | |
|         zulip_msg_ids = list(
 | |
|             Message.objects.order_by("id")
 | |
|             .filter(realm=self.zulip_realm)
 | |
|             .values_list("id", flat=True)[3:10]
 | |
|         )
 | |
|         self._change_messages_date_sent(
 | |
|             zulip_msg_ids,
 | |
|             timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1),
 | |
|         )
 | |
| 
 | |
|         # Only MIT has a retention policy:
 | |
|         expired_msg_ids = expired_mit_msg_ids
 | |
|         expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
 | |
| 
 | |
|         archive_messages()
 | |
|         self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
 | |
| 
 | |
|     def test_different_stream_realm_policies(self) -> None:
 | |
|         verona = get_stream("Verona", self.zulip_realm)
 | |
|         hamlet = self.example_user("hamlet")
 | |
| 
 | |
|         msg_id = self.send_stream_message(hamlet, "Verona", "test")
 | |
|         usermsg_ids = self._get_usermessage_ids([msg_id])
 | |
|         self._change_messages_date_sent([msg_id], timezone_now() - timedelta(days=2))
 | |
| 
 | |
|         # Don't archive if stream's retention policy set to -1:
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, 1)
 | |
|         self._set_stream_message_retention_value(verona, -1)
 | |
|         archive_messages()
 | |
|         self._verify_archive_data([], [])
 | |
| 
 | |
|         # Don't archive if stream and realm have no retention policy:
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, -1)
 | |
|         self._set_stream_message_retention_value(verona, None)
 | |
|         archive_messages()
 | |
|         self._verify_archive_data([], [])
 | |
| 
 | |
|         # Archive if stream has a retention policy set:
 | |
|         self._set_realm_message_retention_value(self.zulip_realm, -1)
 | |
|         self._set_stream_message_retention_value(verona, 1)
 | |
|         archive_messages()
 | |
|         self._verify_archive_data([msg_id], usermsg_ids)
 | |
| 
 | |
|     def test_cross_realm_personal_message_archiving(self) -> None:
 | |
|         """Check that cross-realm personal messages get correctly archived."""
 | |
| 
 | |
|         # We want to test on a set of cross-realm messages of both kinds -
 | |
|         # from a bot to a user, and from a user to a bot.
 | |
|         msg_ids = [self._send_cross_realm_personal_message() for i in range(1, 7)]
 | |
|         msg_ids += [self._send_personal_message_to_cross_realm_bot() for i in range(1, 7)]
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
|         # Make the message expired in the Zulip realm.:
 | |
|         self._change_messages_date_sent(
 | |
|             msg_ids, timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1)
 | |
|         )
 | |
| 
 | |
|         archive_messages()
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|     def test_archiving_interrupted(self) -> None:
 | |
|         """Check that queries get rolled back to a consistent state
 | |
|         if archiving gets interrupted in the middle of processing a chunk."""
 | |
|         expired_msg_ids = self._make_expired_zulip_messages(7)
 | |
|         expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
 | |
| 
 | |
|         # Insert an exception near the end of the archiving process of a chunk:
 | |
|         with mock.patch(
 | |
|             "zerver.lib.retention.delete_messages", side_effect=Exception("delete_messages error")
 | |
|         ):
 | |
|             with self.assertRaisesRegex(Exception, r"^delete_messages error$"):
 | |
|                 # Specify large chunk_size to ensure things happen in a single batch
 | |
|                 archive_messages(chunk_size=1000)
 | |
| 
 | |
|             # Archiving code has been executed, but because we got an exception, things should have been rolled back:
 | |
|             self._verify_archive_data([], [])
 | |
| 
 | |
|             self.assertEqual(
 | |
|                 set(Message.objects.filter(id__in=expired_msg_ids).values_list("id", flat=True)),
 | |
|                 set(expired_msg_ids),
 | |
|             )
 | |
|             self.assertEqual(
 | |
|                 set(
 | |
|                     UserMessage.objects.filter(id__in=expired_usermsg_ids).values_list(
 | |
|                         "id", flat=True
 | |
|                     )
 | |
|                 ),
 | |
|                 set(expired_usermsg_ids),
 | |
|             )
 | |
| 
 | |
|     def test_archive_message_tool(self) -> None:
 | |
|         """End-to-end test of the archiving tool, directly calling
 | |
|         archive_messages."""
 | |
|         # Make some expired messages in MIT:
 | |
|         expired_mit_msg_ids = self._make_mit_messages(
 | |
|             5,
 | |
|             timezone_now() - timedelta(days=MIT_REALM_DAYS + 1),
 | |
|         )
 | |
|         # Make some non-expired messages in MIT:
 | |
|         self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS - 1))
 | |
| 
 | |
|         # Change some Zulip messages to be expired:
 | |
|         expired_zulip_msg_ids = self._make_expired_zulip_messages(7)
 | |
| 
 | |
|         expired_crossrealm_msg_id = self._send_cross_realm_personal_message()
 | |
|         # Make the message expired in the recipient's realm:
 | |
|         self._change_messages_date_sent(
 | |
|             [expired_crossrealm_msg_id],
 | |
|             timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1),
 | |
|         )
 | |
| 
 | |
|         expired_msg_ids = [*expired_mit_msg_ids, *expired_zulip_msg_ids, expired_crossrealm_msg_id]
 | |
|         expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
 | |
| 
 | |
|         archive_messages(chunk_size=2)  # Specify low chunk_size to test batching.
 | |
|         # Make sure we archived what needed:
 | |
|         self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|     def test_archiving_attachments(self) -> None:
 | |
|         """End-to-end test for the logic for archiving attachments.  This test
 | |
|         is hard to read without first reading _send_messages_with_attachments"""
 | |
|         msgs_ids = self._send_messages_with_attachments()
 | |
| 
 | |
|         # First, confirm deleting the oldest message
 | |
|         # (`expired_message_id`) creates ArchivedAttachment objects
 | |
|         # and associates that message ID with them, but does not
 | |
|         # delete the Attachment object.
 | |
|         archive_messages()
 | |
|         self.assertEqual(ArchivedAttachment.objects.count(), 3)
 | |
|         self.assertEqual(
 | |
|             list(
 | |
|                 ArchivedAttachment.objects.distinct("messages__id").values_list(
 | |
|                     "messages__id", flat=True
 | |
|                 )
 | |
|             ),
 | |
|             [msgs_ids["expired_message_id"]],
 | |
|         )
 | |
|         self.assertEqual(Attachment.objects.count(), 3)
 | |
| 
 | |
|         # Now make `actual_message_id` expired too.  We still don't
 | |
|         # delete the Attachment objects.
 | |
|         self._change_messages_date_sent(
 | |
|             [msgs_ids["actual_message_id"]], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1)
 | |
|         )
 | |
|         archive_messages()
 | |
|         self.assertEqual(Attachment.objects.count(), 3)
 | |
| 
 | |
|         # Finally, make the last message mentioning those attachments
 | |
|         # expired.  We should now delete the Attachment objects and
 | |
|         # each ArchivedAttachment object should list all 3 messages.
 | |
|         self._change_messages_date_sent(
 | |
|             [msgs_ids["other_user_message_id"]], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1)
 | |
|         )
 | |
| 
 | |
|         archive_messages()
 | |
|         self.assertEqual(Attachment.objects.count(), 0)
 | |
|         self.assertEqual(ArchivedAttachment.objects.count(), 3)
 | |
|         self.assertEqual(
 | |
|             list(
 | |
|                 ArchivedAttachment.objects.distinct("messages__id")
 | |
|                 .order_by("messages__id")
 | |
|                 .values_list("messages__id", flat=True)
 | |
|             ),
 | |
|             sorted(msgs_ids.values()),
 | |
|         )
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         # Attachments should have been restored:
 | |
|         self.assertEqual(Attachment.objects.count(), 3)
 | |
|         # Archived data doesn't get deleted by restoring.
 | |
|         self.assertEqual(ArchivedAttachment.objects.count(), 3)
 | |
|         self.assertEqual(
 | |
|             list(
 | |
|                 Attachment.objects.distinct("messages__id")
 | |
|                 .order_by("messages__id")
 | |
|                 .values_list("messages__id", flat=True)
 | |
|             ),
 | |
|             sorted(msgs_ids.values()),
 | |
|         )
 | |
| 
 | |
|     def test_restoring_and_rearchiving(self) -> None:
 | |
|         expired_msg_ids = self._make_mit_messages(
 | |
|             7,
 | |
|             timezone_now() - timedelta(days=MIT_REALM_DAYS + 1),
 | |
|         )
 | |
|         expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
 | |
| 
 | |
|         archive_messages(chunk_size=4)
 | |
|         self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         transactions = ArchiveTransaction.objects.all()
 | |
|         self.assert_length(transactions, 2)  # With chunk_size 4, there should be 2 transactions
 | |
| 
 | |
|         now = timezone_now()
 | |
|         with time_machine.travel(now, tick=False):
 | |
|             restore_all_data_from_archive()
 | |
|         transactions[0].refresh_from_db()
 | |
|         transactions[1].refresh_from_db()
 | |
|         self.assertTrue(transactions[0].restored)
 | |
|         self.assertTrue(transactions[1].restored)
 | |
|         self.assertEqual(transactions[0].restored_timestamp, now)
 | |
|         self.assertEqual(transactions[1].restored_timestamp, now)
 | |
| 
 | |
|         archive_messages(chunk_size=10)
 | |
|         self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
 | |
| 
 | |
|         transactions = ArchiveTransaction.objects.order_by("id")
 | |
|         self.assert_length(transactions, 3)
 | |
| 
 | |
|         archived_messages = ArchivedMessage.objects.filter(id__in=expired_msg_ids)
 | |
|         # Check that the re-archived messages are correctly assigned to the new transaction:
 | |
|         for message in archived_messages:
 | |
|             self.assertEqual(message.archive_transaction_id, transactions[2].id)
 | |
| 
 | |
| 
 | |
| class TestArchivingSubMessages(ArchiveMessagesTestingBase):
 | |
|     def test_archiving_submessages(self) -> None:
 | |
|         expired_msg_ids = self._make_expired_zulip_messages(2)
 | |
|         cordelia = self.example_user("cordelia")
 | |
|         hamlet = self.example_user("hamlet")
 | |
| 
 | |
|         do_add_submessage(
 | |
|             realm=self.zulip_realm,
 | |
|             sender_id=cordelia.id,
 | |
|             message_id=expired_msg_ids[0],
 | |
|             msg_type="whatever",
 | |
|             content='{"name": "alice", "salary": 20}',
 | |
|         )
 | |
|         do_add_submessage(
 | |
|             realm=self.zulip_realm,
 | |
|             sender_id=hamlet.id,
 | |
|             message_id=expired_msg_ids[0],
 | |
|             msg_type="whatever",
 | |
|             content='{"name": "john", "salary": 30}',
 | |
|         )
 | |
| 
 | |
|         do_add_submessage(
 | |
|             realm=self.zulip_realm,
 | |
|             sender_id=cordelia.id,
 | |
|             message_id=expired_msg_ids[1],
 | |
|             msg_type="whatever",
 | |
|             content='{"name": "jack", "salary": 10}',
 | |
|         )
 | |
| 
 | |
|         submessage_ids = list(
 | |
|             SubMessage.objects.filter(message_id__in=expired_msg_ids).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self.assert_length(submessage_ids, 3)
 | |
|         self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 3)
 | |
|         archive_messages()
 | |
|         self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(
 | |
|                 ArchivedSubMessage.objects.filter(id__in=submessage_ids).values_list(
 | |
|                     "id", flat=True
 | |
|                 )
 | |
|             ),
 | |
|             set(submessage_ids),
 | |
|         )
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self.assertEqual(
 | |
|             set(SubMessage.objects.filter(id__in=submessage_ids).values_list("id", flat=True)),
 | |
|             set(submessage_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class TestArchivingReactions(ArchiveMessagesTestingBase):
 | |
|     def test_archiving_reactions(self) -> None:
 | |
|         expired_msg_ids = self._make_expired_zulip_messages(2)
 | |
| 
 | |
|         hamlet = self.example_user("hamlet")
 | |
|         cordelia = self.example_user("cordelia")
 | |
| 
 | |
|         for sender in [hamlet, cordelia]:
 | |
|             self.api_post(
 | |
|                 sender,
 | |
|                 f"/api/v1/messages/{expired_msg_ids[0]}/reactions",
 | |
|                 zulip_reaction_info(),
 | |
|             )
 | |
| 
 | |
|         self.api_post(
 | |
|             hamlet,
 | |
|             f"/api/v1/messages/{expired_msg_ids[1]}/reactions",
 | |
|             zulip_reaction_info(),
 | |
|         )
 | |
| 
 | |
|         reaction_ids = list(
 | |
|             Reaction.objects.filter(message_id__in=expired_msg_ids).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self.assert_length(reaction_ids, 3)
 | |
|         self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 3)
 | |
|         archive_messages()
 | |
|         self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(ArchivedReaction.objects.filter(id__in=reaction_ids).values_list("id", flat=True)),
 | |
|             set(reaction_ids),
 | |
|         )
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self.assertEqual(
 | |
|             set(Reaction.objects.filter(id__in=reaction_ids).values_list("id", flat=True)),
 | |
|             set(reaction_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class MoveMessageToArchiveBase(RetentionTestingBase):
 | |
|     @override
 | |
|     def setUp(self) -> None:
 | |
|         super().setUp()
 | |
|         self.sender = self.example_user("hamlet")
 | |
|         self.recipient = self.example_user("cordelia")
 | |
| 
 | |
|     def _create_attachments(self) -> None:
 | |
|         realm_id = get_realm("zulip").id
 | |
|         dummy_files = [
 | |
|             ("zulip.txt", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt"),
 | |
|             ("temp_file.py", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py"),
 | |
|             ("abc.py", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"),
 | |
|             ("hello.txt", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt"),
 | |
|             ("new.py", f"{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py"),
 | |
|         ]
 | |
|         user_profile = self.example_user("hamlet")
 | |
|         for file_name, path_id in dummy_files:
 | |
|             create_attachment(
 | |
|                 file_name, path_id, "text/plain", b"1234567890", user_profile, user_profile.realm
 | |
|             )
 | |
| 
 | |
|     def _assert_archive_empty(self) -> None:
 | |
|         self.assertFalse(ArchivedUserMessage.objects.exists())
 | |
|         self.assertFalse(ArchivedMessage.objects.exists())
 | |
|         self.assertFalse(ArchivedAttachment.objects.exists())
 | |
| 
 | |
| 
 | |
| class MoveMessageToArchiveGeneral(MoveMessageToArchiveBase):
 | |
|     def test_personal_messages_archiving(self) -> None:
 | |
|         msg_ids = [self.send_personal_message(self.sender, self.recipient) for i in range(3)]
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|     def test_move_messages_to_archive_with_realm_argument(self) -> None:
 | |
|         realm = get_realm("zulip")
 | |
|         msg_ids = [self.send_personal_message(self.sender, self.recipient) for i in range(3)]
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids, realm=realm)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         archive_transaction = ArchiveTransaction.objects.last()
 | |
|         assert archive_transaction is not None
 | |
|         self.assertEqual(archive_transaction.realm, realm)
 | |
| 
 | |
|     def test_stream_messages_archiving(self) -> None:
 | |
|         msg_ids = [self.send_stream_message(self.sender, "Verona") for i in range(3)]
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|     def test_archiving_messages_second_time(self) -> None:
 | |
|         msg_ids = [self.send_stream_message(self.sender, "Verona") for i in range(3)]
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         with self.assertRaises(Message.DoesNotExist):
 | |
|             move_messages_to_archive(message_ids=msg_ids)
 | |
| 
 | |
|     def test_archiving_messages_multiple_realms(self) -> None:
 | |
|         """
 | |
|         Verifies that move_messages_to_archive works correctly
 | |
|         if called on messages in multiple realms.
 | |
|         """
 | |
|         iago = self.example_user("iago")
 | |
|         othello = self.example_user("othello")
 | |
| 
 | |
|         cordelia = self.lear_user("cordelia")
 | |
|         king = self.lear_user("king")
 | |
| 
 | |
|         zulip_msg_ids = [self.send_personal_message(iago, othello) for i in range(3)]
 | |
|         lear_msg_ids = [self.send_personal_message(cordelia, king) for i in range(3)]
 | |
|         msg_ids = zulip_msg_ids + lear_msg_ids
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|     def test_archiving_messages_with_attachment(self) -> None:
 | |
|         self._create_attachments()
 | |
|         realm_id = get_realm("zulip").id
 | |
|         host = get_realm("zulip").host
 | |
|         body1 = f"""Some files here ...[zulip.txt](
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
 | |
|             Some more.... http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py
 | |
|         """
 | |
|         body2 = f"""Some files here
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt ...
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
 | |
|         """
 | |
| 
 | |
|         msg_ids = [
 | |
|             self.send_personal_message(self.sender, self.recipient, body1),
 | |
|             self.send_personal_message(self.sender, self.recipient, body2),
 | |
|         ]
 | |
| 
 | |
|         attachment_id_to_message_ids: dict[int, list[int]] = {}
 | |
|         attachment_ids = list(
 | |
|             Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True),
 | |
|         )
 | |
|         for attachment_id in attachment_ids:
 | |
|             attachment_id_to_message_ids[attachment_id] = list(
 | |
|                 Message.objects.filter(realm_id=realm_id, attachment__id=attachment_id).values_list(
 | |
|                     "id", flat=True
 | |
|                 ),
 | |
|             )
 | |
| 
 | |
|         usermsg_ids = self._get_usermessage_ids(msg_ids)
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         move_messages_to_archive(message_ids=msg_ids)
 | |
|         self._verify_archive_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         self.assertFalse(Attachment.objects.exists())
 | |
|         archived_attachment_ids = list(
 | |
|             ArchivedAttachment.objects.filter(messages__id__in=msg_ids).values_list(
 | |
|                 "id", flat=True
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(set(attachment_ids), set(archived_attachment_ids))
 | |
|         for attachment_id in archived_attachment_ids:
 | |
|             self.assertEqual(
 | |
|                 set(attachment_id_to_message_ids[attachment_id]),
 | |
|                 set(
 | |
|                     ArchivedMessage.objects.filter(attachment__id=attachment_id).values_list(
 | |
|                         "id", flat=True
 | |
|                     )
 | |
|                 ),
 | |
|             )
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self._verify_restored_data(msg_ids, usermsg_ids)
 | |
| 
 | |
|         restored_attachment_ids = list(
 | |
|             Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(set(attachment_ids), set(restored_attachment_ids))
 | |
|         for attachment_id in restored_attachment_ids:
 | |
|             self.assertEqual(
 | |
|                 set(attachment_id_to_message_ids[attachment_id]),
 | |
|                 set(
 | |
|                     Message.objects.filter(
 | |
|                         realm_id=realm_id, attachment__id=attachment_id
 | |
|                     ).values_list("id", flat=True)
 | |
|                 ),
 | |
|             )
 | |
| 
 | |
|     def test_archiving_message_with_shared_attachment(self) -> None:
 | |
|         # Make sure that attachments still in use in other messages don't get deleted:
 | |
|         self._create_attachments()
 | |
|         realm_id = get_realm("zulip").id
 | |
|         host = get_realm("zulip").host
 | |
|         body = f"""Some files here ...[zulip.txt](
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
 | |
|             Some more.... http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py ...
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
 | |
|         """
 | |
| 
 | |
|         msg_id = self.send_personal_message(self.sender, self.recipient, body)
 | |
|         # Simulate a reply with the same contents.
 | |
|         reply_msg_id = self.send_personal_message(
 | |
|             from_user=self.recipient,
 | |
|             to_user=self.sender,
 | |
|             content=body,
 | |
|         )
 | |
| 
 | |
|         usermsg_ids = self._get_usermessage_ids([msg_id])
 | |
|         attachment_ids = list(
 | |
|             Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         # Archive one of the messages:
 | |
|         move_messages_to_archive(message_ids=[msg_id])
 | |
|         self._verify_archive_data([msg_id], usermsg_ids)
 | |
|         # Attachments shouldn't have been deleted, as the second message links to them:
 | |
|         self.assertEqual(Attachment.objects.count(), 5)
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(
 | |
|                 ArchivedAttachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)
 | |
|             ),
 | |
|             set(attachment_ids),
 | |
|         )
 | |
| 
 | |
|         # Restore the first message:
 | |
|         restore_all_data_from_archive()
 | |
|         # Archive the second:
 | |
|         move_messages_to_archive(message_ids=[reply_msg_id])
 | |
|         # The restored messages links to the Attachments, so they shouldn't be deleted:
 | |
|         self.assertEqual(Attachment.objects.count(), 5)
 | |
| 
 | |
|         # Archive the first message again:
 | |
|         move_messages_to_archive(message_ids=[msg_id])
 | |
|         # Now the attachment should have been deleted:
 | |
|         self.assertEqual(Attachment.objects.count(), 0)
 | |
| 
 | |
|         # Restore everything:
 | |
|         restore_all_data_from_archive()
 | |
|         self.assertEqual(
 | |
|             set(Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)),
 | |
|             set(attachment_ids),
 | |
|         )
 | |
| 
 | |
|     def test_archiving_message_with_scheduled_message(self) -> None:
 | |
|         # Make sure that attachments referenced by scheduledmessages do't get deleted
 | |
|         self._create_attachments()
 | |
|         realm_id = get_realm("zulip").id
 | |
|         host = get_realm("zulip").host
 | |
|         body = f"""Some files here ...[zulip.txt](
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
 | |
|             Some more.... http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py ...
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
 | |
|             http://{host}/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
 | |
|         """
 | |
| 
 | |
|         msg_id = self.send_personal_message(self.sender, self.recipient, body)
 | |
| 
 | |
|         # Schedule a message with the same contents
 | |
|         scheduled_msg_id = check_schedule_message(
 | |
|             sender=self.sender,
 | |
|             client=get_client("website"),
 | |
|             recipient_type_name="private",
 | |
|             message_to=[self.recipient.id],
 | |
|             topic_name=None,
 | |
|             message_content=body,
 | |
|             deliver_at=timezone_now() + timedelta(hours=1),
 | |
|         )
 | |
| 
 | |
|         usermsg_ids = self._get_usermessage_ids([msg_id])
 | |
|         attachment_ids = list(
 | |
|             Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self._assert_archive_empty()
 | |
|         # Archive one of the messages:
 | |
|         move_messages_to_archive(message_ids=[msg_id])
 | |
|         self._verify_archive_data([msg_id], usermsg_ids)
 | |
|         # Attachments shouldn't have been deleted, as the scheduled message links to them:
 | |
|         self.assertEqual(Attachment.objects.count(), 5)
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(
 | |
|                 ArchivedAttachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)
 | |
|             ),
 | |
|             set(attachment_ids),
 | |
|         )
 | |
| 
 | |
|         # Delete the ScheduledMessage
 | |
|         delete_scheduled_message(self.sender, scheduled_msg_id)
 | |
| 
 | |
|         # The Attachment object exists, with no message or scheduledmessage attached
 | |
|         self.assertEqual(Attachment.objects.count(), 5)
 | |
|         self.assertEqual(
 | |
|             Attachment.objects.filter(messages=None, scheduled_messages=None).count(), 5
 | |
|         )
 | |
| 
 | |
|         # There is also the ArchivedAttachment for each of them
 | |
|         self.assertEqual(
 | |
|             set(
 | |
|                 ArchivedAttachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)
 | |
|             ),
 | |
|             set(attachment_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class MoveMessageToArchiveWithSubMessages(MoveMessageToArchiveBase):
 | |
|     def test_archiving_message_with_submessages(self) -> None:
 | |
|         msg_id = self.send_stream_message(self.sender, "Verona")
 | |
|         cordelia = self.example_user("cordelia")
 | |
|         hamlet = self.example_user("hamlet")
 | |
| 
 | |
|         do_add_submessage(
 | |
|             realm=get_realm("zulip"),
 | |
|             sender_id=cordelia.id,
 | |
|             message_id=msg_id,
 | |
|             msg_type="whatever",
 | |
|             content='{"name": "alice", "salary": 20}',
 | |
|         )
 | |
|         do_add_submessage(
 | |
|             realm=get_realm("zulip"),
 | |
|             sender_id=hamlet.id,
 | |
|             message_id=msg_id,
 | |
|             msg_type="whatever",
 | |
|             content='{"name": "john", "salary": 30}',
 | |
|         )
 | |
| 
 | |
|         submessage_ids = list(
 | |
|             SubMessage.objects.filter(message_id=msg_id).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 2)
 | |
|         move_messages_to_archive(message_ids=[msg_id])
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(ArchivedSubMessage.objects.filter(message_id=msg_id).values_list("id", flat=True)),
 | |
|             set(submessage_ids),
 | |
|         )
 | |
|         self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self.assertEqual(
 | |
|             set(SubMessage.objects.filter(id__in=submessage_ids).values_list("id", flat=True)),
 | |
|             set(submessage_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class MoveMessageToArchiveWithReactions(MoveMessageToArchiveBase):
 | |
|     def test_archiving_message_with_reactions(self) -> None:
 | |
|         msg_id = self.send_stream_message(self.sender, "Verona")
 | |
| 
 | |
|         for name in ["hamlet", "cordelia"]:
 | |
|             self.api_post(
 | |
|                 self.example_user(name),
 | |
|                 f"/api/v1/messages/{msg_id}/reactions",
 | |
|                 zulip_reaction_info(),
 | |
|             )
 | |
| 
 | |
|         reaction_ids = list(
 | |
|             Reaction.objects.filter(message_id=msg_id).values_list("id", flat=True),
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 2)
 | |
|         move_messages_to_archive(message_ids=[msg_id])
 | |
| 
 | |
|         self.assertEqual(
 | |
|             set(ArchivedReaction.objects.filter(message_id=msg_id).values_list("id", flat=True)),
 | |
|             set(reaction_ids),
 | |
|         )
 | |
|         self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
 | |
| 
 | |
|         restore_all_data_from_archive()
 | |
|         self.assertEqual(
 | |
|             set(Reaction.objects.filter(id__in=reaction_ids).values_list("id", flat=True)),
 | |
|             set(reaction_ids),
 | |
|         )
 | |
| 
 | |
| 
 | |
| class TestCleaningArchive(ArchiveMessagesTestingBase):
 | |
|     def test_clean_archived_data(self) -> None:
 | |
|         self._make_expired_zulip_messages(7)
 | |
|         archive_messages(chunk_size=2)  # Small chunk size to have multiple transactions
 | |
| 
 | |
|         transactions = list(ArchiveTransaction.objects.all().order_by("id"))
 | |
|         for transaction in transactions[0:-1]:
 | |
|             transaction.timestamp = timezone_now() - timedelta(
 | |
|                 days=settings.ARCHIVED_DATA_VACUUMING_DELAY_DAYS + 1
 | |
|             )
 | |
|             transaction.save()
 | |
| 
 | |
|         # This transaction would up for deletion, but we enable the flag preventing
 | |
|         # it from automatic deletion:
 | |
|         transactions[-2].protect_from_deletion = True
 | |
|         transactions[-2].save()
 | |
| 
 | |
|         message_ids_to_clean = list(
 | |
|             ArchivedMessage.objects.filter(archive_transaction__in=transactions[0:-2]).values_list(
 | |
|                 "id", flat=True
 | |
|             )
 | |
|         )
 | |
| 
 | |
|         clean_archived_data()
 | |
|         remaining_transactions = list(ArchiveTransaction.objects.order_by("-id"))
 | |
|         self.assert_length(remaining_transactions, 2)
 | |
|         # All transactions except the last two were deleted:
 | |
|         self.assertEqual(remaining_transactions[0].id, transactions[-1].id)
 | |
|         self.assertEqual(remaining_transactions[1].id, transactions[-2].id)
 | |
|         # And corresponding ArchivedMessages should have been deleted:
 | |
|         self.assertFalse(ArchivedMessage.objects.filter(id__in=message_ids_to_clean).exists())
 | |
|         self.assertFalse(
 | |
|             ArchivedUserMessage.objects.filter(message_id__in=message_ids_to_clean).exists()
 | |
|         )
 | |
| 
 | |
|         for message in ArchivedMessage.objects.all():
 | |
|             self.assertIn(
 | |
|                 message.archive_transaction_id,
 | |
|                 [remaining_transactions[0].id, remaining_transactions[1].id],
 | |
|             )
 | |
| 
 | |
| 
 | |
| class TestGetRealmAndStreamsForArchiving(ZulipTestCase):
 | |
|     def fix_ordering_of_result(self, result: list[tuple[Realm, list[Stream]]]) -> None:
 | |
|         """
 | |
|         This is a helper for giving the structure returned by get_realms_and_streams_for_archiving
 | |
|         a consistent ordering.
 | |
|         """
 | |
|         # Sort the list of tuples by realm id:
 | |
|         result.sort(key=lambda x: x[0].id)
 | |
| 
 | |
|         # Now we sort the lists of streams in each tuple:
 | |
|         for realm, streams_list in result:
 | |
|             streams_list.sort(key=lambda stream: stream.id)
 | |
| 
 | |
|     def simple_get_realms_and_streams_for_archiving(self) -> list[tuple[Realm, list[Stream]]]:
 | |
|         """
 | |
|         This is an implementation of the function we're testing, but using the obvious,
 | |
|         unoptimized algorithm. We can use this for additional verification of correctness,
 | |
|         by comparing the output of the two implementations.
 | |
|         """
 | |
| 
 | |
|         result = []
 | |
|         for realm in Realm.objects.all():
 | |
|             if realm.message_retention_days != -1:
 | |
|                 streams = Stream.objects.filter(realm=realm).exclude(message_retention_days=-1)
 | |
|                 result.append((realm, list(streams)))
 | |
|             else:
 | |
|                 streams = (
 | |
|                     Stream.objects.filter(realm=realm)
 | |
|                     .exclude(message_retention_days__isnull=True)
 | |
|                     .exclude(message_retention_days=-1)
 | |
|                 )
 | |
|                 if streams.exists():
 | |
|                     result.append((realm, list(streams)))
 | |
| 
 | |
|         return result
 | |
| 
 | |
|     def test_get_realms_and_streams_for_archiving(self) -> None:
 | |
|         zulip_realm = get_realm("zulip")
 | |
|         zulip_realm.message_retention_days = 10
 | |
|         zulip_realm.save()
 | |
| 
 | |
|         verona = get_stream("Verona", zulip_realm)
 | |
|         verona.message_retention_days = -1  # Block archiving for this stream
 | |
|         verona.save()
 | |
|         denmark = get_stream("Denmark", zulip_realm)
 | |
|         denmark.message_retention_days = 1
 | |
|         denmark.save()
 | |
| 
 | |
|         zephyr_realm = get_realm("zephyr")
 | |
|         zephyr_realm.message_retention_days = -1
 | |
|         zephyr_realm.save()
 | |
|         self.make_stream("normal stream", realm=zephyr_realm)
 | |
| 
 | |
|         archiving_blocked_zephyr_stream = self.make_stream("no archiving", realm=zephyr_realm)
 | |
|         archiving_blocked_zephyr_stream.message_retention_days = -1
 | |
|         archiving_blocked_zephyr_stream.save(update_fields=["message_retention_days"])
 | |
| 
 | |
|         archiving_enabled_zephyr_stream = self.make_stream("with archiving", realm=zephyr_realm)
 | |
|         archiving_enabled_zephyr_stream.message_retention_days = 1
 | |
|         archiving_enabled_zephyr_stream.save(update_fields=["message_retention_days"])
 | |
| 
 | |
|         no_archiving_realm = do_create_realm(string_id="no_archiving", name="no_archiving")
 | |
|         do_set_realm_property(no_archiving_realm, "invite_required", False, acting_user=None)
 | |
|         do_set_realm_property(no_archiving_realm, "message_retention_days", -1, acting_user=None)
 | |
| 
 | |
|         # Realm for testing the edge case where it has a default retention policy,
 | |
|         # but all streams disable it.
 | |
|         realm_all_streams_archiving_disabled = do_create_realm(
 | |
|             string_id="with_archiving", name="with_archiving"
 | |
|         )
 | |
|         do_set_realm_property(
 | |
|             realm_all_streams_archiving_disabled, "invite_required", False, acting_user=None
 | |
|         )
 | |
|         do_set_realm_property(
 | |
|             realm_all_streams_archiving_disabled, "message_retention_days", 1, acting_user=None
 | |
|         )
 | |
|         Stream.objects.filter(realm=realm_all_streams_archiving_disabled).update(
 | |
|             message_retention_days=-1
 | |
|         )
 | |
| 
 | |
|         # We construct a list representing how the result of get_realms_and_streams_for_archiving should be.
 | |
|         # One nuisance is that the ordering of the elements in the result structure is not deterministic,
 | |
|         # so we use a helper to order both structures in a consistent manner. This wouldn't be necessary
 | |
|         # if python had a true "unordered list" data structure. Set doesn't do the job, because it requires
 | |
|         # elements to be hashable.
 | |
|         expected_result: list[tuple[Realm, list[Stream]]] = [
 | |
|             (zulip_realm, list(Stream.objects.filter(realm=zulip_realm).exclude(id=verona.id))),
 | |
|             (zephyr_realm, [archiving_enabled_zephyr_stream]),
 | |
|             (realm_all_streams_archiving_disabled, []),
 | |
|         ]
 | |
|         self.fix_ordering_of_result(expected_result)
 | |
| 
 | |
|         simple_algorithm_result = self.simple_get_realms_and_streams_for_archiving()
 | |
|         self.fix_ordering_of_result(simple_algorithm_result)
 | |
| 
 | |
|         result = get_realms_and_streams_for_archiving()
 | |
|         self.fix_ordering_of_result(result)
 | |
| 
 | |
|         self.assert_length(result, len(expected_result))
 | |
|         self.assertEqual(result, expected_result)
 | |
| 
 | |
|         self.assert_length(result, len(simple_algorithm_result))
 | |
|         self.assertEqual(result, simple_algorithm_result)
 | |
| 
 | |
| 
 | |
| class TestRestoreStreamMessages(ArchiveMessagesTestingBase):
 | |
|     def test_restore_retention_policy_deletions_for_stream(self) -> None:
 | |
|         cordelia = self.example_user("cordelia")
 | |
|         hamlet = self.example_user("hamlet")
 | |
| 
 | |
|         realm = get_realm("zulip")
 | |
|         stream_name = "Verona"
 | |
|         stream = get_stream(stream_name, realm)
 | |
| 
 | |
|         message_ids_to_archive_manually = [
 | |
|             self.send_stream_message(cordelia, stream_name, str(i)) for i in range(2)
 | |
|         ]
 | |
|         usermessage_ids_to_archive_manually = self._get_usermessage_ids(
 | |
|             message_ids_to_archive_manually
 | |
|         )
 | |
|         message_ids_to_archive_by_policy = [
 | |
|             self.send_stream_message(hamlet, stream_name, str(i)) for i in range(2)
 | |
|         ]
 | |
|         usermessage_ids_to_archive_by_policy = self._get_usermessage_ids(
 | |
|             message_ids_to_archive_by_policy
 | |
|         )
 | |
| 
 | |
|         expected_archived_message_ids = (
 | |
|             message_ids_to_archive_manually + message_ids_to_archive_by_policy
 | |
|         )
 | |
|         expected_archived_usermessage_ids = (
 | |
|             usermessage_ids_to_archive_manually + usermessage_ids_to_archive_by_policy
 | |
|         )
 | |
| 
 | |
|         self._set_stream_message_retention_value(stream, 5)
 | |
|         self._change_messages_date_sent(
 | |
|             message_ids_to_archive_by_policy, timezone_now() - timedelta(days=6)
 | |
|         )
 | |
| 
 | |
|         move_messages_to_archive(message_ids_to_archive_manually)
 | |
|         archive_messages()
 | |
| 
 | |
|         self._verify_archive_data(expected_archived_message_ids, expected_archived_usermessage_ids)
 | |
| 
 | |
|         restore_retention_policy_deletions_for_stream(stream)
 | |
| 
 | |
|         # Verify that we restore the stream messages that were archived due to retention policy,
 | |
|         # but not the ones manually deleted.
 | |
|         self.assert_length(
 | |
|             Message.objects.filter(id__in=message_ids_to_archive_by_policy),
 | |
|             len(message_ids_to_archive_by_policy),
 | |
|         )
 | |
|         self.assertFalse(Message.objects.filter(id__in=message_ids_to_archive_manually))
 | |
| 
 | |
| 
 | |
| class TestDoDeleteMessages(ZulipTestCase):
 | |
|     def test_do_delete_messages_multiple(self) -> None:
 | |
|         realm = get_realm("zulip")
 | |
|         cordelia = self.example_user("cordelia")
 | |
|         message_ids = [self.send_stream_message(cordelia, "Verona", str(i)) for i in range(10)]
 | |
|         messages = Message.objects.filter(id__in=message_ids)
 | |
| 
 | |
|         with self.assert_database_query_count(23):
 | |
|             do_delete_messages(realm, messages, acting_user=None)
 | |
|         self.assertFalse(Message.objects.filter(id__in=message_ids).exists())
 | |
| 
 | |
|         archived_messages = ArchivedMessage.objects.filter(id__in=message_ids)
 | |
|         self.assertEqual(archived_messages.count(), len(message_ids))
 | |
|         self.assert_length({message.archive_transaction_id for message in archived_messages}, 1)
 |