From a352d356604d7c79a1f751f881f5b1912da9bba1 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Tue, 28 Jan 2025 14:14:48 +0800 Subject: [PATCH] retention: Add flag to ArchiveTransaction to prevent automatic deletion. This adds an index non-concurrently, but the table should be small enough for this to be fine. --- zerver/lib/retention.py | 12 +++++++++-- ...rchivetransaction_protect_from_deletion.py | 17 +++++++++++++++ zerver/models/messages.py | 4 ++++ zerver/tests/test_retention.py | 21 +++++++++++++------ 4 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 zerver/migrations/0661_archivetransaction_protect_from_deletion.py diff --git a/zerver/lib/retention.py b/zerver/lib/retention.py index eb7c19535d..b090343afe 100644 --- a/zerver/lib/retention.py +++ b/zerver/lib/retention.py @@ -680,12 +680,20 @@ def clean_archived_data() -> None: # Associated archived objects will get deleted through the on_delete=CASCADE property: count = 0 transaction_ids = list( - ArchiveTransaction.objects.filter(timestamp__lt=check_date).values_list("id", flat=True) + ArchiveTransaction.objects.filter( + timestamp__lt=check_date, protect_from_deletion=False + ).values_list("id", flat=True) ) while len(transaction_ids) > 0: transaction_block = transaction_ids[0:TRANSACTION_DELETION_BATCH_SIZE] transaction_ids = transaction_ids[TRANSACTION_DELETION_BATCH_SIZE:] - ArchiveTransaction.objects.filter(id__in=transaction_block).delete() + + ArchiveTransaction.objects.filter( + # The protect_from_deletion=False condition is redundant at this point, but can act + # as an extra safeguard against future bugs. + id__in=transaction_block, + protect_from_deletion=False, + ).delete() count += len(transaction_block) logger.info("Deleted %s old ArchiveTransactions.", count) diff --git a/zerver/migrations/0661_archivetransaction_protect_from_deletion.py b/zerver/migrations/0661_archivetransaction_protect_from_deletion.py new file mode 100644 index 0000000000..38dfee89fa --- /dev/null +++ b/zerver/migrations/0661_archivetransaction_protect_from_deletion.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.10 on 2025-01-28 06:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("zerver", "0660_add_imageattachment_content_type"), + ] + + operations = [ + migrations.AddField( + model_name="archivetransaction", + name="protect_from_deletion", + field=models.BooleanField(db_index=True, default=False), + ), + ] diff --git a/zerver/models/messages.py b/zerver/models/messages.py index 70bc971c97..fc8f71ad1e 100644 --- a/zerver/models/messages.py +++ b/zerver/models/messages.py @@ -112,6 +112,10 @@ class ArchiveTransaction(models.Model): restored = models.BooleanField(default=False, db_index=True) restored_timestamp = models.DateTimeField(null=True, db_index=True) + # ArchiveTransaction objects are regularly deleted. This flag allows tagging + # an ArchiveTransaction as protected from such automated deletion. + protect_from_deletion = models.BooleanField(default=False, db_index=True) + type = models.PositiveSmallIntegerField(db_index=True) # Valid types: RETENTION_POLICY_BASED = 1 # Archiving was executed due to automated retention policies diff --git a/zerver/tests/test_retention.py b/zerver/tests/test_retention.py index f22e318128..277d773003 100644 --- a/zerver/tests/test_retention.py +++ b/zerver/tests/test_retention.py @@ -952,24 +952,30 @@ class TestCleaningArchive(ArchiveMessagesTestingBase): self._make_expired_zulip_messages(7) archive_messages(chunk_size=2) # Small chunk size to have multiple transactions - transactions = list(ArchiveTransaction.objects.all()) + transactions = list(ArchiveTransaction.objects.all().order_by("id")) for transaction in transactions[0:-1]: transaction.timestamp = timezone_now() - timedelta( days=settings.ARCHIVED_DATA_VACUUMING_DELAY_DAYS + 1 ) transaction.save() + # This transaction would up for deletion, but we enable the flag preventing + # it from automatic deletion: + transactions[-2].protect_from_deletion = True + transactions[-2].save() + message_ids_to_clean = list( - ArchivedMessage.objects.filter(archive_transaction__in=transactions[0:-1]).values_list( + ArchivedMessage.objects.filter(archive_transaction__in=transactions[0:-2]).values_list( "id", flat=True ) ) clean_archived_data() - remaining_transactions = list(ArchiveTransaction.objects.all()) - self.assert_length(remaining_transactions, 1) - # All transactions except the last one were deleted: + remaining_transactions = list(ArchiveTransaction.objects.order_by("-id")) + self.assert_length(remaining_transactions, 2) + # All transactions except the last two were deleted: self.assertEqual(remaining_transactions[0].id, transactions[-1].id) + self.assertEqual(remaining_transactions[1].id, transactions[-2].id) # And corresponding ArchivedMessages should have been deleted: self.assertFalse(ArchivedMessage.objects.filter(id__in=message_ids_to_clean).exists()) self.assertFalse( @@ -977,7 +983,10 @@ class TestCleaningArchive(ArchiveMessagesTestingBase): ) for message in ArchivedMessage.objects.all(): - self.assertEqual(message.archive_transaction_id, remaining_transactions[0].id) + self.assertIn( + message.archive_transaction_id, + [remaining_transactions[0].id, remaining_transactions[1].id], + ) class TestGetRealmAndStreamsForArchiving(ZulipTestCase):