retention: Add flag to ArchiveTransaction to prevent automatic deletion.

This adds an index non-concurrently, but the table should be small
enough for this to be fine.
This commit is contained in:
Mateusz Mandera
2025-01-28 14:14:48 +08:00
committed by Tim Abbott
parent bec0326776
commit a352d35660
4 changed files with 46 additions and 8 deletions

View File

@@ -680,12 +680,20 @@ def clean_archived_data() -> None:
# Associated archived objects will get deleted through the on_delete=CASCADE property:
count = 0
transaction_ids = list(
ArchiveTransaction.objects.filter(timestamp__lt=check_date).values_list("id", flat=True)
ArchiveTransaction.objects.filter(
timestamp__lt=check_date, protect_from_deletion=False
).values_list("id", flat=True)
)
while len(transaction_ids) > 0:
transaction_block = transaction_ids[0:TRANSACTION_DELETION_BATCH_SIZE]
transaction_ids = transaction_ids[TRANSACTION_DELETION_BATCH_SIZE:]
ArchiveTransaction.objects.filter(id__in=transaction_block).delete()
ArchiveTransaction.objects.filter(
# The protect_from_deletion=False condition is redundant at this point, but can act
# as an extra safeguard against future bugs.
id__in=transaction_block,
protect_from_deletion=False,
).delete()
count += len(transaction_block)
logger.info("Deleted %s old ArchiveTransactions.", count)

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.0.10 on 2025-01-28 06:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("zerver", "0660_add_imageattachment_content_type"),
]
operations = [
migrations.AddField(
model_name="archivetransaction",
name="protect_from_deletion",
field=models.BooleanField(db_index=True, default=False),
),
]

View File

@@ -112,6 +112,10 @@ class ArchiveTransaction(models.Model):
restored = models.BooleanField(default=False, db_index=True)
restored_timestamp = models.DateTimeField(null=True, db_index=True)
# ArchiveTransaction objects are regularly deleted. This flag allows tagging
# an ArchiveTransaction as protected from such automated deletion.
protect_from_deletion = models.BooleanField(default=False, db_index=True)
type = models.PositiveSmallIntegerField(db_index=True)
# Valid types:
RETENTION_POLICY_BASED = 1 # Archiving was executed due to automated retention policies

View File

@@ -952,24 +952,30 @@ class TestCleaningArchive(ArchiveMessagesTestingBase):
self._make_expired_zulip_messages(7)
archive_messages(chunk_size=2) # Small chunk size to have multiple transactions
transactions = list(ArchiveTransaction.objects.all())
transactions = list(ArchiveTransaction.objects.all().order_by("id"))
for transaction in transactions[0:-1]:
transaction.timestamp = timezone_now() - timedelta(
days=settings.ARCHIVED_DATA_VACUUMING_DELAY_DAYS + 1
)
transaction.save()
# This transaction would up for deletion, but we enable the flag preventing
# it from automatic deletion:
transactions[-2].protect_from_deletion = True
transactions[-2].save()
message_ids_to_clean = list(
ArchivedMessage.objects.filter(archive_transaction__in=transactions[0:-1]).values_list(
ArchivedMessage.objects.filter(archive_transaction__in=transactions[0:-2]).values_list(
"id", flat=True
)
)
clean_archived_data()
remaining_transactions = list(ArchiveTransaction.objects.all())
self.assert_length(remaining_transactions, 1)
# All transactions except the last one were deleted:
remaining_transactions = list(ArchiveTransaction.objects.order_by("-id"))
self.assert_length(remaining_transactions, 2)
# All transactions except the last two were deleted:
self.assertEqual(remaining_transactions[0].id, transactions[-1].id)
self.assertEqual(remaining_transactions[1].id, transactions[-2].id)
# And corresponding ArchivedMessages should have been deleted:
self.assertFalse(ArchivedMessage.objects.filter(id__in=message_ids_to_clean).exists())
self.assertFalse(
@@ -977,7 +983,10 @@ class TestCleaningArchive(ArchiveMessagesTestingBase):
)
for message in ArchivedMessage.objects.all():
self.assertEqual(message.archive_transaction_id, remaining_transactions[0].id)
self.assertIn(
message.archive_transaction_id,
[remaining_transactions[0].id, remaining_transactions[1].id],
)
class TestGetRealmAndStreamsForArchiving(ZulipTestCase):