diff --git a/zerver/actions/realm_settings.py b/zerver/actions/realm_settings.py index d3016853a2..3f2ff9fd53 100644 --- a/zerver/actions/realm_settings.py +++ b/zerver/actions/realm_settings.py @@ -16,7 +16,7 @@ from zerver.actions.user_settings import do_delete_avatar_image from zerver.lib.message import parse_message_time_limit_setting, update_first_visible_message_id from zerver.lib.send_email import FromAddress, send_email_to_admins from zerver.lib.sessions import delete_user_sessions -from zerver.lib.upload import delete_message_attachment +from zerver.lib.upload import delete_message_attachments from zerver.lib.user_counts import realm_user_count_by_role from zerver.models import ( ArchivedAttachment, @@ -334,14 +334,22 @@ def do_add_deactivated_redirect(realm: Realm, redirect_url: str) -> None: realm.save(update_fields=["deactivated_redirect"]) -def do_delete_all_realm_attachments(realm: Realm) -> None: +def do_delete_all_realm_attachments(realm: Realm, *, batch_size: int = 1000) -> None: # Delete attachment files from the storage backend, so that we # don't leave them dangling. for obj_class in Attachment, ArchivedAttachment: - for path_id in obj_class.objects.filter(realm_id=realm.id).values_list( - "path_id", flat=True - ): - delete_message_attachment(path_id) + last_id = 0 + while True: + to_delete = ( + obj_class.objects.filter(realm_id=realm.id, id__gt=last_id) # type: ignore[misc] # Does not recognize shared 'id' PK column + .order_by("id") + .values_list("id", "path_id")[:batch_size] + ) + if len(to_delete) > 0: + delete_message_attachments([row[1] for row in to_delete]) + last_id = to_delete[len(to_delete) - 1][0] + if len(to_delete) < batch_size: + break obj_class.objects.filter(realm=realm).delete() diff --git a/zerver/lib/upload/__init__.py b/zerver/lib/upload/__init__.py index f37e99359b..65ddae14d6 100644 --- a/zerver/lib/upload/__init__.py +++ b/zerver/lib/upload/__init__.py @@ -2,7 +2,7 @@ import io import logging import urllib from mimetypes import guess_type -from typing import IO, Any, Callable, Optional, Tuple +from typing import IO, Any, Callable, List, Optional, Tuple from urllib.parse import urljoin from django.conf import settings @@ -110,6 +110,10 @@ def delete_message_attachment(path_id: str) -> bool: return upload_backend.delete_message_attachment(path_id) +def delete_message_attachments(path_ids: List[str]) -> None: + return upload_backend.delete_message_attachments(path_ids) + + # Avatar image uploads diff --git a/zerver/lib/upload/base.py b/zerver/lib/upload/base.py index dba840f983..244bc87549 100644 --- a/zerver/lib/upload/base.py +++ b/zerver/lib/upload/base.py @@ -2,7 +2,7 @@ import io import os import re import unicodedata -from typing import IO, Any, Callable, Optional, Tuple +from typing import IO, Any, Callable, List, Optional, Tuple from django.utils.translation import gettext as _ from markupsafe import Markup @@ -206,6 +206,10 @@ class ZulipUploadBackend: def delete_message_attachment(self, path_id: str) -> bool: raise NotImplementedError + def delete_message_attachments(self, path_ids: List[str]) -> None: + for path_id in path_ids: + self.delete_message_attachment(path_id) + # Avatar image uploads def get_avatar_url(self, hash_key: str, medium: bool = False) -> str: raise NotImplementedError diff --git a/zerver/lib/upload/s3.py b/zerver/lib/upload/s3.py index 3643ecdfbb..cccbf87c25 100644 --- a/zerver/lib/upload/s3.py +++ b/zerver/lib/upload/s3.py @@ -3,7 +3,7 @@ import os import secrets import urllib from mimetypes import guess_type -from typing import IO, Any, Callable, Optional +from typing import IO, Any, Callable, List, Optional import boto3 import botocore @@ -233,6 +233,11 @@ class S3UploadBackend(ZulipUploadBackend): def delete_message_attachment(self, path_id: str) -> bool: return self.delete_file_from_s3(path_id, self.uploads_bucket) + def delete_message_attachments(self, path_ids: List[str]) -> None: + self.uploads_bucket.delete_objects( + Delete={"Objects": [{"Key": path_id} for path_id in path_ids]} + ) + def write_avatar_images( self, s3_file_name: str, diff --git a/zerver/tests/test_realm.py b/zerver/tests/test_realm.py index 27a4818ae0..00acb4a493 100644 --- a/zerver/tests/test_realm.py +++ b/zerver/tests/test_realm.py @@ -16,6 +16,7 @@ from zerver.actions.realm_settings import ( do_change_realm_org_type, do_change_realm_plan_type, do_deactivate_realm, + do_delete_all_realm_attachments, do_reactivate_realm, do_scrub_realm, do_send_realm_reactivation_email, @@ -27,7 +28,7 @@ from zerver.lib.realm_description import get_realm_rendered_description, get_rea from zerver.lib.send_email import send_future_email from zerver.lib.streams import create_stream_if_needed from zerver.lib.test_classes import ZulipTestCase -from zerver.lib.upload import upload_message_attachment +from zerver.lib.upload import delete_message_attachments, upload_message_attachment from zerver.models import ( Attachment, CustomProfileField, @@ -1417,6 +1418,42 @@ class RealmAPITest(ZulipTestCase): class ScrubRealmTest(ZulipTestCase): + def test_do_delete_all_realm_attachments(self) -> None: + realm = get_realm("zulip") + hamlet = self.example_user("hamlet") + Attachment.objects.filter(realm=realm).delete() + assert settings.LOCAL_UPLOADS_DIR is not None + assert settings.LOCAL_FILES_DIR is not None + + path_ids = [] + for n in range(1, 4): + content = f"content{n}".encode() + url = upload_message_attachment( + f"dummy{n}.txt", len(content), "text/plain", content, hamlet + ) + base = "/user_uploads/" + self.assertEqual(base, url[: len(base)]) + path_id = re.sub("/user_uploads/", "", url) + self.assertTrue(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id))) + path_ids.append(path_id) + + with mock.patch( + "zerver.actions.realm_settings.delete_message_attachments", + side_effect=delete_message_attachments, + ) as p: + do_delete_all_realm_attachments(realm, batch_size=2) + + self.assertEqual(p.call_count, 2) + p.assert_has_calls( + [ + mock.call([path_ids[0], path_ids[1]]), + mock.call([path_ids[2]]), + ] + ) + self.assertEqual(Attachment.objects.filter(realm=realm).count(), 0) + for file_path in path_ids: + self.assertFalse(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id))) + def test_scrub_realm(self) -> None: zulip = get_realm("zulip") lear = get_realm("lear") diff --git a/zerver/tests/test_upload_local.py b/zerver/tests/test_upload_local.py index 4947c2fddd..f79eebe228 100644 --- a/zerver/tests/test_upload_local.py +++ b/zerver/tests/test_upload_local.py @@ -19,6 +19,7 @@ from zerver.lib.test_helpers import ( from zerver.lib.upload import ( delete_export_tarball, delete_message_attachment, + delete_message_attachments, upload_emoji_image, upload_export_tarball, upload_message_attachment, @@ -86,7 +87,35 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase): response_dict = self.assert_json_success(result) path_id = re.sub("/user_uploads/", "", response_dict["uri"]) + + assert settings.LOCAL_FILES_DIR is not None + file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id) + self.assertTrue(os.path.isfile(file_path)) + self.assertTrue(delete_message_attachment(path_id)) + self.assertFalse(os.path.isfile(file_path)) + + def test_delete_message_attachments(self) -> None: + assert settings.LOCAL_UPLOADS_DIR is not None + assert settings.LOCAL_FILES_DIR is not None + + user_profile = self.example_user("hamlet") + path_ids = [] + for n in range(1, 1005): + uri = upload_message_attachment( + "dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile + ) + base = "/user_uploads/" + self.assertEqual(base, uri[: len(base)]) + path_id = re.sub("/user_uploads/", "", uri) + path_ids.append(path_id) + file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id) + self.assertTrue(os.path.isfile(file_path)) + + delete_message_attachments(path_ids) + for path_id in path_ids: + file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id) + self.assertFalse(os.path.isfile(file_path)) def test_avatar_url(self) -> None: self.login("hamlet") diff --git a/zerver/tests/test_upload_s3.py b/zerver/tests/test_upload_s3.py index 07c0fcd3ee..ea63a98203 100644 --- a/zerver/tests/test_upload_s3.py +++ b/zerver/tests/test_upload_s3.py @@ -3,6 +3,7 @@ import os import re import urllib from io import StringIO +from unittest.mock import patch import botocore.exceptions from django.conf import settings @@ -22,6 +23,7 @@ from zerver.lib.test_helpers import ( from zerver.lib.upload import ( delete_export_tarball, delete_message_attachment, + delete_message_attachments, upload_export_tarball, upload_message_attachment, ) @@ -31,6 +33,7 @@ from zerver.lib.upload.base import ( MEDIUM_AVATAR_SIZE, resize_avatar, ) +from zerver.lib.upload.s3 import S3UploadBackend from zerver.models import ( Attachment, RealmEmoji, @@ -96,7 +99,7 @@ class S3Test(ZulipTestCase): @use_s3_backend def test_delete_message_attachment(self) -> None: - create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET) + bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0] user_profile = self.example_user("hamlet") uri = upload_message_attachment( @@ -104,10 +107,37 @@ class S3Test(ZulipTestCase): ) path_id = re.sub("/user_uploads/", "", uri) + self.assertIsNotNone(bucket.Object(path_id).get()) self.assertTrue(delete_message_attachment(path_id)) + with self.assertRaises(botocore.exceptions.ClientError): + bucket.Object(path_id).load() + + @use_s3_backend + def test_delete_message_attachments(self) -> None: + bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0] + + user_profile = self.example_user("hamlet") + path_ids = [] + for n in range(1, 5): + uri = upload_message_attachment( + "dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile + ) + path_id = re.sub("/user_uploads/", "", uri) + self.assertIsNotNone(bucket.Object(path_id).get()) + path_ids.append(path_id) + + with patch.object(S3UploadBackend, "delete_message_attachment") as single_delete: + delete_message_attachments(path_ids) + single_delete.assert_not_called() + for path_id in path_ids: + with self.assertRaises(botocore.exceptions.ClientError): + bucket.Object(path_id).load() @use_s3_backend def test_delete_message_attachment_when_file_doesnt_exist(self) -> None: + bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0] + with self.assertRaises(botocore.exceptions.ClientError): + bucket.Object("non-existent-file").load() with self.assertLogs(level="WARNING") as warn_log: self.assertEqual(False, delete_message_attachment("non-existent-file")) self.assertEqual(