mirror of
https://github.com/zulip/zulip.git
synced 2025-11-10 08:56:10 +00:00
delete_realm: Optimize attachment cleanup by batching.
This commit is contained in:
committed by
Tim Abbott
parent
cdda4bc089
commit
c9d1755a12
@@ -16,7 +16,7 @@ from zerver.actions.user_settings import do_delete_avatar_image
|
||||
from zerver.lib.message import parse_message_time_limit_setting, update_first_visible_message_id
|
||||
from zerver.lib.send_email import FromAddress, send_email_to_admins
|
||||
from zerver.lib.sessions import delete_user_sessions
|
||||
from zerver.lib.upload import delete_message_attachment
|
||||
from zerver.lib.upload import delete_message_attachments
|
||||
from zerver.lib.user_counts import realm_user_count_by_role
|
||||
from zerver.models import (
|
||||
ArchivedAttachment,
|
||||
@@ -334,14 +334,22 @@ def do_add_deactivated_redirect(realm: Realm, redirect_url: str) -> None:
|
||||
realm.save(update_fields=["deactivated_redirect"])
|
||||
|
||||
|
||||
def do_delete_all_realm_attachments(realm: Realm) -> None:
|
||||
def do_delete_all_realm_attachments(realm: Realm, *, batch_size: int = 1000) -> None:
|
||||
# Delete attachment files from the storage backend, so that we
|
||||
# don't leave them dangling.
|
||||
for obj_class in Attachment, ArchivedAttachment:
|
||||
for path_id in obj_class.objects.filter(realm_id=realm.id).values_list(
|
||||
"path_id", flat=True
|
||||
):
|
||||
delete_message_attachment(path_id)
|
||||
last_id = 0
|
||||
while True:
|
||||
to_delete = (
|
||||
obj_class.objects.filter(realm_id=realm.id, id__gt=last_id) # type: ignore[misc] # Does not recognize shared 'id' PK column
|
||||
.order_by("id")
|
||||
.values_list("id", "path_id")[:batch_size]
|
||||
)
|
||||
if len(to_delete) > 0:
|
||||
delete_message_attachments([row[1] for row in to_delete])
|
||||
last_id = to_delete[len(to_delete) - 1][0]
|
||||
if len(to_delete) < batch_size:
|
||||
break
|
||||
obj_class.objects.filter(realm=realm).delete()
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import io
|
||||
import logging
|
||||
import urllib
|
||||
from mimetypes import guess_type
|
||||
from typing import IO, Any, Callable, Optional, Tuple
|
||||
from typing import IO, Any, Callable, List, Optional, Tuple
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from django.conf import settings
|
||||
@@ -110,6 +110,10 @@ def delete_message_attachment(path_id: str) -> bool:
|
||||
return upload_backend.delete_message_attachment(path_id)
|
||||
|
||||
|
||||
def delete_message_attachments(path_ids: List[str]) -> None:
|
||||
return upload_backend.delete_message_attachments(path_ids)
|
||||
|
||||
|
||||
# Avatar image uploads
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import io
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import IO, Any, Callable, Optional, Tuple
|
||||
from typing import IO, Any, Callable, List, Optional, Tuple
|
||||
|
||||
from django.utils.translation import gettext as _
|
||||
from markupsafe import Markup
|
||||
@@ -206,6 +206,10 @@ class ZulipUploadBackend:
|
||||
def delete_message_attachment(self, path_id: str) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def delete_message_attachments(self, path_ids: List[str]) -> None:
|
||||
for path_id in path_ids:
|
||||
self.delete_message_attachment(path_id)
|
||||
|
||||
# Avatar image uploads
|
||||
def get_avatar_url(self, hash_key: str, medium: bool = False) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import secrets
|
||||
import urllib
|
||||
from mimetypes import guess_type
|
||||
from typing import IO, Any, Callable, Optional
|
||||
from typing import IO, Any, Callable, List, Optional
|
||||
|
||||
import boto3
|
||||
import botocore
|
||||
@@ -233,6 +233,11 @@ class S3UploadBackend(ZulipUploadBackend):
|
||||
def delete_message_attachment(self, path_id: str) -> bool:
|
||||
return self.delete_file_from_s3(path_id, self.uploads_bucket)
|
||||
|
||||
def delete_message_attachments(self, path_ids: List[str]) -> None:
|
||||
self.uploads_bucket.delete_objects(
|
||||
Delete={"Objects": [{"Key": path_id} for path_id in path_ids]}
|
||||
)
|
||||
|
||||
def write_avatar_images(
|
||||
self,
|
||||
s3_file_name: str,
|
||||
|
||||
@@ -16,6 +16,7 @@ from zerver.actions.realm_settings import (
|
||||
do_change_realm_org_type,
|
||||
do_change_realm_plan_type,
|
||||
do_deactivate_realm,
|
||||
do_delete_all_realm_attachments,
|
||||
do_reactivate_realm,
|
||||
do_scrub_realm,
|
||||
do_send_realm_reactivation_email,
|
||||
@@ -27,7 +28,7 @@ from zerver.lib.realm_description import get_realm_rendered_description, get_rea
|
||||
from zerver.lib.send_email import send_future_email
|
||||
from zerver.lib.streams import create_stream_if_needed
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
from zerver.lib.upload import upload_message_attachment
|
||||
from zerver.lib.upload import delete_message_attachments, upload_message_attachment
|
||||
from zerver.models import (
|
||||
Attachment,
|
||||
CustomProfileField,
|
||||
@@ -1417,6 +1418,42 @@ class RealmAPITest(ZulipTestCase):
|
||||
|
||||
|
||||
class ScrubRealmTest(ZulipTestCase):
|
||||
def test_do_delete_all_realm_attachments(self) -> None:
|
||||
realm = get_realm("zulip")
|
||||
hamlet = self.example_user("hamlet")
|
||||
Attachment.objects.filter(realm=realm).delete()
|
||||
assert settings.LOCAL_UPLOADS_DIR is not None
|
||||
assert settings.LOCAL_FILES_DIR is not None
|
||||
|
||||
path_ids = []
|
||||
for n in range(1, 4):
|
||||
content = f"content{n}".encode()
|
||||
url = upload_message_attachment(
|
||||
f"dummy{n}.txt", len(content), "text/plain", content, hamlet
|
||||
)
|
||||
base = "/user_uploads/"
|
||||
self.assertEqual(base, url[: len(base)])
|
||||
path_id = re.sub("/user_uploads/", "", url)
|
||||
self.assertTrue(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
|
||||
path_ids.append(path_id)
|
||||
|
||||
with mock.patch(
|
||||
"zerver.actions.realm_settings.delete_message_attachments",
|
||||
side_effect=delete_message_attachments,
|
||||
) as p:
|
||||
do_delete_all_realm_attachments(realm, batch_size=2)
|
||||
|
||||
self.assertEqual(p.call_count, 2)
|
||||
p.assert_has_calls(
|
||||
[
|
||||
mock.call([path_ids[0], path_ids[1]]),
|
||||
mock.call([path_ids[2]]),
|
||||
]
|
||||
)
|
||||
self.assertEqual(Attachment.objects.filter(realm=realm).count(), 0)
|
||||
for file_path in path_ids:
|
||||
self.assertFalse(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
|
||||
|
||||
def test_scrub_realm(self) -> None:
|
||||
zulip = get_realm("zulip")
|
||||
lear = get_realm("lear")
|
||||
|
||||
@@ -19,6 +19,7 @@ from zerver.lib.test_helpers import (
|
||||
from zerver.lib.upload import (
|
||||
delete_export_tarball,
|
||||
delete_message_attachment,
|
||||
delete_message_attachments,
|
||||
upload_emoji_image,
|
||||
upload_export_tarball,
|
||||
upload_message_attachment,
|
||||
@@ -86,7 +87,35 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase):
|
||||
|
||||
response_dict = self.assert_json_success(result)
|
||||
path_id = re.sub("/user_uploads/", "", response_dict["uri"])
|
||||
|
||||
assert settings.LOCAL_FILES_DIR is not None
|
||||
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
||||
self.assertTrue(os.path.isfile(file_path))
|
||||
|
||||
self.assertTrue(delete_message_attachment(path_id))
|
||||
self.assertFalse(os.path.isfile(file_path))
|
||||
|
||||
def test_delete_message_attachments(self) -> None:
|
||||
assert settings.LOCAL_UPLOADS_DIR is not None
|
||||
assert settings.LOCAL_FILES_DIR is not None
|
||||
|
||||
user_profile = self.example_user("hamlet")
|
||||
path_ids = []
|
||||
for n in range(1, 1005):
|
||||
uri = upload_message_attachment(
|
||||
"dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile
|
||||
)
|
||||
base = "/user_uploads/"
|
||||
self.assertEqual(base, uri[: len(base)])
|
||||
path_id = re.sub("/user_uploads/", "", uri)
|
||||
path_ids.append(path_id)
|
||||
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
||||
self.assertTrue(os.path.isfile(file_path))
|
||||
|
||||
delete_message_attachments(path_ids)
|
||||
for path_id in path_ids:
|
||||
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
||||
self.assertFalse(os.path.isfile(file_path))
|
||||
|
||||
def test_avatar_url(self) -> None:
|
||||
self.login("hamlet")
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import re
|
||||
import urllib
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
|
||||
import botocore.exceptions
|
||||
from django.conf import settings
|
||||
@@ -22,6 +23,7 @@ from zerver.lib.test_helpers import (
|
||||
from zerver.lib.upload import (
|
||||
delete_export_tarball,
|
||||
delete_message_attachment,
|
||||
delete_message_attachments,
|
||||
upload_export_tarball,
|
||||
upload_message_attachment,
|
||||
)
|
||||
@@ -31,6 +33,7 @@ from zerver.lib.upload.base import (
|
||||
MEDIUM_AVATAR_SIZE,
|
||||
resize_avatar,
|
||||
)
|
||||
from zerver.lib.upload.s3 import S3UploadBackend
|
||||
from zerver.models import (
|
||||
Attachment,
|
||||
RealmEmoji,
|
||||
@@ -96,7 +99,7 @@ class S3Test(ZulipTestCase):
|
||||
|
||||
@use_s3_backend
|
||||
def test_delete_message_attachment(self) -> None:
|
||||
create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)
|
||||
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
|
||||
|
||||
user_profile = self.example_user("hamlet")
|
||||
uri = upload_message_attachment(
|
||||
@@ -104,10 +107,37 @@ class S3Test(ZulipTestCase):
|
||||
)
|
||||
|
||||
path_id = re.sub("/user_uploads/", "", uri)
|
||||
self.assertIsNotNone(bucket.Object(path_id).get())
|
||||
self.assertTrue(delete_message_attachment(path_id))
|
||||
with self.assertRaises(botocore.exceptions.ClientError):
|
||||
bucket.Object(path_id).load()
|
||||
|
||||
@use_s3_backend
|
||||
def test_delete_message_attachments(self) -> None:
|
||||
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
|
||||
|
||||
user_profile = self.example_user("hamlet")
|
||||
path_ids = []
|
||||
for n in range(1, 5):
|
||||
uri = upload_message_attachment(
|
||||
"dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile
|
||||
)
|
||||
path_id = re.sub("/user_uploads/", "", uri)
|
||||
self.assertIsNotNone(bucket.Object(path_id).get())
|
||||
path_ids.append(path_id)
|
||||
|
||||
with patch.object(S3UploadBackend, "delete_message_attachment") as single_delete:
|
||||
delete_message_attachments(path_ids)
|
||||
single_delete.assert_not_called()
|
||||
for path_id in path_ids:
|
||||
with self.assertRaises(botocore.exceptions.ClientError):
|
||||
bucket.Object(path_id).load()
|
||||
|
||||
@use_s3_backend
|
||||
def test_delete_message_attachment_when_file_doesnt_exist(self) -> None:
|
||||
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
|
||||
with self.assertRaises(botocore.exceptions.ClientError):
|
||||
bucket.Object("non-existent-file").load()
|
||||
with self.assertLogs(level="WARNING") as warn_log:
|
||||
self.assertEqual(False, delete_message_attachment("non-existent-file"))
|
||||
self.assertEqual(
|
||||
|
||||
Reference in New Issue
Block a user