delete_realm: Optimize attachment cleanup by batching.

This commit is contained in:
Alex Vandiver
2023-02-28 03:44:29 +00:00
committed by Tim Abbott
parent cdda4bc089
commit c9d1755a12
7 changed files with 128 additions and 11 deletions

View File

@@ -16,7 +16,7 @@ from zerver.actions.user_settings import do_delete_avatar_image
from zerver.lib.message import parse_message_time_limit_setting, update_first_visible_message_id from zerver.lib.message import parse_message_time_limit_setting, update_first_visible_message_id
from zerver.lib.send_email import FromAddress, send_email_to_admins from zerver.lib.send_email import FromAddress, send_email_to_admins
from zerver.lib.sessions import delete_user_sessions from zerver.lib.sessions import delete_user_sessions
from zerver.lib.upload import delete_message_attachment from zerver.lib.upload import delete_message_attachments
from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_counts import realm_user_count_by_role
from zerver.models import ( from zerver.models import (
ArchivedAttachment, ArchivedAttachment,
@@ -334,14 +334,22 @@ def do_add_deactivated_redirect(realm: Realm, redirect_url: str) -> None:
realm.save(update_fields=["deactivated_redirect"]) realm.save(update_fields=["deactivated_redirect"])
def do_delete_all_realm_attachments(realm: Realm) -> None: def do_delete_all_realm_attachments(realm: Realm, *, batch_size: int = 1000) -> None:
# Delete attachment files from the storage backend, so that we # Delete attachment files from the storage backend, so that we
# don't leave them dangling. # don't leave them dangling.
for obj_class in Attachment, ArchivedAttachment: for obj_class in Attachment, ArchivedAttachment:
for path_id in obj_class.objects.filter(realm_id=realm.id).values_list( last_id = 0
"path_id", flat=True while True:
): to_delete = (
delete_message_attachment(path_id) obj_class.objects.filter(realm_id=realm.id, id__gt=last_id) # type: ignore[misc] # Does not recognize shared 'id' PK column
.order_by("id")
.values_list("id", "path_id")[:batch_size]
)
if len(to_delete) > 0:
delete_message_attachments([row[1] for row in to_delete])
last_id = to_delete[len(to_delete) - 1][0]
if len(to_delete) < batch_size:
break
obj_class.objects.filter(realm=realm).delete() obj_class.objects.filter(realm=realm).delete()

View File

@@ -2,7 +2,7 @@ import io
import logging import logging
import urllib import urllib
from mimetypes import guess_type from mimetypes import guess_type
from typing import IO, Any, Callable, Optional, Tuple from typing import IO, Any, Callable, List, Optional, Tuple
from urllib.parse import urljoin from urllib.parse import urljoin
from django.conf import settings from django.conf import settings
@@ -110,6 +110,10 @@ def delete_message_attachment(path_id: str) -> bool:
return upload_backend.delete_message_attachment(path_id) return upload_backend.delete_message_attachment(path_id)
def delete_message_attachments(path_ids: List[str]) -> None:
return upload_backend.delete_message_attachments(path_ids)
# Avatar image uploads # Avatar image uploads

View File

@@ -2,7 +2,7 @@ import io
import os import os
import re import re
import unicodedata import unicodedata
from typing import IO, Any, Callable, Optional, Tuple from typing import IO, Any, Callable, List, Optional, Tuple
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from markupsafe import Markup from markupsafe import Markup
@@ -206,6 +206,10 @@ class ZulipUploadBackend:
def delete_message_attachment(self, path_id: str) -> bool: def delete_message_attachment(self, path_id: str) -> bool:
raise NotImplementedError raise NotImplementedError
def delete_message_attachments(self, path_ids: List[str]) -> None:
for path_id in path_ids:
self.delete_message_attachment(path_id)
# Avatar image uploads # Avatar image uploads
def get_avatar_url(self, hash_key: str, medium: bool = False) -> str: def get_avatar_url(self, hash_key: str, medium: bool = False) -> str:
raise NotImplementedError raise NotImplementedError

View File

@@ -3,7 +3,7 @@ import os
import secrets import secrets
import urllib import urllib
from mimetypes import guess_type from mimetypes import guess_type
from typing import IO, Any, Callable, Optional from typing import IO, Any, Callable, List, Optional
import boto3 import boto3
import botocore import botocore
@@ -233,6 +233,11 @@ class S3UploadBackend(ZulipUploadBackend):
def delete_message_attachment(self, path_id: str) -> bool: def delete_message_attachment(self, path_id: str) -> bool:
return self.delete_file_from_s3(path_id, self.uploads_bucket) return self.delete_file_from_s3(path_id, self.uploads_bucket)
def delete_message_attachments(self, path_ids: List[str]) -> None:
self.uploads_bucket.delete_objects(
Delete={"Objects": [{"Key": path_id} for path_id in path_ids]}
)
def write_avatar_images( def write_avatar_images(
self, self,
s3_file_name: str, s3_file_name: str,

View File

@@ -16,6 +16,7 @@ from zerver.actions.realm_settings import (
do_change_realm_org_type, do_change_realm_org_type,
do_change_realm_plan_type, do_change_realm_plan_type,
do_deactivate_realm, do_deactivate_realm,
do_delete_all_realm_attachments,
do_reactivate_realm, do_reactivate_realm,
do_scrub_realm, do_scrub_realm,
do_send_realm_reactivation_email, do_send_realm_reactivation_email,
@@ -27,7 +28,7 @@ from zerver.lib.realm_description import get_realm_rendered_description, get_rea
from zerver.lib.send_email import send_future_email from zerver.lib.send_email import send_future_email
from zerver.lib.streams import create_stream_if_needed from zerver.lib.streams import create_stream_if_needed
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.upload import upload_message_attachment from zerver.lib.upload import delete_message_attachments, upload_message_attachment
from zerver.models import ( from zerver.models import (
Attachment, Attachment,
CustomProfileField, CustomProfileField,
@@ -1417,6 +1418,42 @@ class RealmAPITest(ZulipTestCase):
class ScrubRealmTest(ZulipTestCase): class ScrubRealmTest(ZulipTestCase):
def test_do_delete_all_realm_attachments(self) -> None:
realm = get_realm("zulip")
hamlet = self.example_user("hamlet")
Attachment.objects.filter(realm=realm).delete()
assert settings.LOCAL_UPLOADS_DIR is not None
assert settings.LOCAL_FILES_DIR is not None
path_ids = []
for n in range(1, 4):
content = f"content{n}".encode()
url = upload_message_attachment(
f"dummy{n}.txt", len(content), "text/plain", content, hamlet
)
base = "/user_uploads/"
self.assertEqual(base, url[: len(base)])
path_id = re.sub("/user_uploads/", "", url)
self.assertTrue(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
path_ids.append(path_id)
with mock.patch(
"zerver.actions.realm_settings.delete_message_attachments",
side_effect=delete_message_attachments,
) as p:
do_delete_all_realm_attachments(realm, batch_size=2)
self.assertEqual(p.call_count, 2)
p.assert_has_calls(
[
mock.call([path_ids[0], path_ids[1]]),
mock.call([path_ids[2]]),
]
)
self.assertEqual(Attachment.objects.filter(realm=realm).count(), 0)
for file_path in path_ids:
self.assertFalse(os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
def test_scrub_realm(self) -> None: def test_scrub_realm(self) -> None:
zulip = get_realm("zulip") zulip = get_realm("zulip")
lear = get_realm("lear") lear = get_realm("lear")

View File

@@ -19,6 +19,7 @@ from zerver.lib.test_helpers import (
from zerver.lib.upload import ( from zerver.lib.upload import (
delete_export_tarball, delete_export_tarball,
delete_message_attachment, delete_message_attachment,
delete_message_attachments,
upload_emoji_image, upload_emoji_image,
upload_export_tarball, upload_export_tarball,
upload_message_attachment, upload_message_attachment,
@@ -86,7 +87,35 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase):
response_dict = self.assert_json_success(result) response_dict = self.assert_json_success(result)
path_id = re.sub("/user_uploads/", "", response_dict["uri"]) path_id = re.sub("/user_uploads/", "", response_dict["uri"])
assert settings.LOCAL_FILES_DIR is not None
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
self.assertTrue(os.path.isfile(file_path))
self.assertTrue(delete_message_attachment(path_id)) self.assertTrue(delete_message_attachment(path_id))
self.assertFalse(os.path.isfile(file_path))
def test_delete_message_attachments(self) -> None:
assert settings.LOCAL_UPLOADS_DIR is not None
assert settings.LOCAL_FILES_DIR is not None
user_profile = self.example_user("hamlet")
path_ids = []
for n in range(1, 1005):
uri = upload_message_attachment(
"dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile
)
base = "/user_uploads/"
self.assertEqual(base, uri[: len(base)])
path_id = re.sub("/user_uploads/", "", uri)
path_ids.append(path_id)
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
self.assertTrue(os.path.isfile(file_path))
delete_message_attachments(path_ids)
for path_id in path_ids:
file_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
self.assertFalse(os.path.isfile(file_path))
def test_avatar_url(self) -> None: def test_avatar_url(self) -> None:
self.login("hamlet") self.login("hamlet")

View File

@@ -3,6 +3,7 @@ import os
import re import re
import urllib import urllib
from io import StringIO from io import StringIO
from unittest.mock import patch
import botocore.exceptions import botocore.exceptions
from django.conf import settings from django.conf import settings
@@ -22,6 +23,7 @@ from zerver.lib.test_helpers import (
from zerver.lib.upload import ( from zerver.lib.upload import (
delete_export_tarball, delete_export_tarball,
delete_message_attachment, delete_message_attachment,
delete_message_attachments,
upload_export_tarball, upload_export_tarball,
upload_message_attachment, upload_message_attachment,
) )
@@ -31,6 +33,7 @@ from zerver.lib.upload.base import (
MEDIUM_AVATAR_SIZE, MEDIUM_AVATAR_SIZE,
resize_avatar, resize_avatar,
) )
from zerver.lib.upload.s3 import S3UploadBackend
from zerver.models import ( from zerver.models import (
Attachment, Attachment,
RealmEmoji, RealmEmoji,
@@ -96,7 +99,7 @@ class S3Test(ZulipTestCase):
@use_s3_backend @use_s3_backend
def test_delete_message_attachment(self) -> None: def test_delete_message_attachment(self) -> None:
create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET) bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
user_profile = self.example_user("hamlet") user_profile = self.example_user("hamlet")
uri = upload_message_attachment( uri = upload_message_attachment(
@@ -104,10 +107,37 @@ class S3Test(ZulipTestCase):
) )
path_id = re.sub("/user_uploads/", "", uri) path_id = re.sub("/user_uploads/", "", uri)
self.assertIsNotNone(bucket.Object(path_id).get())
self.assertTrue(delete_message_attachment(path_id)) self.assertTrue(delete_message_attachment(path_id))
with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object(path_id).load()
@use_s3_backend
def test_delete_message_attachments(self) -> None:
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
user_profile = self.example_user("hamlet")
path_ids = []
for n in range(1, 5):
uri = upload_message_attachment(
"dummy.txt", len(b"zulip!"), "text/plain", b"zulip!", user_profile
)
path_id = re.sub("/user_uploads/", "", uri)
self.assertIsNotNone(bucket.Object(path_id).get())
path_ids.append(path_id)
with patch.object(S3UploadBackend, "delete_message_attachment") as single_delete:
delete_message_attachments(path_ids)
single_delete.assert_not_called()
for path_id in path_ids:
with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object(path_id).load()
@use_s3_backend @use_s3_backend
def test_delete_message_attachment_when_file_doesnt_exist(self) -> None: def test_delete_message_attachment_when_file_doesnt_exist(self) -> None:
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object("non-existent-file").load()
with self.assertLogs(level="WARNING") as warn_log: with self.assertLogs(level="WARNING") as warn_log:
self.assertEqual(False, delete_message_attachment("non-existent-file")) self.assertEqual(False, delete_message_attachment("non-existent-file"))
self.assertEqual( self.assertEqual(