mirror of
https://github.com/zulip/zulip.git
synced 2025-11-11 01:16:19 +00:00
Actions like deleting realms may leave unreferenced uploads in the attachment storage backend. Fix these by walking the complete contents of the attachment storage backend, and removing files which are no longer present in the database. This may take quite some time, as it is necessarily O(n) in the number of files uploaded to the system.
92 lines
3.6 KiB
Python
92 lines
3.6 KiB
Python
import datetime
|
|
from argparse import ArgumentParser
|
|
from typing import Any
|
|
|
|
from django.core.management.base import BaseCommand, CommandError
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
|
from zerver.lib.upload import all_message_attachments, delete_message_attachments
|
|
from zerver.models import ArchivedAttachment, Attachment, get_old_unclaimed_attachments
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = """Remove unclaimed attachments from storage older than a supplied
|
|
numerical value indicating the limit of how old the attachment can be.
|
|
One week is taken as the default value."""
|
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
|
parser.add_argument(
|
|
"-w",
|
|
"--weeks",
|
|
dest="delta_weeks",
|
|
default=5,
|
|
type=int,
|
|
help="Limiting value of how old the file can be.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-f",
|
|
"--for-real",
|
|
action="store_true",
|
|
help="Actually remove the files from the storage.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-C",
|
|
"--clean-up-storage",
|
|
action="store_true",
|
|
help="Examine all attachments in storage (local disk or S3) and remove "
|
|
"any files which are not in the database. This may take a very long time!",
|
|
)
|
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
|
delta_weeks = options["delta_weeks"]
|
|
print(f"Deleting unclaimed attached files older than {delta_weeks} weeks")
|
|
|
|
# print the list of files that are going to be removed
|
|
old_attachments, old_archived_attachments = get_old_unclaimed_attachments(delta_weeks)
|
|
for old_attachment in old_attachments:
|
|
print(f"* {old_attachment.file_name} created at {old_attachment.create_time}")
|
|
for old_archived_attachment in old_archived_attachments:
|
|
print(
|
|
f"* {old_archived_attachment.file_name} created at {old_archived_attachment.create_time}"
|
|
)
|
|
|
|
if options["for_real"]:
|
|
do_delete_old_unclaimed_attachments(delta_weeks)
|
|
print("")
|
|
print("Unclaimed files deleted.")
|
|
|
|
if options["clean_up_storage"]:
|
|
print("")
|
|
self.clean_attachment_upload_backend(dry_run=not options["for_real"])
|
|
|
|
if not options["for_real"]:
|
|
print("")
|
|
raise CommandError("This was a dry run. Pass -f to actually delete.")
|
|
|
|
def clean_attachment_upload_backend(self, dry_run: bool = True) -> None:
|
|
cutoff = timezone_now() - datetime.timedelta(minutes=5)
|
|
print(f"Removing extra files in storage black-end older than {cutoff.isoformat()}")
|
|
to_delete = []
|
|
for path_id, modified_at in all_message_attachments():
|
|
if Attachment.objects.filter(path_id=path_id).exists():
|
|
continue
|
|
if ArchivedAttachment.objects.filter(path_id=path_id).exists():
|
|
continue
|
|
if modified_at > cutoff:
|
|
# We upload files to the backend storage and _then_
|
|
# make the database entry, so must give some leeway to
|
|
# recently-added files which do not have DB rows.
|
|
continue
|
|
print(f"* {path_id} modified at {modified_at}")
|
|
if dry_run:
|
|
continue
|
|
to_delete.append(path_id)
|
|
if len(to_delete) > 1000:
|
|
delete_message_attachments(to_delete)
|
|
to_delete = []
|
|
if not dry_run and len(to_delete) > 0:
|
|
delete_message_attachments(to_delete)
|