Files
zulip/zerver/management/commands/delete_old_unclaimed_attachments.py
Alex Vandiver 3bf3f47b49 delete_old_unclaimed_attachments: Add flag to clean up storage.
Actions like deleting realms may leave unreferenced uploads in the
attachment storage backend.

Fix these by walking the complete contents of the attachment storage
backend, and removing files which are no longer present in the
database.  This may take quite some time, as it is necessarily O(n) in
the number of files uploaded to the system.
2023-03-02 16:36:19 -08:00

92 lines
3.6 KiB
Python

import datetime
from argparse import ArgumentParser
from typing import Any
from django.core.management.base import BaseCommand, CommandError
from django.utils.timezone import now as timezone_now
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.upload import all_message_attachments, delete_message_attachments
from zerver.models import ArchivedAttachment, Attachment, get_old_unclaimed_attachments
class Command(BaseCommand):
help = """Remove unclaimed attachments from storage older than a supplied
numerical value indicating the limit of how old the attachment can be.
One week is taken as the default value."""
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument(
"-w",
"--weeks",
dest="delta_weeks",
default=5,
type=int,
help="Limiting value of how old the file can be.",
)
parser.add_argument(
"-f",
"--for-real",
action="store_true",
help="Actually remove the files from the storage.",
)
parser.add_argument(
"-C",
"--clean-up-storage",
action="store_true",
help="Examine all attachments in storage (local disk or S3) and remove "
"any files which are not in the database. This may take a very long time!",
)
def handle(self, *args: Any, **options: Any) -> None:
delta_weeks = options["delta_weeks"]
print(f"Deleting unclaimed attached files older than {delta_weeks} weeks")
# print the list of files that are going to be removed
old_attachments, old_archived_attachments = get_old_unclaimed_attachments(delta_weeks)
for old_attachment in old_attachments:
print(f"* {old_attachment.file_name} created at {old_attachment.create_time}")
for old_archived_attachment in old_archived_attachments:
print(
f"* {old_archived_attachment.file_name} created at {old_archived_attachment.create_time}"
)
if options["for_real"]:
do_delete_old_unclaimed_attachments(delta_weeks)
print("")
print("Unclaimed files deleted.")
if options["clean_up_storage"]:
print("")
self.clean_attachment_upload_backend(dry_run=not options["for_real"])
if not options["for_real"]:
print("")
raise CommandError("This was a dry run. Pass -f to actually delete.")
def clean_attachment_upload_backend(self, dry_run: bool = True) -> None:
cutoff = timezone_now() - datetime.timedelta(minutes=5)
print(f"Removing extra files in storage black-end older than {cutoff.isoformat()}")
to_delete = []
for path_id, modified_at in all_message_attachments():
if Attachment.objects.filter(path_id=path_id).exists():
continue
if ArchivedAttachment.objects.filter(path_id=path_id).exists():
continue
if modified_at > cutoff:
# We upload files to the backend storage and _then_
# make the database entry, so must give some leeway to
# recently-added files which do not have DB rows.
continue
print(f"* {path_id} modified at {modified_at}")
if dry_run:
continue
to_delete.append(path_id)
if len(to_delete) > 1000:
delete_message_attachments(to_delete)
to_delete = []
if not dry_run and len(to_delete) > 0:
delete_message_attachments(to_delete)