models: Move some functions to zerver.lib.attachments.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg
2023-12-15 11:03:19 -08:00
committed by Tim Abbott
parent 09d0abfe70
commit cff0b78771
8 changed files with 180 additions and 175 deletions

View File

@@ -1,17 +1,10 @@
import logging
from typing import Any, Dict, List, Union
from zerver.lib.attachments import get_old_unclaimed_attachments, validate_attachment_request
from zerver.lib.markdown import MessageRenderingResult
from zerver.lib.upload import claim_attachment, delete_message_attachment
from zerver.models import (
Attachment,
Message,
ScheduledMessage,
Stream,
UserProfile,
get_old_unclaimed_attachments,
validate_attachment_request,
)
from zerver.models import Attachment, Message, ScheduledMessage, Stream, UserProfile
from zerver.tornado.django_api import send_event

View File

@@ -1,10 +1,25 @@
from typing import Any, Dict, List
from datetime import timedelta
from typing import Any, Dict, List, Optional, Tuple, Union
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.db.models import Exists, OuterRef, QuerySet
from django.utils.timezone import now as timezone_now
from django.utils.translation import gettext as _
from zerver.lib.exceptions import JsonableError
from zerver.lib.exceptions import JsonableError, RateLimitedError
from zerver.lib.upload import delete_message_attachment
from zerver.models import Attachment, UserProfile
from zerver.models import (
ArchivedAttachment,
Attachment,
Message,
Realm,
Recipient,
Stream,
Subscription,
UserMessage,
UserProfile,
)
def user_attachments(user_profile: UserProfile) -> List[Dict[str, Any]]:
@@ -33,3 +48,150 @@ def remove_attachment(user_profile: UserProfile, attachment: Attachment) -> None
_("An error occurred while deleting the attachment. Please try again later.")
)
attachment.delete()
def validate_attachment_request_for_spectator_access(
realm: Realm, attachment: Attachment
) -> Optional[bool]:
if attachment.realm != realm:
return False
# Update cached is_web_public property, if necessary.
if attachment.is_web_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_web_public__isnull=True).update(
is_web_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=realm.id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
recipient__stream__is_web_public=True,
),
),
)
attachment.refresh_from_db()
if not attachment.is_web_public:
return False
if settings.RATE_LIMITING:
try:
from zerver.lib.rate_limiter import rate_limit_spectator_attachment_access_by_file
rate_limit_spectator_attachment_access_by_file(attachment.path_id)
except RateLimitedError:
return False
return True
def validate_attachment_request(
maybe_user_profile: Union[UserProfile, AnonymousUser],
path_id: str,
realm: Optional[Realm] = None,
) -> Optional[bool]:
try:
attachment = Attachment.objects.get(path_id=path_id)
except Attachment.DoesNotExist:
return None
if isinstance(maybe_user_profile, AnonymousUser):
assert realm is not None
return validate_attachment_request_for_spectator_access(realm, attachment)
user_profile = maybe_user_profile
assert isinstance(user_profile, UserProfile)
# Update cached is_realm_public property, if necessary.
if attachment.is_realm_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_realm_public__isnull=True).update(
is_realm_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=user_profile.realm_id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
),
),
)
attachment.refresh_from_db()
if user_profile == attachment.owner:
# If you own the file, you can access it.
return True
if (
attachment.is_realm_public
and attachment.realm == user_profile.realm
and user_profile.can_access_public_streams()
):
# Any user in the realm can access realm-public files
return True
messages = attachment.messages.all()
if UserMessage.objects.filter(user_profile=user_profile, message__in=messages).exists():
# If it was sent in a direct message or private stream
# message, then anyone who received that message can access it.
return True
# The user didn't receive any of the messages that included this
# attachment. But they might still have access to it, if it was
# sent to a stream they are on where history is public to
# subscribers.
# These are subscriptions to a stream one of the messages was sent to
relevant_stream_ids = Subscription.objects.filter(
user_profile=user_profile,
active=True,
recipient__type=Recipient.STREAM,
recipient__in=[m.recipient_id for m in messages],
).values_list("recipient__type_id", flat=True)
if len(relevant_stream_ids) == 0:
return False
return Stream.objects.filter(
id__in=relevant_stream_ids, history_public_to_subscribers=True
).exists()
def get_old_unclaimed_attachments(
weeks_ago: int,
) -> Tuple[QuerySet[Attachment], QuerySet[ArchivedAttachment]]:
"""
The logic in this function is fairly tricky. The essence is that
a file should be cleaned up if and only if it not referenced by any
Message, ScheduledMessage or ArchivedMessage. The way to find that out is through the
Attachment and ArchivedAttachment tables.
The queries are complicated by the fact that an uploaded file
may have either only an Attachment row, only an ArchivedAttachment row,
or both - depending on whether some, all or none of the messages
linking to it have been archived.
"""
delta_weeks_ago = timezone_now() - timedelta(weeks=weeks_ago)
# The Attachment vs ArchivedAttachment queries are asymmetric because only
# Attachment has the scheduled_messages relation.
old_attachments = Attachment.objects.annotate(
has_other_messages=Exists(
ArchivedAttachment.objects.filter(id=OuterRef("id")).exclude(messages=None)
)
).filter(
messages=None,
scheduled_messages=None,
create_time__lt=delta_weeks_ago,
has_other_messages=False,
)
old_archived_attachments = ArchivedAttachment.objects.annotate(
has_other_messages=Exists(
Attachment.objects.filter(id=OuterRef("id")).exclude(
messages=None, scheduled_messages=None
)
)
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
return old_attachments, old_archived_attachments

View File

@@ -7,8 +7,9 @@ from django.utils.timezone import now as timezone_now
from typing_extensions import override
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.attachments import get_old_unclaimed_attachments
from zerver.lib.upload import all_message_attachments, delete_message_attachments
from zerver.models import ArchivedAttachment, Attachment, get_old_unclaimed_attachments
from zerver.models import ArchivedAttachment, Attachment
class Command(BaseCommand):

View File

@@ -6,12 +6,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, TypedDict, TypeVa
import orjson
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.core.exceptions import ValidationError
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models
from django.db.backends.base.base import BaseDatabaseWrapper
from django.db.models import CASCADE, Exists, OuterRef, Q, QuerySet
from django.db.models import CASCADE, Q, QuerySet
from django.db.models.signals import post_delete, post_save
from django.db.models.sql.compiler import SQLCompiler
from django.utils.timezone import now as timezone_now
@@ -26,7 +25,6 @@ from zerver.lib.cache import (
realm_alert_words_cache_key,
)
from zerver.lib.display_recipient import get_recipient_ids
from zerver.lib.exceptions import RateLimitedError
from zerver.lib.timestamp import datetime_to_timestamp
from zerver.lib.types import (
ExtendedFieldElement,
@@ -150,153 +148,6 @@ def query_for_ids(
return query
def validate_attachment_request_for_spectator_access(
realm: Realm, attachment: Attachment
) -> Optional[bool]:
if attachment.realm != realm:
return False
# Update cached is_web_public property, if necessary.
if attachment.is_web_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_web_public__isnull=True).update(
is_web_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=realm.id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
recipient__stream__is_web_public=True,
),
),
)
attachment.refresh_from_db()
if not attachment.is_web_public:
return False
if settings.RATE_LIMITING:
try:
from zerver.lib.rate_limiter import rate_limit_spectator_attachment_access_by_file
rate_limit_spectator_attachment_access_by_file(attachment.path_id)
except RateLimitedError:
return False
return True
def validate_attachment_request(
maybe_user_profile: Union[UserProfile, AnonymousUser],
path_id: str,
realm: Optional[Realm] = None,
) -> Optional[bool]:
try:
attachment = Attachment.objects.get(path_id=path_id)
except Attachment.DoesNotExist:
return None
if isinstance(maybe_user_profile, AnonymousUser):
assert realm is not None
return validate_attachment_request_for_spectator_access(realm, attachment)
user_profile = maybe_user_profile
assert isinstance(user_profile, UserProfile)
# Update cached is_realm_public property, if necessary.
if attachment.is_realm_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_realm_public__isnull=True).update(
is_realm_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=user_profile.realm_id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
),
),
)
attachment.refresh_from_db()
if user_profile == attachment.owner:
# If you own the file, you can access it.
return True
if (
attachment.is_realm_public
and attachment.realm == user_profile.realm
and user_profile.can_access_public_streams()
):
# Any user in the realm can access realm-public files
return True
messages = attachment.messages.all()
if UserMessage.objects.filter(user_profile=user_profile, message__in=messages).exists():
# If it was sent in a direct message or private stream
# message, then anyone who received that message can access it.
return True
# The user didn't receive any of the messages that included this
# attachment. But they might still have access to it, if it was
# sent to a stream they are on where history is public to
# subscribers.
# These are subscriptions to a stream one of the messages was sent to
relevant_stream_ids = Subscription.objects.filter(
user_profile=user_profile,
active=True,
recipient__type=Recipient.STREAM,
recipient__in=[m.recipient_id for m in messages],
).values_list("recipient__type_id", flat=True)
if len(relevant_stream_ids) == 0:
return False
return Stream.objects.filter(
id__in=relevant_stream_ids, history_public_to_subscribers=True
).exists()
def get_old_unclaimed_attachments(
weeks_ago: int,
) -> Tuple[QuerySet[Attachment], QuerySet[ArchivedAttachment]]:
"""
The logic in this function is fairly tricky. The essence is that
a file should be cleaned up if and only if it not referenced by any
Message, ScheduledMessage or ArchivedMessage. The way to find that out is through the
Attachment and ArchivedAttachment tables.
The queries are complicated by the fact that an uploaded file
may have either only an Attachment row, only an ArchivedAttachment row,
or both - depending on whether some, all or none of the messages
linking to it have been archived.
"""
delta_weeks_ago = timezone_now() - timedelta(weeks=weeks_ago)
# The Attachment vs ArchivedAttachment queries are asymmetric because only
# Attachment has the scheduled_messages relation.
old_attachments = Attachment.objects.annotate(
has_other_messages=Exists(
ArchivedAttachment.objects.filter(id=OuterRef("id")).exclude(messages=None)
)
).filter(
messages=None,
scheduled_messages=None,
create_time__lt=delta_weeks_ago,
has_other_messages=False,
)
old_archived_attachments = ArchivedAttachment.objects.annotate(
has_other_messages=Exists(
Attachment.objects.filter(id=OuterRef("id")).exclude(
messages=None, scheduled_messages=None
)
)
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
return old_attachments, old_archived_attachments
class UserActivity(models.Model):
"""Data table recording the last time each user hit Zulip endpoints
via which Clients; unlike UserPresence, these data are not exposed

View File

@@ -38,6 +38,10 @@ from zerver.actions.streams import (
)
from zerver.actions.user_groups import add_subgroups_to_user_group, check_add_user_group
from zerver.actions.users import do_change_user_role, do_deactivate_user
from zerver.lib.attachments import (
validate_attachment_request,
validate_attachment_request_for_spectator_access,
)
from zerver.lib.default_streams import (
get_default_stream_ids_for_realm,
get_default_streams_for_realm_as_dicts,
@@ -104,8 +108,6 @@ from zerver.models import (
UserGroup,
UserMessage,
UserProfile,
validate_attachment_request,
validate_attachment_request_for_spectator_access,
)
from zerver.models.realms import get_realm
from zerver.models.streams import get_default_stream_groups, get_stream

View File

@@ -21,6 +21,7 @@ from zerver.actions.realm_icon import do_change_icon_source
from zerver.actions.realm_logo import do_change_logo_source
from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_realm_property
from zerver.actions.user_settings import do_delete_avatar_image
from zerver.lib.attachments import validate_attachment_request
from zerver.lib.avatar import avatar_url, get_avatar_field
from zerver.lib.cache import cache_get, get_realm_used_upload_space_cache_key
from zerver.lib.create_user import copy_default_settings
@@ -39,14 +40,7 @@ from zerver.lib.upload.base import BadImageError, ZulipUploadBackend, resize_emo
from zerver.lib.upload.local import LocalUploadBackend
from zerver.lib.upload.s3 import S3UploadBackend
from zerver.lib.users import get_api_key
from zerver.models import (
Attachment,
Message,
Realm,
RealmDomain,
UserProfile,
validate_attachment_request,
)
from zerver.models import Attachment, Message, Realm, RealmDomain, UserProfile
from zerver.models.realms import get_realm
from zerver.models.users import get_system_bot, get_user_by_delivery_email

View File

@@ -6,9 +6,10 @@ from django.shortcuts import redirect
from django.utils.translation import gettext as _
from zerver.context_processors import get_valid_realm_from_request
from zerver.lib.attachments import validate_attachment_request
from zerver.lib.request import REQ, has_request_variables
from zerver.lib.thumbnail import generate_thumbnail_url
from zerver.models import Realm, UserProfile, validate_attachment_request
from zerver.models import Realm, UserProfile
def validate_thumbnail_request(

View File

@@ -26,6 +26,7 @@ from django.utils.translation import gettext as _
from zerver.context_processors import get_valid_realm_from_request
from zerver.decorator import zulip_redirect_to_login
from zerver.lib.attachments import validate_attachment_request
from zerver.lib.exceptions import JsonableError
from zerver.lib.response import json_success
from zerver.lib.storage import static_path
@@ -37,7 +38,7 @@ from zerver.lib.upload import (
from zerver.lib.upload.base import INLINE_MIME_TYPES
from zerver.lib.upload.local import assert_is_local_storage_path
from zerver.lib.upload.s3 import get_signed_upload_url
from zerver.models import UserProfile, validate_attachment_request
from zerver.models import UserProfile
def patch_disposition_header(response: HttpResponse, url: str, is_attachment: bool) -> None: