thumbnail: Support checking for images from streaming sources.

We may not always have trivial access to all of the bytes of the
uploaded file -- for instance, if the file was uploaded previously, or
by some other process.  Downloading the entire image in order to check
its headers is an inefficient use of time and bandwidth.

Adjust `maybe_thumbnail` and dependencies to potentially take a
`pyvips.Source` which supports streaming data from S3 or disk.  This
allows making the ImageAttachment row, if deemed appropriate, based on
only a few KB of data, and not the entire image.
This commit is contained in:
Alex Vandiver
2024-09-10 18:33:25 +00:00
committed by Tim Abbott
parent 758aa36cbe
commit 9a1f78db22
8 changed files with 113 additions and 10 deletions

View File

@@ -8,6 +8,7 @@ from datetime import datetime
from typing import IO, Any, BinaryIO
from urllib.parse import unquote, urljoin
import pyvips
from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
from django.db import transaction
@@ -26,7 +27,7 @@ from zerver.lib.thumbnail import (
resize_avatar,
resize_emoji,
)
from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend
from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
from zerver.models.users import is_cross_realm_bot_email
@@ -48,22 +49,28 @@ def create_attachment(
file_name: str,
path_id: str,
content_type: str,
file_data: bytes,
file_data: bytes | StreamingSourceWithSize,
user_profile: UserProfile,
realm: Realm,
) -> None:
assert (user_profile.realm_id == realm.id) or is_cross_realm_bot_email(
user_profile.delivery_email
)
if isinstance(file_data, bytes):
file_size = len(file_data)
file_real_data: bytes | pyvips.Source = file_data
else:
file_size = file_data.size
file_real_data = file_data.source
attachment = Attachment.objects.create(
file_name=file_name,
path_id=path_id,
owner=user_profile,
realm=realm,
size=len(file_data),
size=file_size,
content_type=content_type,
)
maybe_thumbnail(attachment, file_data)
maybe_thumbnail(attachment, file_real_data)
from zerver.actions.uploads import notify_attachment_update
notify_attachment_update(user_profile, "add", attachment.to_dict())
@@ -194,6 +201,10 @@ def upload_message_attachment_from_request(
)
def attachment_vips_source(path_id: str) -> StreamingSourceWithSize:
return upload_backend.attachment_vips_source(path_id)
def save_attachment_contents(path_id: str, filehandle: BinaryIO) -> None:
return upload_backend.save_attachment_contents(path_id, filehandle)

View File

@@ -1,8 +1,11 @@
import os
from collections.abc import Callable, Iterator
from dataclasses import dataclass
from datetime import datetime
from typing import IO, Any, BinaryIO
import pyvips
from zerver.models import Realm, UserProfile
INLINE_MIME_TYPES = [
@@ -27,6 +30,12 @@ INLINE_MIME_TYPES = [
]
@dataclass
class StreamingSourceWithSize:
size: int
source: pyvips.Source
class ZulipUploadBackend:
# Message attachment uploads
def get_public_upload_root_url(self) -> str:
@@ -48,6 +57,9 @@ class ZulipUploadBackend:
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
raise NotImplementedError
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
raise NotImplementedError
def delete_message_attachment(self, path_id: str) -> bool:
raise NotImplementedError

View File

@@ -7,13 +7,14 @@ from collections.abc import Callable, Iterator
from datetime import datetime
from typing import IO, Any, BinaryIO, Literal
import pyvips
from django.conf import settings
from typing_extensions import override
from zerver.lib.mime_types import guess_type
from zerver.lib.thumbnail import resize_avatar, resize_logo
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.lib.upload.base import ZulipUploadBackend
from zerver.lib.upload.base import StreamingSourceWithSize, ZulipUploadBackend
from zerver.lib.utils import assert_is_not_none
from zerver.models import Realm, RealmEmoji, UserProfile
@@ -100,6 +101,13 @@ class LocalUploadBackend(ZulipUploadBackend):
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
filehandle.write(read_local_file("files", path_id))
@override
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
file_path = os.path.join(assert_is_not_none(settings.LOCAL_UPLOADS_DIR), "files", path_id)
assert_is_local_storage_path("files", file_path)
source = pyvips.Source.new_from_file(file_path)
return StreamingSourceWithSize(size=os.path.getsize(file_path), source=source)
@override
def delete_message_attachment(self, path_id: str) -> bool:
return delete_local_file("files", path_id)

View File

@@ -8,14 +8,17 @@ from urllib.parse import urljoin, urlsplit, urlunsplit
import boto3
import botocore
import pyvips
from botocore.client import Config
from botocore.response import StreamingBody
from django.conf import settings
from django.utils.http import content_disposition_header
from mypy_boto3_s3.service_resource import Bucket
from typing_extensions import override
from zerver.lib.partial import partial
from zerver.lib.thumbnail import resize_avatar, resize_logo
from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend
from zerver.models import Realm, RealmEmoji, UserProfile
# Duration that the signed upload URLs that we redirect to when
@@ -236,6 +239,17 @@ class S3UploadBackend(ZulipUploadBackend):
for chunk in self.uploads_bucket.Object(path_id).get()["Body"]:
filehandle.write(chunk)
@override
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
metadata = self.uploads_bucket.Object(path_id).get()
def s3_read(streamingbody: StreamingBody, size: int) -> bytes:
return streamingbody.read(amt=size)
source: pyvips.Source = pyvips.SourceCustom()
source.on_read(partial(s3_read, metadata["Body"]))
return StreamingSourceWithSize(size=metadata["ContentLength"], source=source)
@override
def delete_message_attachment(self, path_id: str) -> bool:
return self.delete_file_from_s3(path_id, self.uploads_bucket)