diff --git a/zerver/lib/thumbnail.py b/zerver/lib/thumbnail.py index 52a42fd033..779fc69069 100644 --- a/zerver/lib/thumbnail.py +++ b/zerver/lib/thumbnail.py @@ -9,14 +9,13 @@ from typing import TypeVar import pyvips from bs4 import BeautifulSoup from bs4.formatter import EntitySubstitution, HTMLFormatter -from django.db.models import OuterRef, Subquery from django.utils.translation import gettext as _ from typing_extensions import override from zerver.lib.exceptions import ErrorCode, JsonableError from zerver.lib.mime_types import INLINE_MIME_TYPES from zerver.lib.queue import queue_event_on_commit -from zerver.models import Attachment, ImageAttachment +from zerver.models import ImageAttachment DEFAULT_AVATAR_SIZE = 100 MEDIUM_AVATAR_SIZE = 500 @@ -293,14 +292,14 @@ def resize_emoji( def missing_thumbnails( - image_attachment: ImageAttachment, original_content_type: str | None = None + image_attachment: ImageAttachment, ) -> list[ThumbnailFormat]: seen_thumbnails: set[StoredThumbnailFormat] = set() for existing_thumbnail in image_attachment.thumbnail_metadata: seen_thumbnails.add(StoredThumbnailFormat(**existing_thumbnail)) potential_output_formats = list(THUMBNAIL_OUTPUT_FORMATS) - if original_content_type is None or original_content_type not in INLINE_MIME_TYPES: + if image_attachment.content_type not in INLINE_MIME_TYPES: if image_attachment.original_width_px >= image_attachment.original_height_px: additional_format = ThumbnailFormat( TRANSCODED_IMAGE_FORMAT.extension, @@ -368,6 +367,7 @@ def maybe_thumbnail( original_height_px=height, frames=image.get_n_pages(), thumbnail_metadata=[], + content_type=content_type, ) queue_event_on_commit("thumbnail", {"id": image_row.id}) return image_row @@ -415,15 +415,9 @@ def get_user_upload_previews( upload_preview_data: dict[str, MarkdownImageMetadata] = {} - image_attachments = ( - ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids) - .annotate( - original_content_type=Subquery( - Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type") - ) - ) - .order_by("id") - ) + image_attachments = ImageAttachment.objects.filter( + realm_id=realm_id, path_id__in=path_ids + ).order_by("id") if lock: image_attachments = image_attachments.select_for_update(of=("self",)) for image_attachment in image_attachments: @@ -436,7 +430,7 @@ def get_user_upload_previews( is_animated=False, original_width_px=image_attachment.original_width_px, original_height_px=image_attachment.original_height_px, - original_content_type=image_attachment.original_content_type, + original_content_type=image_attachment.content_type, ) # We re-queue the row for thumbnailing to make sure that @@ -454,10 +448,8 @@ def get_user_upload_previews( is_animated=is_animated, original_width_px=image_attachment.original_width_px, original_height_px=image_attachment.original_height_px, - original_content_type=image_attachment.original_content_type, - transcoded_image=get_transcoded_format( - image_attachment, image_attachment.original_content_type - ), + original_content_type=image_attachment.content_type, + transcoded_image=get_transcoded_format(image_attachment), ) return upload_preview_data @@ -484,7 +476,7 @@ def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, b def get_transcoded_format( - image_attachment: ImageAttachment, original_content_type: str | None + image_attachment: ImageAttachment, ) -> StoredThumbnailFormat | None: # Returns None if the original content-type is judged to be # renderable inline. Otherwise, we return the largest thumbnail @@ -492,7 +484,7 @@ def get_transcoded_format( # not in INLINE_MIME_TYPES get an extra large-resolution thumbnail # added to their list of formats, this is thus either None or a # high-resolution thumbnail. - if original_content_type is None or original_content_type in INLINE_MIME_TYPES: + if image_attachment.content_type is None or image_attachment.content_type in INLINE_MIME_TYPES: return None thumbs_by_size = sorted( diff --git a/zerver/migrations/0660_add_imageattachment_content_type.py b/zerver/migrations/0660_add_imageattachment_content_type.py new file mode 100644 index 0000000000..f3e26abb7e --- /dev/null +++ b/zerver/migrations/0660_add_imageattachment_content_type.py @@ -0,0 +1,48 @@ +from django.db import migrations, models +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import OuterRef, Subquery + + +def populate_content_type(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + ImageAttachment = apps.get_model("zerver", "ImageAttachment") + Attachment = apps.get_model("zerver", "Attachment") + + batch_size = 1000 + min_id = 0 + while True: + # Update content_types from corresponding Attachments in bulk + rows_updated = ImageAttachment.objects.filter( + id__gt=min_id, id__lte=min_id + batch_size + ).update( + content_type=Subquery( + Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type") + ), + ) + + if not rows_updated: + break + + min_id += batch_size + print(f"Processed ImageAttachments through id {min_id}") + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("zerver", "0659_remove_realm_bot_creation_policy"), + ] + + operations = [ + migrations.AddField( + model_name="imageattachment", + name="content_type", + field=models.TextField(null=True), + ), + migrations.RunPython( + populate_content_type, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + ] diff --git a/zerver/models/messages.py b/zerver/models/messages.py index 5de34749f3..70bc971c97 100644 --- a/zerver/models/messages.py +++ b/zerver/models/messages.py @@ -667,6 +667,7 @@ class ArchivedUserMessage(AbstractUserMessage): class ImageAttachment(models.Model): realm = models.ForeignKey(Realm, on_delete=CASCADE) path_id = models.TextField(db_index=True, unique=True) + content_type = models.TextField(null=True) original_width_px = models.IntegerField() original_height_px = models.IntegerField() diff --git a/zerver/tests/test_markdown_thumbnail.py b/zerver/tests/test_markdown_thumbnail.py index 715606173c..8ba520e163 100644 --- a/zerver/tests/test_markdown_thumbnail.py +++ b/zerver/tests/test_markdown_thumbnail.py @@ -192,7 +192,7 @@ class MarkdownThumbnailTest(ZulipTestCase): # Re-thumbnail with a non-overlapping set of sizes with self.thumbnail_formats(ThumbnailFormat("jpg", 100, 75, animated=False)): - ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/gif") + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) # We generate a new size but leave the old ones self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 3) @@ -227,7 +227,7 @@ class MarkdownThumbnailTest(ZulipTestCase): ) # Complete thumbnailing the second image first -- replacing only that spinner - ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id), "image/jpeg") + ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id)) self.assert_message_content_is( message_id, ( @@ -247,7 +247,7 @@ class MarkdownThumbnailTest(ZulipTestCase): ) # Finish the other thumbnail - ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id), "image/png") + ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id)) self.assert_message_content_is( message_id, ( @@ -283,7 +283,7 @@ class MarkdownThumbnailTest(ZulipTestCase): # Completing rendering after it is deleted should work, and # update the rendered content in the archived message - ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png") + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) expected = ( f'

image

\n' f'
' @@ -313,7 +313,7 @@ class MarkdownThumbnailTest(ZulipTestCase): ) as thumb_mock, self.assertLogs("zerver.worker.thumbnail", "ERROR") as thumbnail_logs, ): - ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png") + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) thumb_mock.assert_called_once() self.assert_length(thumbnail_logs.output, 1) self.assertTrue( @@ -360,7 +360,7 @@ class MarkdownThumbnailTest(ZulipTestCase): ThumbnailFormat("webp", 200, 150, animated=False), ), ): - ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png") + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) # Called once per format self.assertEqual(thumb_mock.call_count, 2) @@ -410,7 +410,7 @@ class MarkdownThumbnailTest(ZulipTestCase): # Thumbnail the image. The message does not exist yet, so # nothing is re-written. - ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png") + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) # Send the message; this should re-check the ImageAttachment # data, find the thumbnails, and update the rendered_content diff --git a/zerver/tests/test_thumbnail.py b/zerver/tests/test_thumbnail.py index 165df76497..cf9c8e53ac 100644 --- a/zerver/tests/test_thumbnail.py +++ b/zerver/tests/test_thumbnail.py @@ -368,11 +368,11 @@ class TestStoreThumbnail(ZulipTestCase): self.assertEqual(thumbnailed_image.get_n_pages(), 2) with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=True)): - self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 0) + self.assertEqual(ensure_thumbnails(image_attachment), 0) self.assert_length(image_attachment.thumbnail_metadata, 1) with self.thumbnail_formats(ThumbnailFormat("webp", 150, 100, opts="Q=90", animated=False)): - self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 1) + self.assertEqual(ensure_thumbnails(image_attachment), 1) self.assert_length(image_attachment.thumbnail_metadata, 2) bigger_thumbnail = StoredThumbnailFormat(**image_attachment.thumbnail_metadata[1]) @@ -594,12 +594,13 @@ class TestStoreThumbnail(ZulipTestCase): original_width_px=128, frames=1, thumbnail_metadata=[], + content_type="image/gif", ) with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=False)): - self.assert_length(missing_thumbnails(image_attachment, "image/gif"), 1) + self.assert_length(missing_thumbnails(image_attachment), 1) with self.assertLogs("zerver.worker.thumbnail", level="ERROR") as error_log: - self.assertEqual(ensure_thumbnails(image_attachment, "image/gif"), 0) + self.assertEqual(ensure_thumbnails(image_attachment), 0) libvips_version = (pyvips.version(0), pyvips.version(1)) # This error message changed @@ -619,25 +620,24 @@ class TestStoreThumbnail(ZulipTestCase): original_height_px=100, frames=1, thumbnail_metadata=[], + content_type="image/png", ) with self.thumbnail_formats(): - self.assertEqual(missing_thumbnails(image_attachment, "image/png"), []) + self.assertEqual(missing_thumbnails(image_attachment), []) still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90") with self.thumbnail_formats(still_webp): - self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp]) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp]) anim_webp = ThumbnailFormat("webp", 100, 75, animated=True, opts="Q=90") with self.thumbnail_formats(still_webp, anim_webp): # It's not animated, so the animated format doesn't appear at all - self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp]) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp]) still_jpeg = ThumbnailFormat("jpeg", 100, 75, animated=False, opts="Q=90") with self.thumbnail_formats(still_webp, anim_webp, still_jpeg): # But other still formats do - self.assertEqual( - missing_thumbnails(image_attachment, "image/png"), [still_webp, still_jpeg] - ) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp, still_jpeg]) # If we have a rendered 150x100.webp, then we're not missing it rendered_still_webp = StoredThumbnailFormat( @@ -652,14 +652,12 @@ class TestStoreThumbnail(ZulipTestCase): ) image_attachment.thumbnail_metadata = [asdict(rendered_still_webp)] with self.thumbnail_formats(still_webp, anim_webp, still_jpeg): - self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_jpeg]) + self.assertEqual(missing_thumbnails(image_attachment), [still_jpeg]) # If we have the still, and it's animated, we do still need the animated image_attachment.frames = 10 with self.thumbnail_formats(still_webp, anim_webp, still_jpeg): - self.assertEqual( - missing_thumbnails(image_attachment, "image/png"), [anim_webp, still_jpeg] - ) + self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg]) def test_transcoded_format(self) -> None: image_attachment = ImageAttachment( @@ -668,34 +666,29 @@ class TestStoreThumbnail(ZulipTestCase): original_height_px=100, frames=1, thumbnail_metadata=[], + content_type="image/tiff", ) still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90") with self.thumbnail_formats(still_webp): # We add a high-resolution transcoded format if the image isn't in INLINE_MIME_TYPES: transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False) - self.assertEqual( - missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded] - ) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded]) # We flip to being portrait if the image is higher than it is wide transcoded = ThumbnailFormat("webp", 3024, 4032, animated=False) image_attachment.original_height_px = 300 - self.assertEqual( - missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded] - ) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded]) # The format is not animated, even if the original was image_attachment.original_height_px = 100 image_attachment.frames = 10 transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False) - self.assertEqual( - missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded] - ) + self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded]) # We do not store on the image_attachment if we generated # a transcoded version; it just picks the largest format # if one is called for. - self.assertEqual(get_transcoded_format(image_attachment, "image/tiff"), None) + self.assertEqual(get_transcoded_format(image_attachment), None) image_attachment.thumbnail_metadata = [ asdict( StoredThumbnailFormat( @@ -734,12 +727,14 @@ class TestStoreThumbnail(ZulipTestCase): ) ), ] - self.assertEqual(get_transcoded_format(image_attachment, "image/png"), None) - self.assertEqual(get_transcoded_format(image_attachment, None), None) self.assertEqual( - get_transcoded_format(image_attachment, "image/tiff"), + get_transcoded_format(image_attachment), ThumbnailFormat("webp", 4032, 3024, animated=False), ) + image_attachment.content_type = "image/png" + self.assertEqual(get_transcoded_format(image_attachment), None) + image_attachment.content_type = None + self.assertEqual(get_transcoded_format(image_attachment), None) def test_maybe_thumbnail_from_stream(self) -> None: # If we put the file in place directly (e.g. simulating a diff --git a/zerver/views/upload.py b/zerver/views/upload.py index 6e1248393a..38dea8703b 100644 --- a/zerver/views/upload.py +++ b/zerver/views/upload.py @@ -342,7 +342,6 @@ def serve_file( with transaction.atomic(savepoint=False): ensure_thumbnails( ImageAttachment.objects.select_for_update().get(id=image_attachment.id), - attachment.content_type, ) # Update the path that we are fetching to be the thumbnail diff --git a/zerver/worker/thumbnail.py b/zerver/worker/thumbnail.py index 2b216eee5a..cedc57a160 100644 --- a/zerver/worker/thumbnail.py +++ b/zerver/worker/thumbnail.py @@ -6,7 +6,6 @@ from typing import Any import pyvips from django.db import transaction -from django.db.models import OuterRef, Subquery from typing_extensions import override from zerver.actions.message_edit import do_update_embedded_data @@ -22,7 +21,7 @@ from zerver.lib.thumbnail import ( rewrite_thumbnailed_images, ) from zerver.lib.upload import save_attachment_contents, upload_backend -from zerver.models import ArchivedMessage, Attachment, ImageAttachment, Message +from zerver.models import ArchivedMessage, ImageAttachment, Message from zerver.worker.base import QueueProcessingWorker, assign_queue logger = logging.getLogger(__name__) @@ -40,21 +39,11 @@ class ThumbnailWorker(QueueProcessingWorker): # directly to a thumbnail URL we have not made yet. # This may mean that we may generate 0 thumbnail # images once we get the lock. - row = ( - ImageAttachment.objects.select_for_update(of=("self",)) - .annotate( - original_content_type=Subquery( - Attachment.objects.filter(path_id=OuterRef("path_id")).values( - "content_type" - ) - ) - ) - .get(id=event["id"]) - ) + row = ImageAttachment.objects.select_for_update(of=("self",)).get(id=event["id"]) except ImageAttachment.DoesNotExist: # nocoverage logger.info("ImageAttachment row %d missing", event["id"]) return - uploaded_thumbnails = ensure_thumbnails(row, row.original_content_type) + uploaded_thumbnails = ensure_thumbnails(row) end = time.time() logger.info( "Processed %d thumbnails (%dms)", @@ -63,10 +52,8 @@ class ThumbnailWorker(QueueProcessingWorker): ) -def ensure_thumbnails( - image_attachment: ImageAttachment, original_content_type: str | None = None -) -> int: - needed_thumbnails = missing_thumbnails(image_attachment, original_content_type) +def ensure_thumbnails(image_attachment: ImageAttachment) -> int: + needed_thumbnails = missing_thumbnails(image_attachment) if not needed_thumbnails: return 0 @@ -174,8 +161,8 @@ def ensure_thumbnails( is_animated=is_animated, original_width_px=image_attachment.original_width_px, original_height_px=image_attachment.original_height_px, - original_content_type=original_content_type, - transcoded_image=get_transcoded_format(image_attachment, original_content_type), + original_content_type=image_attachment.content_type, + transcoded_image=get_transcoded_format(image_attachment), ), ) return written_images