diff --git a/zerver/lib/thumbnail.py b/zerver/lib/thumbnail.py
index 52a42fd033..779fc69069 100644
--- a/zerver/lib/thumbnail.py
+++ b/zerver/lib/thumbnail.py
@@ -9,14 +9,13 @@ from typing import TypeVar
import pyvips
from bs4 import BeautifulSoup
from bs4.formatter import EntitySubstitution, HTMLFormatter
-from django.db.models import OuterRef, Subquery
from django.utils.translation import gettext as _
from typing_extensions import override
from zerver.lib.exceptions import ErrorCode, JsonableError
from zerver.lib.mime_types import INLINE_MIME_TYPES
from zerver.lib.queue import queue_event_on_commit
-from zerver.models import Attachment, ImageAttachment
+from zerver.models import ImageAttachment
DEFAULT_AVATAR_SIZE = 100
MEDIUM_AVATAR_SIZE = 500
@@ -293,14 +292,14 @@ def resize_emoji(
def missing_thumbnails(
- image_attachment: ImageAttachment, original_content_type: str | None = None
+ image_attachment: ImageAttachment,
) -> list[ThumbnailFormat]:
seen_thumbnails: set[StoredThumbnailFormat] = set()
for existing_thumbnail in image_attachment.thumbnail_metadata:
seen_thumbnails.add(StoredThumbnailFormat(**existing_thumbnail))
potential_output_formats = list(THUMBNAIL_OUTPUT_FORMATS)
- if original_content_type is None or original_content_type not in INLINE_MIME_TYPES:
+ if image_attachment.content_type not in INLINE_MIME_TYPES:
if image_attachment.original_width_px >= image_attachment.original_height_px:
additional_format = ThumbnailFormat(
TRANSCODED_IMAGE_FORMAT.extension,
@@ -368,6 +367,7 @@ def maybe_thumbnail(
original_height_px=height,
frames=image.get_n_pages(),
thumbnail_metadata=[],
+ content_type=content_type,
)
queue_event_on_commit("thumbnail", {"id": image_row.id})
return image_row
@@ -415,15 +415,9 @@ def get_user_upload_previews(
upload_preview_data: dict[str, MarkdownImageMetadata] = {}
- image_attachments = (
- ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids)
- .annotate(
- original_content_type=Subquery(
- Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type")
- )
- )
- .order_by("id")
- )
+ image_attachments = ImageAttachment.objects.filter(
+ realm_id=realm_id, path_id__in=path_ids
+ ).order_by("id")
if lock:
image_attachments = image_attachments.select_for_update(of=("self",))
for image_attachment in image_attachments:
@@ -436,7 +430,7 @@ def get_user_upload_previews(
is_animated=False,
original_width_px=image_attachment.original_width_px,
original_height_px=image_attachment.original_height_px,
- original_content_type=image_attachment.original_content_type,
+ original_content_type=image_attachment.content_type,
)
# We re-queue the row for thumbnailing to make sure that
@@ -454,10 +448,8 @@ def get_user_upload_previews(
is_animated=is_animated,
original_width_px=image_attachment.original_width_px,
original_height_px=image_attachment.original_height_px,
- original_content_type=image_attachment.original_content_type,
- transcoded_image=get_transcoded_format(
- image_attachment, image_attachment.original_content_type
- ),
+ original_content_type=image_attachment.content_type,
+ transcoded_image=get_transcoded_format(image_attachment),
)
return upload_preview_data
@@ -484,7 +476,7 @@ def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, b
def get_transcoded_format(
- image_attachment: ImageAttachment, original_content_type: str | None
+ image_attachment: ImageAttachment,
) -> StoredThumbnailFormat | None:
# Returns None if the original content-type is judged to be
# renderable inline. Otherwise, we return the largest thumbnail
@@ -492,7 +484,7 @@ def get_transcoded_format(
# not in INLINE_MIME_TYPES get an extra large-resolution thumbnail
# added to their list of formats, this is thus either None or a
# high-resolution thumbnail.
- if original_content_type is None or original_content_type in INLINE_MIME_TYPES:
+ if image_attachment.content_type is None or image_attachment.content_type in INLINE_MIME_TYPES:
return None
thumbs_by_size = sorted(
diff --git a/zerver/migrations/0660_add_imageattachment_content_type.py b/zerver/migrations/0660_add_imageattachment_content_type.py
new file mode 100644
index 0000000000..f3e26abb7e
--- /dev/null
+++ b/zerver/migrations/0660_add_imageattachment_content_type.py
@@ -0,0 +1,48 @@
+from django.db import migrations, models
+from django.db.backends.base.schema import BaseDatabaseSchemaEditor
+from django.db.migrations.state import StateApps
+from django.db.models import OuterRef, Subquery
+
+
+def populate_content_type(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
+ ImageAttachment = apps.get_model("zerver", "ImageAttachment")
+ Attachment = apps.get_model("zerver", "Attachment")
+
+ batch_size = 1000
+ min_id = 0
+ while True:
+ # Update content_types from corresponding Attachments in bulk
+ rows_updated = ImageAttachment.objects.filter(
+ id__gt=min_id, id__lte=min_id + batch_size
+ ).update(
+ content_type=Subquery(
+ Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type")
+ ),
+ )
+
+ if not rows_updated:
+ break
+
+ min_id += batch_size
+ print(f"Processed ImageAttachments through id {min_id}")
+
+
+class Migration(migrations.Migration):
+ atomic = False
+
+ dependencies = [
+ ("zerver", "0659_remove_realm_bot_creation_policy"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="imageattachment",
+ name="content_type",
+ field=models.TextField(null=True),
+ ),
+ migrations.RunPython(
+ populate_content_type,
+ reverse_code=migrations.RunPython.noop,
+ elidable=True,
+ ),
+ ]
diff --git a/zerver/models/messages.py b/zerver/models/messages.py
index 5de34749f3..70bc971c97 100644
--- a/zerver/models/messages.py
+++ b/zerver/models/messages.py
@@ -667,6 +667,7 @@ class ArchivedUserMessage(AbstractUserMessage):
class ImageAttachment(models.Model):
realm = models.ForeignKey(Realm, on_delete=CASCADE)
path_id = models.TextField(db_index=True, unique=True)
+ content_type = models.TextField(null=True)
original_width_px = models.IntegerField()
original_height_px = models.IntegerField()
diff --git a/zerver/tests/test_markdown_thumbnail.py b/zerver/tests/test_markdown_thumbnail.py
index 715606173c..8ba520e163 100644
--- a/zerver/tests/test_markdown_thumbnail.py
+++ b/zerver/tests/test_markdown_thumbnail.py
@@ -192,7 +192,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
# Re-thumbnail with a non-overlapping set of sizes
with self.thumbnail_formats(ThumbnailFormat("jpg", 100, 75, animated=False)):
- ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/gif")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
# We generate a new size but leave the old ones
self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 3)
@@ -227,7 +227,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
)
# Complete thumbnailing the second image first -- replacing only that spinner
- ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id), "image/jpeg")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id))
self.assert_message_content_is(
message_id,
(
@@ -247,7 +247,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
)
# Finish the other thumbnail
- ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id), "image/png")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id))
self.assert_message_content_is(
message_id,
(
@@ -283,7 +283,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
# Completing rendering after it is deleted should work, and
# update the rendered content in the archived message
- ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
expected = (
f'
image
\n'
f''
@@ -313,7 +313,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
) as thumb_mock,
self.assertLogs("zerver.worker.thumbnail", "ERROR") as thumbnail_logs,
):
- ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
thumb_mock.assert_called_once()
self.assert_length(thumbnail_logs.output, 1)
self.assertTrue(
@@ -360,7 +360,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
ThumbnailFormat("webp", 200, 150, animated=False),
),
):
- ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
# Called once per format
self.assertEqual(thumb_mock.call_count, 2)
@@ -410,7 +410,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
# Thumbnail the image. The message does not exist yet, so
# nothing is re-written.
- ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
+ ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
# Send the message; this should re-check the ImageAttachment
# data, find the thumbnails, and update the rendered_content
diff --git a/zerver/tests/test_thumbnail.py b/zerver/tests/test_thumbnail.py
index 165df76497..cf9c8e53ac 100644
--- a/zerver/tests/test_thumbnail.py
+++ b/zerver/tests/test_thumbnail.py
@@ -368,11 +368,11 @@ class TestStoreThumbnail(ZulipTestCase):
self.assertEqual(thumbnailed_image.get_n_pages(), 2)
with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=True)):
- self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 0)
+ self.assertEqual(ensure_thumbnails(image_attachment), 0)
self.assert_length(image_attachment.thumbnail_metadata, 1)
with self.thumbnail_formats(ThumbnailFormat("webp", 150, 100, opts="Q=90", animated=False)):
- self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 1)
+ self.assertEqual(ensure_thumbnails(image_attachment), 1)
self.assert_length(image_attachment.thumbnail_metadata, 2)
bigger_thumbnail = StoredThumbnailFormat(**image_attachment.thumbnail_metadata[1])
@@ -594,12 +594,13 @@ class TestStoreThumbnail(ZulipTestCase):
original_width_px=128,
frames=1,
thumbnail_metadata=[],
+ content_type="image/gif",
)
with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=False)):
- self.assert_length(missing_thumbnails(image_attachment, "image/gif"), 1)
+ self.assert_length(missing_thumbnails(image_attachment), 1)
with self.assertLogs("zerver.worker.thumbnail", level="ERROR") as error_log:
- self.assertEqual(ensure_thumbnails(image_attachment, "image/gif"), 0)
+ self.assertEqual(ensure_thumbnails(image_attachment), 0)
libvips_version = (pyvips.version(0), pyvips.version(1))
# This error message changed
@@ -619,25 +620,24 @@ class TestStoreThumbnail(ZulipTestCase):
original_height_px=100,
frames=1,
thumbnail_metadata=[],
+ content_type="image/png",
)
with self.thumbnail_formats():
- self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [])
+ self.assertEqual(missing_thumbnails(image_attachment), [])
still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90")
with self.thumbnail_formats(still_webp):
- self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp])
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
anim_webp = ThumbnailFormat("webp", 100, 75, animated=True, opts="Q=90")
with self.thumbnail_formats(still_webp, anim_webp):
# It's not animated, so the animated format doesn't appear at all
- self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp])
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
still_jpeg = ThumbnailFormat("jpeg", 100, 75, animated=False, opts="Q=90")
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
# But other still formats do
- self.assertEqual(
- missing_thumbnails(image_attachment, "image/png"), [still_webp, still_jpeg]
- )
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp, still_jpeg])
# If we have a rendered 150x100.webp, then we're not missing it
rendered_still_webp = StoredThumbnailFormat(
@@ -652,14 +652,12 @@ class TestStoreThumbnail(ZulipTestCase):
)
image_attachment.thumbnail_metadata = [asdict(rendered_still_webp)]
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
- self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_jpeg])
+ self.assertEqual(missing_thumbnails(image_attachment), [still_jpeg])
# If we have the still, and it's animated, we do still need the animated
image_attachment.frames = 10
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
- self.assertEqual(
- missing_thumbnails(image_attachment, "image/png"), [anim_webp, still_jpeg]
- )
+ self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg])
def test_transcoded_format(self) -> None:
image_attachment = ImageAttachment(
@@ -668,34 +666,29 @@ class TestStoreThumbnail(ZulipTestCase):
original_height_px=100,
frames=1,
thumbnail_metadata=[],
+ content_type="image/tiff",
)
still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90")
with self.thumbnail_formats(still_webp):
# We add a high-resolution transcoded format if the image isn't in INLINE_MIME_TYPES:
transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False)
- self.assertEqual(
- missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
- )
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
# We flip to being portrait if the image is higher than it is wide
transcoded = ThumbnailFormat("webp", 3024, 4032, animated=False)
image_attachment.original_height_px = 300
- self.assertEqual(
- missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
- )
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
# The format is not animated, even if the original was
image_attachment.original_height_px = 100
image_attachment.frames = 10
transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False)
- self.assertEqual(
- missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
- )
+ self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
# We do not store on the image_attachment if we generated
# a transcoded version; it just picks the largest format
# if one is called for.
- self.assertEqual(get_transcoded_format(image_attachment, "image/tiff"), None)
+ self.assertEqual(get_transcoded_format(image_attachment), None)
image_attachment.thumbnail_metadata = [
asdict(
StoredThumbnailFormat(
@@ -734,12 +727,14 @@ class TestStoreThumbnail(ZulipTestCase):
)
),
]
- self.assertEqual(get_transcoded_format(image_attachment, "image/png"), None)
- self.assertEqual(get_transcoded_format(image_attachment, None), None)
self.assertEqual(
- get_transcoded_format(image_attachment, "image/tiff"),
+ get_transcoded_format(image_attachment),
ThumbnailFormat("webp", 4032, 3024, animated=False),
)
+ image_attachment.content_type = "image/png"
+ self.assertEqual(get_transcoded_format(image_attachment), None)
+ image_attachment.content_type = None
+ self.assertEqual(get_transcoded_format(image_attachment), None)
def test_maybe_thumbnail_from_stream(self) -> None:
# If we put the file in place directly (e.g. simulating a
diff --git a/zerver/views/upload.py b/zerver/views/upload.py
index 6e1248393a..38dea8703b 100644
--- a/zerver/views/upload.py
+++ b/zerver/views/upload.py
@@ -342,7 +342,6 @@ def serve_file(
with transaction.atomic(savepoint=False):
ensure_thumbnails(
ImageAttachment.objects.select_for_update().get(id=image_attachment.id),
- attachment.content_type,
)
# Update the path that we are fetching to be the thumbnail
diff --git a/zerver/worker/thumbnail.py b/zerver/worker/thumbnail.py
index 2b216eee5a..cedc57a160 100644
--- a/zerver/worker/thumbnail.py
+++ b/zerver/worker/thumbnail.py
@@ -6,7 +6,6 @@ from typing import Any
import pyvips
from django.db import transaction
-from django.db.models import OuterRef, Subquery
from typing_extensions import override
from zerver.actions.message_edit import do_update_embedded_data
@@ -22,7 +21,7 @@ from zerver.lib.thumbnail import (
rewrite_thumbnailed_images,
)
from zerver.lib.upload import save_attachment_contents, upload_backend
-from zerver.models import ArchivedMessage, Attachment, ImageAttachment, Message
+from zerver.models import ArchivedMessage, ImageAttachment, Message
from zerver.worker.base import QueueProcessingWorker, assign_queue
logger = logging.getLogger(__name__)
@@ -40,21 +39,11 @@ class ThumbnailWorker(QueueProcessingWorker):
# directly to a thumbnail URL we have not made yet.
# This may mean that we may generate 0 thumbnail
# images once we get the lock.
- row = (
- ImageAttachment.objects.select_for_update(of=("self",))
- .annotate(
- original_content_type=Subquery(
- Attachment.objects.filter(path_id=OuterRef("path_id")).values(
- "content_type"
- )
- )
- )
- .get(id=event["id"])
- )
+ row = ImageAttachment.objects.select_for_update(of=("self",)).get(id=event["id"])
except ImageAttachment.DoesNotExist: # nocoverage
logger.info("ImageAttachment row %d missing", event["id"])
return
- uploaded_thumbnails = ensure_thumbnails(row, row.original_content_type)
+ uploaded_thumbnails = ensure_thumbnails(row)
end = time.time()
logger.info(
"Processed %d thumbnails (%dms)",
@@ -63,10 +52,8 @@ class ThumbnailWorker(QueueProcessingWorker):
)
-def ensure_thumbnails(
- image_attachment: ImageAttachment, original_content_type: str | None = None
-) -> int:
- needed_thumbnails = missing_thumbnails(image_attachment, original_content_type)
+def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
+ needed_thumbnails = missing_thumbnails(image_attachment)
if not needed_thumbnails:
return 0
@@ -174,8 +161,8 @@ def ensure_thumbnails(
is_animated=is_animated,
original_width_px=image_attachment.original_width_px,
original_height_px=image_attachment.original_height_px,
- original_content_type=original_content_type,
- transcoded_image=get_transcoded_format(image_attachment, original_content_type),
+ original_content_type=image_attachment.content_type,
+ transcoded_image=get_transcoded_format(image_attachment),
),
)
return written_images