mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
models: Add content_type to ImageAttachment.
This means that only ImageAttachment row needs to be fetched, and removes the need to pass around an extra parameter. This denormalization is safe, since in general Attachment rows are read-only, so we are not concerned with drift between the Attachment and ImageAttachment tables. We cannot make content_type non-null, since while the both the `content_type` column in Attachment and populating that from requests predates the ImageAttachment table, we have both backfilled ImageAttachment rows to consider, and imports may also leave files with no `content_type`. Any backfill of currently-null `content_type` values will thus need to update both tables. This change fixes a race condition when importing. ImageAttachment rows are imported before rendering Messages, which are both before importing Attachment rows; if the thumbnailing finished after the Message was imported but before Attachment rows were imported, then the re-rendering step would not know the image's content-type.
This commit is contained in:
committed by
Tim Abbott
parent
0788942a68
commit
98362de185
@@ -9,14 +9,13 @@ from typing import TypeVar
|
||||
import pyvips
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.formatter import EntitySubstitution, HTMLFormatter
|
||||
from django.db.models import OuterRef, Subquery
|
||||
from django.utils.translation import gettext as _
|
||||
from typing_extensions import override
|
||||
|
||||
from zerver.lib.exceptions import ErrorCode, JsonableError
|
||||
from zerver.lib.mime_types import INLINE_MIME_TYPES
|
||||
from zerver.lib.queue import queue_event_on_commit
|
||||
from zerver.models import Attachment, ImageAttachment
|
||||
from zerver.models import ImageAttachment
|
||||
|
||||
DEFAULT_AVATAR_SIZE = 100
|
||||
MEDIUM_AVATAR_SIZE = 500
|
||||
@@ -293,14 +292,14 @@ def resize_emoji(
|
||||
|
||||
|
||||
def missing_thumbnails(
|
||||
image_attachment: ImageAttachment, original_content_type: str | None = None
|
||||
image_attachment: ImageAttachment,
|
||||
) -> list[ThumbnailFormat]:
|
||||
seen_thumbnails: set[StoredThumbnailFormat] = set()
|
||||
for existing_thumbnail in image_attachment.thumbnail_metadata:
|
||||
seen_thumbnails.add(StoredThumbnailFormat(**existing_thumbnail))
|
||||
|
||||
potential_output_formats = list(THUMBNAIL_OUTPUT_FORMATS)
|
||||
if original_content_type is None or original_content_type not in INLINE_MIME_TYPES:
|
||||
if image_attachment.content_type not in INLINE_MIME_TYPES:
|
||||
if image_attachment.original_width_px >= image_attachment.original_height_px:
|
||||
additional_format = ThumbnailFormat(
|
||||
TRANSCODED_IMAGE_FORMAT.extension,
|
||||
@@ -368,6 +367,7 @@ def maybe_thumbnail(
|
||||
original_height_px=height,
|
||||
frames=image.get_n_pages(),
|
||||
thumbnail_metadata=[],
|
||||
content_type=content_type,
|
||||
)
|
||||
queue_event_on_commit("thumbnail", {"id": image_row.id})
|
||||
return image_row
|
||||
@@ -415,15 +415,9 @@ def get_user_upload_previews(
|
||||
|
||||
upload_preview_data: dict[str, MarkdownImageMetadata] = {}
|
||||
|
||||
image_attachments = (
|
||||
ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids)
|
||||
.annotate(
|
||||
original_content_type=Subquery(
|
||||
Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type")
|
||||
)
|
||||
)
|
||||
.order_by("id")
|
||||
)
|
||||
image_attachments = ImageAttachment.objects.filter(
|
||||
realm_id=realm_id, path_id__in=path_ids
|
||||
).order_by("id")
|
||||
if lock:
|
||||
image_attachments = image_attachments.select_for_update(of=("self",))
|
||||
for image_attachment in image_attachments:
|
||||
@@ -436,7 +430,7 @@ def get_user_upload_previews(
|
||||
is_animated=False,
|
||||
original_width_px=image_attachment.original_width_px,
|
||||
original_height_px=image_attachment.original_height_px,
|
||||
original_content_type=image_attachment.original_content_type,
|
||||
original_content_type=image_attachment.content_type,
|
||||
)
|
||||
|
||||
# We re-queue the row for thumbnailing to make sure that
|
||||
@@ -454,10 +448,8 @@ def get_user_upload_previews(
|
||||
is_animated=is_animated,
|
||||
original_width_px=image_attachment.original_width_px,
|
||||
original_height_px=image_attachment.original_height_px,
|
||||
original_content_type=image_attachment.original_content_type,
|
||||
transcoded_image=get_transcoded_format(
|
||||
image_attachment, image_attachment.original_content_type
|
||||
),
|
||||
original_content_type=image_attachment.content_type,
|
||||
transcoded_image=get_transcoded_format(image_attachment),
|
||||
)
|
||||
return upload_preview_data
|
||||
|
||||
@@ -484,7 +476,7 @@ def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, b
|
||||
|
||||
|
||||
def get_transcoded_format(
|
||||
image_attachment: ImageAttachment, original_content_type: str | None
|
||||
image_attachment: ImageAttachment,
|
||||
) -> StoredThumbnailFormat | None:
|
||||
# Returns None if the original content-type is judged to be
|
||||
# renderable inline. Otherwise, we return the largest thumbnail
|
||||
@@ -492,7 +484,7 @@ def get_transcoded_format(
|
||||
# not in INLINE_MIME_TYPES get an extra large-resolution thumbnail
|
||||
# added to their list of formats, this is thus either None or a
|
||||
# high-resolution thumbnail.
|
||||
if original_content_type is None or original_content_type in INLINE_MIME_TYPES:
|
||||
if image_attachment.content_type is None or image_attachment.content_type in INLINE_MIME_TYPES:
|
||||
return None
|
||||
|
||||
thumbs_by_size = sorted(
|
||||
|
48
zerver/migrations/0660_add_imageattachment_content_type.py
Normal file
48
zerver/migrations/0660_add_imageattachment_content_type.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from django.db import migrations, models
|
||||
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
|
||||
from django.db.migrations.state import StateApps
|
||||
from django.db.models import OuterRef, Subquery
|
||||
|
||||
|
||||
def populate_content_type(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
|
||||
ImageAttachment = apps.get_model("zerver", "ImageAttachment")
|
||||
Attachment = apps.get_model("zerver", "Attachment")
|
||||
|
||||
batch_size = 1000
|
||||
min_id = 0
|
||||
while True:
|
||||
# Update content_types from corresponding Attachments in bulk
|
||||
rows_updated = ImageAttachment.objects.filter(
|
||||
id__gt=min_id, id__lte=min_id + batch_size
|
||||
).update(
|
||||
content_type=Subquery(
|
||||
Attachment.objects.filter(path_id=OuterRef("path_id")).values("content_type")
|
||||
),
|
||||
)
|
||||
|
||||
if not rows_updated:
|
||||
break
|
||||
|
||||
min_id += batch_size
|
||||
print(f"Processed ImageAttachments through id {min_id}")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
atomic = False
|
||||
|
||||
dependencies = [
|
||||
("zerver", "0659_remove_realm_bot_creation_policy"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="imageattachment",
|
||||
name="content_type",
|
||||
field=models.TextField(null=True),
|
||||
),
|
||||
migrations.RunPython(
|
||||
populate_content_type,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
elidable=True,
|
||||
),
|
||||
]
|
@@ -667,6 +667,7 @@ class ArchivedUserMessage(AbstractUserMessage):
|
||||
class ImageAttachment(models.Model):
|
||||
realm = models.ForeignKey(Realm, on_delete=CASCADE)
|
||||
path_id = models.TextField(db_index=True, unique=True)
|
||||
content_type = models.TextField(null=True)
|
||||
|
||||
original_width_px = models.IntegerField()
|
||||
original_height_px = models.IntegerField()
|
||||
|
@@ -192,7 +192,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
|
||||
# Re-thumbnail with a non-overlapping set of sizes
|
||||
with self.thumbnail_formats(ThumbnailFormat("jpg", 100, 75, animated=False)):
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/gif")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||
|
||||
# We generate a new size but leave the old ones
|
||||
self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 3)
|
||||
@@ -227,7 +227,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
# Complete thumbnailing the second image first -- replacing only that spinner
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id), "image/jpeg")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id))
|
||||
self.assert_message_content_is(
|
||||
message_id,
|
||||
(
|
||||
@@ -247,7 +247,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
# Finish the other thumbnail
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id), "image/png")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id))
|
||||
self.assert_message_content_is(
|
||||
message_id,
|
||||
(
|
||||
@@ -283,7 +283,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
|
||||
# Completing rendering after it is deleted should work, and
|
||||
# update the rendered content in the archived message
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||
expected = (
|
||||
f'<p><a href="/user_uploads/{path_id}">image</a></p>\n'
|
||||
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="image">'
|
||||
@@ -313,7 +313,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
) as thumb_mock,
|
||||
self.assertLogs("zerver.worker.thumbnail", "ERROR") as thumbnail_logs,
|
||||
):
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||
thumb_mock.assert_called_once()
|
||||
self.assert_length(thumbnail_logs.output, 1)
|
||||
self.assertTrue(
|
||||
@@ -360,7 +360,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
ThumbnailFormat("webp", 200, 150, animated=False),
|
||||
),
|
||||
):
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||
|
||||
# Called once per format
|
||||
self.assertEqual(thumb_mock.call_count, 2)
|
||||
@@ -410,7 +410,7 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||
|
||||
# Thumbnail the image. The message does not exist yet, so
|
||||
# nothing is re-written.
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id), "image/png")
|
||||
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||
|
||||
# Send the message; this should re-check the ImageAttachment
|
||||
# data, find the thumbnails, and update the rendered_content
|
||||
|
@@ -368,11 +368,11 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
self.assertEqual(thumbnailed_image.get_n_pages(), 2)
|
||||
|
||||
with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=True)):
|
||||
self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 0)
|
||||
self.assertEqual(ensure_thumbnails(image_attachment), 0)
|
||||
self.assert_length(image_attachment.thumbnail_metadata, 1)
|
||||
|
||||
with self.thumbnail_formats(ThumbnailFormat("webp", 150, 100, opts="Q=90", animated=False)):
|
||||
self.assertEqual(ensure_thumbnails(image_attachment, "image/webp"), 1)
|
||||
self.assertEqual(ensure_thumbnails(image_attachment), 1)
|
||||
self.assert_length(image_attachment.thumbnail_metadata, 2)
|
||||
|
||||
bigger_thumbnail = StoredThumbnailFormat(**image_attachment.thumbnail_metadata[1])
|
||||
@@ -594,12 +594,13 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
original_width_px=128,
|
||||
frames=1,
|
||||
thumbnail_metadata=[],
|
||||
content_type="image/gif",
|
||||
)
|
||||
with self.thumbnail_formats(ThumbnailFormat("webp", 100, 75, animated=False)):
|
||||
self.assert_length(missing_thumbnails(image_attachment, "image/gif"), 1)
|
||||
self.assert_length(missing_thumbnails(image_attachment), 1)
|
||||
|
||||
with self.assertLogs("zerver.worker.thumbnail", level="ERROR") as error_log:
|
||||
self.assertEqual(ensure_thumbnails(image_attachment, "image/gif"), 0)
|
||||
self.assertEqual(ensure_thumbnails(image_attachment), 0)
|
||||
|
||||
libvips_version = (pyvips.version(0), pyvips.version(1))
|
||||
# This error message changed
|
||||
@@ -619,25 +620,24 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
original_height_px=100,
|
||||
frames=1,
|
||||
thumbnail_metadata=[],
|
||||
content_type="image/png",
|
||||
)
|
||||
with self.thumbnail_formats():
|
||||
self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [])
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [])
|
||||
|
||||
still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90")
|
||||
with self.thumbnail_formats(still_webp):
|
||||
self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp])
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
|
||||
|
||||
anim_webp = ThumbnailFormat("webp", 100, 75, animated=True, opts="Q=90")
|
||||
with self.thumbnail_formats(still_webp, anim_webp):
|
||||
# It's not animated, so the animated format doesn't appear at all
|
||||
self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_webp])
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
|
||||
|
||||
still_jpeg = ThumbnailFormat("jpeg", 100, 75, animated=False, opts="Q=90")
|
||||
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
|
||||
# But other still formats do
|
||||
self.assertEqual(
|
||||
missing_thumbnails(image_attachment, "image/png"), [still_webp, still_jpeg]
|
||||
)
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp, still_jpeg])
|
||||
|
||||
# If we have a rendered 150x100.webp, then we're not missing it
|
||||
rendered_still_webp = StoredThumbnailFormat(
|
||||
@@ -652,14 +652,12 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
)
|
||||
image_attachment.thumbnail_metadata = [asdict(rendered_still_webp)]
|
||||
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
|
||||
self.assertEqual(missing_thumbnails(image_attachment, "image/png"), [still_jpeg])
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_jpeg])
|
||||
|
||||
# If we have the still, and it's animated, we do still need the animated
|
||||
image_attachment.frames = 10
|
||||
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
|
||||
self.assertEqual(
|
||||
missing_thumbnails(image_attachment, "image/png"), [anim_webp, still_jpeg]
|
||||
)
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg])
|
||||
|
||||
def test_transcoded_format(self) -> None:
|
||||
image_attachment = ImageAttachment(
|
||||
@@ -668,34 +666,29 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
original_height_px=100,
|
||||
frames=1,
|
||||
thumbnail_metadata=[],
|
||||
content_type="image/tiff",
|
||||
)
|
||||
still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90")
|
||||
with self.thumbnail_formats(still_webp):
|
||||
# We add a high-resolution transcoded format if the image isn't in INLINE_MIME_TYPES:
|
||||
transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False)
|
||||
self.assertEqual(
|
||||
missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
|
||||
)
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
|
||||
|
||||
# We flip to being portrait if the image is higher than it is wide
|
||||
transcoded = ThumbnailFormat("webp", 3024, 4032, animated=False)
|
||||
image_attachment.original_height_px = 300
|
||||
self.assertEqual(
|
||||
missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
|
||||
)
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
|
||||
|
||||
# The format is not animated, even if the original was
|
||||
image_attachment.original_height_px = 100
|
||||
image_attachment.frames = 10
|
||||
transcoded = ThumbnailFormat("webp", 4032, 3024, animated=False)
|
||||
self.assertEqual(
|
||||
missing_thumbnails(image_attachment, "image/tiff"), [still_webp, transcoded]
|
||||
)
|
||||
self.assertEqual(missing_thumbnails(image_attachment), [still_webp, transcoded])
|
||||
|
||||
# We do not store on the image_attachment if we generated
|
||||
# a transcoded version; it just picks the largest format
|
||||
# if one is called for.
|
||||
self.assertEqual(get_transcoded_format(image_attachment, "image/tiff"), None)
|
||||
self.assertEqual(get_transcoded_format(image_attachment), None)
|
||||
image_attachment.thumbnail_metadata = [
|
||||
asdict(
|
||||
StoredThumbnailFormat(
|
||||
@@ -734,12 +727,14 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||
)
|
||||
),
|
||||
]
|
||||
self.assertEqual(get_transcoded_format(image_attachment, "image/png"), None)
|
||||
self.assertEqual(get_transcoded_format(image_attachment, None), None)
|
||||
self.assertEqual(
|
||||
get_transcoded_format(image_attachment, "image/tiff"),
|
||||
get_transcoded_format(image_attachment),
|
||||
ThumbnailFormat("webp", 4032, 3024, animated=False),
|
||||
)
|
||||
image_attachment.content_type = "image/png"
|
||||
self.assertEqual(get_transcoded_format(image_attachment), None)
|
||||
image_attachment.content_type = None
|
||||
self.assertEqual(get_transcoded_format(image_attachment), None)
|
||||
|
||||
def test_maybe_thumbnail_from_stream(self) -> None:
|
||||
# If we put the file in place directly (e.g. simulating a
|
||||
|
@@ -342,7 +342,6 @@ def serve_file(
|
||||
with transaction.atomic(savepoint=False):
|
||||
ensure_thumbnails(
|
||||
ImageAttachment.objects.select_for_update().get(id=image_attachment.id),
|
||||
attachment.content_type,
|
||||
)
|
||||
|
||||
# Update the path that we are fetching to be the thumbnail
|
||||
|
@@ -6,7 +6,6 @@ from typing import Any
|
||||
|
||||
import pyvips
|
||||
from django.db import transaction
|
||||
from django.db.models import OuterRef, Subquery
|
||||
from typing_extensions import override
|
||||
|
||||
from zerver.actions.message_edit import do_update_embedded_data
|
||||
@@ -22,7 +21,7 @@ from zerver.lib.thumbnail import (
|
||||
rewrite_thumbnailed_images,
|
||||
)
|
||||
from zerver.lib.upload import save_attachment_contents, upload_backend
|
||||
from zerver.models import ArchivedMessage, Attachment, ImageAttachment, Message
|
||||
from zerver.models import ArchivedMessage, ImageAttachment, Message
|
||||
from zerver.worker.base import QueueProcessingWorker, assign_queue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -40,21 +39,11 @@ class ThumbnailWorker(QueueProcessingWorker):
|
||||
# directly to a thumbnail URL we have not made yet.
|
||||
# This may mean that we may generate 0 thumbnail
|
||||
# images once we get the lock.
|
||||
row = (
|
||||
ImageAttachment.objects.select_for_update(of=("self",))
|
||||
.annotate(
|
||||
original_content_type=Subquery(
|
||||
Attachment.objects.filter(path_id=OuterRef("path_id")).values(
|
||||
"content_type"
|
||||
)
|
||||
)
|
||||
)
|
||||
.get(id=event["id"])
|
||||
)
|
||||
row = ImageAttachment.objects.select_for_update(of=("self",)).get(id=event["id"])
|
||||
except ImageAttachment.DoesNotExist: # nocoverage
|
||||
logger.info("ImageAttachment row %d missing", event["id"])
|
||||
return
|
||||
uploaded_thumbnails = ensure_thumbnails(row, row.original_content_type)
|
||||
uploaded_thumbnails = ensure_thumbnails(row)
|
||||
end = time.time()
|
||||
logger.info(
|
||||
"Processed %d thumbnails (%dms)",
|
||||
@@ -63,10 +52,8 @@ class ThumbnailWorker(QueueProcessingWorker):
|
||||
)
|
||||
|
||||
|
||||
def ensure_thumbnails(
|
||||
image_attachment: ImageAttachment, original_content_type: str | None = None
|
||||
) -> int:
|
||||
needed_thumbnails = missing_thumbnails(image_attachment, original_content_type)
|
||||
def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
|
||||
needed_thumbnails = missing_thumbnails(image_attachment)
|
||||
|
||||
if not needed_thumbnails:
|
||||
return 0
|
||||
@@ -174,8 +161,8 @@ def ensure_thumbnails(
|
||||
is_animated=is_animated,
|
||||
original_width_px=image_attachment.original_width_px,
|
||||
original_height_px=image_attachment.original_height_px,
|
||||
original_content_type=original_content_type,
|
||||
transcoded_image=get_transcoded_format(image_attachment, original_content_type),
|
||||
original_content_type=image_attachment.content_type,
|
||||
transcoded_image=get_transcoded_format(image_attachment),
|
||||
),
|
||||
)
|
||||
return written_images
|
||||
|
Reference in New Issue
Block a user