mirror of
https://github.com/zulip/zulip.git
synced 2025-11-13 02:17:19 +00:00
thumbnail: Resolve a race condition when rendering messages.
Messages are rendered outside of a transaction, for performance reasons, and then sent inside of one. This opens thumbnailing up to a race where the thumbnails have not yet been written when the message is rendered, but the message has not been sent when thumbnailing completes, causing `rewrite_thumbnailed_images` to be a no-op and the message being left with a spinner which never resolves. Explicitly lock and use he ImageAttachment data inside the message-sending transaction, to rewrite the message content with the latest information about the existing thumbnails. Despite the thumbnailing worker taking a lock on Message rows to update them, this does not lead to deadlocks -- the INSERT of the Message rows happens in a transaction, ensuring that either the message rending blocks the thumbnailing until the Message row is created, or that the `rewrite_thumbnailed_images` and Message INSERT waits until thumbnailing is complete (and updated no Message rows).
This commit is contained in:
committed by
Tim Abbott
parent
2a14a08e63
commit
6f20c15ae9
@@ -68,6 +68,7 @@ from zerver.lib.stream_subscription import (
|
|||||||
from zerver.lib.stream_topic import StreamTopicTarget
|
from zerver.lib.stream_topic import StreamTopicTarget
|
||||||
from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream
|
from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream
|
||||||
from zerver.lib.string_validation import check_stream_name
|
from zerver.lib.string_validation import check_stream_name
|
||||||
|
from zerver.lib.thumbnail import get_user_upload_previews, rewrite_thumbnailed_images
|
||||||
from zerver.lib.timestamp import timestamp_to_datetime
|
from zerver.lib.timestamp import timestamp_to_datetime
|
||||||
from zerver.lib.topic import participants_for_topic
|
from zerver.lib.topic import participants_for_topic
|
||||||
from zerver.lib.url_preview.types import UrlEmbedData
|
from zerver.lib.url_preview.types import UrlEmbedData
|
||||||
@@ -864,7 +865,31 @@ def do_send_messages(
|
|||||||
send_request.message, send_request.rendering_result.potential_attachment_path_ids
|
send_request.message, send_request.rendering_result.potential_attachment_path_ids
|
||||||
):
|
):
|
||||||
send_request.message.has_attachment = True
|
send_request.message.has_attachment = True
|
||||||
send_request.message.save(update_fields=["has_attachment"])
|
update_fields = ["has_attachment"]
|
||||||
|
|
||||||
|
# Lock the ImageAttachment rows that we pulled when rendering
|
||||||
|
# outside of the transaction; they may be out of date now, and we
|
||||||
|
# need to post-process the rendered_content to swap out the
|
||||||
|
# spinner. We would ideally take a `FOR SHARE` lock here
|
||||||
|
# (https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS),
|
||||||
|
# which would block UPDATEs but not other `FOR SHARE`, but Django
|
||||||
|
# does not support this yet: (https://code.djangoproject.com/ticket/10088)
|
||||||
|
assert send_request.message.rendered_content is not None
|
||||||
|
if send_request.rendering_result.thumbnail_spinners:
|
||||||
|
previews = get_user_upload_previews(
|
||||||
|
send_request.message.realm_id,
|
||||||
|
send_request.message.content,
|
||||||
|
lock=True,
|
||||||
|
path_ids=list(send_request.rendering_result.thumbnail_spinners),
|
||||||
|
)
|
||||||
|
new_rendered_content = rewrite_thumbnailed_images(
|
||||||
|
send_request.message.rendered_content, previews
|
||||||
|
)[0]
|
||||||
|
if new_rendered_content is not None:
|
||||||
|
send_request.message.rendered_content = new_rendered_content
|
||||||
|
update_fields.append("rendered_content")
|
||||||
|
|
||||||
|
send_request.message.save(update_fields=update_fields)
|
||||||
|
|
||||||
ums: list[UserMessageLite] = []
|
ums: list[UserMessageLite] = []
|
||||||
for send_request in send_message_requests:
|
for send_request in send_message_requests:
|
||||||
|
|||||||
@@ -118,6 +118,7 @@ class MessageRenderingResult:
|
|||||||
links_for_preview: set[str]
|
links_for_preview: set[str]
|
||||||
user_ids_with_alert_words: set[int]
|
user_ids_with_alert_words: set[int]
|
||||||
potential_attachment_path_ids: list[str]
|
potential_attachment_path_ids: list[str]
|
||||||
|
thumbnail_spinners: set[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -2625,6 +2626,7 @@ def do_convert(
|
|||||||
links_for_preview=set(),
|
links_for_preview=set(),
|
||||||
user_ids_with_alert_words=set(),
|
user_ids_with_alert_words=set(),
|
||||||
potential_attachment_path_ids=[],
|
potential_attachment_path_ids=[],
|
||||||
|
thumbnail_spinners=set(),
|
||||||
)
|
)
|
||||||
|
|
||||||
_md_engine.zulip_message = message
|
_md_engine.zulip_message = message
|
||||||
@@ -2683,9 +2685,10 @@ def do_convert(
|
|||||||
|
|
||||||
# Post-process the result with the rendered image previews:
|
# Post-process the result with the rendered image previews:
|
||||||
if user_upload_previews is not None:
|
if user_upload_previews is not None:
|
||||||
content_with_thumbnails = rewrite_thumbnailed_images(
|
content_with_thumbnails, thumbnail_spinners = rewrite_thumbnailed_images(
|
||||||
rendering_result.rendered_content, user_upload_previews
|
rendering_result.rendered_content, user_upload_previews
|
||||||
)
|
)
|
||||||
|
rendering_result.thumbnail_spinners = thumbnail_spinners
|
||||||
if content_with_thumbnails is not None:
|
if content_with_thumbnails is not None:
|
||||||
rendering_result.rendered_content = content_with_thumbnails
|
rendering_result.rendered_content = content_with_thumbnails
|
||||||
|
|
||||||
|
|||||||
@@ -333,11 +333,22 @@ class MarkdownImageMetadata:
|
|||||||
|
|
||||||
|
|
||||||
def get_user_upload_previews(
|
def get_user_upload_previews(
|
||||||
realm_id: int, content: str
|
realm_id: int,
|
||||||
|
content: str,
|
||||||
|
lock: bool = False,
|
||||||
|
path_ids: list[str] | None = None,
|
||||||
) -> dict[str, MarkdownImageMetadata | None]:
|
) -> dict[str, MarkdownImageMetadata | None]:
|
||||||
matches = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content)
|
if path_ids is None:
|
||||||
|
path_ids = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content)
|
||||||
|
if not path_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
upload_preview_data: dict[str, MarkdownImageMetadata | None] = {}
|
upload_preview_data: dict[str, MarkdownImageMetadata | None] = {}
|
||||||
for image_attachment in ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=matches):
|
|
||||||
|
image_attachments = ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids)
|
||||||
|
if lock:
|
||||||
|
image_attachments = image_attachments.select_for_update()
|
||||||
|
for image_attachment in image_attachments:
|
||||||
if image_attachment.thumbnail_metadata == []:
|
if image_attachment.thumbnail_metadata == []:
|
||||||
# Image exists, and header of it parsed as a valid image,
|
# Image exists, and header of it parsed as a valid image,
|
||||||
# but has not been thumbnailed yet; we will render a
|
# but has not been thumbnailed yet; we will render a
|
||||||
@@ -379,10 +390,11 @@ def rewrite_thumbnailed_images(
|
|||||||
rendered_content: str,
|
rendered_content: str,
|
||||||
images: dict[str, MarkdownImageMetadata | None],
|
images: dict[str, MarkdownImageMetadata | None],
|
||||||
to_delete: set[str] | None = None,
|
to_delete: set[str] | None = None,
|
||||||
) -> str | None:
|
) -> tuple[str | None, set[str]]:
|
||||||
if not images and not to_delete:
|
if not images and not to_delete:
|
||||||
return None
|
return None, set()
|
||||||
|
|
||||||
|
remaining_thumbnails = set()
|
||||||
parsed_message = BeautifulSoup(rendered_content, "html.parser")
|
parsed_message = BeautifulSoup(rendered_content, "html.parser")
|
||||||
|
|
||||||
changed = False
|
changed = False
|
||||||
@@ -419,7 +431,7 @@ def rewrite_thumbnailed_images(
|
|||||||
# This happens routinely when a message contained multiple
|
# This happens routinely when a message contained multiple
|
||||||
# unthumbnailed images, and only one of those images just
|
# unthumbnailed images, and only one of those images just
|
||||||
# completed thumbnailing.
|
# completed thumbnailing.
|
||||||
pass
|
remaining_thumbnails.add(path_id)
|
||||||
else:
|
else:
|
||||||
changed = True
|
changed = True
|
||||||
del image_tag["class"]
|
del image_tag["class"]
|
||||||
@@ -432,6 +444,6 @@ def rewrite_thumbnailed_images(
|
|||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
# The formatter="html5" means we do not produce self-closing tags
|
# The formatter="html5" means we do not produce self-closing tags
|
||||||
return parsed_message.encode(formatter="html5").decode().strip()
|
return parsed_message.encode(formatter="html5").decode().strip(), remaining_thumbnails
|
||||||
else:
|
else:
|
||||||
return None
|
return None, remaining_thumbnails
|
||||||
|
|||||||
@@ -4,13 +4,22 @@ from unittest.mock import patch
|
|||||||
import pyvips
|
import pyvips
|
||||||
|
|
||||||
from zerver.actions.message_delete import do_delete_messages
|
from zerver.actions.message_delete import do_delete_messages
|
||||||
|
from zerver.actions.message_send import check_message, do_send_messages
|
||||||
|
from zerver.lib.addressee import Addressee
|
||||||
from zerver.lib.camo import get_camo_url
|
from zerver.lib.camo import get_camo_url
|
||||||
from zerver.lib.markdown import render_message_markdown
|
from zerver.lib.markdown import render_message_markdown
|
||||||
from zerver.lib.test_classes import ZulipTestCase
|
from zerver.lib.test_classes import ZulipTestCase
|
||||||
from zerver.lib.test_helpers import get_test_image_file, read_test_image_file
|
from zerver.lib.test_helpers import get_test_image_file, read_test_image_file
|
||||||
from zerver.lib.thumbnail import ThumbnailFormat
|
from zerver.lib.thumbnail import ThumbnailFormat
|
||||||
from zerver.lib.upload import upload_message_attachment
|
from zerver.lib.upload import upload_message_attachment
|
||||||
from zerver.models import ArchivedAttachment, ArchivedMessage, Attachment, ImageAttachment, Message
|
from zerver.models import (
|
||||||
|
ArchivedAttachment,
|
||||||
|
ArchivedMessage,
|
||||||
|
Attachment,
|
||||||
|
Client,
|
||||||
|
ImageAttachment,
|
||||||
|
Message,
|
||||||
|
)
|
||||||
from zerver.models.clients import get_client
|
from zerver.models.clients import get_client
|
||||||
from zerver.worker.thumbnail import ensure_thumbnails
|
from zerver.worker.thumbnail import ensure_thumbnails
|
||||||
|
|
||||||
@@ -334,3 +343,42 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
|||||||
private_message_id,
|
private_message_id,
|
||||||
f'<p>This <a href="/user_uploads/{path_id}">image</a> is private</p>\n{rendered_thumb}',
|
f'<p>This <a href="/user_uploads/{path_id}">image</a> is private</p>\n{rendered_thumb}',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_thumbnail_race(self) -> None:
|
||||||
|
"""Test what happens when thumbnailing happens between rendering and sending"""
|
||||||
|
path_id = self.upload_image("img.png")
|
||||||
|
|
||||||
|
self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 0)
|
||||||
|
|
||||||
|
# Render, but do not send, the message referencing the image.
|
||||||
|
# This will render as a spinner, since the thumbnail has not
|
||||||
|
# been generated yet.
|
||||||
|
send_request = check_message(
|
||||||
|
self.example_user("othello"),
|
||||||
|
Client.objects.get_or_create(name="test suite")[0],
|
||||||
|
Addressee.for_stream_name("Verona", "test"),
|
||||||
|
f"[image](/user_uploads/{path_id})",
|
||||||
|
)
|
||||||
|
expected = (
|
||||||
|
f'<p><a href="/user_uploads/{path_id}">image</a></p>\n'
|
||||||
|
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="image">'
|
||||||
|
'<img class="image-loading-placeholder" src="/static/images/loading/loader-black.svg"></a></div>'
|
||||||
|
)
|
||||||
|
self.assertEqual(send_request.message.rendered_content, expected)
|
||||||
|
|
||||||
|
# Thumbnail the image. The message does not exist yet, so
|
||||||
|
# nothing is re-written.
|
||||||
|
ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id))
|
||||||
|
|
||||||
|
# Send the message; this should re-check the ImageAttachment
|
||||||
|
# data, find the thumbnails, and update the rendered_content
|
||||||
|
# to no longer contain a spinner.
|
||||||
|
message_id = do_send_messages([send_request])[0].message_id
|
||||||
|
|
||||||
|
rendered_thumb = (
|
||||||
|
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="image">'
|
||||||
|
f'<img data-original-dimensions="128x128" src="/user_uploads/thumbnail/{path_id}/840x560.webp"></a></div>'
|
||||||
|
)
|
||||||
|
self.assert_message_content_is(
|
||||||
|
message_id, f'<p><a href="/user_uploads/{path_id}">image</a></p>\n{rendered_thumb}'
|
||||||
|
)
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ def update_message_rendered_content(
|
|||||||
message.rendered_content,
|
message.rendered_content,
|
||||||
{} if image_data is None else {path_id: image_data},
|
{} if image_data is None else {path_id: image_data},
|
||||||
{path_id} if image_data is None else set(),
|
{path_id} if image_data is None else set(),
|
||||||
)
|
)[0]
|
||||||
if rendered_content is None:
|
if rendered_content is None:
|
||||||
# There were no updates -- for instance, if we re-run
|
# There were no updates -- for instance, if we re-run
|
||||||
# ensure_thumbnails on an ImageAttachment we already
|
# ensure_thumbnails on an ImageAttachment we already
|
||||||
|
|||||||
Reference in New Issue
Block a user