mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-31 03:53:50 +00:00 
			
		
		
		
	thumbnail: Resolve a race condition when rendering messages.
Messages are rendered outside of a transaction, for performance reasons, and then sent inside of one. This opens thumbnailing up to a race where the thumbnails have not yet been written when the message is rendered, but the message has not been sent when thumbnailing completes, causing `rewrite_thumbnailed_images` to be a no-op and the message being left with a spinner which never resolves. Explicitly lock and use he ImageAttachment data inside the message-sending transaction, to rewrite the message content with the latest information about the existing thumbnails. Despite the thumbnailing worker taking a lock on Message rows to update them, this does not lead to deadlocks -- the INSERT of the Message rows happens in a transaction, ensuring that either the message rending blocks the thumbnailing until the Message row is created, or that the `rewrite_thumbnailed_images` and Message INSERT waits until thumbnailing is complete (and updated no Message rows).
This commit is contained in:
		
				
					committed by
					
						 Tim Abbott
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							2a14a08e63
						
					
				
				
					commit
					6f20c15ae9
				
			| @@ -68,6 +68,7 @@ from zerver.lib.stream_subscription import ( | ||||
| from zerver.lib.stream_topic import StreamTopicTarget | ||||
| from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream | ||||
| from zerver.lib.string_validation import check_stream_name | ||||
| from zerver.lib.thumbnail import get_user_upload_previews, rewrite_thumbnailed_images | ||||
| from zerver.lib.timestamp import timestamp_to_datetime | ||||
| from zerver.lib.topic import participants_for_topic | ||||
| from zerver.lib.url_preview.types import UrlEmbedData | ||||
| @@ -864,7 +865,31 @@ def do_send_messages( | ||||
|             send_request.message, send_request.rendering_result.potential_attachment_path_ids | ||||
|         ): | ||||
|             send_request.message.has_attachment = True | ||||
|             send_request.message.save(update_fields=["has_attachment"]) | ||||
|             update_fields = ["has_attachment"] | ||||
|  | ||||
|             # Lock the ImageAttachment rows that we pulled when rendering | ||||
|             # outside of the transaction; they may be out of date now, and we | ||||
|             # need to post-process the rendered_content to swap out the | ||||
|             # spinner.  We would ideally take a `FOR SHARE` lock here | ||||
|             # (https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS), | ||||
|             # which would block UPDATEs but not other `FOR SHARE`, but Django | ||||
|             # does not support this yet: (https://code.djangoproject.com/ticket/10088) | ||||
|             assert send_request.message.rendered_content is not None | ||||
|             if send_request.rendering_result.thumbnail_spinners: | ||||
|                 previews = get_user_upload_previews( | ||||
|                     send_request.message.realm_id, | ||||
|                     send_request.message.content, | ||||
|                     lock=True, | ||||
|                     path_ids=list(send_request.rendering_result.thumbnail_spinners), | ||||
|                 ) | ||||
|                 new_rendered_content = rewrite_thumbnailed_images( | ||||
|                     send_request.message.rendered_content, previews | ||||
|                 )[0] | ||||
|                 if new_rendered_content is not None: | ||||
|                     send_request.message.rendered_content = new_rendered_content | ||||
|                     update_fields.append("rendered_content") | ||||
|  | ||||
|             send_request.message.save(update_fields=update_fields) | ||||
|  | ||||
|     ums: list[UserMessageLite] = [] | ||||
|     for send_request in send_message_requests: | ||||
|   | ||||
| @@ -118,6 +118,7 @@ class MessageRenderingResult: | ||||
|     links_for_preview: set[str] | ||||
|     user_ids_with_alert_words: set[int] | ||||
|     potential_attachment_path_ids: list[str] | ||||
|     thumbnail_spinners: set[str] | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| @@ -2625,6 +2626,7 @@ def do_convert( | ||||
|         links_for_preview=set(), | ||||
|         user_ids_with_alert_words=set(), | ||||
|         potential_attachment_path_ids=[], | ||||
|         thumbnail_spinners=set(), | ||||
|     ) | ||||
|  | ||||
|     _md_engine.zulip_message = message | ||||
| @@ -2683,9 +2685,10 @@ def do_convert( | ||||
|  | ||||
|         # Post-process the result with the rendered image previews: | ||||
|         if user_upload_previews is not None: | ||||
|             content_with_thumbnails = rewrite_thumbnailed_images( | ||||
|             content_with_thumbnails, thumbnail_spinners = rewrite_thumbnailed_images( | ||||
|                 rendering_result.rendered_content, user_upload_previews | ||||
|             ) | ||||
|             rendering_result.thumbnail_spinners = thumbnail_spinners | ||||
|             if content_with_thumbnails is not None: | ||||
|                 rendering_result.rendered_content = content_with_thumbnails | ||||
|  | ||||
|   | ||||
| @@ -333,11 +333,22 @@ class MarkdownImageMetadata: | ||||
|  | ||||
|  | ||||
| def get_user_upload_previews( | ||||
|     realm_id: int, content: str | ||||
|     realm_id: int, | ||||
|     content: str, | ||||
|     lock: bool = False, | ||||
|     path_ids: list[str] | None = None, | ||||
| ) -> dict[str, MarkdownImageMetadata | None]: | ||||
|     matches = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content) | ||||
|     if path_ids is None: | ||||
|         path_ids = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content) | ||||
|     if not path_ids: | ||||
|         return {} | ||||
|  | ||||
|     upload_preview_data: dict[str, MarkdownImageMetadata | None] = {} | ||||
|     for image_attachment in ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=matches): | ||||
|  | ||||
|     image_attachments = ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids) | ||||
|     if lock: | ||||
|         image_attachments = image_attachments.select_for_update() | ||||
|     for image_attachment in image_attachments: | ||||
|         if image_attachment.thumbnail_metadata == []: | ||||
|             # Image exists, and header of it parsed as a valid image, | ||||
|             # but has not been thumbnailed yet; we will render a | ||||
| @@ -379,10 +390,11 @@ def rewrite_thumbnailed_images( | ||||
|     rendered_content: str, | ||||
|     images: dict[str, MarkdownImageMetadata | None], | ||||
|     to_delete: set[str] | None = None, | ||||
| ) -> str | None: | ||||
| ) -> tuple[str | None, set[str]]: | ||||
|     if not images and not to_delete: | ||||
|         return None | ||||
|         return None, set() | ||||
|  | ||||
|     remaining_thumbnails = set() | ||||
|     parsed_message = BeautifulSoup(rendered_content, "html.parser") | ||||
|  | ||||
|     changed = False | ||||
| @@ -419,7 +431,7 @@ def rewrite_thumbnailed_images( | ||||
|             # This happens routinely when a message contained multiple | ||||
|             # unthumbnailed images, and only one of those images just | ||||
|             # completed thumbnailing. | ||||
|             pass | ||||
|             remaining_thumbnails.add(path_id) | ||||
|         else: | ||||
|             changed = True | ||||
|             del image_tag["class"] | ||||
| @@ -432,6 +444,6 @@ def rewrite_thumbnailed_images( | ||||
|  | ||||
|     if changed: | ||||
|         # The formatter="html5" means we do not produce self-closing tags | ||||
|         return parsed_message.encode(formatter="html5").decode().strip() | ||||
|         return parsed_message.encode(formatter="html5").decode().strip(), remaining_thumbnails | ||||
|     else: | ||||
|         return None | ||||
|         return None, remaining_thumbnails | ||||
|   | ||||
| @@ -4,13 +4,22 @@ from unittest.mock import patch | ||||
| import pyvips | ||||
|  | ||||
| from zerver.actions.message_delete import do_delete_messages | ||||
| from zerver.actions.message_send import check_message, do_send_messages | ||||
| from zerver.lib.addressee import Addressee | ||||
| from zerver.lib.camo import get_camo_url | ||||
| from zerver.lib.markdown import render_message_markdown | ||||
| from zerver.lib.test_classes import ZulipTestCase | ||||
| from zerver.lib.test_helpers import get_test_image_file, read_test_image_file | ||||
| from zerver.lib.thumbnail import ThumbnailFormat | ||||
| from zerver.lib.upload import upload_message_attachment | ||||
| from zerver.models import ArchivedAttachment, ArchivedMessage, Attachment, ImageAttachment, Message | ||||
| from zerver.models import ( | ||||
|     ArchivedAttachment, | ||||
|     ArchivedMessage, | ||||
|     Attachment, | ||||
|     Client, | ||||
|     ImageAttachment, | ||||
|     Message, | ||||
| ) | ||||
| from zerver.models.clients import get_client | ||||
| from zerver.worker.thumbnail import ensure_thumbnails | ||||
|  | ||||
| @@ -334,3 +343,42 @@ class MarkdownThumbnailTest(ZulipTestCase): | ||||
|             private_message_id, | ||||
|             f'<p>This <a href="/user_uploads/{path_id}">image</a> is private</p>\n{rendered_thumb}', | ||||
|         ) | ||||
|  | ||||
|     def test_thumbnail_race(self) -> None: | ||||
|         """Test what happens when thumbnailing happens between rendering and sending""" | ||||
|         path_id = self.upload_image("img.png") | ||||
|  | ||||
|         self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 0) | ||||
|  | ||||
|         # Render, but do not send, the message referencing the image. | ||||
|         # This will render as a spinner, since the thumbnail has not | ||||
|         # been generated yet. | ||||
|         send_request = check_message( | ||||
|             self.example_user("othello"), | ||||
|             Client.objects.get_or_create(name="test suite")[0], | ||||
|             Addressee.for_stream_name("Verona", "test"), | ||||
|             f"[image](/user_uploads/{path_id})", | ||||
|         ) | ||||
|         expected = ( | ||||
|             f'<p><a href="/user_uploads/{path_id}">image</a></p>\n' | ||||
|             f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="image">' | ||||
|             '<img class="image-loading-placeholder" src="/static/images/loading/loader-black.svg"></a></div>' | ||||
|         ) | ||||
|         self.assertEqual(send_request.message.rendered_content, expected) | ||||
|  | ||||
|         # Thumbnail the image.  The message does not exist yet, so | ||||
|         # nothing is re-written. | ||||
|         ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) | ||||
|  | ||||
|         # Send the message; this should re-check the ImageAttachment | ||||
|         # data, find the thumbnails, and update the rendered_content | ||||
|         # to no longer contain a spinner. | ||||
|         message_id = do_send_messages([send_request])[0].message_id | ||||
|  | ||||
|         rendered_thumb = ( | ||||
|             f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="image">' | ||||
|             f'<img data-original-dimensions="128x128" src="/user_uploads/thumbnail/{path_id}/840x560.webp"></a></div>' | ||||
|         ) | ||||
|         self.assert_message_content_is( | ||||
|             message_id, f'<p><a href="/user_uploads/{path_id}">image</a></p>\n{rendered_thumb}' | ||||
|         ) | ||||
|   | ||||
| @@ -170,7 +170,7 @@ def update_message_rendered_content( | ||||
|                 message.rendered_content, | ||||
|                 {} if image_data is None else {path_id: image_data}, | ||||
|                 {path_id} if image_data is None else set(), | ||||
|             ) | ||||
|             )[0] | ||||
|             if rendered_content is None: | ||||
|                 # There were no updates -- for instance, if we re-run | ||||
|                 # ensure_thumbnails on an ImageAttachment we already | ||||
|   | ||||
		Reference in New Issue
	
	Block a user