thumbnail: Resolve a race condition when rendering messages.

Messages are rendered outside of a transaction, for performance
reasons, and then sent inside of one.  This opens thumbnailing up to a
race where the thumbnails have not yet been written when the message
is rendered, but the message has not been sent when thumbnailing
completes, causing `rewrite_thumbnailed_images` to be a no-op and the
message being left with a spinner which never resolves.

Explicitly lock and use he ImageAttachment data inside the
message-sending transaction, to rewrite the message content with the
latest information about the existing thumbnails.

Despite the thumbnailing worker taking a lock on Message rows to
update them, this does not lead to deadlocks -- the INSERT of the
Message rows happens in a transaction, ensuring that either the
message rending blocks the thumbnailing until the Message row is
created, or that the `rewrite_thumbnailed_images` and Message INSERT
waits until thumbnailing is complete (and updated no Message rows).
This commit is contained in:
Alex Vandiver
2024-07-31 16:07:10 +00:00
committed by Tim Abbott
parent 2a14a08e63
commit 6f20c15ae9
5 changed files with 100 additions and 12 deletions

View File

@@ -333,11 +333,22 @@ class MarkdownImageMetadata:
def get_user_upload_previews(
realm_id: int, content: str
realm_id: int,
content: str,
lock: bool = False,
path_ids: list[str] | None = None,
) -> dict[str, MarkdownImageMetadata | None]:
matches = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content)
if path_ids is None:
path_ids = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content)
if not path_ids:
return {}
upload_preview_data: dict[str, MarkdownImageMetadata | None] = {}
for image_attachment in ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=matches):
image_attachments = ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=path_ids)
if lock:
image_attachments = image_attachments.select_for_update()
for image_attachment in image_attachments:
if image_attachment.thumbnail_metadata == []:
# Image exists, and header of it parsed as a valid image,
# but has not been thumbnailed yet; we will render a
@@ -379,10 +390,11 @@ def rewrite_thumbnailed_images(
rendered_content: str,
images: dict[str, MarkdownImageMetadata | None],
to_delete: set[str] | None = None,
) -> str | None:
) -> tuple[str | None, set[str]]:
if not images and not to_delete:
return None
return None, set()
remaining_thumbnails = set()
parsed_message = BeautifulSoup(rendered_content, "html.parser")
changed = False
@@ -419,7 +431,7 @@ def rewrite_thumbnailed_images(
# This happens routinely when a message contained multiple
# unthumbnailed images, and only one of those images just
# completed thumbnailing.
pass
remaining_thumbnails.add(path_id)
else:
changed = True
del image_tag["class"]
@@ -432,6 +444,6 @@ def rewrite_thumbnailed_images(
if changed:
# The formatter="html5" means we do not produce self-closing tags
return parsed_message.encode(formatter="html5").decode().strip()
return parsed_message.encode(formatter="html5").decode().strip(), remaining_thumbnails
else:
return None
return None, remaining_thumbnails