From ef741bf317e6f3bcc7f7fbead2aae4feb4f74d87 Mon Sep 17 00:00:00 2001 From: Udit107710 Date: Thu, 26 Mar 2020 22:16:23 +0000 Subject: [PATCH] messages: Return shallow copy of message object. When more than one outgoing webhook is configured, the message which is send to the webhook bot passes through finalize_payload function multiple times, which mutated the message dict in a way that many keys were lost from the dict obj. This commit fixes that problem by having `finalize_payload` return a shallow copy of the incoming dict, instead of mutating it. We still mutate dicts inside of `post_process_dicts`, though, for performance reasons. This was slightly modified by @showell to fix the `test_both_codepaths` test that was added concurrently to this work. (I used a slightly verbose style in the tests to emphasize the transformation from `wide_dict` to `narrow_dict`.) I also removed a deepcopy call inside `get_client_payload`, since we now no longer mutate in `finalize_payload`. Finally, I added some comments here and there. For testing, I mostly protect against the root cause of the bug happening again, by adding a line to make sure that `sender_realm_id` does not get wiped out from the "wide" dictionary. A better test would exercise the actual code that exposed the bug here by sending a message to a bot with two or more services attached to it. I will do that in a future commit. Fixes #14384 --- zerver/lib/message.py | 34 +++++++++++++++++-- zerver/lib/outgoing_webhook.py | 4 +-- zerver/tests/test_messages.py | 10 ++---- zerver/tests/test_narrow.py | 10 ++++-- .../tests/test_outgoing_webhook_interfaces.py | 3 ++ zerver/tornado/event_queue.py | 9 ++--- 6 files changed, 51 insertions(+), 19 deletions(-) diff --git a/zerver/lib/message.py b/zerver/lib/message.py index f3d3066420..2d30a13e92 100644 --- a/zerver/lib/message.py +++ b/zerver/lib/message.py @@ -2,6 +2,7 @@ import datetime import ujson import zlib import ahocorasick +import copy from django.utils.translation import ugettext as _ from django.utils.timezone import now as timezone_now @@ -190,17 +191,46 @@ class MessageDict: @staticmethod def post_process_dicts(objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool) -> None: + ''' + NOTE: This function mutates the objects in + the `objs` list, rather than making + shallow copies. It might be safer to + make shallow copies here, but performance + is somewhat important here, as we are + often fetching several messages. + ''' MessageDict.bulk_hydrate_sender_info(objs) MessageDict.bulk_hydrate_recipient_info(objs) for obj in objs: - MessageDict.finalize_payload(obj, apply_markdown, client_gravatar) + MessageDict._finalize_payload(obj, apply_markdown, client_gravatar) @staticmethod def finalize_payload(obj: Dict[str, Any], apply_markdown: bool, client_gravatar: bool, - keep_rendered_content: bool=False) -> None: + keep_rendered_content: bool=False) -> Dict[str, Any]: + ''' + Make a shallow copy of the incoming dict to avoid + mutation-related bugs. This function is often + called when we're sending out message events to + multiple clients, who often want the final dictionary + to have different shapes here based on the parameters. + ''' + new_obj = copy.copy(obj) + + # Next call our worker, which mutates the record in place. + MessageDict._finalize_payload( + new_obj, + apply_markdown=apply_markdown, + client_gravatar=client_gravatar, + keep_rendered_content=keep_rendered_content + ) + return new_obj + + @staticmethod + def _finalize_payload(obj: Dict[str, Any], apply_markdown: bool, client_gravatar: bool, + keep_rendered_content: bool=False) -> None: MessageDict.set_sender_avatar(obj, client_gravatar) if apply_markdown: obj['content_type'] = 'text/html' diff --git a/zerver/lib/outgoing_webhook.py b/zerver/lib/outgoing_webhook.py index 9c89b7f9aa..e5f80bf1fd 100644 --- a/zerver/lib/outgoing_webhook.py +++ b/zerver/lib/outgoing_webhook.py @@ -37,7 +37,7 @@ class GenericOutgoingWebhookService(OutgoingWebhookServiceInterface): info to the clients (so they don't have to compute it themselves). ''' - MessageDict.finalize_payload( + message_dict = MessageDict.finalize_payload( event['message'], apply_markdown=False, client_gravatar=False, @@ -45,7 +45,7 @@ class GenericOutgoingWebhookService(OutgoingWebhookServiceInterface): ) request_data = {"data": event['command'], - "message": event['message'], + "message": message_dict, "bot_email": self.user_profile.email, "token": self.token, "trigger": event['trigger']} diff --git a/zerver/tests/test_messages.py b/zerver/tests/test_messages.py index c1236994d7..bccccbd64c 100644 --- a/zerver/tests/test_messages.py +++ b/zerver/tests/test_messages.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import copy - from django.db import IntegrityError from django.db.models import Q from django.conf import settings @@ -1302,12 +1300,8 @@ class MessageDictTest(ZulipTestCase): msg = reload_message(msg_id) wide_dict = MessageDict.wide_dict(msg) - # TODO: Have finalize_payload make this - # copy for us, rather than mutating - # in place. - narrow_dict = copy.copy(wide_dict) - MessageDict.finalize_payload( - narrow_dict, + narrow_dict = MessageDict.finalize_payload( + wide_dict, apply_markdown=apply_markdown, client_gravatar=client_gravatar, ) diff --git a/zerver/tests/test_narrow.py b/zerver/tests/test_narrow.py index 3ab67890a1..958239c704 100644 --- a/zerver/tests/test_narrow.py +++ b/zerver/tests/test_narrow.py @@ -2416,9 +2416,13 @@ class GetOldMessagesTest(ZulipTestCase): m = self.get_last_message() m.rendered_content = m.rendered_content_version = None m.content = 'test content' - d = MessageDict.wide_dict(m) - MessageDict.finalize_payload(d, apply_markdown=True, client_gravatar=False) - self.assertEqual(d['content'], '

test content

') + wide_dict = MessageDict.wide_dict(m) + final_dict = MessageDict.finalize_payload( + wide_dict, + apply_markdown=True, + client_gravatar=False, + ) + self.assertEqual(final_dict['content'], '

test content

') def common_check_get_messages_query(self, query_params: Dict[str, object], expected: str) -> None: user_profile = self.example_user('hamlet') diff --git a/zerver/tests/test_outgoing_webhook_interfaces.py b/zerver/tests/test_outgoing_webhook_interfaces.py index a119446321..36358fd3cb 100644 --- a/zerver/tests/test_outgoing_webhook_interfaces.py +++ b/zerver/tests/test_outgoing_webhook_interfaces.py @@ -122,6 +122,9 @@ class TestGenericOutgoingWebhookService(ZulipTestCase): self.assertEqual(request_data['token'], "abcdef") self.assertEqual(request_data['message'], expected_message_data) + # Make sure we didn't accidentally mutate wide_message_dict. + self.assertEqual(wide_message_dict['sender_realm_id'], othello.realm_id) + def test_process_success(self) -> None: response = dict(response_not_required=True) # type: Dict[str, Any] success_response = self.handler.process_success(response) diff --git a/zerver/tornado/event_queue.py b/zerver/tornado/event_queue.py index c5d8c398df..c80fc5f6dc 100644 --- a/zerver/tornado/event_queue.py +++ b/zerver/tornado/event_queue.py @@ -840,10 +840,11 @@ def process_message_event(event_template: Mapping[str, Any], users: Iterable[Map @cachify def get_client_payload(apply_markdown: bool, client_gravatar: bool) -> Dict[str, Any]: - dct = copy.deepcopy(wide_dict) - - MessageDict.finalize_payload(dct, apply_markdown, client_gravatar) - return dct + return MessageDict.finalize_payload( + wide_dict, + apply_markdown=apply_markdown, + client_gravatar=client_gravatar + ) # Extra user-specific data to include extra_user_data = {} # type: Dict[int, Any]