messages: Don't use display_recipient values from cached message_dicts.

The user information in display_recipient in cached message_dicts
becomes outdated if the information is changed in any way.

In particular, since we don't have a way to find all the message
objects that might contain PMs after an organization toggles the
setting to hide user email addresses from other users, we had a
situation where client might see inaccurate cached data from before
the transition for a period of up to hours.

We address this by using our generic_bulk_cached_fetch toolchain to
ensure we always are fetching display_recipient data from the database
(and/or a special recipient_id -> display_recipient cache, which we
can flush easily).

Fixes #12818.
This commit is contained in:
Mateusz Mandera
2019-08-07 00:18:13 +02:00
committed by Tim Abbott
parent 48efd46bc6
commit c779bb1959
6 changed files with 326 additions and 25 deletions

View File

@@ -322,6 +322,11 @@ def preview_url_cache_key(url: str) -> str:
def display_recipient_cache_key(recipient_id: int) -> str:
return "display_recipient_dict:%d" % (recipient_id,)
def display_recipient_bulk_get_users_by_id_cache_key(user_id: int) -> str:
# Cache key function for a function for bulk fetching users, used internally
# by display_recipient code.
return 'bulk_fetch_display_recipients:' + user_profile_by_id_cache_key(user_id)
def user_profile_by_email_cache_key(email: str) -> str:
# See the comment in zerver/lib/avatar_hash.py:gravatar_hash for why we
# are proactively encoding email addresses even though they will
@@ -399,6 +404,7 @@ def delete_display_recipient_cache(user_profile: 'UserProfile') -> None:
recipient_ids = Subscription.objects.filter(user_profile=user_profile)
recipient_ids = recipient_ids.values_list('recipient_id', flat=True)
keys = [display_recipient_cache_key(rid) for rid in recipient_ids]
keys.append(display_recipient_bulk_get_users_by_id_cache_key(user_profile.id))
cache_delete_many(keys)
def changed(kwargs: Any, fields: List[str]) -> bool:

View File

@@ -1,7 +1,8 @@
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from zerver.lib.cache import cache_with_key, display_recipient_cache_key
from zerver.models import Recipient, Stream, UserProfile
from zerver.lib.cache import cache_with_key, display_recipient_cache_key, generic_bulk_cached_fetch, \
display_recipient_bulk_get_users_by_id_cache_key
from zerver.models import Recipient, Stream, UserProfile, bulk_get_huddle_user_ids
DisplayRecipientCacheT = Union[str, List[Dict[str, Any]]]
@cache_with_key(lambda *args: display_recipient_cache_key(args[0]),
@@ -32,3 +33,131 @@ def user_profile_to_display_recipient_dict(user_profile: 'UserProfile') -> Dict[
'short_name': user_profile.short_name,
'id': user_profile.id,
'is_mirror_dummy': user_profile.is_mirror_dummy}
def bulk_get_user_profile_by_id(uids: List[int]) -> Dict[int, UserProfile]:
return generic_bulk_cached_fetch(
# Use a separate cache key to protect us from conflicts with
# the get_user_profile_by_id cache.
# (Since we fetch without select_related() here)
cache_key_function=display_recipient_bulk_get_users_by_id_cache_key,
query_function=lambda ids: list(UserProfile.objects.filter(id__in=ids)),
object_ids=uids
)
def bulk_fetch_display_recipients(recipient_tuples: Set[Tuple[int, int, int]]
) -> Dict[int, DisplayRecipientCacheT]:
"""
Takes set of tuples of the form (recipient_id, recipient_type, recipient_type_id)
Returns dict mapping recipient_id to corresponding display_recipient
"""
# Build dict mapping recipient id to (type, type_id) of the corresponding recipient:
recipient_id_to_type_pair_dict = {
recipient[0]: (recipient[1], recipient[2])
for recipient in recipient_tuples
}
# And the inverse mapping:
type_pair_to_recipient_id_dict = {
(recipient[1], recipient[2]): recipient[0]
for recipient in recipient_tuples
}
stream_recipients = set(
recipient for recipient in recipient_tuples if recipient[1] == Recipient.STREAM
)
personal_and_huddle_recipients = recipient_tuples - stream_recipients
def stream_query_function(recipient_ids: List[int]) -> List[Stream]:
stream_ids = [
recipient_id_to_type_pair_dict[recipient_id][1] for recipient_id in recipient_ids
]
return Stream.objects.filter(id__in=stream_ids)
def stream_id_fetcher(stream: Stream) -> int:
return type_pair_to_recipient_id_dict[(Recipient.STREAM, stream.id)]
def stream_cache_transformer(stream: Stream) -> str:
return stream.name
# ItemT = Stream, CacheItemT = str (name), ObjKT = int (recipient_id)
stream_display_recipients = generic_bulk_cached_fetch(
cache_key_function=display_recipient_cache_key,
query_function=stream_query_function,
object_ids=[recipient[0] for recipient in stream_recipients],
id_fetcher=stream_id_fetcher,
cache_transformer=stream_cache_transformer,
) # type: Dict[int, str]
# Now we have to create display_recipients for personal and huddle messages.
# We do this via generic_bulk_cached_fetch, supplying apprioprate functions to it.
def personal_and_huddle_query_function(recipient_ids: List[int]) -> List[Tuple[int, List[UserProfile]]]:
"""
Return a list of tuples of the form (recipient_id, [list of UserProfiles])
where [list of UserProfiles] has users corresponding to the recipient,
so the receiving userin Recipient.PERSONAL case,
or in Personal.HUDDLE case - users in the huddle.
This is a pretty hacky return value, but it needs to be in this form,
for this function to work as the query_function in generic_bulk_cached_fetch.
"""
recipients = [Recipient(
id=recipient_id,
type=recipient_id_to_type_pair_dict[recipient_id][0],
type_id=recipient_id_to_type_pair_dict[recipient_id][1]
) for recipient_id in recipient_ids]
# Find all user ids whose UserProfiles we will need to fetch:
user_ids_to_fetch = set() # type: Set[int]
huddle_user_ids = {} # type: Dict[int, List[int]]
huddle_user_ids = bulk_get_huddle_user_ids([recipient for recipient in recipients
if recipient.type == Recipient.HUDDLE])
for recipient in recipients:
if recipient.type == Recipient.PERSONAL:
user_ids_to_fetch.add(recipient.type_id)
else:
user_ids_to_fetch = user_ids_to_fetch.union(huddle_user_ids[recipient.id])
# Fetch the needed UserProfiles:
user_profiles = bulk_get_user_profile_by_id(list(user_ids_to_fetch)) # maps user id to UserProfile
# Build the return value:
result = [] # type: List[Tuple[int, List[UserProfile]]]
for recipient in recipients:
if recipient.type == Recipient.PERSONAL:
result.append((recipient.id, [user_profiles[recipient.type_id]]))
else:
result.append((recipient.id, [user_profiles[user_id]
for user_id in huddle_user_ids[recipient.id]]))
return result
def personal_and_huddle_cache_transformer(db_object: Tuple[int, List[UserProfile]]
) -> List[Dict[str, Any]]:
"""
Takes an element of the list returned by the query_function, maps it to the final
display_recipient list.
"""
user_profile_list = db_object[1]
display_recipient = [user_profile_to_display_recipient_dict(user_profile)
for user_profile in user_profile_list]
return display_recipient
def personal_and_huddle_id_fetcher(db_object: Tuple[int, List[UserProfile]]) -> int:
# db_object is a tuple, with recipient_id in the first position
return db_object[0]
# ItemT = Tuple[int, List[UserProfile]] (recipient_id, list of corresponding users)
# CacheItemT = List[Dict[str, Any]] (display_recipient list)
# ObjKT = int (recipient_id)
personal_and_huddle_display_recipients = generic_bulk_cached_fetch(
cache_key_function=display_recipient_cache_key,
query_function=personal_and_huddle_query_function,
object_ids=[recipient[0] for recipient in personal_and_huddle_recipients],
id_fetcher=personal_and_huddle_id_fetcher,
cache_transformer=personal_and_huddle_cache_transformer
)
# Glue the dicts together and return:
return {**stream_display_recipients, **personal_and_huddle_display_recipients}

View File

@@ -18,6 +18,7 @@ from zerver.lib.cache import (
to_dict_cache_key,
to_dict_cache_key_id,
)
from zerver.lib.display_recipient import DisplayRecipientCacheT, bulk_fetch_display_recipients
from zerver.lib.request import JsonableError
from zerver.lib.stream_subscription import (
get_stream_subscriptions_for_user,
@@ -182,16 +183,16 @@ class MessageDict:
processor.
'''
MessageDict.bulk_hydrate_sender_info([obj])
MessageDict.hydrate_recipient_info(obj)
MessageDict.bulk_hydrate_recipient_info([obj])
return obj
@staticmethod
def post_process_dicts(objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool) -> None:
MessageDict.bulk_hydrate_sender_info(objs)
MessageDict.bulk_hydrate_recipient_info(objs)
for obj in objs:
MessageDict.hydrate_recipient_info(obj)
MessageDict.finalize_payload(obj, apply_markdown, client_gravatar)
@staticmethod
@@ -419,7 +420,7 @@ class MessageDict:
obj['sender_is_mirror_dummy'] = user_row['is_mirror_dummy']
@staticmethod
def hydrate_recipient_info(obj: Dict[str, Any]) -> None:
def hydrate_recipient_info(obj: Dict[str, Any], display_recipient: DisplayRecipientCacheT) -> None:
'''
This method hyrdrates recipient info with things
like full names and emails of senders. Eventually
@@ -427,7 +428,6 @@ class MessageDict:
themselves with info they already have on users.
'''
display_recipient = obj['raw_display_recipient']
recipient_type = obj['recipient_type']
recipient_type_id = obj['recipient_type_id']
sender_is_mirror_dummy = obj['sender_is_mirror_dummy']
@@ -461,6 +461,20 @@ class MessageDict:
if obj['type'] == 'stream':
obj['stream_id'] = recipient_type_id
@staticmethod
def bulk_hydrate_recipient_info(objs: List[Dict[str, Any]]) -> None:
recipient_tuples = set( # We use set to eliminate duplicate tuples.
(
obj['recipient_id'],
obj['recipient_type'],
obj['recipient_type_id']
) for obj in objs
)
display_recipients = bulk_fetch_display_recipients(recipient_tuples)
for obj in objs:
MessageDict.hydrate_recipient_info(obj, display_recipients[obj['recipient_id']])
@staticmethod
def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None:
sender_id = obj['sender_id']

View File

@@ -87,7 +87,8 @@ from zerver.models import (
RealmAuditLog, RealmDomain, get_realm, UserPresence, Subscription,
get_stream, get_stream_recipient, get_system_bot, get_user, Reaction,
flush_per_request_caches, ScheduledMessage, get_huddle_recipient,
bulk_get_huddle_user_ids, get_huddle_user_ids
bulk_get_huddle_user_ids, get_huddle_user_ids,
get_personal_recipient, get_display_recipient
)
@@ -105,7 +106,7 @@ import datetime
import mock
import time
import ujson
from typing import Any, Dict, List, Optional, Set
from typing import Any, Dict, List, Optional, Set, Union
from collections import namedtuple
@@ -953,7 +954,7 @@ class StreamMessagesTest(ZulipTestCase):
body=content,
)
self.assert_length(queries, 14)
self.assert_length(queries, 15)
def test_stream_message_dict(self) -> None:
user_profile = self.example_user('iago')
@@ -1219,7 +1220,7 @@ class MessageDictTest(ZulipTestCase):
# slower.
error_msg = "Number of ids: {}. Time delay: {}".format(num_ids, delay)
self.assertTrue(delay < 0.0015 * num_ids, error_msg)
self.assert_length(queries, 7)
self.assert_length(queries, 9)
self.assertEqual(len(rows), num_ids)
def test_applying_markdown(self) -> None:
@@ -4186,7 +4187,6 @@ class MessageHydrationTest(ZulipTestCase):
stream_id = get_stream('Verona', realm).id
obj = dict(
raw_display_recipient='Verona',
recipient_type=Recipient.STREAM,
recipient_type_id=stream_id,
sender_is_mirror_dummy=False,
@@ -4196,21 +4196,21 @@ class MessageHydrationTest(ZulipTestCase):
sender_id=cordelia.id,
)
MessageDict.hydrate_recipient_info(obj)
MessageDict.hydrate_recipient_info(obj, 'Verona')
self.assertEqual(obj['display_recipient'], 'Verona')
self.assertEqual(obj['type'], 'stream')
def test_hydrate_pm_recipient_info(self) -> None:
cordelia = self.example_user('cordelia')
display_recipient = [
dict(
email='aaron@example.com',
full_name='Aaron Smith',
),
]
obj = dict(
raw_display_recipient=[
dict(
email='aaron@example.com',
full_name='Aaron Smith',
),
],
recipient_type=Recipient.PERSONAL,
recipient_type_id=None,
sender_is_mirror_dummy=False,
@@ -4220,7 +4220,7 @@ class MessageHydrationTest(ZulipTestCase):
sender_id=cordelia.id,
)
MessageDict.hydrate_recipient_info(obj)
MessageDict.hydrate_recipient_info(obj, display_recipient)
self.assertEqual(
obj['display_recipient'],
@@ -4282,6 +4282,158 @@ class MessageHydrationTest(ZulipTestCase):
self.assertIn('class="user-mention"', new_message['content'])
self.assertEqual(new_message['flags'], ['mentioned'])
def test_display_recipient_up_to_date(self) -> None:
"""
This is a test for a bug where due to caching of message_dicts,
after updating a user's information, fetching those cached messages
via messages_for_ids would return message_dicts with display_recipient
still having the old information. The returned message_dicts should have
up-to-date display_recipients and we check for that here.
"""
hamlet = self.example_user('hamlet')
cordelia = self.example_user('cordelia')
message_id = self.send_personal_message(hamlet.email, cordelia.email, 'test')
cordelia_recipient = get_personal_recipient(cordelia.id)
# Cause the display_recipient to get cached:
get_display_recipient(cordelia_recipient)
# Change cordelia's email:
cordelia_new_email = 'new-cordelia@zulip.com'
cordelia.email = cordelia_new_email
cordelia.save()
# Local display_recipient cache needs to be flushed.
# flush_per_request_caches() is called after every request,
# so it makes sense to run it here.
flush_per_request_caches()
messages = messages_for_ids(
message_ids=[message_id],
user_message_flags={message_id: ['read']},
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
)
message = messages[0]
# Find which display_recipient in the list is cordelia:
for display_recipient in message['display_recipient']:
if display_recipient['short_name'] == 'cordelia':
cordelia_display_recipient = display_recipient
# Make sure the email is up-to-date.
self.assertEqual(cordelia_display_recipient['email'], cordelia_new_email)
class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase):
def _verify_display_recipient(self, display_recipient: Union[str, List[Dict[str, Any]]],
expected_recipient_objects: Union[Stream, List[UserProfile]]) -> None:
if isinstance(expected_recipient_objects, Stream):
self.assertEqual(display_recipient, expected_recipient_objects.name)
else:
for user_profile in expected_recipient_objects:
recipient_dict = {'email': user_profile.email,
'full_name': user_profile.full_name,
'short_name': user_profile.short_name,
'id': user_profile.id,
'is_mirror_dummy': user_profile.is_mirror_dummy}
self.assertTrue(recipient_dict in display_recipient)
def test_display_recipient_personal(self) -> None:
hamlet = self.example_user('hamlet')
cordelia = self.example_user('cordelia')
othello = self.example_user('othello')
message_ids = [
self.send_personal_message(hamlet.email, cordelia.email, 'test'),
self.send_personal_message(cordelia.email, othello.email, 'test')
]
messages = messages_for_ids(
message_ids=message_ids,
user_message_flags={message_id: ['read'] for message_id in message_ids},
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
)
self._verify_display_recipient(messages[0]['display_recipient'], [hamlet, cordelia])
self._verify_display_recipient(messages[1]['display_recipient'], [cordelia, othello])
def test_display_recipient_stream(self) -> None:
cordelia = self.example_user('cordelia')
message_ids = [
self.send_stream_message(cordelia.email, "Verona", content='test'),
self.send_stream_message(cordelia.email, "Denmark", content='test')
]
messages = messages_for_ids(
message_ids=message_ids,
user_message_flags={message_id: ['read'] for message_id in message_ids},
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
)
self._verify_display_recipient(messages[0]['display_recipient'], get_stream("Verona", cordelia.realm))
self._verify_display_recipient(messages[1]['display_recipient'], get_stream("Denmark", cordelia.realm))
def test_display_recipient_huddle(self) -> None:
hamlet = self.example_user('hamlet')
cordelia = self.example_user('cordelia')
othello = self.example_user('othello')
iago = self.example_user('iago')
message_ids = [
self.send_huddle_message(hamlet.email, [cordelia.email, othello.email], 'test'),
self.send_huddle_message(cordelia.email, [hamlet.email, othello.email, iago.email], 'test')
]
messages = messages_for_ids(
message_ids=message_ids,
user_message_flags={message_id: ['read'] for message_id in message_ids},
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
)
self._verify_display_recipient(messages[0]['display_recipient'], [hamlet, cordelia, othello])
self._verify_display_recipient(messages[1]['display_recipient'], [hamlet, cordelia, othello, iago])
def test_display_recipient_various_types(self) -> None:
hamlet = self.example_user('hamlet')
cordelia = self.example_user('cordelia')
othello = self.example_user('othello')
iago = self.example_user('iago')
message_ids = [
self.send_huddle_message(hamlet.email, [cordelia.email, othello.email], 'test'),
self.send_stream_message(cordelia.email, "Verona", content='test'),
self.send_personal_message(hamlet.email, cordelia.email, 'test'),
self.send_stream_message(cordelia.email, "Denmark", content='test'),
self.send_huddle_message(cordelia.email, [hamlet.email, othello.email, iago.email], 'test'),
self.send_personal_message(cordelia.email, othello.email, 'test')
]
messages = messages_for_ids(
message_ids=message_ids,
user_message_flags={message_id: ['read'] for message_id in message_ids},
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
)
self._verify_display_recipient(messages[0]['display_recipient'], [hamlet, cordelia, othello])
self._verify_display_recipient(messages[1]['display_recipient'], get_stream("Verona", hamlet.realm))
self._verify_display_recipient(messages[2]['display_recipient'], [hamlet, cordelia])
self._verify_display_recipient(messages[3]['display_recipient'], get_stream("Denmark", hamlet.realm))
self._verify_display_recipient(messages[4]['display_recipient'], [hamlet, cordelia, othello, iago])
self._verify_display_recipient(messages[5]['display_recipient'], [cordelia, othello])
class MessageVisibilityTest(ZulipTestCase):
def test_update_first_visible_message_id(self) -> None:
Message.objects.all().delete()

View File

@@ -454,7 +454,7 @@ class LoginTest(ZulipTestCase):
with queries_captured() as queries:
self.register(self.nonreg_email('test'), "test")
# Ensure the number of queries we make is not O(streams)
self.assertEqual(len(queries), 78)
self.assertEqual(len(queries), 79)
user_profile = self.nonreg_user('test')
self.assert_logged_in_user_id(user_profile.id)
self.assertFalse(user_profile.enable_stream_desktop_notifications)

View File

@@ -2402,7 +2402,7 @@ class SubscriptionAPITest(ZulipTestCase):
streams_to_sub,
dict(principals=ujson.dumps([user1.email, user2.email])),
)
self.assert_length(queries, 45)
self.assert_length(queries, 46)
self.assert_length(events, 7)
for ev in [x for x in events if x['event']['type'] not in ('message', 'stream')]:
@@ -3142,7 +3142,7 @@ class SubscriptionAPITest(ZulipTestCase):
[new_streams[0]],
dict(principals=ujson.dumps([user1.email, user2.email])),
)
self.assert_length(queries, 45)
self.assert_length(queries, 46)
# Test creating private stream.
with queries_captured() as queries:
@@ -3152,7 +3152,7 @@ class SubscriptionAPITest(ZulipTestCase):
dict(principals=ujson.dumps([user1.email, user2.email])),
invite_only=True,
)
self.assert_length(queries, 40)
self.assert_length(queries, 41)
# Test creating a public stream with announce when realm has a notification stream.
notifications_stream = get_stream(self.streams[0], self.test_realm)
@@ -3167,7 +3167,7 @@ class SubscriptionAPITest(ZulipTestCase):
principals=ujson.dumps([user1.email, user2.email])
)
)
self.assert_length(queries, 53)
self.assert_length(queries, 55)
class GetBotOwnerStreamsTest(ZulipTestCase):
def test_streams_api_for_bot_owners(self) -> None: