messages: Don't use display_recipient values from cached message_dicts.

The user information in display_recipient in cached message_dicts
becomes outdated if the information is changed in any way.

In particular, since we don't have a way to find all the message
objects that might contain PMs after an organization toggles the
setting to hide user email addresses from other users, we had a
situation where client might see inaccurate cached data from before
the transition for a period of up to hours.

We address this by using our generic_bulk_cached_fetch toolchain to
ensure we always are fetching display_recipient data from the database
(and/or a special recipient_id -> display_recipient cache, which we
can flush easily).

Fixes #12818.
This commit is contained in:
Mateusz Mandera
2019-08-07 00:18:13 +02:00
committed by Tim Abbott
parent 48efd46bc6
commit c779bb1959
6 changed files with 326 additions and 25 deletions

View File

@@ -1,7 +1,8 @@
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from zerver.lib.cache import cache_with_key, display_recipient_cache_key
from zerver.models import Recipient, Stream, UserProfile
from zerver.lib.cache import cache_with_key, display_recipient_cache_key, generic_bulk_cached_fetch, \
display_recipient_bulk_get_users_by_id_cache_key
from zerver.models import Recipient, Stream, UserProfile, bulk_get_huddle_user_ids
DisplayRecipientCacheT = Union[str, List[Dict[str, Any]]]
@cache_with_key(lambda *args: display_recipient_cache_key(args[0]),
@@ -32,3 +33,131 @@ def user_profile_to_display_recipient_dict(user_profile: 'UserProfile') -> Dict[
'short_name': user_profile.short_name,
'id': user_profile.id,
'is_mirror_dummy': user_profile.is_mirror_dummy}
def bulk_get_user_profile_by_id(uids: List[int]) -> Dict[int, UserProfile]:
return generic_bulk_cached_fetch(
# Use a separate cache key to protect us from conflicts with
# the get_user_profile_by_id cache.
# (Since we fetch without select_related() here)
cache_key_function=display_recipient_bulk_get_users_by_id_cache_key,
query_function=lambda ids: list(UserProfile.objects.filter(id__in=ids)),
object_ids=uids
)
def bulk_fetch_display_recipients(recipient_tuples: Set[Tuple[int, int, int]]
) -> Dict[int, DisplayRecipientCacheT]:
"""
Takes set of tuples of the form (recipient_id, recipient_type, recipient_type_id)
Returns dict mapping recipient_id to corresponding display_recipient
"""
# Build dict mapping recipient id to (type, type_id) of the corresponding recipient:
recipient_id_to_type_pair_dict = {
recipient[0]: (recipient[1], recipient[2])
for recipient in recipient_tuples
}
# And the inverse mapping:
type_pair_to_recipient_id_dict = {
(recipient[1], recipient[2]): recipient[0]
for recipient in recipient_tuples
}
stream_recipients = set(
recipient for recipient in recipient_tuples if recipient[1] == Recipient.STREAM
)
personal_and_huddle_recipients = recipient_tuples - stream_recipients
def stream_query_function(recipient_ids: List[int]) -> List[Stream]:
stream_ids = [
recipient_id_to_type_pair_dict[recipient_id][1] for recipient_id in recipient_ids
]
return Stream.objects.filter(id__in=stream_ids)
def stream_id_fetcher(stream: Stream) -> int:
return type_pair_to_recipient_id_dict[(Recipient.STREAM, stream.id)]
def stream_cache_transformer(stream: Stream) -> str:
return stream.name
# ItemT = Stream, CacheItemT = str (name), ObjKT = int (recipient_id)
stream_display_recipients = generic_bulk_cached_fetch(
cache_key_function=display_recipient_cache_key,
query_function=stream_query_function,
object_ids=[recipient[0] for recipient in stream_recipients],
id_fetcher=stream_id_fetcher,
cache_transformer=stream_cache_transformer,
) # type: Dict[int, str]
# Now we have to create display_recipients for personal and huddle messages.
# We do this via generic_bulk_cached_fetch, supplying apprioprate functions to it.
def personal_and_huddle_query_function(recipient_ids: List[int]) -> List[Tuple[int, List[UserProfile]]]:
"""
Return a list of tuples of the form (recipient_id, [list of UserProfiles])
where [list of UserProfiles] has users corresponding to the recipient,
so the receiving userin Recipient.PERSONAL case,
or in Personal.HUDDLE case - users in the huddle.
This is a pretty hacky return value, but it needs to be in this form,
for this function to work as the query_function in generic_bulk_cached_fetch.
"""
recipients = [Recipient(
id=recipient_id,
type=recipient_id_to_type_pair_dict[recipient_id][0],
type_id=recipient_id_to_type_pair_dict[recipient_id][1]
) for recipient_id in recipient_ids]
# Find all user ids whose UserProfiles we will need to fetch:
user_ids_to_fetch = set() # type: Set[int]
huddle_user_ids = {} # type: Dict[int, List[int]]
huddle_user_ids = bulk_get_huddle_user_ids([recipient for recipient in recipients
if recipient.type == Recipient.HUDDLE])
for recipient in recipients:
if recipient.type == Recipient.PERSONAL:
user_ids_to_fetch.add(recipient.type_id)
else:
user_ids_to_fetch = user_ids_to_fetch.union(huddle_user_ids[recipient.id])
# Fetch the needed UserProfiles:
user_profiles = bulk_get_user_profile_by_id(list(user_ids_to_fetch)) # maps user id to UserProfile
# Build the return value:
result = [] # type: List[Tuple[int, List[UserProfile]]]
for recipient in recipients:
if recipient.type == Recipient.PERSONAL:
result.append((recipient.id, [user_profiles[recipient.type_id]]))
else:
result.append((recipient.id, [user_profiles[user_id]
for user_id in huddle_user_ids[recipient.id]]))
return result
def personal_and_huddle_cache_transformer(db_object: Tuple[int, List[UserProfile]]
) -> List[Dict[str, Any]]:
"""
Takes an element of the list returned by the query_function, maps it to the final
display_recipient list.
"""
user_profile_list = db_object[1]
display_recipient = [user_profile_to_display_recipient_dict(user_profile)
for user_profile in user_profile_list]
return display_recipient
def personal_and_huddle_id_fetcher(db_object: Tuple[int, List[UserProfile]]) -> int:
# db_object is a tuple, with recipient_id in the first position
return db_object[0]
# ItemT = Tuple[int, List[UserProfile]] (recipient_id, list of corresponding users)
# CacheItemT = List[Dict[str, Any]] (display_recipient list)
# ObjKT = int (recipient_id)
personal_and_huddle_display_recipients = generic_bulk_cached_fetch(
cache_key_function=display_recipient_cache_key,
query_function=personal_and_huddle_query_function,
object_ids=[recipient[0] for recipient in personal_and_huddle_recipients],
id_fetcher=personal_and_huddle_id_fetcher,
cache_transformer=personal_and_huddle_cache_transformer
)
# Glue the dicts together and return:
return {**stream_display_recipients, **personal_and_huddle_display_recipients}