cache: Use a single cache entry for cross-realm bots.

The cross-realm bots rarely change, and there are only
a few of them, so we just query them all at once and
put them in the cache.

Also, we put the dictionaries in the cache, instead of
the user objects, since there is nothing time-sensitive
about the dictionaries, and they are small. This saves
us a little time computing the avatar url and things
like that, not to mention marshalling costs.

This commit also fixes a theoretical bug where we would
have stale cache entries if somebody somehow modified
the cross-realm bots without bumping KEY_PREFIX.

Internally we no longer pre-fetch the realm objects for
the bots, but we don't get overly precise about picking
individual fields from UserProfile, since we rarely hit
the database and since we don't store raw ORM objects
in the cache.

The test diffs make it look like we are hitting the
cache an extra time, but the tests weren't counting
bulk fetches.  Now we only use a single key for all
bots rather a key per bot.
This commit is contained in:
Steve Howell
2023-07-19 12:06:56 +00:00
committed by Tim Abbott
parent 0c92879f2a
commit 61a9f701bd
4 changed files with 40 additions and 22 deletions

View File

@@ -449,6 +449,13 @@ def user_profile_by_api_key_cache_key(api_key: str) -> str:
return f"user_profile_by_api_key:{api_key}" return f"user_profile_by_api_key:{api_key}"
def get_cross_realm_dicts_key() -> str:
emails = list(settings.CROSS_REALM_BOT_EMAILS)
raw_key = ",".join(sorted(emails))
digest = hashlib.sha1(raw_key.encode()).hexdigest()
return f"get_cross_realm_dicts:{digest}"
realm_user_dict_fields: List[str] = [ realm_user_dict_fields: List[str] = [
"id", "id",
"full_name", "full_name",
@@ -525,6 +532,7 @@ def delete_user_profile_caches(user_profiles: Iterable["UserProfile"], realm: "R
if user_profile.is_bot and is_cross_realm_bot_email(user_profile.email): if user_profile.is_bot and is_cross_realm_bot_email(user_profile.email):
# Handle clearing system bots from their special cache. # Handle clearing system bots from their special cache.
keys.append(bot_profile_cache_key(user_profile.email, realm.id)) keys.append(bot_profile_cache_key(user_profile.email, realm.id))
keys.append(get_cross_realm_dicts_key())
cache_delete_many(keys) cache_delete_many(keys)

View File

@@ -15,6 +15,8 @@ from zulip_bots.custom_exceptions import ConfigValidationError
from zerver.lib.avatar import avatar_url, get_avatar_field from zerver.lib.avatar import avatar_url, get_avatar_field
from zerver.lib.cache import ( from zerver.lib.cache import (
bulk_cached_fetch, bulk_cached_fetch,
cache_with_key,
get_cross_realm_dicts_key,
realm_user_dict_fields, realm_user_dict_fields,
user_profile_by_id_cache_key, user_profile_by_id_cache_key,
) )
@@ -174,28 +176,24 @@ def is_administrator_role(role: int) -> bool:
def bulk_get_cross_realm_bots() -> Dict[str, UserProfile]: def bulk_get_cross_realm_bots() -> Dict[str, UserProfile]:
emails = list(settings.CROSS_REALM_BOT_EMAILS) emails = list(settings.CROSS_REALM_BOT_EMAILS)
query = UserProfile.objects.filter(realm__string_id=settings.SYSTEM_BOT_REALM)
def fetch_users_by_email(emails: List[str]) -> QuerySet[UserProfile]: # This should be just
# This should be just #
# # UserProfile.objects.select_related("realm").filter(email__iexact__in=emails,
# UserProfile.objects.select_related("realm").filter(email__iexact__in=emails, # realm=realm)
# realm=realm) #
# # But chaining __in and __iexact doesn't work with Django's
# But chaining __in and __iexact doesn't work with Django's # ORM, so we have the following hack to construct the relevant where clause
# ORM, so we have the following hack to construct the relevant where clause where_clause = (
where_clause = "upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)" "upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)"
return query.select_related("realm").extra(where=[where_clause], params=(emails,))
def user_to_email(user_profile: UserProfile) -> str:
return user_profile.email.lower()
return bulk_cached_fetch(
lambda email: f"bulk_get_cross_realm_bots:{email}",
fetch_users_by_email,
[email.lower() for email in emails],
id_fetcher=user_to_email,
) )
users = list(
UserProfile.objects.filter(realm__string_id=settings.SYSTEM_BOT_REALM).extra(
where=[where_clause], params=(emails,)
)
)
return {user.email.lower(): user for user in users}
def get_user_id(user: UserProfile) -> int: def get_user_id(user: UserProfile) -> int:
@@ -522,6 +520,7 @@ def user_profile_to_user_row(user_profile: UserProfile) -> Dict[str, Any]:
return user_row return user_row
@cache_with_key(get_cross_realm_dicts_key)
def get_cross_realm_dicts() -> List[Dict[str, Any]]: def get_cross_realm_dicts() -> List[Dict[str, Any]]:
user_dict = bulk_get_cross_realm_bots() user_dict = bulk_get_cross_realm_bots()
users = sorted(user_dict.values(), key=lambda user: user.full_name) users = sorted(user_dict.values(), key=lambda user: user.full_name)

View File

@@ -255,7 +255,7 @@ class HomeTest(ZulipTestCase):
set(result["Cache-Control"].split(", ")), {"must-revalidate", "no-store", "no-cache"} set(result["Cache-Control"].split(", ")), {"must-revalidate", "no-store", "no-cache"}
) )
self.assert_length(cache_mock.call_args_list, 4) self.assert_length(cache_mock.call_args_list, 5)
html = result.content.decode() html = result.content.decode()
@@ -441,7 +441,7 @@ class HomeTest(ZulipTestCase):
with patch("zerver.lib.cache.cache_set") as cache_mock: with patch("zerver.lib.cache.cache_set") as cache_mock:
result = self._get_home_page() result = self._get_home_page()
self.check_rendered_logged_in_app(result) self.check_rendered_logged_in_app(result)
self.assert_length(cache_mock.call_args_list, 6) self.assert_length(cache_mock.call_args_list, 7)
def test_num_queries_with_streams(self) -> None: def test_num_queries_with_streams(self) -> None:
main_user = self.example_user("hamlet") main_user = self.example_user("hamlet")

View File

@@ -1351,6 +1351,17 @@ class UserProfileTest(ZulipTestCase):
self.assertEqual(actual_dicts, expected_dicts) self.assertEqual(actual_dicts, expected_dicts)
# Test cache invalidation
welcome_bot = UserProfile.objects.get(email="welcome-bot@zulip.com")
welcome_bot.full_name = "fred"
welcome_bot.save()
with self.assert_database_query_count(1, keep_cache_warm=True):
actual_dicts = get_cross_realm_dicts()
expected_dicts = [user_row(email) for email in expected_emails]
self.assertEqual(actual_dicts, expected_dicts)
def test_get_user_subscription_status(self) -> None: def test_get_user_subscription_status(self) -> None:
self.login("hamlet") self.login("hamlet")
iago = self.example_user("iago") iago = self.example_user("iago")