cache: Use a single cache entry for cross-realm bots.

The cross-realm bots rarely change, and there are only
a few of them, so we just query them all at once and
put them in the cache.

Also, we put the dictionaries in the cache, instead of
the user objects, since there is nothing time-sensitive
about the dictionaries, and they are small. This saves
us a little time computing the avatar url and things
like that, not to mention marshalling costs.

This commit also fixes a theoretical bug where we would
have stale cache entries if somebody somehow modified
the cross-realm bots without bumping KEY_PREFIX.

Internally we no longer pre-fetch the realm objects for
the bots, but we don't get overly precise about picking
individual fields from UserProfile, since we rarely hit
the database and since we don't store raw ORM objects
in the cache.

The test diffs make it look like we are hitting the
cache an extra time, but the tests weren't counting
bulk fetches.  Now we only use a single key for all
bots rather a key per bot.
This commit is contained in:
Steve Howell
2023-07-19 12:06:56 +00:00
committed by Tim Abbott
parent 0c92879f2a
commit 61a9f701bd
4 changed files with 40 additions and 22 deletions

View File

@@ -15,6 +15,8 @@ from zulip_bots.custom_exceptions import ConfigValidationError
from zerver.lib.avatar import avatar_url, get_avatar_field
from zerver.lib.cache import (
bulk_cached_fetch,
cache_with_key,
get_cross_realm_dicts_key,
realm_user_dict_fields,
user_profile_by_id_cache_key,
)
@@ -174,28 +176,24 @@ def is_administrator_role(role: int) -> bool:
def bulk_get_cross_realm_bots() -> Dict[str, UserProfile]:
emails = list(settings.CROSS_REALM_BOT_EMAILS)
query = UserProfile.objects.filter(realm__string_id=settings.SYSTEM_BOT_REALM)
def fetch_users_by_email(emails: List[str]) -> QuerySet[UserProfile]:
# This should be just
#
# UserProfile.objects.select_related("realm").filter(email__iexact__in=emails,
# realm=realm)
#
# But chaining __in and __iexact doesn't work with Django's
# ORM, so we have the following hack to construct the relevant where clause
where_clause = "upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)"
return query.select_related("realm").extra(where=[where_clause], params=(emails,))
def user_to_email(user_profile: UserProfile) -> str:
return user_profile.email.lower()
return bulk_cached_fetch(
lambda email: f"bulk_get_cross_realm_bots:{email}",
fetch_users_by_email,
[email.lower() for email in emails],
id_fetcher=user_to_email,
# This should be just
#
# UserProfile.objects.select_related("realm").filter(email__iexact__in=emails,
# realm=realm)
#
# But chaining __in and __iexact doesn't work with Django's
# ORM, so we have the following hack to construct the relevant where clause
where_clause = (
"upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)"
)
users = list(
UserProfile.objects.filter(realm__string_id=settings.SYSTEM_BOT_REALM).extra(
where=[where_clause], params=(emails,)
)
)
return {user.email.lower(): user for user in users}
def get_user_id(user: UserProfile) -> int:
@@ -522,6 +520,7 @@ def user_profile_to_user_row(user_profile: UserProfile) -> Dict[str, Any]:
return user_row
@cache_with_key(get_cross_realm_dicts_key)
def get_cross_realm_dicts() -> List[Dict[str, Any]]:
user_dict = bulk_get_cross_realm_bots()
users = sorted(user_dict.values(), key=lambda user: user.full_name)