Files
zulip/zerver/lib/cache_helpers.py
Anders Kaseorg fead14951c python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.

We can likely further refine the remaining pieces with some testing.

Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:

-    invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+    invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(

-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None

-    notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
-    signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+    notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+    signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)

-    author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+    author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)

-    bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+    bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)

-    default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-    default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+    default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+    default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)

-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}

-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}

-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None

Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 11:02:32 -07:00

139 lines
6.5 KiB
Python

# See https://zulip.readthedocs.io/en/latest/subsystems/caching.html for docs
from typing import Any, Callable, Dict, List, Tuple
import datetime
import logging
# This file needs to be different from cache.py because cache.py
# cannot import anything from zerver.models or we'd have an import
# loop
from analytics.models import RealmCount
from django.conf import settings
from zerver.models import Message, UserProfile, Stream, get_stream_cache_key, \
Client, get_client_cache_key, \
Huddle, huddle_hash_cache_key
from zerver.lib.cache import \
user_profile_by_api_key_cache_key, \
user_profile_cache_key, get_remote_cache_time, get_remote_cache_requests, \
cache_set_many, to_dict_cache_key_id
from zerver.lib.message import MessageDict
from zerver.lib.users import get_all_api_keys
from importlib import import_module
from django.contrib.sessions.models import Session
from django.db.models import Q
from django.utils.timezone import now as timezone_now
MESSAGE_CACHE_SIZE = 75000
def message_fetch_objects() -> List[Any]:
try:
max_id = Message.objects.only('id').order_by("-id")[0].id
except IndexError:
return []
return Message.objects.select_related().filter(~Q(sender__email='tabbott/extra@mit.edu'),
id__gt=max_id - MESSAGE_CACHE_SIZE)
def message_cache_items(items_for_remote_cache: Dict[str, Tuple[bytes]],
message: Message) -> None:
'''
Note: this code is untested, and the caller has been
commented out for a while.
'''
key = to_dict_cache_key_id(message.id)
value = MessageDict.to_dict_uncached(message)
items_for_remote_cache[key] = (value,)
def user_cache_items(items_for_remote_cache: Dict[str, Tuple[UserProfile]],
user_profile: UserProfile) -> None:
for api_key in get_all_api_keys(user_profile):
items_for_remote_cache[user_profile_by_api_key_cache_key(api_key)] = (user_profile,)
items_for_remote_cache[user_profile_cache_key(user_profile.email,
user_profile.realm)] = (user_profile,)
# We have other user_profile caches, but none of them are on the
# core serving path for lots of requests.
def stream_cache_items(items_for_remote_cache: Dict[str, Tuple[Stream]],
stream: Stream) -> None:
items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
def client_cache_items(items_for_remote_cache: Dict[str, Tuple[Client]],
client: Client) -> None:
items_for_remote_cache[get_client_cache_key(client.name)] = (client,)
def huddle_cache_items(items_for_remote_cache: Dict[str, Tuple[Huddle]],
huddle: Huddle) -> None:
items_for_remote_cache[huddle_hash_cache_key(huddle.huddle_hash)] = (huddle,)
session_engine = import_module(settings.SESSION_ENGINE)
def session_cache_items(items_for_remote_cache: Dict[str, str],
session: Session) -> None:
if settings.SESSION_ENGINE != "django.contrib.sessions.backends.cached_db":
# If we're not using the cached_db session engine, we there
# will be no store.cache_key attribute, and in any case we
# don't need to fill the cache, since it won't exist.
return
store = session_engine.SessionStore(session_key=session.session_key) # type: ignore[attr-defined] # import_module
items_for_remote_cache[store.cache_key] = store.decode(session.session_data)
def get_active_realm_ids() -> List[int]:
"""For servers like zulipchat.com with a lot of realms, it only makes
sense to do cache-filling work for realms that have any currently
active users/clients. Otherwise, we end up with every single-user
trial organization that has ever been created costing us N streams
worth of cache work (where N is the number of default streams for
a new organization).
"""
date = timezone_now() - datetime.timedelta(days=2)
return RealmCount.objects.filter(
end_time__gte=date,
property="1day_actives::day",
value__gt=0).distinct("realm_id").values_list("realm_id", flat=True)
def get_streams() -> List[Stream]:
return Stream.objects.select_related().filter(
realm__in=get_active_realm_ids()).exclude(
# We filter out Zephyr realms, because they can easily
# have 10,000s of streams with only 1 subscriber.
is_in_zephyr_realm=True)
def get_users() -> List[UserProfile]:
return UserProfile.objects.select_related().filter(
long_term_idle=False,
realm__in=get_active_realm_ids())
# Format is (objects query, items filler function, timeout, batch size)
#
# The objects queries are put inside lambdas to prevent Django from
# doing any setup for things we're unlikely to use (without the lambda
# wrapper the below adds an extra 3ms or so to startup time for
# anything importing this file).
cache_fillers: Dict[str, Tuple[Callable[[], List[Any]], Callable[[Dict[str, Any], Any], None], int, int]] = {
'user': (get_users, user_cache_items, 3600*24*7, 10000),
'client': (lambda: Client.objects.select_related().all(), client_cache_items, 3600*24*7, 10000),
'stream': (get_streams, stream_cache_items, 3600*24*7, 10000),
# Message cache fetching disabled until we can fix the fact that it
# does a bunch of inefficient memcached queries as part of filling
# the display_recipient cache
# 'message': (message_fetch_objects, message_cache_items, 3600 * 24, 1000),
'huddle': (lambda: Huddle.objects.select_related().all(), huddle_cache_items, 3600*24*7, 10000),
'session': (lambda: Session.objects.all(), session_cache_items, 3600*24*7, 10000),
}
def fill_remote_cache(cache: str) -> None:
remote_cache_time_start = get_remote_cache_time()
remote_cache_requests_start = get_remote_cache_requests()
items_for_remote_cache: Dict[str, Any] = {}
(objects, items_filler, timeout, batch_size) = cache_fillers[cache]
count = 0
for obj in objects():
items_filler(items_for_remote_cache, obj)
count += 1
if (count % batch_size == 0):
cache_set_many(items_for_remote_cache, timeout=3600*24)
items_for_remote_cache = {}
cache_set_many(items_for_remote_cache, timeout=3600*24*7)
logging.info("Successfully populated %s cache! Consumed %s remote cache queries (%s time)" %
(cache, get_remote_cache_requests() - remote_cache_requests_start,
round(get_remote_cache_time() - remote_cache_time_start, 2)))