mirror of
https://github.com/zulip/zulip.git
synced 2025-11-09 16:37:23 +00:00
cache: Limit which realms we access when filling caches.
For realms that don't have any presence-active users, we know for a fact that there aren't any active clients that will be reloading just after the server restarts, so we can skip filling the cache with data related to that realm. For zulipchat.com, this results in a significant performance optimization for the recipient and stream caches, and a moderate performance improvement for the user caches as well.
This commit is contained in:
@@ -2,9 +2,13 @@
|
|||||||
|
|
||||||
from typing import Any, Callable, Dict, List, Tuple
|
from typing import Any, Callable, Dict, List, Tuple
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
|
||||||
# This file needs to be different from cache.py because cache.py
|
# This file needs to be different from cache.py because cache.py
|
||||||
# cannot import anything from zerver.models or we'd have an import
|
# cannot import anything from zerver.models or we'd have an import
|
||||||
# loop
|
# loop
|
||||||
|
from analytics.models import RealmCount
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from zerver.models import Message, UserProfile, Stream, get_stream_cache_key, \
|
from zerver.models import Message, UserProfile, Stream, get_stream_cache_key, \
|
||||||
Recipient, get_recipient_cache_key, Client, get_client_cache_key, \
|
Recipient, get_recipient_cache_key, Client, get_client_cache_key, \
|
||||||
@@ -16,8 +20,8 @@ from zerver.lib.cache import cache_with_key, cache_set, \
|
|||||||
from zerver.lib.message import MessageDict
|
from zerver.lib.message import MessageDict
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
from django.contrib.sessions.models import Session
|
from django.contrib.sessions.models import Session
|
||||||
import logging
|
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
|
from django.utils.timezone import now as timezone_now
|
||||||
|
|
||||||
MESSAGE_CACHE_SIZE = 75000
|
MESSAGE_CACHE_SIZE = 75000
|
||||||
|
|
||||||
@@ -68,6 +72,36 @@ def session_cache_items(items_for_remote_cache: Dict[str, str],
|
|||||||
store = session_engine.SessionStore(session_key=session.session_key) # type: ignore # import_module
|
store = session_engine.SessionStore(session_key=session.session_key) # type: ignore # import_module
|
||||||
items_for_remote_cache[store.cache_key] = store.decode(session.session_data)
|
items_for_remote_cache[store.cache_key] = store.decode(session.session_data)
|
||||||
|
|
||||||
|
def get_active_realm_ids() -> List[int]:
|
||||||
|
"""For servers like zulipchat.com with a lot of realms, it only makes
|
||||||
|
sense to do cache-filling work for realms that have any currently
|
||||||
|
active users/clients. Otherwise, we end up with every single-user
|
||||||
|
trial organization that has ever been created costing us N streams
|
||||||
|
worth of cache work (where N is the number of default streams for
|
||||||
|
a new organization).
|
||||||
|
"""
|
||||||
|
date = timezone_now() - datetime.timedelta(days=2)
|
||||||
|
return RealmCount.objects.filter(
|
||||||
|
end_time__gte=date,
|
||||||
|
property="1day_actives::day",
|
||||||
|
value__gt=0).distinct("realm_id").values_list("realm_id", flat=True)
|
||||||
|
|
||||||
|
def get_streams() -> List[Stream]:
|
||||||
|
return Stream.objects.select_related().filter(
|
||||||
|
realm__in=get_active_realm_ids()).exclude(
|
||||||
|
# We filter out Zephyr realms, because they can easily
|
||||||
|
# have 10,000s of streams with only 1 subscriber.
|
||||||
|
is_in_zephyr_realm=True)
|
||||||
|
|
||||||
|
def get_recipients() -> List[Recipient]:
|
||||||
|
return Recipient.objects.select_related().filter(
|
||||||
|
type_id__in=get_streams().values_list("id", flat=True)) # type: ignore # Should be QuerySet above
|
||||||
|
|
||||||
|
def get_users() -> List[UserProfile]:
|
||||||
|
return UserProfile.objects.select_related().filter(
|
||||||
|
long_term_idle=False,
|
||||||
|
realm__in=get_active_realm_ids())
|
||||||
|
|
||||||
# Format is (objects query, items filler function, timeout, batch size)
|
# Format is (objects query, items filler function, timeout, batch size)
|
||||||
#
|
#
|
||||||
# The objects queries are put inside lambdas to prevent Django from
|
# The objects queries are put inside lambdas to prevent Django from
|
||||||
@@ -75,12 +109,10 @@ def session_cache_items(items_for_remote_cache: Dict[str, str],
|
|||||||
# wrapper the below adds an extra 3ms or so to startup time for
|
# wrapper the below adds an extra 3ms or so to startup time for
|
||||||
# anything importing this file).
|
# anything importing this file).
|
||||||
cache_fillers = {
|
cache_fillers = {
|
||||||
'user': (lambda: UserProfile.objects.select_related().filter(
|
'user': (get_users, user_cache_items, 3600*24*7, 10000),
|
||||||
long_term_idle=False), user_cache_items, 3600*24*7, 10000),
|
|
||||||
'client': (lambda: Client.objects.select_related().all(), client_cache_items, 3600*24*7, 10000),
|
'client': (lambda: Client.objects.select_related().all(), client_cache_items, 3600*24*7, 10000),
|
||||||
'recipient': (lambda: Recipient.objects.select_related().filter(
|
'recipient': (get_recipients, recipient_cache_items, 3600*24*7, 10000),
|
||||||
type=Recipient.STREAM), recipient_cache_items, 3600*24*7, 10000),
|
'stream': (get_streams, stream_cache_items, 3600*24*7, 10000),
|
||||||
'stream': (lambda: Stream.objects.select_related().all(), stream_cache_items, 3600*24*7, 10000),
|
|
||||||
# Message cache fetching disabled until we can fix the fact that it
|
# Message cache fetching disabled until we can fix the fact that it
|
||||||
# does a bunch of inefficient memcached queries as part of filling
|
# does a bunch of inefficient memcached queries as part of filling
|
||||||
# the display_recipient cache
|
# the display_recipient cache
|
||||||
|
|||||||
Reference in New Issue
Block a user