Files
zulip/zerver/lib/cache_helpers.py
Steve Howell df93a99b50 Cache only one row per message.
Before this change, we populated two cache entries for each
message that we sent.  The entries were largely redundant,
with the only difference being whether we sent the content
as raw markdown or as the rendered HTML.

This commit makes it so we only have one cache entry per
message, and it includes both content and rendered_content.

One legacy source on confusion here is that `content`
changes meaning when you're on the front end.  Here is the
situation going forward:

    database:
        content = raw
        rendered_contented = rendered

    cache entry:
        content = raw
        rendered_contented = rendered

    payload for the frontend:
        content = raw (for apply_markdown=False)
        content = rendered (for apply_markdown=True)
2017-10-26 16:35:28 -07:00

110 lines
5.4 KiB
Python

from six import binary_type
from typing import Any, Callable, Dict, List, Tuple, Text
# This file needs to be different from cache.py because cache.py
# cannot import anything from zerver.models or we'd have an import
# loop
from django.conf import settings
from zerver.models import Message, UserProfile, Stream, get_stream_cache_key, \
Recipient, get_recipient_cache_key, Client, get_client_cache_key, \
Huddle, huddle_hash_cache_key
from zerver.lib.cache import cache_with_key, cache_set, \
user_profile_by_api_key_cache_key, \
user_profile_by_email_cache_key, \
user_profile_by_id_cache_key, \
user_profile_cache_key, get_remote_cache_time, get_remote_cache_requests, \
cache_set_many, to_dict_cache_key_id
from zerver.lib.message import MessageDict
from importlib import import_module
from django.contrib.sessions.models import Session
import logging
from django.db.models import Q
MESSAGE_CACHE_SIZE = 75000
def message_fetch_objects():
# type: () -> List[Any]
try:
max_id = Message.objects.only('id').order_by("-id")[0].id
except IndexError:
return []
return Message.objects.select_related().filter(~Q(sender__email='tabbott/extra@mit.edu'),
id__gt=max_id - MESSAGE_CACHE_SIZE)
def message_cache_items(items_for_remote_cache, message):
# type: (Dict[Text, Tuple[binary_type]], Message) -> None
'''
Note: this code is untested, and the caller has been
commented out for a while.
'''
key = to_dict_cache_key_id(message.id)
value = MessageDict.to_dict_uncached(message)
items_for_remote_cache[key] = (value,)
def user_cache_items(items_for_remote_cache, user_profile):
# type: (Dict[Text, Tuple[UserProfile]], UserProfile) -> None
items_for_remote_cache[user_profile_by_email_cache_key(user_profile.email)] = (user_profile,)
items_for_remote_cache[user_profile_by_id_cache_key(user_profile.id)] = (user_profile,)
items_for_remote_cache[user_profile_by_api_key_cache_key(user_profile.api_key)] = (user_profile,)
items_for_remote_cache[user_profile_cache_key(user_profile.email, user_profile.realm)] = (user_profile,)
def stream_cache_items(items_for_remote_cache, stream):
# type: (Dict[Text, Tuple[Stream]], Stream) -> None
items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
def client_cache_items(items_for_remote_cache, client):
# type: (Dict[Text, Tuple[Client]], Client) -> None
items_for_remote_cache[get_client_cache_key(client.name)] = (client,)
def huddle_cache_items(items_for_remote_cache, huddle):
# type: (Dict[Text, Tuple[Huddle]], Huddle) -> None
items_for_remote_cache[huddle_hash_cache_key(huddle.huddle_hash)] = (huddle,)
def recipient_cache_items(items_for_remote_cache, recipient):
# type: (Dict[Text, Tuple[Recipient]], Recipient) -> None
items_for_remote_cache[get_recipient_cache_key(recipient.type, recipient.type_id)] = (recipient,)
session_engine = import_module(settings.SESSION_ENGINE)
def session_cache_items(items_for_remote_cache, session):
# type: (Dict[Text, Text], Session) -> None
store = session_engine.SessionStore(session_key=session.session_key) # type: ignore # import_module
items_for_remote_cache[store.cache_key] = store.decode(session.session_data)
# Format is (objects query, items filler function, timeout, batch size)
#
# The objects queries are put inside lambdas to prevent Django from
# doing any setup for things we're unlikely to use (without the lambda
# wrapper the below adds an extra 3ms or so to startup time for
# anything importing this file).
cache_fillers = {
'user': (lambda: UserProfile.objects.select_related().all(), user_cache_items, 3600*24*7, 10000),
'client': (lambda: Client.objects.select_related().all(), client_cache_items, 3600*24*7, 10000),
'recipient': (lambda: Recipient.objects.select_related().all(), recipient_cache_items, 3600*24*7, 10000),
'stream': (lambda: Stream.objects.select_related().all(), stream_cache_items, 3600*24*7, 10000),
# Message cache fetching disabled until we can fix the fact that it
# does a bunch of inefficient memcached queries as part of filling
# the display_recipient cache
# 'message': (message_fetch_objects, message_cache_items, 3600 * 24, 1000),
'huddle': (lambda: Huddle.objects.select_related().all(), huddle_cache_items, 3600*24*7, 10000),
'session': (lambda: Session.objects.all(), session_cache_items, 3600*24*7, 10000),
} # type: Dict[str, Tuple[Callable[[], List[Any]], Callable[[Dict[Text, Any], Any], None], int, int]]
def fill_remote_cache(cache):
# type: (str) -> None
remote_cache_time_start = get_remote_cache_time()
remote_cache_requests_start = get_remote_cache_requests()
items_for_remote_cache = {} # type: Dict[Text, Any]
(objects, items_filler, timeout, batch_size) = cache_fillers[cache]
count = 0
for obj in objects():
items_filler(items_for_remote_cache, obj)
count += 1
if (count % batch_size == 0):
cache_set_many(items_for_remote_cache, timeout=3600*24)
items_for_remote_cache = {}
cache_set_many(items_for_remote_cache, timeout=3600*24*7)
logging.info("Succesfully populated %s cache! Consumed %s remote cache queries (%s time)" %
(cache, get_remote_cache_requests() - remote_cache_requests_start,
round(get_remote_cache_time() - remote_cache_time_start, 2)))