mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
tornado: Limit the width of the user queries, when they're needed.
Tornado requests try hard to not make SQL queries -- and if they're necessary, to minimize the number of them. Specifically, both session objects and user objects are cached in memcached, and we expect that both of them will have been filled there by Django before any requests are made to Tornado. In the event that memcached is flushed, or data is otherwise evicted, we perform two database queries -- one for the session, and one for the user. However, the *width* of the latter query has grown significantly over time, as the Realm object grew more fields, and recently with the addition of role groups, which require multiple joins each. This leads to a query which is over 12k of text long, and results in 319 columns. In the event of a memcached flush, this can result in a *significant* amount of SQL traffic, as nearly every active Tornado request will make that query. We do not wish to narrow the default query for Django; we instead tag the request in the REST wrapper, and use that to use a much narrower user cache entry. That narrower cache entry is filled before the queue is created in Django; we also use it to explicitly set the log data, so the second "half" of the continued Tornado request does not need to fetch any user data either when writing its log line. Because they use different cache keys, this only affects the session-based `/json/events` endpoint, which caches by user-id; the `/api/v1/events` endpoint, which uses an API-key cache, keeps its wide user object. The former is 50% of the total request volume, whereas the latter is only 2%, so adding an additional cache for it is unnecessary complexity.
This commit is contained in:
committed by
Tim Abbott
parent
7c20f1d3ea
commit
58bf2a7935
@@ -429,6 +429,10 @@ def user_profile_by_id_cache_key(user_profile_id: int) -> str:
|
||||
return f"user_profile_by_id:{user_profile_id}"
|
||||
|
||||
|
||||
def user_profile_narrow_by_id_cache_key(user_profile_id: int) -> str:
|
||||
return f"user_profile_narrow_by_id:{user_profile_id}"
|
||||
|
||||
|
||||
def user_profile_by_api_key_cache_key(api_key: str) -> str:
|
||||
return f"user_profile_by_api_key:{api_key}"
|
||||
|
||||
@@ -513,6 +517,7 @@ def delete_user_profile_caches(user_profiles: Iterable["UserProfile"], realm_id:
|
||||
keys = []
|
||||
for user_profile in user_profiles:
|
||||
keys.append(user_profile_by_id_cache_key(user_profile.id))
|
||||
keys.append(user_profile_narrow_by_id_cache_key(user_profile.id))
|
||||
keys += map(user_profile_by_api_key_cache_key, get_all_api_keys(user_profile))
|
||||
keys.append(user_profile_cache_key_id(user_profile.email, realm_id))
|
||||
keys.append(user_profile_delivery_email_cache_key(user_profile.delivery_email, realm_id))
|
||||
|
@@ -19,6 +19,7 @@ from zerver.decorator import (
|
||||
from zerver.lib.exceptions import MissingAuthenticationError
|
||||
from zerver.lib.request import RequestNotes
|
||||
from zerver.lib.response import json_method_not_allowed
|
||||
from zerver.lib.sessions import narrow_request_user
|
||||
|
||||
ParamT = ParamSpec("ParamT")
|
||||
METHODS = ("GET", "HEAD", "POST", "PUT", "DELETE", "PATCH")
|
||||
@@ -152,6 +153,11 @@ def rest_dispatch(request: HttpRequest, /, **kwargs: object) -> HttpResponse:
|
||||
# Security implications of this portion of the code are minimal,
|
||||
# as we should worst-case fail closed if we miscategorize a request.
|
||||
|
||||
def check_is_authenticated(request: HttpRequest) -> bool:
|
||||
if "narrow_user_session_cache" not in view_flags:
|
||||
return request.user.is_authenticated
|
||||
return narrow_request_user(request).is_authenticated
|
||||
|
||||
# for some special views (e.g. serving a file that has been
|
||||
# uploaded), we support using the same URL for web and API clients.
|
||||
if "override_api_url_scheme" in view_flags and "Authorization" in request.headers:
|
||||
@@ -168,7 +174,7 @@ def rest_dispatch(request: HttpRequest, /, **kwargs: object) -> HttpResponse:
|
||||
# React Native. See last block for rate limiting notes.
|
||||
target_function = authenticated_uploads_api_view(skip_rate_limiting=True)(target_function)
|
||||
# /json views (web client) validate with a session token (cookie)
|
||||
elif not request.path.startswith("/api") and request.user.is_authenticated:
|
||||
elif not request.path.startswith("/api") and check_is_authenticated(request):
|
||||
# Authenticated via sessions framework, only CSRF check needed
|
||||
auth_kwargs = {}
|
||||
if "override_api_url_scheme" in view_flags:
|
||||
|
@@ -6,13 +6,17 @@ from typing import Any, Protocol, cast
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import SESSION_KEY, get_user_model
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.contrib.sessions.backends.base import SessionBase
|
||||
from django.contrib.sessions.models import Session
|
||||
from django.http import HttpRequest
|
||||
from django.utils.functional import LazyObject
|
||||
from django.utils.timezone import now as timezone_now
|
||||
|
||||
from zerver.lib.request import RequestNotes
|
||||
from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime
|
||||
from zerver.models import Realm, UserProfile
|
||||
from zerver.models.users import get_user_profile_by_id
|
||||
from zerver.models.users import get_user_profile_by_id, get_user_profile_narrow_by_id
|
||||
|
||||
|
||||
class SessionEngine(Protocol):
|
||||
@@ -99,3 +103,33 @@ def get_expirable_session_var(
|
||||
if delete:
|
||||
del session[var_name]
|
||||
return value
|
||||
|
||||
|
||||
def narrow_request_user(
|
||||
request: HttpRequest, *, user_id: int | None = None
|
||||
) -> UserProfile | AnonymousUser:
|
||||
# In Tornado and other performance-critical paths, we want to not
|
||||
# load the extremely wide default UserProfile select_related. We
|
||||
# respect the request.user if it has been explicitly set already,
|
||||
# and otherwise perform a cached lookup of a much narrower view of
|
||||
# the UserProfile; this is faster than the normal UserProfile both
|
||||
# for cache misses (1.8ms vs 15ms) and cache hits (147μs vs
|
||||
# 387μs). We fill the requester_for_logs to skip a session and
|
||||
# user memcached fetch when writing the log lines.
|
||||
if not isinstance(request.user, LazyObject):
|
||||
return request.user # nocoverage
|
||||
|
||||
if user_id is None:
|
||||
user_id = get_session_dict_user(request.session)
|
||||
if user_id is None:
|
||||
return AnonymousUser() # nocoverage
|
||||
|
||||
try:
|
||||
request.user = get_user_profile_narrow_by_id(user_id)
|
||||
RequestNotes.get_notes(
|
||||
request
|
||||
).requester_for_logs = request.user.format_requester_for_logs()
|
||||
except UserProfile.DoesNotExist: # nocoverage
|
||||
request.user = AnonymousUser()
|
||||
|
||||
return request.user
|
||||
|
@@ -25,6 +25,7 @@ from zerver.lib.cache import (
|
||||
user_profile_by_api_key_cache_key,
|
||||
user_profile_by_id_cache_key,
|
||||
user_profile_cache_key,
|
||||
user_profile_narrow_by_id_cache_key,
|
||||
)
|
||||
from zerver.lib.types import ProfileData, RawUserDict
|
||||
from zerver.lib.utils import generate_api_key
|
||||
@@ -955,6 +956,24 @@ def get_user_profile_by_id(user_profile_id: int) -> UserProfile:
|
||||
return base_get_user_queryset().get(id=user_profile_id)
|
||||
|
||||
|
||||
@cache_with_key(user_profile_narrow_by_id_cache_key, timeout=3600 * 24 * 7)
|
||||
def get_user_profile_narrow_by_id(user_profile_id: int) -> UserProfile:
|
||||
return (
|
||||
UserProfile.objects.select_related("realm")
|
||||
.only(
|
||||
"id",
|
||||
"bot_type",
|
||||
"is_active",
|
||||
"rate_limits",
|
||||
"role",
|
||||
"recipient_id",
|
||||
"realm__string_id",
|
||||
"realm__deactivated",
|
||||
)
|
||||
.get(id=user_profile_id)
|
||||
)
|
||||
|
||||
|
||||
def get_user_profile_by_email(email: str) -> UserProfile:
|
||||
"""This function is intended to be used for
|
||||
manual manage.py shell work; robust code must use get_user or
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import socket
|
||||
from collections.abc import Awaitable, Callable
|
||||
from collections.abc import Awaitable, Callable, Iterator
|
||||
from contextlib import contextmanager
|
||||
from functools import wraps
|
||||
from typing import Any, TypeVar
|
||||
from unittest import TestResult, mock
|
||||
@@ -17,7 +18,10 @@ from tornado.httpclient import AsyncHTTPClient, HTTPResponse
|
||||
from tornado.httpserver import HTTPServer
|
||||
from typing_extensions import ParamSpec, override
|
||||
|
||||
from zerver.lib.cache import user_profile_narrow_by_id_cache_key
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
from zerver.lib.test_helpers import cache_tries_captured, queries_captured
|
||||
from zerver.models import UserProfile
|
||||
from zerver.tornado import event_queue
|
||||
from zerver.tornado.application import create_tornado_application
|
||||
from zerver.tornado.event_queue import process_event
|
||||
@@ -111,6 +115,20 @@ class EventsTestCase(TornadoWebTestCase):
|
||||
queue_id = await self.create_queue()
|
||||
self.assertIn(queue_id, event_queue.clients)
|
||||
|
||||
@contextmanager
|
||||
def mocked_events(self, user_profile: UserProfile, event: dict[str, object]) -> Iterator[None]:
|
||||
def process_events() -> None:
|
||||
users = [user_profile.id]
|
||||
process_event(event, users)
|
||||
|
||||
def wrapped_fetch_events(**query: Any) -> dict[str, Any]:
|
||||
ret = event_queue.fetch_events(**query)
|
||||
asyncio.get_running_loop().call_soon(process_events)
|
||||
return ret
|
||||
|
||||
with mock.patch("zerver.tornado.views.fetch_events", side_effect=wrapped_fetch_events):
|
||||
yield
|
||||
|
||||
@async_to_sync_decorator
|
||||
async def test_events_async(self) -> None:
|
||||
user_profile = await in_django_thread(lambda: self.example_user("hamlet"))
|
||||
@@ -123,20 +141,7 @@ class EventsTestCase(TornadoWebTestCase):
|
||||
|
||||
path = f"/json/events?{urlencode(data)}"
|
||||
|
||||
def process_events() -> None:
|
||||
users = [user_profile.id]
|
||||
event = dict(
|
||||
type="test",
|
||||
data="test data",
|
||||
)
|
||||
process_event(event, users)
|
||||
|
||||
def wrapped_fetch_events(**query: Any) -> dict[str, Any]:
|
||||
ret = event_queue.fetch_events(**query)
|
||||
asyncio.get_running_loop().call_soon(process_events)
|
||||
return ret
|
||||
|
||||
with mock.patch("zerver.tornado.views.fetch_events", side_effect=wrapped_fetch_events):
|
||||
with self.mocked_events(user_profile, {"type": "test", "data": "test data"}):
|
||||
response = await self.fetch_async("GET", path)
|
||||
|
||||
self.assertEqual(response.headers["Vary"], "Accept-Language, Cookie")
|
||||
@@ -148,3 +153,61 @@ class EventsTestCase(TornadoWebTestCase):
|
||||
],
|
||||
)
|
||||
self.assertEqual(data["result"], "success")
|
||||
|
||||
@async_to_sync_decorator
|
||||
async def test_events_caching(self) -> None:
|
||||
user_profile = await in_django_thread(lambda: self.example_user("hamlet"))
|
||||
await in_django_thread(lambda: self.login_user(user_profile))
|
||||
event_queue_id = await self.create_queue()
|
||||
data = {
|
||||
"queue_id": event_queue_id,
|
||||
"last_event_id": -1,
|
||||
}
|
||||
|
||||
path = f"/json/events?{urlencode(data)}"
|
||||
|
||||
with (
|
||||
self.mocked_events(user_profile, {"type": "test", "data": "test data"}),
|
||||
cache_tries_captured() as cache_gets,
|
||||
queries_captured() as queries,
|
||||
):
|
||||
await self.fetch_async("GET", path)
|
||||
|
||||
# Two cache fetches -- for the user and the client. In
|
||||
# production, the session would also be a cache access,
|
||||
# but tests don't use cached sessions.
|
||||
self.assert_length(cache_gets, 2)
|
||||
self.assertEqual(
|
||||
cache_gets[0], ("get", user_profile_narrow_by_id_cache_key(user_profile.id), None)
|
||||
)
|
||||
self.assertEqual(cache_gets[1][0], "get")
|
||||
assert isinstance(cache_gets[1][1], str)
|
||||
self.assertTrue(cache_gets[1][1].startswith("get_client:"))
|
||||
|
||||
# Three database queries -- session, user, and client.
|
||||
# The user query should remain small; it is currently 470
|
||||
# bytes, but anything under 1k should be Fine.
|
||||
self.assert_length(queries, 3)
|
||||
self.assertIn("django_session", queries[0].sql)
|
||||
self.assertIn("zerver_userprofile", queries[1].sql)
|
||||
self.assertLessEqual(len(queries[1].sql), 1024)
|
||||
self.assertIn("zerver_client", queries[2].sql)
|
||||
|
||||
# Perform the same request again, preserving the caches. We
|
||||
# should only see one database query -- the session. As noted
|
||||
# above, in production even that would be cached.
|
||||
with (
|
||||
self.mocked_events(user_profile, {"type": "test", "data": "test data"}),
|
||||
cache_tries_captured() as cache_gets,
|
||||
queries_captured(keep_cache_warm=True) as queries,
|
||||
):
|
||||
await self.fetch_async("GET", path)
|
||||
self.assert_length(cache_gets, 1)
|
||||
self.assertEqual(
|
||||
cache_gets[0], ("get", user_profile_narrow_by_id_cache_key(user_profile.id), None)
|
||||
)
|
||||
# Client is cached in-process-memory, so doesn't even see
|
||||
# a memcached hit
|
||||
|
||||
self.assert_length(queries, 1)
|
||||
self.assertIn("django_session", queries[0].sql)
|
||||
|
@@ -16,6 +16,7 @@ from urllib3.util import Retry
|
||||
from zerver.lib.partial import partial
|
||||
from zerver.lib.queue import queue_json_publish_rollback_unsafe
|
||||
from zerver.models import Client, Realm, UserProfile
|
||||
from zerver.models.users import get_user_profile_narrow_by_id
|
||||
from zerver.tornado.sharding import (
|
||||
get_realm_tornado_ports,
|
||||
get_tornado_url,
|
||||
@@ -97,6 +98,11 @@ def request_event_queue(
|
||||
if not settings.USING_TORNADO:
|
||||
return None
|
||||
|
||||
# We make sure to pre-fill the narrow user cache, to save
|
||||
# session-based Tornado (/json/events) from having to go to the
|
||||
# database.
|
||||
get_user_profile_narrow_by_id(user_profile.id)
|
||||
|
||||
tornado_url = get_tornado_url(get_user_tornado_port(user_profile))
|
||||
req = {
|
||||
"dont_block": "true",
|
||||
@@ -134,6 +140,12 @@ def get_user_events(
|
||||
if not settings.USING_TORNADO:
|
||||
return []
|
||||
|
||||
# Pre-fill the narrow user cache, to save session-based Tornado
|
||||
# (/json/events) from having to go to the database. This is
|
||||
# almost certainly filled already, from above, but there is little
|
||||
# harm in forcing it.
|
||||
get_user_profile_narrow_by_id(user_profile.id)
|
||||
|
||||
tornado_url = get_tornado_url(get_user_tornado_port(user_profile))
|
||||
post_data: dict[str, Any] = {
|
||||
"queue_id": queue_id,
|
||||
|
@@ -14,10 +14,10 @@ from zerver.lib.exceptions import JsonableError
|
||||
from zerver.lib.queue import get_queue_client
|
||||
from zerver.lib.request import RequestNotes
|
||||
from zerver.lib.response import AsynchronousResponse, json_success
|
||||
from zerver.lib.sessions import narrow_request_user
|
||||
from zerver.lib.typed_endpoint import ApiParamConfig, DocumentationStatus, typed_endpoint
|
||||
from zerver.models import UserProfile
|
||||
from zerver.models.clients import get_client
|
||||
from zerver.models.users import get_user_profile_by_id
|
||||
from zerver.tornado.descriptors import is_current_port
|
||||
from zerver.tornado.event_queue import (
|
||||
access_client_descriptor,
|
||||
@@ -100,8 +100,8 @@ def cleanup_event_queue(
|
||||
@internal_api_view(True)
|
||||
@typed_endpoint
|
||||
def get_events_internal(request: HttpRequest, *, user_profile_id: Json[int]) -> HttpResponse:
|
||||
user_profile = get_user_profile_by_id(user_profile_id)
|
||||
RequestNotes.get_notes(request).requester_for_logs = user_profile.format_requester_for_logs()
|
||||
user_profile = narrow_request_user(request, user_id=user_profile_id)
|
||||
assert isinstance(user_profile, UserProfile)
|
||||
assert is_current_port(get_user_tornado_port(user_profile))
|
||||
|
||||
process_client(request, user_profile, client_name="internal")
|
||||
|
@@ -523,7 +523,11 @@ v1_api_and_json_patterns = [
|
||||
# used to register for an event queue in tornado
|
||||
rest_path("register", POST=(events_register_backend, {"allow_anonymous_user_web"})),
|
||||
# events -> zerver.tornado.views
|
||||
rest_path("events", GET=get_events, DELETE=cleanup_event_queue),
|
||||
rest_path(
|
||||
"events",
|
||||
GET=(get_events, {"narrow_user_session_cache"}),
|
||||
DELETE=(cleanup_event_queue, {"narrow_user_session_cache"}),
|
||||
),
|
||||
# Used to generate a Zoom video call URL
|
||||
rest_path("calls/zoom/create", POST=make_zoom_video_call),
|
||||
# Used to generate a BigBlueButton video call URL
|
||||
|
Reference in New Issue
Block a user