mirror of
https://github.com/zulip/zulip.git
synced 2025-10-31 20:13:46 +00:00
performance: Extract subscribers/peers in bulk.
We replace get_peer_user_ids_for_stream_change
with two bulk functions to get peers and/or
subscribers.
Note that we have three codepaths that care about
peers:
subscribing existing users:
we need to tell peers about new subscribers
we need to tell subscribed user about old subscribers
unsubscribing existing users:
we only need to tell peers who unsubscribed
subscribing new user:
we only need to tell peers about the new user
(right now we generate send_event
calls to tell the new user about existing
subscribers, but this is a waste
of effort that we will fix soon)
The two bulk functions are this:
bulk_get_subscriber_peer_info
bulk_get_peers
They have some overlap in the implementation,
but there are some nuanced differences that are
described in the comments.
Looking up peers/subscribers in bulk leads to some
nice optimizations.
We will save some memchached traffic if you are
subscribing to multiple public streams.
We will save a query in the remove-subscriber
case if you are only dealing with private streams.
This commit is contained in:
@@ -1,10 +1,26 @@
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from operator import itemgetter
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
from zerver.models import Recipient, Stream, Subscription, UserProfile
|
||||
from zerver.models import (
|
||||
Realm,
|
||||
Recipient,
|
||||
Stream,
|
||||
Subscription,
|
||||
UserProfile,
|
||||
active_non_guest_user_ids,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubscriberPeerInfo:
|
||||
subscribed_ids: Dict[int, Set[int]]
|
||||
peer_ids: Dict[int, Set[int]]
|
||||
|
||||
def get_active_subscriptions_for_stream_id(stream_id: int) -> QuerySet:
|
||||
# TODO: Change return type to QuerySet[Subscription]
|
||||
return Subscription.objects.filter(
|
||||
@@ -73,6 +89,98 @@ def num_subscribers_for_stream_id(stream_id: int) -> int:
|
||||
user_profile__is_active=True,
|
||||
).count()
|
||||
|
||||
def get_user_ids_for_streams(stream_ids: Set[int]) -> Dict[int, Set[int]]:
|
||||
all_subs = get_active_subscriptions_for_stream_ids(stream_ids).filter(
|
||||
user_profile__is_active=True,
|
||||
).values(
|
||||
'recipient__type_id',
|
||||
'user_profile_id',
|
||||
).order_by(
|
||||
'recipient__type_id',
|
||||
)
|
||||
|
||||
get_stream_id = itemgetter('recipient__type_id')
|
||||
|
||||
result: Dict[int, Set[int]] = defaultdict(set)
|
||||
for stream_id, rows in itertools.groupby(all_subs, get_stream_id):
|
||||
user_ids = {row['user_profile_id'] for row in rows}
|
||||
result[stream_id] = user_ids
|
||||
|
||||
return result
|
||||
|
||||
def bulk_get_subscriber_peer_info(
|
||||
realm: Realm,
|
||||
streams: List[Stream],
|
||||
) -> SubscriberPeerInfo:
|
||||
"""
|
||||
Glossary:
|
||||
|
||||
subscribed_ids:
|
||||
This shows the users who are actually subscribed to the
|
||||
stream, which we generally send to the person subscribing
|
||||
to the stream.
|
||||
|
||||
peer_ids:
|
||||
These are the folks that need to know about a new subscriber.
|
||||
It's usually a superset of the subscribers.
|
||||
"""
|
||||
|
||||
subscribed_ids = {}
|
||||
peer_ids = {}
|
||||
|
||||
private_stream_ids = {stream.id for stream in streams if stream.invite_only}
|
||||
public_stream_ids = {stream.id for stream in streams if not stream.invite_only}
|
||||
|
||||
stream_user_ids = get_user_ids_for_streams(private_stream_ids | public_stream_ids)
|
||||
|
||||
if private_stream_ids:
|
||||
realm_admin_ids = {user.id for user in realm.get_admin_users_and_bots()}
|
||||
|
||||
for stream_id in private_stream_ids:
|
||||
subscribed_user_ids = stream_user_ids.get(stream_id, set())
|
||||
subscribed_ids[stream_id] = subscribed_user_ids
|
||||
peer_ids[stream_id] = subscribed_user_ids | realm_admin_ids
|
||||
|
||||
if public_stream_ids:
|
||||
non_guests = active_non_guest_user_ids(realm.id)
|
||||
for stream_id in public_stream_ids:
|
||||
subscribed_user_ids = stream_user_ids.get(stream_id, set())
|
||||
subscribed_ids[stream_id] = subscribed_user_ids
|
||||
peer_ids[stream_id] = set(non_guests)
|
||||
|
||||
return SubscriberPeerInfo(
|
||||
subscribed_ids=subscribed_ids,
|
||||
peer_ids=peer_ids,
|
||||
)
|
||||
|
||||
def bulk_get_peers(
|
||||
realm: Realm,
|
||||
streams: List[Stream],
|
||||
) -> Dict[int, Set[int]]:
|
||||
# This is almost a subset of bulk_get_subscriber_peer_info,
|
||||
# with the nuance that we don't have to query subscribers
|
||||
# for public streams. (The other functions tries to save
|
||||
# a query hop.)
|
||||
|
||||
peer_ids = {}
|
||||
|
||||
private_stream_ids = {stream.id for stream in streams if stream.invite_only}
|
||||
public_stream_ids = {stream.id for stream in streams if not stream.invite_only}
|
||||
|
||||
if private_stream_ids:
|
||||
realm_admin_ids = {user.id for user in realm.get_admin_users_and_bots()}
|
||||
stream_user_ids = get_user_ids_for_streams(private_stream_ids)
|
||||
|
||||
for stream_id in private_stream_ids:
|
||||
subscribed_user_ids = stream_user_ids.get(stream_id, set())
|
||||
peer_ids[stream_id] = subscribed_user_ids | realm_admin_ids
|
||||
|
||||
if public_stream_ids:
|
||||
non_guests = active_non_guest_user_ids(realm.id)
|
||||
for stream_id in public_stream_ids:
|
||||
peer_ids[stream_id] = set(non_guests)
|
||||
|
||||
return peer_ids
|
||||
|
||||
def handle_stream_notifications_compatibility(user_profile: Optional[UserProfile],
|
||||
stream_dict: Dict[str, Any],
|
||||
|
||||
Reference in New Issue
Block a user