mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 14:03:30 +00:00
performance: Add get_subscriptions_for_send_message.
This new function optimizes how we fetch subscriptions for streams. Basically, it excludes most long-term-idle users from the query. With 8k users, of which all but 400 are long term idle, this speeds up get_recipient_info from about 150ms to 50ms. Overall this change appears to save a factor of 2-3 in the backend processing time for sending or editing a message in large, public streams in chat.zulip.org (at 18K users today).
This commit is contained in:
@@ -133,6 +133,7 @@ from zerver.lib.stream_subscription import (
|
||||
get_stream_subscriptions_for_user,
|
||||
get_stream_subscriptions_for_users,
|
||||
get_subscribed_stream_ids_for_user,
|
||||
get_subscriptions_for_send_message,
|
||||
get_user_ids_for_streams,
|
||||
num_subscribers_for_stream_id,
|
||||
subscriber_ids_with_stream_history_access,
|
||||
@@ -1422,6 +1423,8 @@ class RecipientInfoResult(TypedDict):
|
||||
|
||||
|
||||
def get_recipient_info(
|
||||
*,
|
||||
realm_id: int,
|
||||
recipient: Recipient,
|
||||
sender_id: int,
|
||||
stream_topic: Optional[StreamTopicTarget],
|
||||
@@ -1446,7 +1449,12 @@ def get_recipient_info(
|
||||
user_ids_muting_topic = stream_topic.user_ids_muting_topic()
|
||||
|
||||
subscription_rows = (
|
||||
stream_topic.get_active_subscriptions()
|
||||
get_subscriptions_for_send_message(
|
||||
realm_id=realm_id,
|
||||
stream_id=stream_topic.stream_id,
|
||||
possible_wildcard_mention=possible_wildcard_mention,
|
||||
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
|
||||
)
|
||||
.annotate(
|
||||
user_profile_email_notifications=F(
|
||||
"user_profile__enable_stream_email_notifications"
|
||||
@@ -1732,6 +1740,7 @@ def build_message_send_dict(
|
||||
stream_topic = None
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=message_dict["message"].recipient,
|
||||
sender_id=message_dict["message"].sender_id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -5691,10 +5700,12 @@ def do_update_message(
|
||||
|
||||
changed_messages = [target_message]
|
||||
|
||||
realm = user_profile.realm
|
||||
|
||||
stream_being_edited = None
|
||||
if target_message.is_stream_message():
|
||||
stream_id = target_message.recipient.type_id
|
||||
stream_being_edited = get_stream_by_id_in_realm(stream_id, user_profile.realm)
|
||||
stream_being_edited = get_stream_by_id_in_realm(stream_id, realm)
|
||||
event["stream_name"] = stream_being_edited.name
|
||||
|
||||
ums = UserMessage.objects.filter(message=target_message.id)
|
||||
@@ -5753,6 +5764,7 @@ def do_update_message(
|
||||
stream_topic = None
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=target_message.recipient,
|
||||
sender_id=target_message.sender_id,
|
||||
stream_topic=stream_topic,
|
||||
|
||||
@@ -2,11 +2,11 @@ import itertools
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from operator import itemgetter
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from typing import AbstractSet, Any, Dict, List, Optional, Set
|
||||
|
||||
from django.db.models.query import QuerySet
|
||||
from django.db.models import Q, QuerySet
|
||||
|
||||
from zerver.models import Realm, Recipient, Stream, Subscription, UserProfile
|
||||
from zerver.models import AlertWord, Realm, Recipient, Stream, Subscription, UserProfile
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -274,3 +274,62 @@ def subscriber_ids_with_stream_history_access(stream: Stream) -> Set[int]:
|
||||
stream.id, include_deactivated_users=False
|
||||
).values_list("user_profile_id", flat=True)
|
||||
)
|
||||
|
||||
|
||||
def get_subscriptions_for_send_message(
|
||||
*,
|
||||
realm_id: int,
|
||||
stream_id: int,
|
||||
possible_wildcard_mention: bool,
|
||||
possibly_mentioned_user_ids: AbstractSet[int],
|
||||
) -> QuerySet:
|
||||
"""This function optimizes an important use case for large
|
||||
streams. Open realms often have many long_term_idle users, which
|
||||
can result in 10,000s of long_term_idle recipients in default
|
||||
streams. do_send_messages has an optimization to avoid doing work
|
||||
for long_term_idle unless message flags or notifications should be
|
||||
generated.
|
||||
|
||||
However, it's expensive even to fetch and process them all in
|
||||
Python at all. This function returns all recipients of a stream
|
||||
message that could possibly require action in the send-message
|
||||
codepath.
|
||||
|
||||
Basically, it returns all subscribers, excluding all long-term
|
||||
idle users who it can prove will not receive a UserMessage row or
|
||||
notification for the message (i.e. no alert words, mentions, or
|
||||
email/push notifications are configured) and thus are not needed
|
||||
for processing the message send.
|
||||
|
||||
Critically, this function is called before the Markdown
|
||||
processor. As a result, it returns all subscribers who have ANY
|
||||
configured alert words, even if their alert words aren't present
|
||||
in the message. Similarly, it returns all subscribers who match
|
||||
the "possible mention" parameters.
|
||||
|
||||
Downstream logic, which runs after the Markdown processor has
|
||||
parsed the message, will do the precise determination.
|
||||
"""
|
||||
|
||||
query = get_active_subscriptions_for_stream_id(
|
||||
stream_id,
|
||||
include_deactivated_users=False,
|
||||
)
|
||||
|
||||
if possible_wildcard_mention:
|
||||
return query
|
||||
|
||||
query = query.filter(
|
||||
Q(user_profile__long_term_idle=False)
|
||||
| Q(push_notifications=True)
|
||||
| (Q(push_notifications=None) & Q(user_profile__enable_stream_push_notifications=True))
|
||||
| Q(email_notifications=True)
|
||||
| (Q(email_notifications=None) & Q(user_profile__enable_stream_email_notifications=True))
|
||||
| Q(user_profile_id__in=possibly_mentioned_user_ids)
|
||||
| Q(
|
||||
user_profile_id__in=AlertWord.objects.filter(realm_id=realm_id).values_list(
|
||||
"user_profile_id"
|
||||
)
|
||||
)
|
||||
)
|
||||
return query
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
from typing import Set
|
||||
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
from zerver.lib.stream_subscription import get_active_subscriptions_for_stream_id
|
||||
from zerver.models import MutedTopic
|
||||
|
||||
|
||||
@@ -26,8 +23,3 @@ class StreamTopicTarget:
|
||||
"user_profile_id",
|
||||
)
|
||||
return {row["user_profile_id"] for row in query}
|
||||
|
||||
def get_active_subscriptions(self) -> QuerySet:
|
||||
return get_active_subscriptions_for_stream_id(
|
||||
self.stream_id, include_deactivated_users=True
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from typing import AbstractSet
|
||||
from unittest import mock
|
||||
|
||||
from django.utils.timezone import now as timezone_now
|
||||
@@ -14,6 +15,7 @@ from zerver.lib.soft_deactivation import (
|
||||
get_users_for_soft_deactivation,
|
||||
reactivate_user_if_soft_deactivated,
|
||||
)
|
||||
from zerver.lib.stream_subscription import get_subscriptions_for_send_message
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
from zerver.lib.test_helpers import (
|
||||
get_subscription,
|
||||
@@ -22,6 +24,7 @@ from zerver.lib.test_helpers import (
|
||||
queries_captured,
|
||||
)
|
||||
from zerver.models import (
|
||||
AlertWord,
|
||||
Client,
|
||||
Message,
|
||||
RealmAuditLog,
|
||||
@@ -580,16 +583,21 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
# In this test we are basically testing out the logic used out in
|
||||
# do_send_messages() in action.py for filtering the messages for which
|
||||
# UserMessage rows should be created for a soft-deactivated user.
|
||||
AlertWord.objects.all().delete()
|
||||
|
||||
long_term_idle_user = self.example_user("hamlet")
|
||||
cordelia = self.example_user("cordelia")
|
||||
sender = self.example_user("iago")
|
||||
stream_name = "Denmark"
|
||||
stream_name = "Brand New Stream"
|
||||
topic_name = "foo"
|
||||
realm_id = cordelia.realm_id
|
||||
|
||||
self.subscribe(long_term_idle_user, stream_name)
|
||||
self.subscribe(cordelia, stream_name)
|
||||
self.subscribe(sender, stream_name)
|
||||
|
||||
stream_id = get_stream(stream_name, cordelia.realm).id
|
||||
|
||||
def send_stream_message(content: str) -> None:
|
||||
self.send_stream_message(sender, stream_name, content, topic_name)
|
||||
|
||||
@@ -620,7 +628,35 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
else:
|
||||
self.assertEqual(user_messages[-1].content, content)
|
||||
|
||||
def assert_stream_message_sent_to_idle_user(content: str) -> None:
|
||||
def assert_num_possible_users(
|
||||
expected_count: int,
|
||||
*,
|
||||
possible_wildcard_mention: bool = False,
|
||||
possibly_mentioned_user_ids: AbstractSet[int] = set(),
|
||||
) -> None:
|
||||
self.assertEqual(
|
||||
len(
|
||||
get_subscriptions_for_send_message(
|
||||
realm_id=realm_id,
|
||||
stream_id=stream_id,
|
||||
possible_wildcard_mention=possible_wildcard_mention,
|
||||
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
|
||||
)
|
||||
),
|
||||
expected_count,
|
||||
)
|
||||
|
||||
def assert_stream_message_sent_to_idle_user(
|
||||
content: str,
|
||||
*,
|
||||
possible_wildcard_mention: bool = False,
|
||||
possibly_mentioned_user_ids: AbstractSet[int] = set(),
|
||||
) -> None:
|
||||
assert_num_possible_users(
|
||||
expected_count=3,
|
||||
possible_wildcard_mention=possible_wildcard_mention,
|
||||
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
|
||||
)
|
||||
general_user_msg_count = len(get_user_messages(cordelia))
|
||||
soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
|
||||
send_stream_message(content)
|
||||
@@ -629,7 +665,22 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
assert_last_um_content(long_term_idle_user, content)
|
||||
assert_last_um_content(cordelia, content)
|
||||
|
||||
def assert_stream_message_not_sent_to_idle_user(content: str) -> None:
|
||||
def assert_stream_message_not_sent_to_idle_user(
|
||||
content: str,
|
||||
*,
|
||||
possibly_mentioned_user_ids: AbstractSet[int] = set(),
|
||||
false_alarm_row: bool = False,
|
||||
) -> None:
|
||||
if false_alarm_row:
|
||||
# We will query for our idle user if he has **ANY** alert
|
||||
# words, but we won't actually write a UserMessage row until
|
||||
# we truly parse the message. We also get false alarms for
|
||||
# messages with quoted mentions.
|
||||
assert_num_possible_users(
|
||||
3, possibly_mentioned_user_ids=possibly_mentioned_user_ids
|
||||
)
|
||||
else:
|
||||
assert_num_possible_users(2)
|
||||
general_user_msg_count = len(get_user_messages(cordelia))
|
||||
soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
|
||||
send_stream_message(content)
|
||||
@@ -696,7 +747,16 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
|
||||
# Test UserMessage row is created while user is deactivated if
|
||||
# user itself is mentioned.
|
||||
assert_stream_message_sent_to_idle_user("Test @**King Hamlet** mention")
|
||||
assert_stream_message_sent_to_idle_user(
|
||||
"Test @**King Hamlet** mention",
|
||||
possibly_mentioned_user_ids={long_term_idle_user.id},
|
||||
)
|
||||
|
||||
assert_stream_message_not_sent_to_idle_user(
|
||||
"Test `@**King Hamlet**` mention",
|
||||
possibly_mentioned_user_ids={long_term_idle_user.id},
|
||||
false_alarm_row=True,
|
||||
)
|
||||
|
||||
# Test UserMessage row is not created while user is deactivated if
|
||||
# anyone is mentioned but the user.
|
||||
@@ -704,9 +764,15 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
|
||||
# Test UserMessage row is created while user is deactivated if
|
||||
# there is a wildcard mention such as @all or @everyone
|
||||
assert_stream_message_sent_to_idle_user("Test @**all** mention")
|
||||
assert_stream_message_sent_to_idle_user("Test @**everyone** mention")
|
||||
assert_stream_message_sent_to_idle_user("Test @**stream** mention")
|
||||
assert_stream_message_sent_to_idle_user(
|
||||
"Test @**all** mention", possible_wildcard_mention=True
|
||||
)
|
||||
assert_stream_message_sent_to_idle_user(
|
||||
"Test @**everyone** mention", possible_wildcard_mention=True
|
||||
)
|
||||
assert_stream_message_sent_to_idle_user(
|
||||
"Test @**stream** mention", possible_wildcard_mention=True
|
||||
)
|
||||
assert_stream_message_not_sent_to_idle_user("Test @**bogus** mention")
|
||||
|
||||
# Test UserMessage row is created while user is deactivated if there
|
||||
@@ -714,6 +780,13 @@ class SoftDeactivationMessageTest(ZulipTestCase):
|
||||
do_add_alert_words(long_term_idle_user, ["test_alert_word"])
|
||||
assert_stream_message_sent_to_idle_user("Testing test_alert_word")
|
||||
|
||||
do_add_alert_words(cordelia, ["cordelia"])
|
||||
assert_stream_message_not_sent_to_idle_user("cordelia", false_alarm_row=True)
|
||||
|
||||
# Test UserMessage row is not created while user is deactivated if
|
||||
# message is a me message.
|
||||
assert_stream_message_not_sent_to_idle_user("/me says test")
|
||||
assert_stream_message_not_sent_to_idle_user("/me says test", false_alarm_row=True)
|
||||
|
||||
# Sanity check after removing the alert word for Hamlet.
|
||||
AlertWord.objects.filter(user_profile=long_term_idle_user).delete()
|
||||
assert_stream_message_not_sent_to_idle_user("no alert words")
|
||||
|
||||
@@ -1510,6 +1510,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1537,6 +1538,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
hamlet.enable_stream_push_notifications = True
|
||||
hamlet.save()
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1546,6 +1548,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
self.assertEqual(info["wildcard_mention_user_ids"], set())
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1557,6 +1560,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
sub.push_notifications = False
|
||||
sub.save()
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1569,6 +1573,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
sub.push_notifications = True
|
||||
sub.save()
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1584,6 +1589,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1593,6 +1599,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
self.assertEqual(info["wildcard_mention_user_ids"], set())
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1608,6 +1615,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
sub.save()
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1623,6 +1631,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
sub.save()
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1642,6 +1651,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1665,6 +1675,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
)
|
||||
|
||||
info = get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
@@ -1686,6 +1697,7 @@ class RecipientInfoTest(ZulipTestCase):
|
||||
with self.assertRaisesRegex(ValueError, "Bad recipient type"):
|
||||
invalid_recipient = Recipient(type=999) # 999 is not a valid type
|
||||
get_recipient_info(
|
||||
realm_id=realm.id,
|
||||
recipient=invalid_recipient,
|
||||
sender_id=hamlet.id,
|
||||
stream_topic=stream_topic,
|
||||
|
||||
Reference in New Issue
Block a user