performance: Add get_subscriptions_for_send_message.

This new function optimizes how we fetch subscriptions
for streams.  Basically, it excludes most long-term-idle
users from the query.

With 8k users, of which all but 400 are long term idle,
this speeds up get_recipient_info from about 150ms
to 50ms.

Overall this change appears to save a factor of 2-3 in the backend
processing time for sending or editing a message in large, public
streams in chat.zulip.org (at 18K users today).
This commit is contained in:
Steve Howell
2021-05-11 11:55:49 +00:00
committed by Tim Abbott
parent c690fa1174
commit b4470ac8e1
5 changed files with 169 additions and 21 deletions

View File

@@ -133,6 +133,7 @@ from zerver.lib.stream_subscription import (
get_stream_subscriptions_for_user,
get_stream_subscriptions_for_users,
get_subscribed_stream_ids_for_user,
get_subscriptions_for_send_message,
get_user_ids_for_streams,
num_subscribers_for_stream_id,
subscriber_ids_with_stream_history_access,
@@ -1422,6 +1423,8 @@ class RecipientInfoResult(TypedDict):
def get_recipient_info(
*,
realm_id: int,
recipient: Recipient,
sender_id: int,
stream_topic: Optional[StreamTopicTarget],
@@ -1446,7 +1449,12 @@ def get_recipient_info(
user_ids_muting_topic = stream_topic.user_ids_muting_topic()
subscription_rows = (
stream_topic.get_active_subscriptions()
get_subscriptions_for_send_message(
realm_id=realm_id,
stream_id=stream_topic.stream_id,
possible_wildcard_mention=possible_wildcard_mention,
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
)
.annotate(
user_profile_email_notifications=F(
"user_profile__enable_stream_email_notifications"
@@ -1732,6 +1740,7 @@ def build_message_send_dict(
stream_topic = None
info = get_recipient_info(
realm_id=realm.id,
recipient=message_dict["message"].recipient,
sender_id=message_dict["message"].sender_id,
stream_topic=stream_topic,
@@ -5691,10 +5700,12 @@ def do_update_message(
changed_messages = [target_message]
realm = user_profile.realm
stream_being_edited = None
if target_message.is_stream_message():
stream_id = target_message.recipient.type_id
stream_being_edited = get_stream_by_id_in_realm(stream_id, user_profile.realm)
stream_being_edited = get_stream_by_id_in_realm(stream_id, realm)
event["stream_name"] = stream_being_edited.name
ums = UserMessage.objects.filter(message=target_message.id)
@@ -5753,6 +5764,7 @@ def do_update_message(
stream_topic = None
info = get_recipient_info(
realm_id=realm.id,
recipient=target_message.recipient,
sender_id=target_message.sender_id,
stream_topic=stream_topic,

View File

@@ -2,11 +2,11 @@ import itertools
from collections import defaultdict
from dataclasses import dataclass
from operator import itemgetter
from typing import Any, Dict, List, Optional, Set
from typing import AbstractSet, Any, Dict, List, Optional, Set
from django.db.models.query import QuerySet
from django.db.models import Q, QuerySet
from zerver.models import Realm, Recipient, Stream, Subscription, UserProfile
from zerver.models import AlertWord, Realm, Recipient, Stream, Subscription, UserProfile
@dataclass
@@ -274,3 +274,62 @@ def subscriber_ids_with_stream_history_access(stream: Stream) -> Set[int]:
stream.id, include_deactivated_users=False
).values_list("user_profile_id", flat=True)
)
def get_subscriptions_for_send_message(
*,
realm_id: int,
stream_id: int,
possible_wildcard_mention: bool,
possibly_mentioned_user_ids: AbstractSet[int],
) -> QuerySet:
"""This function optimizes an important use case for large
streams. Open realms often have many long_term_idle users, which
can result in 10,000s of long_term_idle recipients in default
streams. do_send_messages has an optimization to avoid doing work
for long_term_idle unless message flags or notifications should be
generated.
However, it's expensive even to fetch and process them all in
Python at all. This function returns all recipients of a stream
message that could possibly require action in the send-message
codepath.
Basically, it returns all subscribers, excluding all long-term
idle users who it can prove will not receive a UserMessage row or
notification for the message (i.e. no alert words, mentions, or
email/push notifications are configured) and thus are not needed
for processing the message send.
Critically, this function is called before the Markdown
processor. As a result, it returns all subscribers who have ANY
configured alert words, even if their alert words aren't present
in the message. Similarly, it returns all subscribers who match
the "possible mention" parameters.
Downstream logic, which runs after the Markdown processor has
parsed the message, will do the precise determination.
"""
query = get_active_subscriptions_for_stream_id(
stream_id,
include_deactivated_users=False,
)
if possible_wildcard_mention:
return query
query = query.filter(
Q(user_profile__long_term_idle=False)
| Q(push_notifications=True)
| (Q(push_notifications=None) & Q(user_profile__enable_stream_push_notifications=True))
| Q(email_notifications=True)
| (Q(email_notifications=None) & Q(user_profile__enable_stream_email_notifications=True))
| Q(user_profile_id__in=possibly_mentioned_user_ids)
| Q(
user_profile_id__in=AlertWord.objects.filter(realm_id=realm_id).values_list(
"user_profile_id"
)
)
)
return query

View File

@@ -1,8 +1,5 @@
from typing import Set
from django.db.models.query import QuerySet
from zerver.lib.stream_subscription import get_active_subscriptions_for_stream_id
from zerver.models import MutedTopic
@@ -26,8 +23,3 @@ class StreamTopicTarget:
"user_profile_id",
)
return {row["user_profile_id"] for row in query}
def get_active_subscriptions(self) -> QuerySet:
return get_active_subscriptions_for_stream_id(
self.stream_id, include_deactivated_users=True
)

View File

@@ -1,3 +1,4 @@
from typing import AbstractSet
from unittest import mock
from django.utils.timezone import now as timezone_now
@@ -14,6 +15,7 @@ from zerver.lib.soft_deactivation import (
get_users_for_soft_deactivation,
reactivate_user_if_soft_deactivated,
)
from zerver.lib.stream_subscription import get_subscriptions_for_send_message
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import (
get_subscription,
@@ -22,6 +24,7 @@ from zerver.lib.test_helpers import (
queries_captured,
)
from zerver.models import (
AlertWord,
Client,
Message,
RealmAuditLog,
@@ -580,16 +583,21 @@ class SoftDeactivationMessageTest(ZulipTestCase):
# In this test we are basically testing out the logic used out in
# do_send_messages() in action.py for filtering the messages for which
# UserMessage rows should be created for a soft-deactivated user.
AlertWord.objects.all().delete()
long_term_idle_user = self.example_user("hamlet")
cordelia = self.example_user("cordelia")
sender = self.example_user("iago")
stream_name = "Denmark"
stream_name = "Brand New Stream"
topic_name = "foo"
realm_id = cordelia.realm_id
self.subscribe(long_term_idle_user, stream_name)
self.subscribe(cordelia, stream_name)
self.subscribe(sender, stream_name)
stream_id = get_stream(stream_name, cordelia.realm).id
def send_stream_message(content: str) -> None:
self.send_stream_message(sender, stream_name, content, topic_name)
@@ -620,7 +628,35 @@ class SoftDeactivationMessageTest(ZulipTestCase):
else:
self.assertEqual(user_messages[-1].content, content)
def assert_stream_message_sent_to_idle_user(content: str) -> None:
def assert_num_possible_users(
expected_count: int,
*,
possible_wildcard_mention: bool = False,
possibly_mentioned_user_ids: AbstractSet[int] = set(),
) -> None:
self.assertEqual(
len(
get_subscriptions_for_send_message(
realm_id=realm_id,
stream_id=stream_id,
possible_wildcard_mention=possible_wildcard_mention,
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
)
),
expected_count,
)
def assert_stream_message_sent_to_idle_user(
content: str,
*,
possible_wildcard_mention: bool = False,
possibly_mentioned_user_ids: AbstractSet[int] = set(),
) -> None:
assert_num_possible_users(
expected_count=3,
possible_wildcard_mention=possible_wildcard_mention,
possibly_mentioned_user_ids=possibly_mentioned_user_ids,
)
general_user_msg_count = len(get_user_messages(cordelia))
soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
send_stream_message(content)
@@ -629,7 +665,22 @@ class SoftDeactivationMessageTest(ZulipTestCase):
assert_last_um_content(long_term_idle_user, content)
assert_last_um_content(cordelia, content)
def assert_stream_message_not_sent_to_idle_user(content: str) -> None:
def assert_stream_message_not_sent_to_idle_user(
content: str,
*,
possibly_mentioned_user_ids: AbstractSet[int] = set(),
false_alarm_row: bool = False,
) -> None:
if false_alarm_row:
# We will query for our idle user if he has **ANY** alert
# words, but we won't actually write a UserMessage row until
# we truly parse the message. We also get false alarms for
# messages with quoted mentions.
assert_num_possible_users(
3, possibly_mentioned_user_ids=possibly_mentioned_user_ids
)
else:
assert_num_possible_users(2)
general_user_msg_count = len(get_user_messages(cordelia))
soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
send_stream_message(content)
@@ -696,7 +747,16 @@ class SoftDeactivationMessageTest(ZulipTestCase):
# Test UserMessage row is created while user is deactivated if
# user itself is mentioned.
assert_stream_message_sent_to_idle_user("Test @**King Hamlet** mention")
assert_stream_message_sent_to_idle_user(
"Test @**King Hamlet** mention",
possibly_mentioned_user_ids={long_term_idle_user.id},
)
assert_stream_message_not_sent_to_idle_user(
"Test `@**King Hamlet**` mention",
possibly_mentioned_user_ids={long_term_idle_user.id},
false_alarm_row=True,
)
# Test UserMessage row is not created while user is deactivated if
# anyone is mentioned but the user.
@@ -704,9 +764,15 @@ class SoftDeactivationMessageTest(ZulipTestCase):
# Test UserMessage row is created while user is deactivated if
# there is a wildcard mention such as @all or @everyone
assert_stream_message_sent_to_idle_user("Test @**all** mention")
assert_stream_message_sent_to_idle_user("Test @**everyone** mention")
assert_stream_message_sent_to_idle_user("Test @**stream** mention")
assert_stream_message_sent_to_idle_user(
"Test @**all** mention", possible_wildcard_mention=True
)
assert_stream_message_sent_to_idle_user(
"Test @**everyone** mention", possible_wildcard_mention=True
)
assert_stream_message_sent_to_idle_user(
"Test @**stream** mention", possible_wildcard_mention=True
)
assert_stream_message_not_sent_to_idle_user("Test @**bogus** mention")
# Test UserMessage row is created while user is deactivated if there
@@ -714,6 +780,13 @@ class SoftDeactivationMessageTest(ZulipTestCase):
do_add_alert_words(long_term_idle_user, ["test_alert_word"])
assert_stream_message_sent_to_idle_user("Testing test_alert_word")
do_add_alert_words(cordelia, ["cordelia"])
assert_stream_message_not_sent_to_idle_user("cordelia", false_alarm_row=True)
# Test UserMessage row is not created while user is deactivated if
# message is a me message.
assert_stream_message_not_sent_to_idle_user("/me says test")
assert_stream_message_not_sent_to_idle_user("/me says test", false_alarm_row=True)
# Sanity check after removing the alert word for Hamlet.
AlertWord.objects.filter(user_profile=long_term_idle_user).delete()
assert_stream_message_not_sent_to_idle_user("no alert words")

View File

@@ -1510,6 +1510,7 @@ class RecipientInfoTest(ZulipTestCase):
)
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1537,6 +1538,7 @@ class RecipientInfoTest(ZulipTestCase):
hamlet.enable_stream_push_notifications = True
hamlet.save()
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1546,6 +1548,7 @@ class RecipientInfoTest(ZulipTestCase):
self.assertEqual(info["wildcard_mention_user_ids"], set())
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1557,6 +1560,7 @@ class RecipientInfoTest(ZulipTestCase):
sub.push_notifications = False
sub.save()
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1569,6 +1573,7 @@ class RecipientInfoTest(ZulipTestCase):
sub.push_notifications = True
sub.save()
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1584,6 +1589,7 @@ class RecipientInfoTest(ZulipTestCase):
)
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1593,6 +1599,7 @@ class RecipientInfoTest(ZulipTestCase):
self.assertEqual(info["wildcard_mention_user_ids"], set())
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1608,6 +1615,7 @@ class RecipientInfoTest(ZulipTestCase):
sub.save()
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1623,6 +1631,7 @@ class RecipientInfoTest(ZulipTestCase):
sub.save()
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1642,6 +1651,7 @@ class RecipientInfoTest(ZulipTestCase):
)
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1665,6 +1675,7 @@ class RecipientInfoTest(ZulipTestCase):
)
info = get_recipient_info(
realm_id=realm.id,
recipient=recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,
@@ -1686,6 +1697,7 @@ class RecipientInfoTest(ZulipTestCase):
with self.assertRaisesRegex(ValueError, "Bad recipient type"):
invalid_recipient = Recipient(type=999) # 999 is not a valid type
get_recipient_info(
realm_id=realm.id,
recipient=invalid_recipient,
sender_id=hamlet.id,
stream_topic=stream_topic,