performance: Add get_subscriptions_for_send_message.

This new function optimizes how we fetch subscriptions for streams. Basically, it excludes most long-term-idle users from the query. With 8k users, of which all but 400 are long term idle, this speeds up get_recipient_info from about 150ms to 50ms. Overall this change appears to save a factor of 2-3 in the backend processing time for sending or editing a message in large, public streams in chat.zulip.org (at 18K users today).
2025-11-04 14:03:30 +00:00 · 2021-05-11 11:55:49 +00:00
parent c690fa1174
commit b4470ac8e1
5 changed files with 169 additions and 21 deletions
--- a/zerver/lib/actions.py
+++ b/zerver/lib/actions.py
@@ -133,6 +133,7 @@ from zerver.lib.stream_subscription import (
    get_stream_subscriptions_for_user,
    get_stream_subscriptions_for_users,
    get_subscribed_stream_ids_for_user,
+    get_subscriptions_for_send_message,
    get_user_ids_for_streams,
    num_subscribers_for_stream_id,
    subscriber_ids_with_stream_history_access,
@@ -1422,6 +1423,8 @@ class RecipientInfoResult(TypedDict):


 def get_recipient_info(
+    *,
+    realm_id: int,
    recipient: Recipient,
    sender_id: int,
    stream_topic: Optional[StreamTopicTarget],
@@ -1446,7 +1449,12 @@ def get_recipient_info(
        user_ids_muting_topic = stream_topic.user_ids_muting_topic()

        subscription_rows = (
-            stream_topic.get_active_subscriptions()
+            get_subscriptions_for_send_message(
+                realm_id=realm_id,
+                stream_id=stream_topic.stream_id,
+                possible_wildcard_mention=possible_wildcard_mention,
+                possibly_mentioned_user_ids=possibly_mentioned_user_ids,
+            )
            .annotate(
                user_profile_email_notifications=F(
                    "user_profile__enable_stream_email_notifications"
@@ -1732,6 +1740,7 @@ def build_message_send_dict(
        stream_topic = None

    info = get_recipient_info(
+        realm_id=realm.id,
        recipient=message_dict["message"].recipient,
        sender_id=message_dict["message"].sender_id,
        stream_topic=stream_topic,
@@ -5691,10 +5700,12 @@ def do_update_message(

    changed_messages = [target_message]

+    realm = user_profile.realm
+
    stream_being_edited = None
    if target_message.is_stream_message():
        stream_id = target_message.recipient.type_id
-        stream_being_edited = get_stream_by_id_in_realm(stream_id, user_profile.realm)
+        stream_being_edited = get_stream_by_id_in_realm(stream_id, realm)
        event["stream_name"] = stream_being_edited.name

    ums = UserMessage.objects.filter(message=target_message.id)
@@ -5753,6 +5764,7 @@ def do_update_message(
            stream_topic = None

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=target_message.recipient,
            sender_id=target_message.sender_id,
            stream_topic=stream_topic,
--- a/zerver/lib/stream_subscription.py
+++ b/zerver/lib/stream_subscription.py
@@ -2,11 +2,11 @@ import itertools
 from collections import defaultdict
 from dataclasses import dataclass
 from operator import itemgetter
-from typing import Any, Dict, List, Optional, Set
+from typing import AbstractSet, Any, Dict, List, Optional, Set

-from django.db.models.query import QuerySet
+from django.db.models import Q, QuerySet

-from zerver.models import Realm, Recipient, Stream, Subscription, UserProfile
+from zerver.models import AlertWord, Realm, Recipient, Stream, Subscription, UserProfile


@dataclass
@@ -274,3 +274,62 @@ def subscriber_ids_with_stream_history_access(stream: Stream) -> Set[int]:
            stream.id, include_deactivated_users=False
        ).values_list("user_profile_id", flat=True)
    )
+
+
+def get_subscriptions_for_send_message(
+    *,
+    realm_id: int,
+    stream_id: int,
+    possible_wildcard_mention: bool,
+    possibly_mentioned_user_ids: AbstractSet[int],
+) -> QuerySet:
+    """This function optimizes an important use case for large
+    streams. Open realms often have many long_term_idle users, which
+    can result in 10,000s of long_term_idle recipients in default
+    streams. do_send_messages has an optimization to avoid doing work
+    for long_term_idle unless message flags or notifications should be
+    generated.
+
+    However, it's expensive even to fetch and process them all in
+    Python at all. This function returns all recipients of a stream
+    message that could possibly require action in the send-message
+    codepath.
+
+    Basically, it returns all subscribers, excluding all long-term
+    idle users who it can prove will not receive a UserMessage row or
+    notification for the message (i.e. no alert words, mentions, or
+    email/push notifications are configured) and thus are not needed
+    for processing the message send.
+
+    Critically, this function is called before the Markdown
+    processor. As a result, it returns all subscribers who have ANY
+    configured alert words, even if their alert words aren't present
+    in the message. Similarly, it returns all subscribers who match
+    the "possible mention" parameters.
+
+    Downstream logic, which runs after the Markdown processor has
+    parsed the message, will do the precise determination.
+    """
+
+    query = get_active_subscriptions_for_stream_id(
+        stream_id,
+        include_deactivated_users=False,
+    )
+
+    if possible_wildcard_mention:
+        return query
+
+    query = query.filter(
+        Q(user_profile__long_term_idle=False)
+        | Q(push_notifications=True)
+        | (Q(push_notifications=None) & Q(user_profile__enable_stream_push_notifications=True))
+        | Q(email_notifications=True)
+        | (Q(email_notifications=None) & Q(user_profile__enable_stream_email_notifications=True))
+        | Q(user_profile_id__in=possibly_mentioned_user_ids)
+        | Q(
+            user_profile_id__in=AlertWord.objects.filter(realm_id=realm_id).values_list(
+                "user_profile_id"
+            )
+        )
+    )
+    return query
--- a/zerver/lib/stream_topic.py
+++ b/zerver/lib/stream_topic.py
@@ -1,8 +1,5 @@
 from typing import Set

-from django.db.models.query import QuerySet
-
-from zerver.lib.stream_subscription import get_active_subscriptions_for_stream_id
 from zerver.models import MutedTopic


@@ -26,8 +23,3 @@ class StreamTopicTarget:
            "user_profile_id",
        )
        return {row["user_profile_id"] for row in query}
-
-    def get_active_subscriptions(self) -> QuerySet:
-        return get_active_subscriptions_for_stream_id(
-            self.stream_id, include_deactivated_users=True
-        )
--- a/zerver/tests/test_soft_deactivation.py
+++ b/zerver/tests/test_soft_deactivation.py
@@ -1,3 +1,4 @@
+from typing import AbstractSet
 from unittest import mock

 from django.utils.timezone import now as timezone_now
@@ -14,6 +15,7 @@ from zerver.lib.soft_deactivation import (
    get_users_for_soft_deactivation,
    reactivate_user_if_soft_deactivated,
 )
+from zerver.lib.stream_subscription import get_subscriptions_for_send_message
 from zerver.lib.test_classes import ZulipTestCase
 from zerver.lib.test_helpers import (
    get_subscription,
@@ -22,6 +24,7 @@ from zerver.lib.test_helpers import (
    queries_captured,
 )
 from zerver.models import (
+    AlertWord,
    Client,
    Message,
    RealmAuditLog,
@@ -580,16 +583,21 @@ class SoftDeactivationMessageTest(ZulipTestCase):
        # In this test we are basically testing out the logic used out in
        # do_send_messages() in action.py for filtering the messages for which
        # UserMessage rows should be created for a soft-deactivated user.
+        AlertWord.objects.all().delete()
+
        long_term_idle_user = self.example_user("hamlet")
        cordelia = self.example_user("cordelia")
        sender = self.example_user("iago")
-        stream_name = "Denmark"
+        stream_name = "Brand New Stream"
        topic_name = "foo"
+        realm_id = cordelia.realm_id

        self.subscribe(long_term_idle_user, stream_name)
        self.subscribe(cordelia, stream_name)
        self.subscribe(sender, stream_name)

+        stream_id = get_stream(stream_name, cordelia.realm).id
+
        def send_stream_message(content: str) -> None:
            self.send_stream_message(sender, stream_name, content, topic_name)

@@ -620,7 +628,35 @@ class SoftDeactivationMessageTest(ZulipTestCase):
            else:
                self.assertEqual(user_messages[-1].content, content)

-        def assert_stream_message_sent_to_idle_user(content: str) -> None:
+        def assert_num_possible_users(
+            expected_count: int,
+            *,
+            possible_wildcard_mention: bool = False,
+            possibly_mentioned_user_ids: AbstractSet[int] = set(),
+        ) -> None:
+            self.assertEqual(
+                len(
+                    get_subscriptions_for_send_message(
+                        realm_id=realm_id,
+                        stream_id=stream_id,
+                        possible_wildcard_mention=possible_wildcard_mention,
+                        possibly_mentioned_user_ids=possibly_mentioned_user_ids,
+                    )
+                ),
+                expected_count,
+            )
+
+        def assert_stream_message_sent_to_idle_user(
+            content: str,
+            *,
+            possible_wildcard_mention: bool = False,
+            possibly_mentioned_user_ids: AbstractSet[int] = set(),
+        ) -> None:
+            assert_num_possible_users(
+                expected_count=3,
+                possible_wildcard_mention=possible_wildcard_mention,
+                possibly_mentioned_user_ids=possibly_mentioned_user_ids,
+            )
            general_user_msg_count = len(get_user_messages(cordelia))
            soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
            send_stream_message(content)
@@ -629,7 +665,22 @@ class SoftDeactivationMessageTest(ZulipTestCase):
            assert_last_um_content(long_term_idle_user, content)
            assert_last_um_content(cordelia, content)

-        def assert_stream_message_not_sent_to_idle_user(content: str) -> None:
+        def assert_stream_message_not_sent_to_idle_user(
+            content: str,
+            *,
+            possibly_mentioned_user_ids: AbstractSet[int] = set(),
+            false_alarm_row: bool = False,
+        ) -> None:
+            if false_alarm_row:
+                # We will query for our idle user if he has **ANY** alert
+                # words, but we won't actually write a UserMessage row until
+                # we truly parse the message. We also get false alarms for
+                # messages with quoted mentions.
+                assert_num_possible_users(
+                    3, possibly_mentioned_user_ids=possibly_mentioned_user_ids
+                )
+            else:
+                assert_num_possible_users(2)
            general_user_msg_count = len(get_user_messages(cordelia))
            soft_deactivated_user_msg_count = len(get_user_messages(long_term_idle_user))
            send_stream_message(content)
@@ -696,7 +747,16 @@ class SoftDeactivationMessageTest(ZulipTestCase):

        # Test UserMessage row is created while user is deactivated if
        # user itself is mentioned.
-        assert_stream_message_sent_to_idle_user("Test @**King Hamlet** mention")
+        assert_stream_message_sent_to_idle_user(
+            "Test @**King Hamlet** mention",
+            possibly_mentioned_user_ids={long_term_idle_user.id},
+        )
+
+        assert_stream_message_not_sent_to_idle_user(
+            "Test `@**King Hamlet**` mention",
+            possibly_mentioned_user_ids={long_term_idle_user.id},
+            false_alarm_row=True,
+        )

        # Test UserMessage row is not created while user is deactivated if
        # anyone is mentioned but the user.
@@ -704,9 +764,15 @@ class SoftDeactivationMessageTest(ZulipTestCase):

        # Test UserMessage row is created while user is deactivated if
        # there is a wildcard mention such as @all or @everyone
-        assert_stream_message_sent_to_idle_user("Test @**all** mention")
-        assert_stream_message_sent_to_idle_user("Test @**everyone** mention")
-        assert_stream_message_sent_to_idle_user("Test @**stream** mention")
+        assert_stream_message_sent_to_idle_user(
+            "Test @**all** mention", possible_wildcard_mention=True
+        )
+        assert_stream_message_sent_to_idle_user(
+            "Test @**everyone** mention", possible_wildcard_mention=True
+        )
+        assert_stream_message_sent_to_idle_user(
+            "Test @**stream** mention", possible_wildcard_mention=True
+        )
        assert_stream_message_not_sent_to_idle_user("Test @**bogus** mention")

        # Test UserMessage row is created while user is deactivated if there
@@ -714,6 +780,13 @@ class SoftDeactivationMessageTest(ZulipTestCase):
        do_add_alert_words(long_term_idle_user, ["test_alert_word"])
        assert_stream_message_sent_to_idle_user("Testing test_alert_word")

+        do_add_alert_words(cordelia, ["cordelia"])
+        assert_stream_message_not_sent_to_idle_user("cordelia", false_alarm_row=True)
+
        # Test UserMessage row is not created while user is deactivated if
        # message is a me message.
-        assert_stream_message_not_sent_to_idle_user("/me says test")
+        assert_stream_message_not_sent_to_idle_user("/me says test", false_alarm_row=True)
+
+        # Sanity check after removing the alert word for Hamlet.
+        AlertWord.objects.filter(user_profile=long_term_idle_user).delete()
+        assert_stream_message_not_sent_to_idle_user("no alert words")
--- a/zerver/tests/test_users.py
+++ b/zerver/tests/test_users.py
@@ -1510,6 +1510,7 @@ class RecipientInfoTest(ZulipTestCase):
        )

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1537,6 +1538,7 @@ class RecipientInfoTest(ZulipTestCase):
        hamlet.enable_stream_push_notifications = True
        hamlet.save()
        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1546,6 +1548,7 @@ class RecipientInfoTest(ZulipTestCase):
        self.assertEqual(info["wildcard_mention_user_ids"], set())

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1557,6 +1560,7 @@ class RecipientInfoTest(ZulipTestCase):
        sub.push_notifications = False
        sub.save()
        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1569,6 +1573,7 @@ class RecipientInfoTest(ZulipTestCase):
        sub.push_notifications = True
        sub.save()
        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1584,6 +1589,7 @@ class RecipientInfoTest(ZulipTestCase):
        )

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1593,6 +1599,7 @@ class RecipientInfoTest(ZulipTestCase):
        self.assertEqual(info["wildcard_mention_user_ids"], set())

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1608,6 +1615,7 @@ class RecipientInfoTest(ZulipTestCase):
        sub.save()

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1623,6 +1631,7 @@ class RecipientInfoTest(ZulipTestCase):
        sub.save()

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1642,6 +1651,7 @@ class RecipientInfoTest(ZulipTestCase):
        )

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1665,6 +1675,7 @@ class RecipientInfoTest(ZulipTestCase):
        )

        info = get_recipient_info(
+            realm_id=realm.id,
            recipient=recipient,
            sender_id=hamlet.id,
            stream_topic=stream_topic,
@@ -1686,6 +1697,7 @@ class RecipientInfoTest(ZulipTestCase):
        with self.assertRaisesRegex(ValueError, "Bad recipient type"):
            invalid_recipient = Recipient(type=999)  # 999 is not a valid type
            get_recipient_info(
+                realm_id=realm.id,
                recipient=invalid_recipient,
                sender_id=hamlet.id,
                stream_topic=stream_topic,