mobile: Add support for recent_private_conversations API.

This adds experimental support in /register for sending key statistical data on the last 1000 private messages that the user is a participant in. Because it's experimental, we require developers to request it explicitly in production (we don't use these data yet in the webapp, and it likely carries some perf cost). We expect this to be extremely helpful in initializing the mobile app user experience for showing recent private message conversations. See the code comments, but this has been heavily optimized to be very efficient and do all the filtering work at the database layer so that we minimize network transit with the database. Fixes #11944.
2025-10-23 04:52:12 +00:00 · 2019-03-19 20:15:58 -07:00
parent 0076b71411
commit 4c3c669b41
5 changed files with 237 additions and 7 deletions
--- a/zerver/lib/events.py
+++ b/zerver/lib/events.py
@@ -22,6 +22,8 @@ from zerver.lib.message import (
    aggregate_unread_data,
    apply_unread_message_event,
    get_raw_unread_data,
+    get_recent_conversations_recipient_id,
+    get_recent_private_conversations,
    get_starred_message_ids,
 )
 from zerver.lib.narrow import check_supported_events_narrow_filter, read_stop_words
@@ -122,6 +124,10 @@ def always_want(msg_type: str) -> bool:
    info for every event type.  Defining this at module
    level makes it easier to mock.
    '''
+    if settings.PRODUCTION and msg_type == "recent_private_conversations":  # nocoverage
+        # Temporary: Don't include recent_private_conversations in production
+        # by default while the feature is still experimental.
+        return False
    return True

 # Fetch initial data.  When event_types is not specified, clients want
@@ -273,6 +279,21 @@ def fetch_initial_state_data(user_profile: UserProfile,
                                        'config': load_bot_config_template(bot.name)})
        state['realm_embedded_bots'] = realm_embedded_bots

+    if want('recent_private_conversations'):
+        # A data structure containing records of this form:
+        #
+        #   [{'max_message_id': 700175, 'user_ids': [801]}]
+        #
+        # for all recent private message conversations, ordered by the
+        # highest message ID in the conversation.  The user_ids list
+        # is the list of users other than the current user in the
+        # private message conversation (so it is [] for PMs to self).
+        # Note that raw_recent_private_conversations is an
+        # intermediate form as a dictionary keyed by recipient_id,
+        # which is more efficient to update, and is rewritten to the
+        # final format in post_process_state.
+        state['raw_recent_private_conversations'] = get_recent_private_conversations(user_profile)
+
    if want('subscription'):
        subscriptions, unsubscribed, never_subscribed = gather_subscriptions_helper(
            user_profile, include_subscribers=include_subscribers)
@@ -371,10 +392,24 @@ def apply_event(state: Dict[str, Any],
                event['flags'],
            )

-        # Below, we handle maintaining first_message_id.
        if event['message']['type'] != "stream":
+            if 'raw_recent_private_conversations' in state:
+                # Handle maintaining the recent_private_conversations data structure.
+                conversations = state['raw_recent_private_conversations']
+                recipient_id = get_recent_conversations_recipient_id(
+                    user_profile, event['message']['recipient_id'],
+                    event['message']["sender_id"])
+
+                if recipient_id not in conversations:
+                    conversations[recipient_id] = dict(
+                        user_ids=[user_dict['id'] for user_dict in
+                                  event['message']['display_recipient'] if
+                                  user_dict['id'] != user_profile.id]
+                    )
+                conversations[recipient_id]['max_message_id'] = event['message']['id']
            return

+        # Below, we handle maintaining first_message_id.
        for sub_dict in state.get('subscriptions', []):
            if event['message']['stream_id'] == sub_dict['stream_id']:
                if sub_dict['first_message_id'] is None:
@@ -636,6 +671,31 @@ def apply_event(state: Dict[str, Any],

        remove_id = event['message_id']
        remove_message_id_from_unread_mgs(state, remove_id)
+
+        # The remainder of this block is about maintaining recent_private_conversations
+        if 'raw_recent_private_conversations' not in state or event['message_type'] != 'private':
+            return
+
+        recipient_id = get_recent_conversations_recipient_id(user_profile, event['recipient_id'],
+                                                             event['sender_id'])
+
+        # Ideally, we'd have test coverage for these two blocks.  To
+        # do that, we'll need a test where we delete not-the-latest
+        # messages or delete a private message not in
+        # recent_private_conversations.
+        if recipient_id not in state['raw_recent_private_conversations']:  # nocoverage
+            return
+
+        old_max_message_id = state['raw_recent_private_conversations'][recipient_id]['max_message_id']
+        if old_max_message_id != event['message_id']:  # nocoverage
+            return
+
+        # OK, we just deleted what had been the max_message_id for
+        # this recent conversation; we need to recompute that value
+        # from scratch.  Definitely don't need to re-query everything,
+        # but this case is likely rare enough that it's reasonable to do so.
+        state['raw_recent_private_conversations'] = \
+            get_recent_private_conversations(user_profile)
    elif event['type'] == "reaction":
        # The client will get the message with the reactions directly
        pass
@@ -807,7 +867,7 @@ def post_process_state(ret: Dict[str, Any]) -> None:
    '''
    See the note above; the same technique applies below.
    '''
-    if 'raw_users'in ret:
+    if 'raw_users' in ret:
        user_dicts = list(ret['raw_users'].values())

        ret['realm_users'] = [d for d in user_dicts if d['is_active']]
@@ -827,3 +887,12 @@ def post_process_state(ret: Dict[str, Any]) -> None:
            d.pop('is_active')

        del ret['raw_users']
+
+    if 'raw_recent_private_conversations' in ret:
+        # Reformat recent_private_conversations to be a list of dictionaries, rather than a dict.
+        ret['recent_private_conversations'] = sorted([
+            dict(
+                **value
+            ) for (recipient_id, value) in ret['raw_recent_private_conversations'].items()
+        ], key = lambda x: -x["max_message_id"])
+        del ret['raw_recent_private_conversations']
--- a/zerver/lib/message.py
+++ b/zerver/lib/message.py
@@ -6,6 +6,7 @@ import ahocorasick

 from django.utils.translation import ugettext as _
 from django.utils.timezone import now as timezone_now
+from django.db import connection
 from django.db.models import Sum

 from analytics.lib.counts import COUNT_STATS, RealmCount
@@ -974,3 +975,123 @@ def update_first_visible_message_id(realm: Realm) -> None:
            first_visible_message_id = 0
        realm.first_visible_message_id = first_visible_message_id
    realm.save(update_fields=["first_visible_message_id"])
+
+
+def get_recent_conversations_recipient_id(user_profile: UserProfile,
+                                          recipient_id: int,
+                                          sender_id: int) -> int:
+    """Helper for doing lookups of the recipient_id that
+    get_recent_private_conversations would have used to record that
+    message in its data structure.
+    """
+    my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
+                                            type_id=user_profile.id).id
+    if recipient_id == my_recipient_id:
+        return Recipient.objects.get(type=Recipient.PERSONAL,
+                                     type_id=sender_id).id
+    return recipient_id
+
+def get_recent_private_conversations(user_profile: UserProfile) -> Dict[int, Dict[str, Any]]:
+    """This function uses some carefully optimized SQL queries, designed
+    to use the UserMessage index on private_messages.  It is
+    significantly complicated by the fact that for 1:1 private
+    messages, we store the message against a recipient_id of whichever
+    user was the recipient, and thus for 1:1 private messages sent
+    directly to us, we need to look up the other user from the
+    sender_id on those messages.  You'll see that pattern repeated
+    both here and also in zerver/lib/events.py.
+
+    Ideally, we would write these queries using Django, but even
+    without the UNION ALL, that seems to not be possible, because the
+    equivalent Django syntax (for the first part of this query):
+
+        message_data = UserMessage.objects.select_related("message__recipient_id").filter(
+            user_profile=user_profile,
+        ).extra(
+            where=[UserMessage.where_private()]
+        ).order_by("-message_id")[:1000].values(
+            "message__recipient_id").annotate(last_message_id=Max("message_id"))
+
+    does not properly nest the GROUP BY (from .annotate) with the slicing.
+
+    We return a dictionary structure for convenient modification
+    below; this structure is converted into its final form by
+    post_process.
+
+    """
+    RECENT_CONVERSATIONS_LIMIT = 1000
+
+    recipient_map = {}
+    my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
+                                            type_id=user_profile.id).id
+
+    query = '''
+    SELECT
+        subquery.recipient_id, MAX(subquery.message_id)
+    FROM (
+        (SELECT
+            um.message_id AS message_id,
+            m.recipient_id AS recipient_id
+        FROM
+            zerver_usermessage um
+        JOIN
+            zerver_message m
+        ON
+            um.message_id = m.id
+        WHERE
+            um.user_profile_id=%(user_profile_id)d AND
+            um.flags & 2048 <> 0 AND
+            m.recipient_id <> %(my_recipient_id)d
+        ORDER BY message_id DESC
+        LIMIT %(conversation_limit)d)
+        UNION ALL
+        (SELECT
+            um.message_id AS message_id,
+            r.id AS recipient_id
+        FROM
+            zerver_usermessage um
+        JOIN
+            zerver_message m
+        ON
+            um.message_id = m.id
+        JOIN
+            zerver_recipient r
+        ON
+            r.type = 1 AND
+            r.type_id = m.sender_id
+        WHERE
+            um.user_profile_id=%(user_profile_id)d AND
+            um.flags & 2048 <> 0 AND
+            m.recipient_id=%(my_recipient_id)d
+        ORDER BY message_id DESC
+        LIMIT %(conversation_limit)d)
+    ) AS subquery
+    GROUP BY subquery.recipient_id
+    ''' % dict(
+        user_profile_id=user_profile.id,
+        conversation_limit=RECENT_CONVERSATIONS_LIMIT,
+        my_recipient_id=my_recipient_id,
+    )
+
+    cursor = connection.cursor()
+    cursor.execute(query)
+    rows = cursor.fetchall()
+    cursor.close()
+
+    # The resulting rows will be (recipient_id, max_message_id)
+    # objects for all parties we've had recent (group?) private
+    # message conversations with, including PMs with yourself (those
+    # will generate an empty list of user_ids).
+    for recipient_id, max_message_id in rows:
+        recipient_map[recipient_id] = dict(
+            max_message_id=max_message_id,
+            user_ids=list(),
+        )
+
+    # Now we need to map all the recipient_id objects to lists of user IDs
+    for (recipient_id, user_profile_id) in Subscription.objects.filter(
+            recipient_id__in=recipient_map.keys()).exclude(
+                user_profile_id=user_profile.id).values_list(
+                    "recipient_id", "user_profile_id"):
+        recipient_map[recipient_id]['user_ids'].append(user_profile_id)
+    return recipient_map
--- a/zerver/tests/test_events.py
+++ b/zerver/tests/test_events.py
@@ -3304,7 +3304,7 @@ class FetchQueriesTest(ZulipTestCase):
                    client_gravatar=False,
                )

-        self.assert_length(queries, 31)
+        self.assert_length(queries, 33)

        expected_counts = dict(
            alert_words=0,
@@ -3324,6 +3324,7 @@ class FetchQueriesTest(ZulipTestCase):
            realm_filters=1,
            realm_user=3,
            realm_user_groups=2,
+            recent_private_conversations=2,
            starred_messages=1,
            stream=2,
            stop_words=0,
--- a/zerver/tests/test_home.py
+++ b/zerver/tests/test_home.py
@@ -178,6 +178,7 @@ class HomeTest(ZulipTestCase):
            "realm_zoom_api_key",
            "realm_zoom_api_secret",
            "realm_zoom_user_id",
+            "recent_private_conversations",
            "root_domain_uri",
            "save_stacktraces",
            "search_pills_enabled",
@@ -227,7 +228,7 @@ class HomeTest(ZulipTestCase):
            with patch('zerver.lib.cache.cache_set') as cache_mock:
                result = self._get_home_page(stream='Denmark')

-        self.assert_length(queries, 43)
+        self.assert_length(queries, 45)
        self.assert_length(cache_mock.call_args_list, 7)

        html = result.content.decode('utf-8')
@@ -293,7 +294,7 @@ class HomeTest(ZulipTestCase):
                result = self._get_home_page()
                self.assertEqual(result.status_code, 200)
                self.assert_length(cache_mock.call_args_list, 6)
-            self.assert_length(queries, 40)
+            self.assert_length(queries, 42)

    @slow("Creates and subscribes 10 users in a loop.  Should use bulk queries.")
    def test_num_queries_with_streams(self) -> None:
@@ -325,7 +326,7 @@ class HomeTest(ZulipTestCase):
        with queries_captured() as queries2:
            result = self._get_home_page()

-        self.assert_length(queries2, 37)
+        self.assert_length(queries2, 39)

        # Do a sanity check that our new streams were in the payload.
        html = result.content.decode('utf-8')
--- a/zerver/tests/test_messages.py
+++ b/zerver/tests/test_messages.py
@@ -47,6 +47,7 @@ from zerver.lib.message import (
    bulk_access_messages,
    get_first_visible_message_id,
    get_raw_unread_data,
+    get_recent_private_conversations,
    maybe_update_first_visible_message_id,
    messages_for_ids,
    sew_messages_and_reactions,
@@ -1154,6 +1155,13 @@ class StreamMessagesTest(ZulipTestCase):
        # only these two messages are present in msg_data
        self.assertEqual(len(msg_data["huddle_dict"].keys()), 2)

+        recent_conversations = get_recent_private_conversations(users[1])
+        self.assertEqual(len(recent_conversations), 1)
+        recent_conversation = list(recent_conversations.values())[0]
+        self.assertEqual(set(recent_conversation['user_ids']), set(user.id for user in users if
+                                                                   user != users[1]))
+        self.assertEqual(recent_conversation['max_message_id'], message2_id)
+
 class MessageDictTest(ZulipTestCase):
    @slow('builds lots of messages')
    def test_bulk_message_fetching(self) -> None:
@@ -1509,12 +1517,42 @@ class MessagePOSTTest(ZulipTestCase):
        """
        Sending a personal message to a valid username is successful.
        """
-        self.login(self.example_email("hamlet"))
+        user_profile = self.example_user("hamlet")
+        self.login(user_profile.email)
        result = self.client_post("/json/messages", {"type": "private",
                                                     "content": "Test message",
                                                     "client": "test suite",
                                                     "to": self.example_email("othello")})
        self.assert_json_success(result)
+        message_id = ujson.loads(result.content.decode())['id']
+
+        recent_conversations = get_recent_private_conversations(user_profile)
+        self.assertEqual(len(recent_conversations), 1)
+        recent_conversation = list(recent_conversations.values())[0]
+        recipient_id = list(recent_conversations.keys())[0]
+        self.assertEqual(set(recent_conversation['user_ids']), set([self.example_user("othello").id]))
+        self.assertEqual(recent_conversation['max_message_id'], message_id)
+
+        # Now send a message to yourself and see how that interacts with the data structure
+        result = self.client_post("/json/messages", {"type": "private",
+                                                     "content": "Test message",
+                                                     "client": "test suite",
+                                                     "to": self.example_email("hamlet")})
+        self.assert_json_success(result)
+        self_message_id = ujson.loads(result.content.decode())['id']
+
+        recent_conversations = get_recent_private_conversations(user_profile)
+        self.assertEqual(len(recent_conversations), 2)
+        recent_conversation = recent_conversations[recipient_id]
+        self.assertEqual(set(recent_conversation['user_ids']), set([self.example_user("othello").id]))
+        self.assertEqual(recent_conversation['max_message_id'], message_id)
+
+        # Now verify we have the appropriate self-pm data structure
+        del recent_conversations[recipient_id]
+        recent_conversation = list(recent_conversations.values())[0]
+        recipient_id = list(recent_conversations.keys())[0]
+        self.assertEqual(set(recent_conversation['user_ids']), set([]))
+        self.assertEqual(recent_conversation['max_message_id'], self_message_id)

    def test_personal_message_by_id(self) -> None:
        """