mobile: Add support for recent_private_conversations API.

This adds experimental support in /register for sending key
statistical data on the last 1000 private messages that the user is a
participant in.  Because it's experimental, we require developers to
request it explicitly in production (we don't use these data yet in
the webapp, and it likely carries some perf cost).

We expect this to be extremely helpful in initializing the mobile app
user experience for showing recent private message conversations.

See the code comments, but this has been heavily optimized to be very
efficient and do all the filtering work at the database layer so that
we minimize network transit with the database.

Fixes #11944.
This commit is contained in:
Tim Abbott
2019-03-19 20:15:58 -07:00
parent 0076b71411
commit 4c3c669b41
5 changed files with 237 additions and 7 deletions

View File

@@ -22,6 +22,8 @@ from zerver.lib.message import (
aggregate_unread_data,
apply_unread_message_event,
get_raw_unread_data,
get_recent_conversations_recipient_id,
get_recent_private_conversations,
get_starred_message_ids,
)
from zerver.lib.narrow import check_supported_events_narrow_filter, read_stop_words
@@ -122,6 +124,10 @@ def always_want(msg_type: str) -> bool:
info for every event type. Defining this at module
level makes it easier to mock.
'''
if settings.PRODUCTION and msg_type == "recent_private_conversations": # nocoverage
# Temporary: Don't include recent_private_conversations in production
# by default while the feature is still experimental.
return False
return True
# Fetch initial data. When event_types is not specified, clients want
@@ -273,6 +279,21 @@ def fetch_initial_state_data(user_profile: UserProfile,
'config': load_bot_config_template(bot.name)})
state['realm_embedded_bots'] = realm_embedded_bots
if want('recent_private_conversations'):
# A data structure containing records of this form:
#
# [{'max_message_id': 700175, 'user_ids': [801]}]
#
# for all recent private message conversations, ordered by the
# highest message ID in the conversation. The user_ids list
# is the list of users other than the current user in the
# private message conversation (so it is [] for PMs to self).
# Note that raw_recent_private_conversations is an
# intermediate form as a dictionary keyed by recipient_id,
# which is more efficient to update, and is rewritten to the
# final format in post_process_state.
state['raw_recent_private_conversations'] = get_recent_private_conversations(user_profile)
if want('subscription'):
subscriptions, unsubscribed, never_subscribed = gather_subscriptions_helper(
user_profile, include_subscribers=include_subscribers)
@@ -371,10 +392,24 @@ def apply_event(state: Dict[str, Any],
event['flags'],
)
# Below, we handle maintaining first_message_id.
if event['message']['type'] != "stream":
if 'raw_recent_private_conversations' in state:
# Handle maintaining the recent_private_conversations data structure.
conversations = state['raw_recent_private_conversations']
recipient_id = get_recent_conversations_recipient_id(
user_profile, event['message']['recipient_id'],
event['message']["sender_id"])
if recipient_id not in conversations:
conversations[recipient_id] = dict(
user_ids=[user_dict['id'] for user_dict in
event['message']['display_recipient'] if
user_dict['id'] != user_profile.id]
)
conversations[recipient_id]['max_message_id'] = event['message']['id']
return
# Below, we handle maintaining first_message_id.
for sub_dict in state.get('subscriptions', []):
if event['message']['stream_id'] == sub_dict['stream_id']:
if sub_dict['first_message_id'] is None:
@@ -636,6 +671,31 @@ def apply_event(state: Dict[str, Any],
remove_id = event['message_id']
remove_message_id_from_unread_mgs(state, remove_id)
# The remainder of this block is about maintaining recent_private_conversations
if 'raw_recent_private_conversations' not in state or event['message_type'] != 'private':
return
recipient_id = get_recent_conversations_recipient_id(user_profile, event['recipient_id'],
event['sender_id'])
# Ideally, we'd have test coverage for these two blocks. To
# do that, we'll need a test where we delete not-the-latest
# messages or delete a private message not in
# recent_private_conversations.
if recipient_id not in state['raw_recent_private_conversations']: # nocoverage
return
old_max_message_id = state['raw_recent_private_conversations'][recipient_id]['max_message_id']
if old_max_message_id != event['message_id']: # nocoverage
return
# OK, we just deleted what had been the max_message_id for
# this recent conversation; we need to recompute that value
# from scratch. Definitely don't need to re-query everything,
# but this case is likely rare enough that it's reasonable to do so.
state['raw_recent_private_conversations'] = \
get_recent_private_conversations(user_profile)
elif event['type'] == "reaction":
# The client will get the message with the reactions directly
pass
@@ -807,7 +867,7 @@ def post_process_state(ret: Dict[str, Any]) -> None:
'''
See the note above; the same technique applies below.
'''
if 'raw_users'in ret:
if 'raw_users' in ret:
user_dicts = list(ret['raw_users'].values())
ret['realm_users'] = [d for d in user_dicts if d['is_active']]
@@ -827,3 +887,12 @@ def post_process_state(ret: Dict[str, Any]) -> None:
d.pop('is_active')
del ret['raw_users']
if 'raw_recent_private_conversations' in ret:
# Reformat recent_private_conversations to be a list of dictionaries, rather than a dict.
ret['recent_private_conversations'] = sorted([
dict(
**value
) for (recipient_id, value) in ret['raw_recent_private_conversations'].items()
], key = lambda x: -x["max_message_id"])
del ret['raw_recent_private_conversations']

View File

@@ -6,6 +6,7 @@ import ahocorasick
from django.utils.translation import ugettext as _
from django.utils.timezone import now as timezone_now
from django.db import connection
from django.db.models import Sum
from analytics.lib.counts import COUNT_STATS, RealmCount
@@ -974,3 +975,123 @@ def update_first_visible_message_id(realm: Realm) -> None:
first_visible_message_id = 0
realm.first_visible_message_id = first_visible_message_id
realm.save(update_fields=["first_visible_message_id"])
def get_recent_conversations_recipient_id(user_profile: UserProfile,
recipient_id: int,
sender_id: int) -> int:
"""Helper for doing lookups of the recipient_id that
get_recent_private_conversations would have used to record that
message in its data structure.
"""
my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
type_id=user_profile.id).id
if recipient_id == my_recipient_id:
return Recipient.objects.get(type=Recipient.PERSONAL,
type_id=sender_id).id
return recipient_id
def get_recent_private_conversations(user_profile: UserProfile) -> Dict[int, Dict[str, Any]]:
"""This function uses some carefully optimized SQL queries, designed
to use the UserMessage index on private_messages. It is
significantly complicated by the fact that for 1:1 private
messages, we store the message against a recipient_id of whichever
user was the recipient, and thus for 1:1 private messages sent
directly to us, we need to look up the other user from the
sender_id on those messages. You'll see that pattern repeated
both here and also in zerver/lib/events.py.
Ideally, we would write these queries using Django, but even
without the UNION ALL, that seems to not be possible, because the
equivalent Django syntax (for the first part of this query):
message_data = UserMessage.objects.select_related("message__recipient_id").filter(
user_profile=user_profile,
).extra(
where=[UserMessage.where_private()]
).order_by("-message_id")[:1000].values(
"message__recipient_id").annotate(last_message_id=Max("message_id"))
does not properly nest the GROUP BY (from .annotate) with the slicing.
We return a dictionary structure for convenient modification
below; this structure is converted into its final form by
post_process.
"""
RECENT_CONVERSATIONS_LIMIT = 1000
recipient_map = {}
my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
type_id=user_profile.id).id
query = '''
SELECT
subquery.recipient_id, MAX(subquery.message_id)
FROM (
(SELECT
um.message_id AS message_id,
m.recipient_id AS recipient_id
FROM
zerver_usermessage um
JOIN
zerver_message m
ON
um.message_id = m.id
WHERE
um.user_profile_id=%(user_profile_id)d AND
um.flags & 2048 <> 0 AND
m.recipient_id <> %(my_recipient_id)d
ORDER BY message_id DESC
LIMIT %(conversation_limit)d)
UNION ALL
(SELECT
um.message_id AS message_id,
r.id AS recipient_id
FROM
zerver_usermessage um
JOIN
zerver_message m
ON
um.message_id = m.id
JOIN
zerver_recipient r
ON
r.type = 1 AND
r.type_id = m.sender_id
WHERE
um.user_profile_id=%(user_profile_id)d AND
um.flags & 2048 <> 0 AND
m.recipient_id=%(my_recipient_id)d
ORDER BY message_id DESC
LIMIT %(conversation_limit)d)
) AS subquery
GROUP BY subquery.recipient_id
''' % dict(
user_profile_id=user_profile.id,
conversation_limit=RECENT_CONVERSATIONS_LIMIT,
my_recipient_id=my_recipient_id,
)
cursor = connection.cursor()
cursor.execute(query)
rows = cursor.fetchall()
cursor.close()
# The resulting rows will be (recipient_id, max_message_id)
# objects for all parties we've had recent (group?) private
# message conversations with, including PMs with yourself (those
# will generate an empty list of user_ids).
for recipient_id, max_message_id in rows:
recipient_map[recipient_id] = dict(
max_message_id=max_message_id,
user_ids=list(),
)
# Now we need to map all the recipient_id objects to lists of user IDs
for (recipient_id, user_profile_id) in Subscription.objects.filter(
recipient_id__in=recipient_map.keys()).exclude(
user_profile_id=user_profile.id).values_list(
"recipient_id", "user_profile_id"):
recipient_map[recipient_id]['user_ids'].append(user_profile_id)
return recipient_map

View File

@@ -3304,7 +3304,7 @@ class FetchQueriesTest(ZulipTestCase):
client_gravatar=False,
)
self.assert_length(queries, 31)
self.assert_length(queries, 33)
expected_counts = dict(
alert_words=0,
@@ -3324,6 +3324,7 @@ class FetchQueriesTest(ZulipTestCase):
realm_filters=1,
realm_user=3,
realm_user_groups=2,
recent_private_conversations=2,
starred_messages=1,
stream=2,
stop_words=0,

View File

@@ -178,6 +178,7 @@ class HomeTest(ZulipTestCase):
"realm_zoom_api_key",
"realm_zoom_api_secret",
"realm_zoom_user_id",
"recent_private_conversations",
"root_domain_uri",
"save_stacktraces",
"search_pills_enabled",
@@ -227,7 +228,7 @@ class HomeTest(ZulipTestCase):
with patch('zerver.lib.cache.cache_set') as cache_mock:
result = self._get_home_page(stream='Denmark')
self.assert_length(queries, 43)
self.assert_length(queries, 45)
self.assert_length(cache_mock.call_args_list, 7)
html = result.content.decode('utf-8')
@@ -293,7 +294,7 @@ class HomeTest(ZulipTestCase):
result = self._get_home_page()
self.assertEqual(result.status_code, 200)
self.assert_length(cache_mock.call_args_list, 6)
self.assert_length(queries, 40)
self.assert_length(queries, 42)
@slow("Creates and subscribes 10 users in a loop. Should use bulk queries.")
def test_num_queries_with_streams(self) -> None:
@@ -325,7 +326,7 @@ class HomeTest(ZulipTestCase):
with queries_captured() as queries2:
result = self._get_home_page()
self.assert_length(queries2, 37)
self.assert_length(queries2, 39)
# Do a sanity check that our new streams were in the payload.
html = result.content.decode('utf-8')

View File

@@ -47,6 +47,7 @@ from zerver.lib.message import (
bulk_access_messages,
get_first_visible_message_id,
get_raw_unread_data,
get_recent_private_conversations,
maybe_update_first_visible_message_id,
messages_for_ids,
sew_messages_and_reactions,
@@ -1154,6 +1155,13 @@ class StreamMessagesTest(ZulipTestCase):
# only these two messages are present in msg_data
self.assertEqual(len(msg_data["huddle_dict"].keys()), 2)
recent_conversations = get_recent_private_conversations(users[1])
self.assertEqual(len(recent_conversations), 1)
recent_conversation = list(recent_conversations.values())[0]
self.assertEqual(set(recent_conversation['user_ids']), set(user.id for user in users if
user != users[1]))
self.assertEqual(recent_conversation['max_message_id'], message2_id)
class MessageDictTest(ZulipTestCase):
@slow('builds lots of messages')
def test_bulk_message_fetching(self) -> None:
@@ -1509,12 +1517,42 @@ class MessagePOSTTest(ZulipTestCase):
"""
Sending a personal message to a valid username is successful.
"""
self.login(self.example_email("hamlet"))
user_profile = self.example_user("hamlet")
self.login(user_profile.email)
result = self.client_post("/json/messages", {"type": "private",
"content": "Test message",
"client": "test suite",
"to": self.example_email("othello")})
self.assert_json_success(result)
message_id = ujson.loads(result.content.decode())['id']
recent_conversations = get_recent_private_conversations(user_profile)
self.assertEqual(len(recent_conversations), 1)
recent_conversation = list(recent_conversations.values())[0]
recipient_id = list(recent_conversations.keys())[0]
self.assertEqual(set(recent_conversation['user_ids']), set([self.example_user("othello").id]))
self.assertEqual(recent_conversation['max_message_id'], message_id)
# Now send a message to yourself and see how that interacts with the data structure
result = self.client_post("/json/messages", {"type": "private",
"content": "Test message",
"client": "test suite",
"to": self.example_email("hamlet")})
self.assert_json_success(result)
self_message_id = ujson.loads(result.content.decode())['id']
recent_conversations = get_recent_private_conversations(user_profile)
self.assertEqual(len(recent_conversations), 2)
recent_conversation = recent_conversations[recipient_id]
self.assertEqual(set(recent_conversation['user_ids']), set([self.example_user("othello").id]))
self.assertEqual(recent_conversation['max_message_id'], message_id)
# Now verify we have the appropriate self-pm data structure
del recent_conversations[recipient_id]
recent_conversation = list(recent_conversations.values())[0]
recipient_id = list(recent_conversations.keys())[0]
self.assertEqual(set(recent_conversation['user_ids']), set([]))
self.assertEqual(recent_conversation['max_message_id'], self_message_id)
def test_personal_message_by_id(self) -> None:
"""