presence: Extract get_legacy_user_info.

This code is a bit flatter and just preps the data
for a single user.  There is never any interaction
between the data for user A and user B, so we can
mostly avoid complicated nested data structures
and do most of the data-crunching on a per-user basis.

We also do an explicit sort of the data before
running it through groupby.  The explicit sort
simplifies how we calculate `most_recent_info`
and also avoids needing to add `dt` to an intermediate
data structure.

Finally, when it comes to the individual client data,
the code has relied on the assumption that there is
only one row per client, which I believe to be true,
but now the code is more explicit about that.
This commit is contained in:
Steve Howell
2020-02-06 20:00:21 +00:00
committed by Tim Abbott
parent bf3baa14ac
commit 7e99e7feb2

View File

@@ -1,11 +1,12 @@
from collections import defaultdict from collections import defaultdict
import datetime import datetime
import itertools
import time import time
from django.utils.timezone import now as timezone_now from django.utils.timezone import now as timezone_now
from typing import Any, DefaultDict, Dict, List, Set from typing import Any, Dict, List, Set
from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.timestamp import datetime_to_timestamp
from zerver.models import ( from zerver.models import (
@@ -16,23 +17,49 @@ from zerver.models import (
UserProfile, UserProfile,
) )
def get_status_dicts_for_rows(presence_rows: List[Dict[str, Any]], def get_status_dicts_for_rows(all_rows: List[Dict[str, Any]],
mobile_user_ids: Set[int], mobile_user_ids: Set[int],
slim_presence: bool) -> Dict[str, Dict[str, Any]]: slim_presence: bool) -> Dict[str, Dict[str, Any]]:
info_row_dct = defaultdict(list) # type: DefaultDict[str, List[Dict[str, Any]]] # Note that datetime values have sub-second granularity, which is
for row in presence_rows: # mostly important for avoiding test flakes, but it's also technically
# For now slim_presence just means that we will use # more precise for real users.
# user_id as a key instead of email. We will eventually # We could technically do this sort with the database, but doing it
# do other things based on this flag to make things simpler # here prevents us from having to assume the caller is playing nice.
# for the clients. all_rows = sorted(
if slim_presence: all_rows,
# Stringify user_id here, since it's gonna be turned key = lambda row: (row['user_profile__id'], row['timestamp'])
# into a string anyway by JSON, and it keeps mypy happy. )
user_key = str(row['user_profile__id'])
else:
user_key = row['user_profile__email']
# For now slim_presence just means that we will use
# user_id as a key instead of email. We will eventually
# do other things based on this flag to make things simpler
# for the clients.
if slim_presence:
# Stringify user_id here, since it's gonna be turned
# into a string anyway by JSON, and it keeps mypy happy.
get_user_key = lambda row: str(row['user_profile__id'])
else:
get_user_key = lambda row: row['user_profile__email']
user_statuses = dict() # type: Dict[str, Dict[str, Any]]
for user_key, presence_rows in itertools.groupby(all_rows, get_user_key):
info = get_legacy_user_info(
list(presence_rows),
mobile_user_ids
)
user_statuses[user_key] = info
return user_statuses
def get_legacy_user_info(presence_rows: List[Dict[str, Any]],
mobile_user_ids: Set[int]) -> Dict[str, Any]:
# The format of data here is for legacy users of our API,
# including old versions of the mobile app.
info_rows = []
for row in presence_rows:
client_name = row['client__name'] client_name = row['client__name']
status = UserPresence.status_to_string(row['status']) status = UserPresence.status_to_string(row['status'])
dt = row['timestamp'] dt = row['timestamp']
@@ -44,38 +71,31 @@ def get_status_dicts_for_rows(presence_rows: List[Dict[str, Any]],
info = dict( info = dict(
client=client_name, client=client_name,
status=status, status=status,
dt=dt,
timestamp=timestamp, timestamp=timestamp,
pushable=pushable, pushable=pushable,
) )
info_row_dct[user_key].append(info) info_rows.append(info)
user_statuses = dict() # type: Dict[str, Dict[str, Any]] most_recent_info = info_rows[-1]
for user_key, info_rows in info_row_dct.items(): result = dict()
# Note that datetime values have sub-second granularity, which is
# mostly important for avoiding test flakes, but it's also technically
# more precise for real users.
by_time = lambda row: row['dt']
most_recent_info = max(info_rows, key=by_time)
# We don't send datetime values to the client. # The word "aggegrated" here is possibly misleading.
for r in info_rows: # It's really just the most recent client's info.
del r['dt'] result['aggregated'] = dict(
client=most_recent_info['client'],
status=most_recent_info['status'],
timestamp=most_recent_info['timestamp'],
)
client_dict = {info['client']: info for info in info_rows} # Build a dictionary of client -> info. There should
user_statuses[user_key] = client_dict # only be one row per client, but to be on the safe side,
# we always overwrite with rows that are later in our list.
for info in info_rows:
result[info['client']] = info
# The word "aggegrated" here is possibly misleading. return result
# It's really just the most recent client's info.
user_statuses[user_key]['aggregated'] = dict(
client=most_recent_info['client'],
status=most_recent_info['status'],
timestamp=most_recent_info['timestamp'],
)
return user_statuses
def get_presence_for_user(user_profile_id: int, def get_presence_for_user(user_profile_id: int,
slim_presence: bool=False) -> Dict[str, Dict[str, Any]]: slim_presence: bool=False) -> Dict[str, Dict[str, Any]]: