presence: Extract get_legacy_user_info.

This code is a bit flatter and just preps the data for a single user. There is never any interaction between the data for user A and user B, so we can mostly avoid complicated nested data structures and do most of the data-crunching on a per-user basis. We also do an explicit sort of the data before running it through groupby. The explicit sort simplifies how we calculate `most_recent_info` and also avoids needing to add `dt` to an intermediate data structure. Finally, when it comes to the individual client data, the code has relied on the assumption that there is only one row per client, which I believe to be true, but now the code is more explicit about that.
2025-11-02 04:53:36 +00:00 · 2020-02-06 20:00:21 +00:00
parent bf3baa14ac
commit 7e99e7feb2
1 changed files with 57 additions and 37 deletions
--- a/zerver/lib/presence.py
+++ b/zerver/lib/presence.py
@@ -1,11 +1,12 @@
 from collections import defaultdict

 import datetime
+import itertools
 import time

 from django.utils.timezone import now as timezone_now

-from typing import Any, DefaultDict, Dict, List, Set
+from typing import Any, Dict, List, Set

 from zerver.lib.timestamp import datetime_to_timestamp
 from zerver.models import (
@@ -16,12 +17,20 @@ from zerver.models import (
    UserProfile,
 )

-def get_status_dicts_for_rows(presence_rows: List[Dict[str, Any]],
+def get_status_dicts_for_rows(all_rows: List[Dict[str, Any]],
                              mobile_user_ids: Set[int],
                              slim_presence: bool) -> Dict[str, Dict[str, Any]]:

-    info_row_dct = defaultdict(list)  # type: DefaultDict[str, List[Dict[str, Any]]]
-    for row in presence_rows:
+    # Note that datetime values have sub-second granularity, which is
+    # mostly important for avoiding test flakes, but it's also technically
+    # more precise for real users.
+    # We could technically do this sort with the database, but doing it
+    # here prevents us from having to assume the caller is playing nice.
+    all_rows = sorted(
+        all_rows,
+        key = lambda row: (row['user_profile__id'], row['timestamp'])
+    )
+
    # For now slim_presence just means that we will use
    # user_id as a key instead of email.  We will eventually
    # do other things based on this flag to make things simpler
@@ -29,10 +38,28 @@ def get_status_dicts_for_rows(presence_rows: List[Dict[str, Any]],
    if slim_presence:
        # Stringify user_id here, since it's gonna be turned
        # into a string anyway by JSON, and it keeps mypy happy.
-            user_key = str(row['user_profile__id'])
+        get_user_key = lambda row: str(row['user_profile__id'])
    else:
-            user_key = row['user_profile__email']
+        get_user_key = lambda row: row['user_profile__email']

+    user_statuses = dict()  # type: Dict[str, Dict[str, Any]]
+
+    for user_key, presence_rows in itertools.groupby(all_rows, get_user_key):
+        info = get_legacy_user_info(
+            list(presence_rows),
+            mobile_user_ids
+        )
+        user_statuses[user_key] = info
+
+    return user_statuses
+
+def get_legacy_user_info(presence_rows: List[Dict[str, Any]],
+                         mobile_user_ids: Set[int]) -> Dict[str, Any]:
+
+    # The format of data here is for legacy users of our API,
+    # including old versions of the mobile app.
+    info_rows = []
+    for row in presence_rows:
        client_name = row['client__name']
        status = UserPresence.status_to_string(row['status'])
        dt = row['timestamp']
@@ -44,38 +71,31 @@ def get_status_dicts_for_rows(presence_rows: List[Dict[str, Any]],
        info = dict(
            client=client_name,
            status=status,
-            dt=dt,
            timestamp=timestamp,
            pushable=pushable,
        )

-        info_row_dct[user_key].append(info)
+        info_rows.append(info)

-    user_statuses = dict()  # type: Dict[str, Dict[str, Any]]
+    most_recent_info = info_rows[-1]

-    for user_key, info_rows in info_row_dct.items():
-        # Note that datetime values have sub-second granularity, which is
-        # mostly important for avoiding test flakes, but it's also technically
-        # more precise for real users.
-        by_time = lambda row: row['dt']
-        most_recent_info = max(info_rows, key=by_time)
-
-        # We don't send datetime values to the client.
-        for r in info_rows:
-            del r['dt']
-
-        client_dict = {info['client']: info for info in info_rows}
-        user_statuses[user_key] = client_dict
+    result = dict()

    # The word "aggegrated" here is possibly misleading.
    # It's really just the most recent client's info.
-        user_statuses[user_key]['aggregated'] = dict(
+    result['aggregated'] = dict(
        client=most_recent_info['client'],
        status=most_recent_info['status'],
        timestamp=most_recent_info['timestamp'],
    )

-    return user_statuses
+    # Build a dictionary of client -> info.  There should
+    # only be one row per client, but to be on the safe side,
+    # we always overwrite with rows that are later in our list.
+    for info in info_rows:
+        result[info['client']] = info
+
+    return result

 def get_presence_for_user(user_profile_id: int,
                          slim_presence: bool=False) -> Dict[str, Dict[str, Any]]: