diff --git a/zerver/lib/remote_server.py b/zerver/lib/remote_server.py index 7e5cf49c61..a51b03aa18 100644 --- a/zerver/lib/remote_server.py +++ b/zerver/lib/remote_server.py @@ -5,14 +5,13 @@ from urllib.parse import urljoin import orjson import requests from django.conf import settings -from django.forms.models import model_to_dict +from django.db.models import QuerySet from django.utils.translation import gettext as _ -from pydantic import UUID4, BaseModel, ConfigDict, Field, field_validator +from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator from analytics.models import InstallationCount, RealmCount from version import ZULIP_VERSION from zerver.lib.exceptions import JsonableError, MissingRemoteRealmError -from zerver.lib.export import floatify_datetime_fields from zerver.lib.outgoing_http import OutgoingSession from zerver.lib.queue import queue_json_publish from zerver.lib.types import RemoteRealmDictValue @@ -36,6 +35,32 @@ class PushNotificationBouncerServerError(PushNotificationBouncerRetryLaterError) http_status_code = 502 +class RealmCountDataForAnalytics(BaseModel): + property: str + realm: int + id: int + end_time: float + subgroup: Optional[str] + value: int + + +class InstallationCountDataForAnalytics(BaseModel): + property: str + id: int + end_time: float + subgroup: Optional[str] + value: int + + +class RealmAuditLogDataForAnalytics(BaseModel): + id: int + realm: int + event_time: float + backfilled: bool + extra_data: Optional[Union[str, Dict[str, Any]]] + event_type: int + + class RealmDataForAnalytics(BaseModel): model_config = ConfigDict(extra="forbid") @@ -61,6 +86,14 @@ class RealmDataForAnalytics(BaseModel): return value +class AnalyticsRequest(BaseModel): + realm_counts: Json[List[RealmCountDataForAnalytics]] + installation_counts: Json[List[InstallationCountDataForAnalytics]] + realmauditlog_rows: Optional[Json[List[RealmAuditLogDataForAnalytics]]] = None + realms: Json[List[RealmDataForAnalytics]] + version: Optional[Json[str]] + + class UserDataForRemoteBilling(BaseModel): uuid: UUID4 email: str @@ -189,45 +222,54 @@ def send_json_to_push_bouncer( ) -REALMAUDITLOG_PUSHED_FIELDS = [ - "id", - "realm", - "event_time", - "backfilled", - # Note that we don't need to add extra_data_json here because - # the view remote_server_post_analytics populates extra_data_json - # from the provided extra_data. - "extra_data", - "event_type", -] - - def build_analytics_data( - realm_count_query: Any, installation_count_query: Any, realmauditlog_query: Any -) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: + realm_count_query: QuerySet[RealmCount], + installation_count_query: QuerySet[InstallationCount], + realmauditlog_query: QuerySet[RealmAuditLog], +) -> Tuple[ + List[RealmCountDataForAnalytics], + List[InstallationCountDataForAnalytics], + List[RealmAuditLogDataForAnalytics], +]: # We limit the batch size on the client side to avoid OOM kills timeouts, etc. MAX_CLIENT_BATCH_SIZE = 10000 - data = {} - data["analytics_realmcount"] = [ - model_to_dict(row) for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE] + realm_count_data = [ + RealmCountDataForAnalytics( + property=row.property, + realm=row.realm.id, + id=row.id, + end_time=row.end_time.timestamp(), + subgroup=row.subgroup, + value=row.value, + ) + for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE] ] - data["analytics_installationcount"] = [ - model_to_dict(row) + installation_count_data = [ + InstallationCountDataForAnalytics( + property=row.property, + id=row.id, + end_time=row.end_time.timestamp(), + subgroup=row.subgroup, + value=row.value, + ) for row in installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE] ] - data["zerver_realmauditlog"] = [ - model_to_dict(row, fields=REALMAUDITLOG_PUSHED_FIELDS) + zerver_realmauditlog = [ + RealmAuditLogDataForAnalytics( + id=row.id, + realm=row.realm.id, + event_time=row.event_time.timestamp(), + backfilled=row.backfilled, + # Note that we don't need to add extra_data_json here because + # the view remote_server_post_analytics populates extra_data_json + # from the provided extra_data. + extra_data=row.extra_data, + event_type=row.event_type, + ) for row in realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE] ] - floatify_datetime_fields(data, "analytics_realmcount") - floatify_datetime_fields(data, "analytics_installationcount") - floatify_datetime_fields(data, "zerver_realmauditlog") - return ( - data["analytics_realmcount"], - data["analytics_installationcount"], - data["zerver_realmauditlog"], - ) + return realm_count_data, installation_count_data, zerver_realmauditlog def get_realms_info_for_push_bouncer(realm_id: Optional[int] = None) -> List[RealmDataForAnalytics]: @@ -287,36 +329,36 @@ def send_analytics_to_push_bouncer() -> None: ) record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data) - request = { - "realm_counts": orjson.dumps(realm_count_data).decode(), - "installation_counts": orjson.dumps(installation_count_data).decode(), - "realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(), - "realms": orjson.dumps( - [dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()] - ).decode(), - "version": orjson.dumps(ZULIP_VERSION).decode(), - } + request = AnalyticsRequest.model_construct( + realm_counts=realm_count_data, + installation_counts=installation_count_data, + realmauditlog_rows=realmauditlog_data, + realms=get_realms_info_for_push_bouncer(), + version=ZULIP_VERSION, + ) try: - send_to_push_bouncer("POST", "server/analytics", request) + send_to_push_bouncer("POST", "server/analytics", request.model_dump(round_trip=True)) except JsonableError as e: logger.warning(e.msg) logger.info("Reported %d records", record_count) def send_realms_only_to_push_bouncer() -> Dict[str, RemoteRealmDictValue]: - request = { - "realm_counts": "[]", - "installation_counts": "[]", - "realms": orjson.dumps( - [dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()] - ).decode(), - "version": orjson.dumps(ZULIP_VERSION).decode(), - } + request = AnalyticsRequest.model_construct( + realm_counts=[], + installation_counts=[], + realms=get_realms_info_for_push_bouncer(), + version=ZULIP_VERSION, + ) # We don't catch JsonableError here, because we want it to propagate further # to either explicitly, loudly fail or be error-handled by the caller. - response = send_to_push_bouncer("POST", "server/analytics", request) + response = send_to_push_bouncer( + "POST", + "server/analytics", + request.model_dump(round_trip=True, exclude={"realmauditlog_rows"}), + ) assert isinstance(response["realms"], dict) # for mypy return response["realms"] diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index 9f4008103d..ac84a3b556 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -60,6 +60,7 @@ from zerver.lib.push_notifications import ( send_notifications_to_bouncer, ) from zerver.lib.remote_server import ( + AnalyticsRequest, PushNotificationBouncerError, PushNotificationBouncerRetryLaterError, PushNotificationBouncerServerError, @@ -1351,14 +1352,17 @@ class AnalyticsBouncerTest(BouncerTestCase): (realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data( RealmCount.objects.all(), InstallationCount.objects.all(), RealmAuditLog.objects.all() ) + request = AnalyticsRequest.model_construct( + realm_counts=realm_count_data, + installation_counts=installation_count_data, + realmauditlog_rows=realmauditlog_data, + realms=[], + version=None, + ) result = self.uuid_post( self.server_uuid, "/api/v1/remotes/server/analytics", - { - "realm_counts": orjson.dumps(realm_count_data).decode(), - "installation_counts": orjson.dumps(installation_count_data).decode(), - "realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(), - }, + request.model_dump(round_trip=True, exclude={"realms", "version"}), subdomain="", ) self.assert_json_error(result, "Data is out of order.") @@ -1382,19 +1386,21 @@ class AnalyticsBouncerTest(BouncerTestCase): check_counts(11, 11, 3, 2, 8) # Test that only valid org_type values are accepted - integers defined in OrgTypeEnum. - realms_data = [dict(realm) for realm in get_realms_info_for_push_bouncer()] + realms_data = get_realms_info_for_push_bouncer() # Not a valid org_type value: - realms_data[0]["org_type"] = 11 + realms_data[0].org_type = 11 + request = AnalyticsRequest.model_construct( + realm_counts=[], + installation_counts=[], + realmauditlog_rows=[], + realms=realms_data, + version=None, + ) result = self.uuid_post( self.server_uuid, "/api/v1/remotes/server/analytics", - { - "realm_counts": orjson.dumps([]).decode(), - "installation_counts": orjson.dumps([]).decode(), - "realmauditlog_rows": orjson.dumps([]).decode(), - "realms": orjson.dumps(realms_data).decode(), - }, + request.model_dump(round_trip=True, exclude={"version", "api_feature_level"}), subdomain="", ) self.assert_json_error( @@ -1435,15 +1441,17 @@ class AnalyticsBouncerTest(BouncerTestCase): ) # This first post should fail because of excessive audit log event types. + request = AnalyticsRequest.model_construct( + realm_counts=realm_count_data, + installation_counts=installation_count_data, + realmauditlog_rows=realmauditlog_data, + realms=[], + version=None, + ) result = self.uuid_post( self.server_uuid, "/api/v1/remotes/server/analytics", - { - "realm_counts": orjson.dumps(realm_count_data).decode(), - "installation_counts": orjson.dumps(installation_count_data).decode(), - "realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(), - "realms": orjson.dumps([]).decode(), - }, + request.model_dump(round_trip=True, exclude={"version"}), subdomain="", ) self.assert_json_error(result, "Invalid event type.") @@ -1458,15 +1466,17 @@ class AnalyticsBouncerTest(BouncerTestCase): # Send the data to the bouncer without any realms data. This should lead # to successful saving of the data, but with the remote_realm foreign key # set to NULL. + request = AnalyticsRequest.model_construct( + realm_counts=realm_count_data, + installation_counts=installation_count_data, + realmauditlog_rows=realmauditlog_data, + realms=[], + version=None, + ) result = self.uuid_post( self.server_uuid, "/api/v1/remotes/server/analytics", - { - "realm_counts": orjson.dumps(realm_count_data).decode(), - "installation_counts": orjson.dumps(installation_count_data).decode(), - "realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(), - "realms": orjson.dumps([]).decode(), - }, + request.model_dump(round_trip=True, exclude={"version"}), subdomain="", ) self.assert_json_success(result) diff --git a/zilencer/views.py b/zilencer/views.py index d831662afa..35d11705b6 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -34,7 +34,12 @@ from zerver.lib.push_notifications import ( send_apple_push_notification, send_test_push_notification_directly_to_devices, ) -from zerver.lib.remote_server import RealmDataForAnalytics +from zerver.lib.remote_server import ( + InstallationCountDataForAnalytics, + RealmAuditLogDataForAnalytics, + RealmCountDataForAnalytics, + RealmDataForAnalytics, +) from zerver.lib.request import REQ, has_request_variables from zerver.lib.response import json_success from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime @@ -659,32 +664,6 @@ def update_remote_realm_data_for_server( RemoteRealmAuditLog.objects.bulk_create(remote_realm_audit_logs) -class RealmAuditLogDataForAnalytics(BaseModel): - id: int - realm: int - event_time: float - backfilled: bool - extra_data: Optional[Union[str, Dict[str, Any]]] - event_type: int - - -class RealmCountDataForAnalytics(BaseModel): - property: str - realm: int - id: int - end_time: float - subgroup: Optional[str] - value: int - - -class InstallationCountDataForAnalytics(BaseModel): - property: str - id: int - end_time: float - subgroup: Optional[str] - value: int - - @typed_endpoint @transaction.atomic def remote_server_post_analytics(