analytics: Slew record reporting by up to 10 minutes.

This reduces the giant load spike at 5 minute past the hour, when all
remote servers currently attempt to submit their records.

We do not wish to slew over a full hour, because we want to ensure
that we do not hold the lock when the next hour's analytics runs.  It
is also not necessary to have that much variation; 10 minutes is
picked as an arbitrary "long enough" time to spread requests over.
This commit is contained in:
Alex Vandiver
2023-11-17 15:57:59 +00:00
committed by Tim Abbott
parent 85cc8b6a20
commit b363999d19
2 changed files with 14 additions and 1 deletions

View File

@@ -1,3 +1,4 @@
import hashlib
import os
import time
from argparse import ArgumentParser
@@ -96,4 +97,13 @@ class Command(BaseCommand):
logger.info("Finished updating analytics counts through %s", fill_to_time)
if settings.PUSH_NOTIFICATION_BOUNCER_URL and settings.SUBMIT_USAGE_STATISTICS:
# Skew 0-10 minutes based on a hash of settings.ZULIP_ORG_ID, so
# that each server will report in at a somewhat consistent time.
assert settings.ZULIP_ORG_ID
delay = int.from_bytes(
hashlib.sha256(settings.ZULIP_ORG_ID.encode()).digest(), byteorder="big"
) % (60 * 10)
logger.info("Sleeping %d seconds before reporting...", delay)
time.sleep(delay)
send_analytics_to_push_bouncer()

View File

@@ -229,7 +229,9 @@ def send_analytics_to_push_bouncer() -> None:
),
)
if len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data) == 0:
record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data)
if record_count == 0:
logger.info("No new records to report.")
return
request = {
@@ -246,6 +248,7 @@ def send_analytics_to_push_bouncer() -> None:
send_to_push_bouncer("POST", "server/analytics", request)
except JsonableError as e:
logger.warning(e.msg)
logger.info("Reported %d records", record_count)
def send_realms_only_to_push_bouncer() -> None: