Files
zulip/analytics/management/commands/update_analytics_counts.py
Alex Vandiver a5a5791794 management: Skip hourly crons during deploys.
This is most important for `send_zulip_update_announcements`, which
can race with the version run as a post-deploy hook.  However, all of
these crons can tolerate being slightly delayed, and there's little
benefit to them taking CPU or possibly hitting odd borderline race
conditions when the deploy is in progress.

For safety, we only trust the deploy lockfile if it was created
within the last hour -- deploys should not take more than an hour, and
failing to ever run hourly crons is much worse than perhaps running
them during a real very-long deploy.
2025-07-15 09:07:36 -07:00

100 lines
3.9 KiB
Python

import hashlib
import time
from argparse import ArgumentParser
from datetime import timezone
from typing import Any
from django.conf import settings
from django.utils.dateparse import parse_datetime
from django.utils.timezone import now as timezone_now
from typing_extensions import override
from analytics.lib.counts import ALL_COUNT_STATS, logger, process_count_stat
from zerver.lib.management import ZulipBaseCommand, abort_cron_during_deploy, abort_unless_locked
from zerver.lib.remote_server import send_server_data_to_push_bouncer, should_send_analytics_data
from zerver.lib.timestamp import floor_to_hour
from zerver.models import Realm
class Command(ZulipBaseCommand):
help = """Fills Analytics tables.
Run as a cron job that runs every hour."""
@override
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument(
"--time",
"-t",
help="Update stat tables from current state to --time. Defaults to the current time.",
default=timezone_now().isoformat(),
)
parser.add_argument("--utc", action="store_true", help="Interpret --time in UTC.")
parser.add_argument(
"--stat", "-s", help="CountStat to process. If omitted, all stats are processed."
)
parser.add_argument(
"--verbose", action="store_true", help="Print timing information to stdout."
)
@override
@abort_cron_during_deploy
@abort_unless_locked
def handle(self, *args: Any, **options: Any) -> None:
self.run_update_analytics_counts(options)
def run_update_analytics_counts(self, options: dict[str, Any]) -> None:
# installation_epoch relies on there being at least one realm; we
# shouldn't run the analytics code if that condition isn't satisfied
if not Realm.objects.exists():
logger.info("No realms, stopping update_analytics_counts")
return
fill_to_time = parse_datetime(options["time"])
assert fill_to_time is not None
if options["utc"]:
fill_to_time = fill_to_time.replace(tzinfo=timezone.utc)
if fill_to_time.tzinfo is None:
raise ValueError(
"--time must be time-zone-aware. Maybe you meant to use the --utc option?"
)
fill_to_time = floor_to_hour(fill_to_time.astimezone(timezone.utc))
if options["stat"] is not None:
stats = [ALL_COUNT_STATS[options["stat"]]]
else:
stats = list(ALL_COUNT_STATS.values())
logger.info("Starting updating analytics counts through %s", fill_to_time)
if options["verbose"]:
start = time.time()
last = start
for stat in stats:
process_count_stat(stat, fill_to_time)
if options["verbose"]:
print(f"Updated {stat.property} in {time.time() - last:.3f}s")
last = time.time()
if options["verbose"]:
print(
f"Finished updating analytics counts through {fill_to_time} in {time.time() - start:.3f}s"
)
logger.info("Finished updating analytics counts through %s", fill_to_time)
if should_send_analytics_data():
# Based on the specific value of the setting, the exact details to send
# will be decided. However, we proceed just based on this not being falsey.
# Skew 0-10 minutes based on a hash of settings.ZULIP_ORG_ID, so
# that each server will report in at a somewhat consistent time.
assert settings.ZULIP_ORG_ID
delay = int.from_bytes(
hashlib.sha256(settings.ZULIP_ORG_ID.encode()).digest(), byteorder="big"
) % (60 * 10)
logger.info("Sleeping %d seconds before reporting...", delay)
time.sleep(delay)
send_server_data_to_push_bouncer(consider_usage_statistics=True, raise_on_error=True)