populate_db: Add support for creating messages older than 5 days ago.

Fixes #21646.
2025-11-03 05:23:35 +00:00 · 2022-04-04 15:16:17 -07:00
parent 6e831db55b
commit e4c51488c0
2 changed files with 27 additions and 10 deletions
--- a/zerver/tests/test_populate_db.py
+++ b/zerver/tests/test_populate_db.py
@@ -13,7 +13,7 @@ class TestChoosePubDate(ZulipTestCase):
        """
        tot_messages = 1000000
        datetimes_list = [
-            choose_date_sent(i, tot_messages, 1)
+            choose_date_sent(i, tot_messages, 5, 1)
            for i in range(1, tot_messages, tot_messages // 100)
        ]
--- a/zilencer/management/commands/populate_db.py
+++ b/zilencer/management/commands/populate_db.py
@@ -194,6 +194,14 @@ class Command(BaseCommand):
            "-n", "--num-messages", type=int, default=500, help="The number of messages to create."
        )
        parser.add_argument(
            "-o",
            "--oldest-message-days",
            type=int,
            default=5,
            help="The start of the time range where messages could have been sent.",
        )
        parser.add_argument(
            "-b",
            "--batch-size",
@@ -1075,7 +1083,9 @@ def generate_and_send_messages(
            message.subject = random.choice(possible_topics[message.recipient.id])
            saved_data["subject"] = message.subject
-        message.date_sent = choose_date_sent(num_messages, tot_messages, options["threads"])
+        message.date_sent = choose_date_sent(
            num_messages, tot_messages, options["oldest_message_days"], options["threads"]
        )
        messages.append(message)
        recipients[num_messages] = (message_type, message.recipient.id, saved_data)
@@ -1162,25 +1172,32 @@ def bulk_create_reactions(all_messages: List[Message]) -> None:
    Reaction.objects.bulk_create(reactions)
-def choose_date_sent(num_messages: int, tot_messages: int, threads: int) -> datetime:
+def choose_date_sent(
    num_messages: int, tot_messages: int, oldest_message_days: int, threads: int
 ) -> datetime:
    # Spoofing time not supported with threading
    if threads != 1:
        return timezone_now()
-    # Distrubutes 80% of messages starting from 5 days ago, over a period
+    # We want to ensure that:
-    # of 3 days. Then, distributes remaining messages over past 24 hours.
+    # (1) some messages are sent in the last 4 hours,
    # (2) there are some >24hr gaps between adjacent messages, and
    # (3) a decent bulk of messages in the last day so you see adjacent messages with the same date.
    # So we distribute 80% of messages starting from oldest_message_days days ago, over a period
    # of the first min(oldest_message_days-2, 1) of those days. Then, distributes remaining messages
    # over the past 24 hours.
    amount_in_first_chunk = int(tot_messages * 0.8)
    amount_in_second_chunk = tot_messages - amount_in_first_chunk
    if num_messages < amount_in_first_chunk:
-        # Distribute starting from 5 days ago, over a period
+        spoofed_date = timezone_now() - timezone_timedelta(days=oldest_message_days)
-        # of 3 days:
+        num_days_for_first_chunk = min(oldest_message_days - 2, 1)
-        spoofed_date = timezone_now() - timezone_timedelta(days=5)
+        interval_size = num_days_for_first_chunk * 24 * 60 * 60 / amount_in_first_chunk
        interval_size = 3 * 24 * 60 * 60 / amount_in_first_chunk
        lower_bound = interval_size * num_messages
        upper_bound = interval_size * (num_messages + 1)
    else:
-        # We're in the last 20% of messages, distribute them over the last 24 hours:
+        # We're in the last 20% of messages, so distribute them over the last 24 hours:
        spoofed_date = timezone_now() - timezone_timedelta(days=1)
        interval_size = 24 * 60 * 60 / amount_in_second_chunk
        lower_bound = interval_size * (num_messages - amount_in_first_chunk)