populate_db: Add support for creating messages older than 5 days ago.

Fixes #21646.
This commit is contained in:
evykassirer
2022-04-04 15:16:17 -07:00
committed by Tim Abbott
parent 6e831db55b
commit e4c51488c0
2 changed files with 27 additions and 10 deletions

View File

@@ -13,7 +13,7 @@ class TestChoosePubDate(ZulipTestCase):
""" """
tot_messages = 1000000 tot_messages = 1000000
datetimes_list = [ datetimes_list = [
choose_date_sent(i, tot_messages, 1) choose_date_sent(i, tot_messages, 5, 1)
for i in range(1, tot_messages, tot_messages // 100) for i in range(1, tot_messages, tot_messages // 100)
] ]

View File

@@ -194,6 +194,14 @@ class Command(BaseCommand):
"-n", "--num-messages", type=int, default=500, help="The number of messages to create." "-n", "--num-messages", type=int, default=500, help="The number of messages to create."
) )
parser.add_argument(
"-o",
"--oldest-message-days",
type=int,
default=5,
help="The start of the time range where messages could have been sent.",
)
parser.add_argument( parser.add_argument(
"-b", "-b",
"--batch-size", "--batch-size",
@@ -1075,7 +1083,9 @@ def generate_and_send_messages(
message.subject = random.choice(possible_topics[message.recipient.id]) message.subject = random.choice(possible_topics[message.recipient.id])
saved_data["subject"] = message.subject saved_data["subject"] = message.subject
message.date_sent = choose_date_sent(num_messages, tot_messages, options["threads"]) message.date_sent = choose_date_sent(
num_messages, tot_messages, options["oldest_message_days"], options["threads"]
)
messages.append(message) messages.append(message)
recipients[num_messages] = (message_type, message.recipient.id, saved_data) recipients[num_messages] = (message_type, message.recipient.id, saved_data)
@@ -1162,25 +1172,32 @@ def bulk_create_reactions(all_messages: List[Message]) -> None:
Reaction.objects.bulk_create(reactions) Reaction.objects.bulk_create(reactions)
def choose_date_sent(num_messages: int, tot_messages: int, threads: int) -> datetime: def choose_date_sent(
num_messages: int, tot_messages: int, oldest_message_days: int, threads: int
) -> datetime:
# Spoofing time not supported with threading # Spoofing time not supported with threading
if threads != 1: if threads != 1:
return timezone_now() return timezone_now()
# Distrubutes 80% of messages starting from 5 days ago, over a period # We want to ensure that:
# of 3 days. Then, distributes remaining messages over past 24 hours. # (1) some messages are sent in the last 4 hours,
# (2) there are some >24hr gaps between adjacent messages, and
# (3) a decent bulk of messages in the last day so you see adjacent messages with the same date.
# So we distribute 80% of messages starting from oldest_message_days days ago, over a period
# of the first min(oldest_message_days-2, 1) of those days. Then, distributes remaining messages
# over the past 24 hours.
amount_in_first_chunk = int(tot_messages * 0.8) amount_in_first_chunk = int(tot_messages * 0.8)
amount_in_second_chunk = tot_messages - amount_in_first_chunk amount_in_second_chunk = tot_messages - amount_in_first_chunk
if num_messages < amount_in_first_chunk: if num_messages < amount_in_first_chunk:
# Distribute starting from 5 days ago, over a period spoofed_date = timezone_now() - timezone_timedelta(days=oldest_message_days)
# of 3 days: num_days_for_first_chunk = min(oldest_message_days - 2, 1)
spoofed_date = timezone_now() - timezone_timedelta(days=5) interval_size = num_days_for_first_chunk * 24 * 60 * 60 / amount_in_first_chunk
interval_size = 3 * 24 * 60 * 60 / amount_in_first_chunk
lower_bound = interval_size * num_messages lower_bound = interval_size * num_messages
upper_bound = interval_size * (num_messages + 1) upper_bound = interval_size * (num_messages + 1)
else: else:
# We're in the last 20% of messages, distribute them over the last 24 hours: # We're in the last 20% of messages, so distribute them over the last 24 hours:
spoofed_date = timezone_now() - timezone_timedelta(days=1) spoofed_date = timezone_now() - timezone_timedelta(days=1)
interval_size = 24 * 60 * 60 / amount_in_second_chunk interval_size = 24 * 60 * 60 / amount_in_second_chunk
lower_bound = interval_size * (num_messages - amount_in_first_chunk) lower_bound = interval_size * (num_messages - amount_in_first_chunk)