From b655bd14ea5aee1a66a0b821fcf34047e1eb6d38 Mon Sep 17 00:00:00 2001 From: Sahil Batra Date: Thu, 22 May 2025 12:35:28 +0530 Subject: [PATCH] messages: Use "\x07" as topic for DMs and group DMs. This commit updates code to use "\x07" as value for "subject" field of Message objects for DMs and group DMs, so that we have a unique value for DMs and group DMs which cannot be used for channel messages. This helps in avoiding having an empty string value as topic for DMs, which is also used for "general chat" channel messages, as large number of DMs in the realm resulted in PostgreSQL query planner thinking that there are too many "general chat" messages and thus generated bad query plans for operations like fetching "general chat" messages in a stream or moving messages to and from "general chat" topic. This change as done for ArchivedMessage and ScheduledMessage objects as well. Note that the clients still get "subject" value as an empty string "". This commit also adds tests for checking that "\x07" cannot be used as topic for channel messages. Fixes #34360. --- zerver/actions/message_send.py | 1 + zerver/data_import/import_util.py | 2 +- zerver/lib/message_cache.py | 8 ++- .../0718_fix_topics_for_direct_messages.py | 53 +++++++++++++++++++ zerver/models/messages.py | 18 +++++++ zerver/models/scheduled_jobs.py | 7 ++- zerver/tests/test_events.py | 6 ++- zerver/tests/test_mattermost_importer.py | 4 +- zerver/tests/test_message_edit.py | 2 + zerver/tests/test_message_move_topic.py | 8 +++ zerver/tests/test_message_send.py | 15 ++++++ zerver/tests/test_reminders.py | 2 + zerver/tests/test_rocketchat_importer.py | 4 +- zerver/tests/test_scheduled_messages.py | 6 ++- zerver/tests/test_slack_importer.py | 6 +-- zerver/views/message_edit.py | 9 ++-- zilencer/management/commands/populate_db.py | 2 + 17 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 zerver/migrations/0718_fix_topics_for_direct_messages.py diff --git a/zerver/actions/message_send.py b/zerver/actions/message_send.py index 82cdf47f3b..2a5ebf2b40 100644 --- a/zerver/actions/message_send.py +++ b/zerver/actions/message_send.py @@ -1837,6 +1837,7 @@ def check_message( message.set_topic_name(topic_name) message.is_channel_message = True else: + message.set_topic_name(Message.DM_TOPIC) message.is_channel_message = False if forged and forged_timestamp is not None: # Forged messages come with a timestamp diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index 0c25b1d06b..92fe78f3bc 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -524,7 +524,7 @@ def build_message( has_link=has_link, ) if is_direct_message_type: - topic_name = "" + topic_name = Message.DM_TOPIC zulip_message.set_topic_name(topic_name) zulip_message_dict = model_to_dict( zulip_message, exclude=["recipient", "sender", "sending_client"] diff --git a/zerver/lib/message_cache.py b/zerver/lib/message_cache.py index 93d683f964..9acec6b50f 100644 --- a/zerver/lib/message_cache.py +++ b/zerver/lib/message_cache.py @@ -372,12 +372,18 @@ class MessageDict: row is a row from a .values() call, and it needs to have all the relevant fields populated """ + + def get_message_topic(row: dict[str, Any]) -> str: + if row["recipient__type"] == Recipient.STREAM: + return row[DB_TOPIC_NAME] + return "" + return MessageDict.build_message_dict( message_id=row["id"], last_edit_time=row["last_edit_time"], edit_history_json=row["edit_history"], content=row["content"], - topic_name=row[DB_TOPIC_NAME], + topic_name=get_message_topic(row), date_sent=row["date_sent"], rendered_content=row["rendered_content"], rendered_content_version=row["rendered_content_version"], diff --git a/zerver/migrations/0718_fix_topics_for_direct_messages.py b/zerver/migrations/0718_fix_topics_for_direct_messages.py new file mode 100644 index 0000000000..785e85054b --- /dev/null +++ b/zerver/migrations/0718_fix_topics_for_direct_messages.py @@ -0,0 +1,53 @@ +# Generated by Django 5.1.8 on 2025-05-23 09:25 + +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import Max + + +def fix_topic_for_direct_messages(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + Message = apps.get_model("zerver", "Message") + ArchivedMessage = apps.get_model("zerver", "ArchivedMessage") + ScheduledMessage = apps.get_model("zerver", "ScheduledMessage") + + RECIPIENT_STREAM = 2 + + DM_TOPIC = "\x07" + + BATCH_SIZE = 10000 + for message_model in [Message, ArchivedMessage, ScheduledMessage]: + lower_bound = 1 + + max_id = message_model.objects.aggregate(Max("id"))["id__max"] + if max_id is None: + continue + + while lower_bound <= max_id: + upper_bound = lower_bound + BATCH_SIZE - 1 + print(f"Processing batch {lower_bound} to {upper_bound} for {message_model.__name__}") + + message_model.objects.filter( + id__range=(lower_bound, upper_bound), + ).exclude(recipient__type=RECIPIENT_STREAM).update(subject=DM_TOPIC) + + lower_bound += BATCH_SIZE + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("zerver", "0717_stream_topics_policy"), + ] + + operations = [ + migrations.RunPython( + fix_topic_for_direct_messages, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + # We need to run an ANALYZE for the query planner statistics + # to no longer think most messages have "" as the topic. + migrations.RunSQL("ANALYZE zerver_message"), + ] diff --git a/zerver/models/messages.py b/zerver/models/messages.py index 563cabe225..4dd9b577d5 100644 --- a/zerver/models/messages.py +++ b/zerver/models/messages.py @@ -58,6 +58,21 @@ class AbstractMessage(models.Model): db_default=MessageType.NORMAL, ) + # Direct messages do not have topics in the API. Originally, all + # DMs had a topic of "" in the database. When we started using "" + # as the topic for "general chat", this caused problems with the + # PostgreSQL query planner. Because a large portion of all + # messages are DMs, PostgreSQL could do very inefficient table + # scans to fetch messages in general chat, ignoring the topic + # index, because it assumed most messages were in general chat. + # + # To avoid that query planner statistics problem, we use an + # unprintable character, which isn't permitted in actual topics, + # as the topic for DMs in the database. The "BEL" character is an + # arbitrary choice, but feels suitable given DMs trigger a + # notification sound. + DM_TOPIC = "\x07" + # The message's topic. # # Early versions of Zulip called this concept a "subject", as in an email @@ -105,6 +120,9 @@ class AbstractMessage(models.Model): @override def __str__(self) -> str: + if not self.is_channel_message: + return f"{self.recipient.label()} / / {self.sender!r}" + return f"{self.recipient.label()} / {self.subject} / {self.sender!r}" diff --git a/zerver/models/scheduled_jobs.py b/zerver/models/scheduled_jobs.py index 53ae1a4f8a..4d39859a75 100644 --- a/zerver/models/scheduled_jobs.py +++ b/zerver/models/scheduled_jobs.py @@ -202,6 +202,9 @@ class ScheduledMessage(models.Model): @override def __str__(self) -> str: + if self.recipient.type != Recipient.STREAM: + return f"{self.recipient.label()} {self.sender!r} {self.scheduled_timestamp}" + return f"{self.recipient.label()} {self.subject} {self.sender!r} {self.scheduled_timestamp}" def topic_name(self) -> str: @@ -217,8 +220,8 @@ class ScheduledMessage(models.Model): recipient, recipient_type_str = get_recipient_ids(self.recipient, self.sender.id) if recipient_type_str == "private": - # The topic for direct messages should always be an empty string. - assert self.topic_name() == "" + # The topic for direct messages should always be "\x07". + assert self.topic_name() == Message.DM_TOPIC return APIScheduledDirectMessageDict( scheduled_message_id=self.id, diff --git a/zerver/tests/test_events.py b/zerver/tests/test_events.py index 05d30d6083..82e8a73745 100644 --- a/zerver/tests/test_events.py +++ b/zerver/tests/test_events.py @@ -630,13 +630,14 @@ class NormalActionsTest(BaseAction): ) def test_pm_send_message_events(self) -> None: - with self.verify_action(): + with self.verify_action() as events: self.send_personal_message( self.example_user("cordelia"), self.example_user("hamlet"), "hola", skip_capture_on_commit_callbacks=True, ) + self.assertEqual(events[0]["message"][TOPIC_NAME], "") # Verify direct message editing - content only edit pm = Message.objects.order_by("-id")[0] @@ -684,13 +685,14 @@ class NormalActionsTest(BaseAction): self.example_user("hamlet"), self.example_user("othello"), ] - with self.verify_action(): + with self.verify_action() as events: self.send_group_direct_message( self.example_user("cordelia"), direct_message_group, "hola", skip_capture_on_commit_callbacks=True, ) + self.assertEqual(events[0]["message"][TOPIC_NAME], "") def test_user_creation_events_on_sending_messages(self) -> None: self.set_up_db_for_testing_user_access() diff --git a/zerver/tests/test_mattermost_importer.py b/zerver/tests/test_mattermost_importer.py index c4eeaafaf7..66281bfcaf 100644 --- a/zerver/tests/test_mattermost_importer.py +++ b/zerver/tests/test_mattermost_importer.py @@ -905,7 +905,7 @@ class MatterMostImporter(ZulipTestCase): self.assertEqual( group_direct_messages[0].content, "Who is going to Hogsmeade this weekend?\n\n" ) - self.assertEqual(group_direct_messages[0].topic_name(), "") + self.assertEqual(group_direct_messages[0].topic_name(), Message.DM_TOPIC) personal_messages = messages.filter(recipient__type=Recipient.PERSONAL).order_by( "date_sent" @@ -918,7 +918,7 @@ class MatterMostImporter(ZulipTestCase): self.assertTrue(personal_messages[0].has_attachment) self.assertTrue(personal_messages[0].has_image) self.assertTrue(personal_messages[0].has_link) - self.assertEqual(personal_messages[0].topic_name(), "") + self.assertEqual(personal_messages[0].topic_name(), Message.DM_TOPIC) def test_do_convert_data_with_masking(self) -> None: mattermost_data_dir = self.fixture_file_name("", "mattermost_fixtures") diff --git a/zerver/tests/test_message_edit.py b/zerver/tests/test_message_edit.py index a1088d5d83..d973b15011 100644 --- a/zerver/tests/test_message_edit.py +++ b/zerver/tests/test_message_edit.py @@ -189,6 +189,7 @@ class EditMessageTest(ZulipTestCase): self.assertEqual(response_dict["message"]["id"], msg_id) self.assertEqual(response_dict["message"]["recipient_id"], cordelia.recipient_id) self.assertEqual(response_dict["message"]["flags"], ["read"]) + self.assertEqual(response_dict["message"][TOPIC_NAME], "") msg_id = self.send_personal_message( from_user=cordelia, to_user=hamlet, content="Incoming direct message" @@ -200,6 +201,7 @@ class EditMessageTest(ZulipTestCase): # Incoming DMs show the recipient_id that outgoing DMs would. self.assertEqual(response_dict["message"]["recipient_id"], cordelia.recipient_id) self.assertEqual(response_dict["message"]["flags"], []) + self.assertEqual(response_dict["message"][TOPIC_NAME], "") # Send message to web-public stream where hamlet is not subscribed. # This will test case of user having no `UserMessage` but having access diff --git a/zerver/tests/test_message_move_topic.py b/zerver/tests/test_message_move_topic.py index 35f96f2d52..847d41e9d5 100644 --- a/zerver/tests/test_message_move_topic.py +++ b/zerver/tests/test_message_move_topic.py @@ -228,6 +228,14 @@ class MessageMoveTopicTest(ZulipTestCase): ) self.assert_json_error(result, "Invalid character in topic, at position 8!") + result = self.client_patch( + "/json/messages/" + str(msg_id), + { + "topic": f"{Message.DM_TOPIC}", + }, + ) + self.assert_json_error(result, "Invalid character in topic, at position 1!") + @mock.patch("zerver.actions.message_edit.send_event_on_commit") def test_edit_topic_public_history_stream(self, mock_send_event: mock.MagicMock) -> None: stream_name = "Macbeth" diff --git a/zerver/tests/test_message_send.py b/zerver/tests/test_message_send.py index 0b39cb18e8..50b00d9ebb 100644 --- a/zerver/tests/test_message_send.py +++ b/zerver/tests/test_message_send.py @@ -948,6 +948,19 @@ class MessagePOSTTest(ZulipTestCase): ) self.assert_json_error(result, "Invalid character in topic, at position 5!") + # Make sure that a stream message cannot be sent with topic set + # to Message.DM_TOPIC. + result = self.client_post( + "/json/messages", + { + "type": "channel", + "to": "Verona", + "topic": f"{Message.DM_TOPIC}", + "content": "Test message", + }, + ) + self.assert_json_error(result, "Invalid character in topic, at position 1!") + def test_invalid_recipient_type(self) -> None: """ Messages other than the type of "direct", "private", "channel" or "stream" are invalid. @@ -2610,6 +2623,8 @@ class PersonalMessageSendTest(ZulipTestCase): self.assertEqual(most_recent_message(sender).recipient, recipient) self.assertEqual(most_recent_message(receiver).recipient, recipient) + self.assertEqual(most_recent_message(sender).topic_name(), Message.DM_TOPIC) + self.assertEqual(most_recent_message(receiver).topic_name(), Message.DM_TOPIC) def test_personal(self) -> None: """ diff --git a/zerver/tests/test_reminders.py b/zerver/tests/test_reminders.py index 32e5cb5d90..233a79f118 100644 --- a/zerver/tests/test_reminders.py +++ b/zerver/tests/test_reminders.py @@ -90,6 +90,7 @@ class RemindersTest(ZulipTestCase): scheduled_message.reminder_target_message_id, message_id, ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Scheduling a direct message with user IDs is successful. self.example_user("othello") @@ -110,6 +111,7 @@ class RemindersTest(ZulipTestCase): scheduled_message.reminder_target_message_id, message_id, ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) def test_schedule_reminder_with_bad_timestamp(self) -> None: self.login("hamlet") diff --git a/zerver/tests/test_rocketchat_importer.py b/zerver/tests/test_rocketchat_importer.py index f39fed4625..e382c1a15d 100644 --- a/zerver/tests/test_rocketchat_importer.py +++ b/zerver/tests/test_rocketchat_importer.py @@ -1057,7 +1057,7 @@ class RocketChatImporter(ZulipTestCase): self.assertEqual(group_direct_messages[0].sender.email, "hermionegranger@email.com") self.assertEqual(group_direct_messages[0].content, "Hey people!") - self.assertEqual(group_direct_messages[0].topic_name(), "") + self.assertEqual(group_direct_messages[0].topic_name(), Message.DM_TOPIC) self.assertEqual(group_direct_messages[2].sender.email, "harrypotter@email.com") self.assertRegex( group_direct_messages[2].content, @@ -1078,7 +1078,7 @@ class RocketChatImporter(ZulipTestCase): personal_messages[0].content, "Hey @**Hermione Granger** :grin:, how's everything going?", ) - self.assertEqual(personal_messages[0].topic_name(), "") + self.assertEqual(personal_messages[0].topic_name(), Message.DM_TOPIC) self.verify_emoji_code_foreign_keys() diff --git a/zerver/tests/test_scheduled_messages.py b/zerver/tests/test_scheduled_messages.py index d4ff006c54..32bdbf181d 100644 --- a/zerver/tests/test_scheduled_messages.py +++ b/zerver/tests/test_scheduled_messages.py @@ -86,6 +86,7 @@ class ScheduledMessageTest(ZulipTestCase): scheduled_message.scheduled_timestamp, timestamp_to_datetime(scheduled_delivery_timestamp), ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Cannot schedule a direct message with user emails. result = self.do_schedule_message( @@ -487,6 +488,9 @@ class ScheduledMessageTest(ZulipTestCase): scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) self.assertNotEqual(scheduled_message.recipient.type, Recipient.STREAM) + self.assertEqual(scheduled_message.recipient.type, Recipient.PERSONAL) + scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Trying to edit `topic` for direct message is ignored payload = { @@ -496,7 +500,7 @@ class ScheduledMessageTest(ZulipTestCase): self.assert_json_success(result) scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) - self.assertEqual(scheduled_message.topic_name(), "") + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Trying to edit `type` to stream message type without a `topic` returns an error payload = { diff --git a/zerver/tests/test_slack_importer.py b/zerver/tests/test_slack_importer.py index 834387887a..4bf3ca7196 100644 --- a/zerver/tests/test_slack_importer.py +++ b/zerver/tests/test_slack_importer.py @@ -1292,10 +1292,10 @@ class SlackImporter(ZulipTestCase): self.assertEqual(zerver_message[5]["has_link"], False) self.assertEqual(zerver_message[7]["has_link"], False) - # Test that topic_name is an empty string for direct messages and + # Test that topic_name is set to '\x07' for direct messages and # group direct messages. - self.assertEqual(zerver_message[6][EXPORT_TOPIC_NAME], "") - self.assertEqual(zerver_message[8][EXPORT_TOPIC_NAME], "") + self.assertEqual(zerver_message[6][EXPORT_TOPIC_NAME], Message.DM_TOPIC) + self.assertEqual(zerver_message[8][EXPORT_TOPIC_NAME], Message.DM_TOPIC) self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "imported from Slack") self.assertEqual(zerver_message[3]["content"], "/me added bot") diff --git a/zerver/views/message_edit.py b/zerver/views/message_edit.py index 102c25073c..961f6a8960 100644 --- a/zerver/views/message_edit.py +++ b/zerver/views/message_edit.py @@ -45,9 +45,12 @@ def fill_edit_history_entries( prev_content = message.content prev_rendered_content = message.rendered_content is_channel_message = message.is_stream_message() - prev_topic_name = maybe_rename_empty_topic_to_general_chat( - message.topic_name(), is_channel_message, allow_empty_topic_name - ) + if is_channel_message: + prev_topic_name = maybe_rename_empty_topic_to_general_chat( + message.topic_name(), is_channel_message, allow_empty_topic_name + ) + else: + prev_topic_name = "" # Make sure that the latest entry in the history corresponds to the # message's last edit time diff --git a/zilencer/management/commands/populate_db.py b/zilencer/management/commands/populate_db.py index 5790323850..4925a38b57 100644 --- a/zilencer/management/commands/populate_db.py +++ b/zilencer/management/commands/populate_db.py @@ -1297,11 +1297,13 @@ def generate_and_send_messages( if recipient_type == Recipient.DIRECT_MESSAGE_GROUP: sender_id = random.choice(direct_message_group_members[message.recipient.id]) message.sender = get_user_profile_by_id(sender_id) + message.subject = Message.DM_TOPIC elif recipient_type == Recipient.PERSONAL: message.recipient = Recipient.objects.get( type=Recipient.PERSONAL, type_id=personals_pair[0] ) message.sender = get_user_profile_by_id(personals_pair[1]) + message.subject = Message.DM_TOPIC saved_data["personals_pair"] = personals_pair elif recipient_type == Recipient.STREAM: # Pick a random subscriber to the stream