diff --git a/zerver/actions/message_send.py b/zerver/actions/message_send.py index 82cdf47f3b..2a5ebf2b40 100644 --- a/zerver/actions/message_send.py +++ b/zerver/actions/message_send.py @@ -1837,6 +1837,7 @@ def check_message( message.set_topic_name(topic_name) message.is_channel_message = True else: + message.set_topic_name(Message.DM_TOPIC) message.is_channel_message = False if forged and forged_timestamp is not None: # Forged messages come with a timestamp diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index 0c25b1d06b..92fe78f3bc 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -524,7 +524,7 @@ def build_message( has_link=has_link, ) if is_direct_message_type: - topic_name = "" + topic_name = Message.DM_TOPIC zulip_message.set_topic_name(topic_name) zulip_message_dict = model_to_dict( zulip_message, exclude=["recipient", "sender", "sending_client"] diff --git a/zerver/lib/message_cache.py b/zerver/lib/message_cache.py index 93d683f964..9acec6b50f 100644 --- a/zerver/lib/message_cache.py +++ b/zerver/lib/message_cache.py @@ -372,12 +372,18 @@ class MessageDict: row is a row from a .values() call, and it needs to have all the relevant fields populated """ + + def get_message_topic(row: dict[str, Any]) -> str: + if row["recipient__type"] == Recipient.STREAM: + return row[DB_TOPIC_NAME] + return "" + return MessageDict.build_message_dict( message_id=row["id"], last_edit_time=row["last_edit_time"], edit_history_json=row["edit_history"], content=row["content"], - topic_name=row[DB_TOPIC_NAME], + topic_name=get_message_topic(row), date_sent=row["date_sent"], rendered_content=row["rendered_content"], rendered_content_version=row["rendered_content_version"], diff --git a/zerver/migrations/0718_fix_topics_for_direct_messages.py b/zerver/migrations/0718_fix_topics_for_direct_messages.py new file mode 100644 index 0000000000..785e85054b --- /dev/null +++ b/zerver/migrations/0718_fix_topics_for_direct_messages.py @@ -0,0 +1,53 @@ +# Generated by Django 5.1.8 on 2025-05-23 09:25 + +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import Max + + +def fix_topic_for_direct_messages(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + Message = apps.get_model("zerver", "Message") + ArchivedMessage = apps.get_model("zerver", "ArchivedMessage") + ScheduledMessage = apps.get_model("zerver", "ScheduledMessage") + + RECIPIENT_STREAM = 2 + + DM_TOPIC = "\x07" + + BATCH_SIZE = 10000 + for message_model in [Message, ArchivedMessage, ScheduledMessage]: + lower_bound = 1 + + max_id = message_model.objects.aggregate(Max("id"))["id__max"] + if max_id is None: + continue + + while lower_bound <= max_id: + upper_bound = lower_bound + BATCH_SIZE - 1 + print(f"Processing batch {lower_bound} to {upper_bound} for {message_model.__name__}") + + message_model.objects.filter( + id__range=(lower_bound, upper_bound), + ).exclude(recipient__type=RECIPIENT_STREAM).update(subject=DM_TOPIC) + + lower_bound += BATCH_SIZE + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("zerver", "0717_stream_topics_policy"), + ] + + operations = [ + migrations.RunPython( + fix_topic_for_direct_messages, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + # We need to run an ANALYZE for the query planner statistics + # to no longer think most messages have "" as the topic. + migrations.RunSQL("ANALYZE zerver_message"), + ] diff --git a/zerver/models/messages.py b/zerver/models/messages.py index 563cabe225..4dd9b577d5 100644 --- a/zerver/models/messages.py +++ b/zerver/models/messages.py @@ -58,6 +58,21 @@ class AbstractMessage(models.Model): db_default=MessageType.NORMAL, ) + # Direct messages do not have topics in the API. Originally, all + # DMs had a topic of "" in the database. When we started using "" + # as the topic for "general chat", this caused problems with the + # PostgreSQL query planner. Because a large portion of all + # messages are DMs, PostgreSQL could do very inefficient table + # scans to fetch messages in general chat, ignoring the topic + # index, because it assumed most messages were in general chat. + # + # To avoid that query planner statistics problem, we use an + # unprintable character, which isn't permitted in actual topics, + # as the topic for DMs in the database. The "BEL" character is an + # arbitrary choice, but feels suitable given DMs trigger a + # notification sound. + DM_TOPIC = "\x07" + # The message's topic. # # Early versions of Zulip called this concept a "subject", as in an email @@ -105,6 +120,9 @@ class AbstractMessage(models.Model): @override def __str__(self) -> str: + if not self.is_channel_message: + return f"{self.recipient.label()} / / {self.sender!r}" + return f"{self.recipient.label()} / {self.subject} / {self.sender!r}" diff --git a/zerver/models/scheduled_jobs.py b/zerver/models/scheduled_jobs.py index 53ae1a4f8a..4d39859a75 100644 --- a/zerver/models/scheduled_jobs.py +++ b/zerver/models/scheduled_jobs.py @@ -202,6 +202,9 @@ class ScheduledMessage(models.Model): @override def __str__(self) -> str: + if self.recipient.type != Recipient.STREAM: + return f"{self.recipient.label()} {self.sender!r} {self.scheduled_timestamp}" + return f"{self.recipient.label()} {self.subject} {self.sender!r} {self.scheduled_timestamp}" def topic_name(self) -> str: @@ -217,8 +220,8 @@ class ScheduledMessage(models.Model): recipient, recipient_type_str = get_recipient_ids(self.recipient, self.sender.id) if recipient_type_str == "private": - # The topic for direct messages should always be an empty string. - assert self.topic_name() == "" + # The topic for direct messages should always be "\x07". + assert self.topic_name() == Message.DM_TOPIC return APIScheduledDirectMessageDict( scheduled_message_id=self.id, diff --git a/zerver/tests/test_events.py b/zerver/tests/test_events.py index 05d30d6083..82e8a73745 100644 --- a/zerver/tests/test_events.py +++ b/zerver/tests/test_events.py @@ -630,13 +630,14 @@ class NormalActionsTest(BaseAction): ) def test_pm_send_message_events(self) -> None: - with self.verify_action(): + with self.verify_action() as events: self.send_personal_message( self.example_user("cordelia"), self.example_user("hamlet"), "hola", skip_capture_on_commit_callbacks=True, ) + self.assertEqual(events[0]["message"][TOPIC_NAME], "") # Verify direct message editing - content only edit pm = Message.objects.order_by("-id")[0] @@ -684,13 +685,14 @@ class NormalActionsTest(BaseAction): self.example_user("hamlet"), self.example_user("othello"), ] - with self.verify_action(): + with self.verify_action() as events: self.send_group_direct_message( self.example_user("cordelia"), direct_message_group, "hola", skip_capture_on_commit_callbacks=True, ) + self.assertEqual(events[0]["message"][TOPIC_NAME], "") def test_user_creation_events_on_sending_messages(self) -> None: self.set_up_db_for_testing_user_access() diff --git a/zerver/tests/test_mattermost_importer.py b/zerver/tests/test_mattermost_importer.py index c4eeaafaf7..66281bfcaf 100644 --- a/zerver/tests/test_mattermost_importer.py +++ b/zerver/tests/test_mattermost_importer.py @@ -905,7 +905,7 @@ class MatterMostImporter(ZulipTestCase): self.assertEqual( group_direct_messages[0].content, "Who is going to Hogsmeade this weekend?\n\n" ) - self.assertEqual(group_direct_messages[0].topic_name(), "") + self.assertEqual(group_direct_messages[0].topic_name(), Message.DM_TOPIC) personal_messages = messages.filter(recipient__type=Recipient.PERSONAL).order_by( "date_sent" @@ -918,7 +918,7 @@ class MatterMostImporter(ZulipTestCase): self.assertTrue(personal_messages[0].has_attachment) self.assertTrue(personal_messages[0].has_image) self.assertTrue(personal_messages[0].has_link) - self.assertEqual(personal_messages[0].topic_name(), "") + self.assertEqual(personal_messages[0].topic_name(), Message.DM_TOPIC) def test_do_convert_data_with_masking(self) -> None: mattermost_data_dir = self.fixture_file_name("", "mattermost_fixtures") diff --git a/zerver/tests/test_message_edit.py b/zerver/tests/test_message_edit.py index a1088d5d83..d973b15011 100644 --- a/zerver/tests/test_message_edit.py +++ b/zerver/tests/test_message_edit.py @@ -189,6 +189,7 @@ class EditMessageTest(ZulipTestCase): self.assertEqual(response_dict["message"]["id"], msg_id) self.assertEqual(response_dict["message"]["recipient_id"], cordelia.recipient_id) self.assertEqual(response_dict["message"]["flags"], ["read"]) + self.assertEqual(response_dict["message"][TOPIC_NAME], "") msg_id = self.send_personal_message( from_user=cordelia, to_user=hamlet, content="Incoming direct message" @@ -200,6 +201,7 @@ class EditMessageTest(ZulipTestCase): # Incoming DMs show the recipient_id that outgoing DMs would. self.assertEqual(response_dict["message"]["recipient_id"], cordelia.recipient_id) self.assertEqual(response_dict["message"]["flags"], []) + self.assertEqual(response_dict["message"][TOPIC_NAME], "") # Send message to web-public stream where hamlet is not subscribed. # This will test case of user having no `UserMessage` but having access diff --git a/zerver/tests/test_message_move_topic.py b/zerver/tests/test_message_move_topic.py index 35f96f2d52..847d41e9d5 100644 --- a/zerver/tests/test_message_move_topic.py +++ b/zerver/tests/test_message_move_topic.py @@ -228,6 +228,14 @@ class MessageMoveTopicTest(ZulipTestCase): ) self.assert_json_error(result, "Invalid character in topic, at position 8!") + result = self.client_patch( + "/json/messages/" + str(msg_id), + { + "topic": f"{Message.DM_TOPIC}", + }, + ) + self.assert_json_error(result, "Invalid character in topic, at position 1!") + @mock.patch("zerver.actions.message_edit.send_event_on_commit") def test_edit_topic_public_history_stream(self, mock_send_event: mock.MagicMock) -> None: stream_name = "Macbeth" diff --git a/zerver/tests/test_message_send.py b/zerver/tests/test_message_send.py index 0b39cb18e8..50b00d9ebb 100644 --- a/zerver/tests/test_message_send.py +++ b/zerver/tests/test_message_send.py @@ -948,6 +948,19 @@ class MessagePOSTTest(ZulipTestCase): ) self.assert_json_error(result, "Invalid character in topic, at position 5!") + # Make sure that a stream message cannot be sent with topic set + # to Message.DM_TOPIC. + result = self.client_post( + "/json/messages", + { + "type": "channel", + "to": "Verona", + "topic": f"{Message.DM_TOPIC}", + "content": "Test message", + }, + ) + self.assert_json_error(result, "Invalid character in topic, at position 1!") + def test_invalid_recipient_type(self) -> None: """ Messages other than the type of "direct", "private", "channel" or "stream" are invalid. @@ -2610,6 +2623,8 @@ class PersonalMessageSendTest(ZulipTestCase): self.assertEqual(most_recent_message(sender).recipient, recipient) self.assertEqual(most_recent_message(receiver).recipient, recipient) + self.assertEqual(most_recent_message(sender).topic_name(), Message.DM_TOPIC) + self.assertEqual(most_recent_message(receiver).topic_name(), Message.DM_TOPIC) def test_personal(self) -> None: """ diff --git a/zerver/tests/test_reminders.py b/zerver/tests/test_reminders.py index 32e5cb5d90..233a79f118 100644 --- a/zerver/tests/test_reminders.py +++ b/zerver/tests/test_reminders.py @@ -90,6 +90,7 @@ class RemindersTest(ZulipTestCase): scheduled_message.reminder_target_message_id, message_id, ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Scheduling a direct message with user IDs is successful. self.example_user("othello") @@ -110,6 +111,7 @@ class RemindersTest(ZulipTestCase): scheduled_message.reminder_target_message_id, message_id, ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) def test_schedule_reminder_with_bad_timestamp(self) -> None: self.login("hamlet") diff --git a/zerver/tests/test_rocketchat_importer.py b/zerver/tests/test_rocketchat_importer.py index f39fed4625..e382c1a15d 100644 --- a/zerver/tests/test_rocketchat_importer.py +++ b/zerver/tests/test_rocketchat_importer.py @@ -1057,7 +1057,7 @@ class RocketChatImporter(ZulipTestCase): self.assertEqual(group_direct_messages[0].sender.email, "hermionegranger@email.com") self.assertEqual(group_direct_messages[0].content, "Hey people!") - self.assertEqual(group_direct_messages[0].topic_name(), "") + self.assertEqual(group_direct_messages[0].topic_name(), Message.DM_TOPIC) self.assertEqual(group_direct_messages[2].sender.email, "harrypotter@email.com") self.assertRegex( group_direct_messages[2].content, @@ -1078,7 +1078,7 @@ class RocketChatImporter(ZulipTestCase): personal_messages[0].content, "Hey @**Hermione Granger** :grin:, how's everything going?", ) - self.assertEqual(personal_messages[0].topic_name(), "") + self.assertEqual(personal_messages[0].topic_name(), Message.DM_TOPIC) self.verify_emoji_code_foreign_keys() diff --git a/zerver/tests/test_scheduled_messages.py b/zerver/tests/test_scheduled_messages.py index d4ff006c54..32bdbf181d 100644 --- a/zerver/tests/test_scheduled_messages.py +++ b/zerver/tests/test_scheduled_messages.py @@ -86,6 +86,7 @@ class ScheduledMessageTest(ZulipTestCase): scheduled_message.scheduled_timestamp, timestamp_to_datetime(scheduled_delivery_timestamp), ) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Cannot schedule a direct message with user emails. result = self.do_schedule_message( @@ -487,6 +488,9 @@ class ScheduledMessageTest(ZulipTestCase): scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) self.assertNotEqual(scheduled_message.recipient.type, Recipient.STREAM) + self.assertEqual(scheduled_message.recipient.type, Recipient.PERSONAL) + scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Trying to edit `topic` for direct message is ignored payload = { @@ -496,7 +500,7 @@ class ScheduledMessageTest(ZulipTestCase): self.assert_json_success(result) scheduled_message = self.get_scheduled_message(str(scheduled_message_id)) - self.assertEqual(scheduled_message.topic_name(), "") + self.assertEqual(scheduled_message.topic_name(), Message.DM_TOPIC) # Trying to edit `type` to stream message type without a `topic` returns an error payload = { diff --git a/zerver/tests/test_slack_importer.py b/zerver/tests/test_slack_importer.py index 834387887a..4bf3ca7196 100644 --- a/zerver/tests/test_slack_importer.py +++ b/zerver/tests/test_slack_importer.py @@ -1292,10 +1292,10 @@ class SlackImporter(ZulipTestCase): self.assertEqual(zerver_message[5]["has_link"], False) self.assertEqual(zerver_message[7]["has_link"], False) - # Test that topic_name is an empty string for direct messages and + # Test that topic_name is set to '\x07' for direct messages and # group direct messages. - self.assertEqual(zerver_message[6][EXPORT_TOPIC_NAME], "") - self.assertEqual(zerver_message[8][EXPORT_TOPIC_NAME], "") + self.assertEqual(zerver_message[6][EXPORT_TOPIC_NAME], Message.DM_TOPIC) + self.assertEqual(zerver_message[8][EXPORT_TOPIC_NAME], Message.DM_TOPIC) self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "imported from Slack") self.assertEqual(zerver_message[3]["content"], "/me added bot") diff --git a/zerver/views/message_edit.py b/zerver/views/message_edit.py index 102c25073c..961f6a8960 100644 --- a/zerver/views/message_edit.py +++ b/zerver/views/message_edit.py @@ -45,9 +45,12 @@ def fill_edit_history_entries( prev_content = message.content prev_rendered_content = message.rendered_content is_channel_message = message.is_stream_message() - prev_topic_name = maybe_rename_empty_topic_to_general_chat( - message.topic_name(), is_channel_message, allow_empty_topic_name - ) + if is_channel_message: + prev_topic_name = maybe_rename_empty_topic_to_general_chat( + message.topic_name(), is_channel_message, allow_empty_topic_name + ) + else: + prev_topic_name = "" # Make sure that the latest entry in the history corresponds to the # message's last edit time diff --git a/zilencer/management/commands/populate_db.py b/zilencer/management/commands/populate_db.py index 5790323850..4925a38b57 100644 --- a/zilencer/management/commands/populate_db.py +++ b/zilencer/management/commands/populate_db.py @@ -1297,11 +1297,13 @@ def generate_and_send_messages( if recipient_type == Recipient.DIRECT_MESSAGE_GROUP: sender_id = random.choice(direct_message_group_members[message.recipient.id]) message.sender = get_user_profile_by_id(sender_id) + message.subject = Message.DM_TOPIC elif recipient_type == Recipient.PERSONAL: message.recipient = Recipient.objects.get( type=Recipient.PERSONAL, type_id=personals_pair[0] ) message.sender = get_user_profile_by_id(personals_pair[1]) + message.subject = Message.DM_TOPIC saved_data["personals_pair"] = personals_pair elif recipient_type == Recipient.STREAM: # Pick a random subscriber to the stream