Files
zulip/zerver/migrations/0377_message_edit_history_format.py
Tim Abbott afecb2eca6 migrations: Add missing elidable tags on RunPython/RunSQL steps.
This helps the squashmigrations tool know that it can squash these.
2024-08-23 17:15:35 -07:00

170 lines
6.1 KiB
Python

import time
from typing import Any, TypedDict
import orjson
from django.db import migrations, transaction
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Min
BATCH_SIZE = 10000
STREAM = 2
# Legacy TypedDict has "prev_topic" field for any edit_history entries that
# were saved to the database after the legacy "prev_subject" field stopped
# being written to the database in the pre-migration commit.
class LegacyEditHistoryEvent(TypedDict, total=False):
user_id: int
timestamp: int
prev_stream: int
prev_subject: str
prev_topic: str
prev_content: str
prev_rendered_content: str | None
prev_rendered_content_version: int | None
class EditHistoryEvent(TypedDict, total=False):
user_id: int | None
timestamp: int
prev_stream: int
stream: int
prev_topic: str
topic: str
prev_content: str
prev_rendered_content: str | None
prev_rendered_content_version: int | None
@transaction.atomic
def backfill_message_edit_history_chunk(
first_id: int, last_id: int, message_model: type[Any]
) -> None:
"""
Migrate edit history events for the messages in the provided range to:
* Rename prev_subject => prev_topic.
* Provide topic and stream fields with the current values.
The range of message IDs to be processed is inclusive on both ends.
"""
messages = (
message_model.objects.select_for_update()
.only(
"recipient",
"recipient__type",
"recipient__type_id",
"subject",
"edit_history",
)
.filter(edit_history__isnull=False, id__range=(first_id, last_id))
)
for message in messages:
legacy_edit_history: list[LegacyEditHistoryEvent] = orjson.loads(message.edit_history)
message_type = message.recipient.type
modern_edit_history: list[EditHistoryEvent] = []
# Only Stream messages have topic / stream edit history data.
if message_type == STREAM:
topic = message.subject
stream_id = message.recipient.type_id
for edit_history_event in legacy_edit_history:
modern_entry: EditHistoryEvent = {
"user_id": edit_history_event.get("user_id"),
"timestamp": edit_history_event["timestamp"],
}
if "prev_content" in edit_history_event:
modern_entry["prev_content"] = edit_history_event["prev_content"]
modern_entry["prev_rendered_content"] = edit_history_event["prev_rendered_content"]
modern_entry["prev_rendered_content_version"] = edit_history_event[
"prev_rendered_content_version"
]
if message_type == STREAM:
if "prev_subject" in edit_history_event:
# Add topic edit key/value pairs from legacy format.
modern_entry["topic"] = topic
modern_entry["prev_topic"] = edit_history_event["prev_subject"]
# Because edit_history is ordered chronologically,
# most recent to least recent, we set the topic
# variable to the `prev_topic` value for this edit
# for any subsequent topic edits in the loop.
topic = edit_history_event["prev_subject"]
elif "prev_topic" in edit_history_event:
# Add topic edit key/value pairs from modern format.
modern_entry["topic"] = topic
modern_entry["prev_topic"] = edit_history_event["prev_topic"]
# Same logic as above but for modern format.
topic = edit_history_event["prev_topic"]
if "prev_stream" in edit_history_event:
# Add stream edit key/value pairs.
modern_entry["stream"] = stream_id
modern_entry["prev_stream"] = edit_history_event["prev_stream"]
# Same logic as above for the topic variable.
stream_id = edit_history_event["prev_stream"]
modern_edit_history.append(modern_entry)
message.edit_history = orjson.dumps(modern_edit_history).decode()
message_model.objects.bulk_update(messages, ["edit_history"])
def copy_and_update_message_edit_history(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
Message = apps.get_model("zerver", "Message")
ArchivedMessage = apps.get_model("zerver", "ArchivedMessage")
message_models = [Message, ArchivedMessage]
for message_model in message_models:
if not message_model.objects.filter(edit_history__isnull=False).exists():
# No messages with "edit_history"
continue
first_id_to_update = message_model.objects.filter(edit_history__isnull=False).aggregate(
Min("id")
)["id__min"]
last_id = message_model.objects.latest("id").id
id_range_lower_bound = first_id_to_update
id_range_upper_bound = first_id_to_update + BATCH_SIZE
while id_range_upper_bound <= last_id:
backfill_message_edit_history_chunk(
id_range_lower_bound, id_range_upper_bound, message_model
)
print(f"Modernized edit history for {id_range_upper_bound}/{last_id} messages.")
id_range_lower_bound = id_range_upper_bound + 1
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
time.sleep(0.1)
if last_id >= id_range_lower_bound:
# Copy/update for the last batch, or if only 1 message with edit_history in db
backfill_message_edit_history_chunk(id_range_lower_bound, last_id, message_model)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("zerver", "0376_set_realmemoji_author_and_reupload_realmemoji"),
]
operations = [
migrations.RunPython(
copy_and_update_message_edit_history,
reverse_code=migrations.RunPython.noop,
elidable=True,
),
]