Files
zulip/zerver/migrations/0257_fix_has_link_attribute.py
Aman Agrawal 47bf111de8 migrations: Mark RunPython statements elidable.
This will make django automatically remove them when we run
squashmigrations. There are still some RunSQL statements which
we will have to take care of manually.
2020-04-29 10:41:20 -07:00

94 lines
3.5 KiB
Python

# Generated by Django 1.11.24 on 2019-10-07 05:25
import time
import lxml
from django.db import migrations
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
BATCH_SIZE = 1000
def process_batch(apps: StateApps, id_start: int, id_end: int, last_id: int) -> None:
Message = apps.get_model('zerver', 'Message')
for message in Message.objects.filter(id__gte=id_start, id__lte=id_end).order_by("id"):
if message.rendered_content == "":
# There have been bugs in the past that made it possible
# for a message to have "" as its rendered_content; we
# need to skip those because lxml won't process them.
#
# They should safely already have the correct state
# has_link=has_image=has_attachment=False.
continue
if message.id % 1000 == 0:
print("Processed %s / %s" % (message.id, last_id))
# Because we maintain the Attachment table, this should be as
# simple as just just checking if there's any Attachment
# objects associated with this message.
has_attachment = message.attachment_set.exists()
# For has_link and has_image, we need to parse the messages.
# Links are simple -- look for a link in the message.
lxml_obj = lxml.html.fromstring(message.rendered_content)
has_link = False
for link in lxml_obj.xpath("//a"):
has_link = True
break
# has_image refers to inline image previews, so we just check
# for the relevant CSS class.
has_image = False
for img in lxml_obj.find_class("message_inline_image"):
has_image = True
break
if (message.has_link == has_link and
message.has_attachment == has_attachment and
message.has_image == has_image):
# No need to spend time with the database if there aren't changes.
continue
message.has_image = has_image
message.has_link = has_link
message.has_attachment = has_attachment
message.save(update_fields=['has_link', 'has_attachment', 'has_image'])
def fix_has_link(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
Message = apps.get_model('zerver', 'Message')
if not Message.objects.exists():
# Nothing to do, and Message.objects.latest() will crash.
return
# This migration logic assumes that either the server is not
# running, or that it's being run after the logic to correct how
# `has_link` and friends are set for new messages have been
# deployed.
last_id = Message.objects.latest("id").id
id_range_lower_bound = 0
id_range_upper_bound = 0 + BATCH_SIZE
while id_range_upper_bound <= last_id:
process_batch(apps, id_range_lower_bound, id_range_upper_bound, last_id)
id_range_lower_bound = id_range_upper_bound + 1
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
time.sleep(0.1)
if last_id > id_range_lower_bound:
# Copy for the last batch.
process_batch(apps, id_range_lower_bound, last_id, last_id)
class Migration(migrations.Migration):
atomic = False
dependencies = [
('zerver', '0256_userprofile_stream_set_recipient_column_values'),
]
operations = [
migrations.RunPython(fix_has_link,
reverse_code=migrations.RunPython.noop,
elidable=True),
]