Files
zulip/zerver/migrations/0423_fix_email_gateway_attachment_owner.py
Alex Vandiver 994806c505 migrations: Fix ownership, and re-attach, mis-owned email attachments.
608c787c52 fixed a bug where messages sent by the email gateway "as"
a user failed to properly attribute ownership of their attachments,
leaving the attachments orphaned and thus with nobody with permissions
to view them.

These orphaned attachments only remain longer than a few weeks if the
`delete_unclaimed_attachments` script has not been run reliably.
Since there is currently no shipped cron job for this, that is most
likely all deployments.

Add a migration to find such orphaned attachments, and re-attach them
to their original message.  While theoretically the attachments
could have been later referenced in other messages -- which would be
very difficult to find and determine if they had access to the
attachment -- we only fix the original message.

In order to make this somewhat performant, we assume that the Message
rows associated with an Attachment made by the email gateway happened
within 5 minutes, since they must have been made during one HTTP
request.

This is complicated by the message potentially having been deleted; in
this case, the Attachment is moved into ArchivedAttachment, so it can
relate to the ArchivedMessage.  The many-to-many
`zerver_archivedattachment_messages` relationship table cannot use its
own `id` sequence for the value, since the `id` is re-used when the
row is inserted into the `zerver_attachment_messages` table -- we
instead consume a value from the `id` sequence of the
`zerver_attachment_messages` table.
2023-01-24 10:49:46 -08:00

177 lines
7.4 KiB
Python

from datetime import timedelta
from django.conf import settings
from django.db import connection, migrations
from django.db.backends.postgresql.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from psycopg2.sql import SQL, Identifier, Literal
def fix_email_gateway_attachment_owner(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
Realm = apps.get_model("zerver", "Realm")
UserProfile = apps.get_model("zerver", "UserProfile")
Client = apps.get_model("zerver", "Client")
Message = apps.get_model("zerver", "Message")
ArchivedMessage = apps.get_model("zerver", "ArchivedMessage")
Stream = apps.get_model("zerver", "Stream")
Attachment = apps.get_model("zerver", "Attachment")
ArchivedAttachment = apps.get_model("zerver", "ArchivedAttachment")
if not Realm.objects.exists():
return
mail_gateway_bot = UserProfile.objects.get(email__iexact=settings.EMAIL_GATEWAY_BOT)
# "Internal" is the client-id of all mail gateway posts
internal_client, _ = Client.objects.get_or_create(name="Internal")
# We only look in Attachment and not ArchivedAttachment because,
# never having been associated with a message, there is no way for
# the attachments to have been archived.
orphan_attachments = Attachment.objects.filter(
messages=None,
owner_id=mail_gateway_bot.id,
)
if len(orphan_attachments) == 0:
return
print("")
print(f"Found {len(orphan_attachments)} email gateway attachments to reattach")
for attachment in orphan_attachments:
# We look for the message posted by "Internal" at the same
# time, in the same realm, which has a link to the attachment
# but no "has_attachments". There are potentially other,
# later, messages (possibly from other users, to other
# places!) which tried to link to the attachment; we do not
# fix those references, because finding them efficiently is
# quite hard, as is calculating if they "should" have had
# access to the attachment at the time.
print(
f"Looking for a message to attach {attachment.path_id}, created {attachment.create_time}"
)
possible_matches = []
for model_class in (Message, ArchivedMessage):
possible_matches.extend(
# All messages with this bug will have
# `has_attachment=False`, since they failed to attach
# the contents. However, we cannot limit to
# sender=mail_gateway_bot because they were sent "as"
# some other user.
model_class.objects.filter(
has_attachment=False,
realm_id=attachment.realm_id,
sending_client_id=internal_client.id,
date_sent__gte=attachment.create_time,
date_sent__lte=attachment.create_time + timedelta(minutes=5),
content__contains="/user_uploads/" + attachment.path_id,
).order_by("date_sent")
)
if len(possible_matches) == 0:
print(" No matches!")
continue
# If there are 1 or more matches, we assume the earliest is
# the correct one, since it's ~impossible to have predicted
# the URL before it was first sent.
message = possible_matches[0]
print(f" Found {message.id} @ {message.date_sent} by {message.sender.delivery_email})")
# If this is an ArchivedMessage, then we have to move the
# Attachment into an ArchivedAttachment. We also have to
# generate an zerver_archivedattachment_message row with an id
# based on the next free from zerver_attachment_message, since
# those are one id space.
if isinstance(message, ArchivedMessage):
# move_rows
fields = list(Attachment._meta.fields)
src_fields = [Identifier("zerver_attachment", field.column) for field in fields]
dst_fields = [Identifier(field.column) for field in fields]
with connection.cursor() as cursor:
raw_query = SQL(
"""
INSERT INTO zerver_archivedattachment ({dst_fields})
SELECT {src_fields}
FROM zerver_attachment
WHERE id = {id}
ON CONFLICT (id) DO NOTHING
RETURNING id
"""
)
cursor.execute(
raw_query.format(
src_fields=SQL(",").join(src_fields),
dst_fields=SQL(",").join(dst_fields),
id=Literal(attachment.id),
)
)
archived_ids = [id for (id,) in cursor.fetchall()]
if len(archived_ids) != 1:
print("!!! Did not create one archived attachment row!")
attachment.delete()
attachment = ArchivedAttachment.objects.get(id=archived_ids[0])
# Determine message (and thus attachment) properties; this is
# from do_claim_attachments
is_message_realm_public = False
is_message_web_public = False
if message.recipient.type == 2: # Recipient.STREAM
stream = Stream.objects.get(id=message.recipient.type_id)
is_message_realm_public = not stream.invite_only and not stream.is_in_zephyr_realm
is_message_web_public = stream.is_web_public
attachment.owner_id = message.sender_id
attachment.is_web_public = is_message_web_public
attachment.is_realm_public = is_message_realm_public
attachment.save(update_fields=["owner_id", "is_web_public", "is_realm_public"])
if isinstance(attachment, ArchivedAttachment):
assert isinstance(message, ArchivedMessage)
# We need to use the sequence from
# zerver_attachment_messages, since that id is reused when
# restoring the message.
with connection.cursor() as cursor:
raw_query = SQL(
"""
INSERT INTO zerver_archivedattachment_messages
(id, archivedattachment_id, archivedmessage_id)
VALUES (nextval(pg_get_serial_sequence('zerver_attachment_messages', 'id')),
{attachment_id}, {message_id})
"""
)
cursor.execute(
raw_query.format(
attachment_id=Literal(attachment.id),
message_id=Literal(message.id),
)
)
else:
assert isinstance(message, Message)
attachment.messages.add(message)
message.has_attachment = True
message.save(update_fields=["has_attachment"])
class Migration(migrations.Migration):
"""
Messages sent "as" a user via the email gateway had their
attachments left orphan, accidentally owned by the email gateway
bot. Find each such orphaned attachment, and re-own it and attach
it to the appropriate message.
"""
dependencies = [
("zerver", "0422_multiuseinvite_status"),
]
operations = [
migrations.RunPython(
fix_email_gateway_attachment_owner,
reverse_code=migrations.RunPython.noop,
elidable=True,
)
]