mirror of
https://github.com/zulip/zulip.git
synced 2025-11-03 21:43:21 +00:00
In #23380, we are changing all occurrences of uri with url in order to follow the latest URL standard. Previous PRs #25038 and #25045 has replaced the occurences of uri that has no direct relation with realm. This commit changes just the model property, which has no API compatibility concerns.
215 lines
8.7 KiB
Python
215 lines
8.7 KiB
Python
import sys
|
|
from contextlib import ExitStack, redirect_stdout
|
|
from typing import TextIO
|
|
|
|
from django.conf import settings
|
|
from django.db import migrations
|
|
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
|
|
from django.db.migrations.state import StateApps
|
|
|
|
BUILD_BAD_MOVES_TABLE = """
|
|
CREATE TEMPORARY TABLE bad_moves_cve_2024_27286 AS (
|
|
WITH messages_with_dangling_usermessages AS (
|
|
SELECT zerver_message.id AS message_id,
|
|
ARRAY_AGG(DISTINCT zerver_usermessage.id) AS extra_usermessage_ids,
|
|
edit_history::jsonb
|
|
|
|
FROM zerver_message
|
|
|
|
JOIN zerver_stream
|
|
ON zerver_stream.recipient_id = zerver_message.recipient_id
|
|
|
|
JOIN zerver_usermessage
|
|
ON zerver_usermessage.message_id = zerver_message.id
|
|
|
|
LEFT JOIN zerver_subscription
|
|
ON zerver_subscription.recipient_id = zerver_stream.recipient_id
|
|
AND zerver_subscription.user_profile_id = zerver_usermessage.user_profile_id
|
|
|
|
WHERE zerver_stream.invite_only
|
|
AND zerver_subscription.id IS NULL
|
|
AND zerver_message.edit_history IS NOT NULL
|
|
|
|
GROUP BY zerver_message.id
|
|
)
|
|
SELECT message_id,
|
|
extra_usermessage_ids,
|
|
(history_entry->>'timestamp') AS timestamp_moved,
|
|
(history_entry->>'prev_stream')::numeric AS moved_from_stream_id,
|
|
(history_entry->>'stream')::numeric AS moved_to_stream_id
|
|
FROM messages_with_dangling_usermessages
|
|
CROSS JOIN JSONB_ARRAY_ELEMENTS(edit_history) AS history_entry
|
|
WHERE history_entry->>'prev_stream' IS NOT NULL
|
|
ORDER BY 1 ASC
|
|
)
|
|
"""
|
|
|
|
|
|
# The SQL query above builds a `bad_moves_cve_2024_27286` temporary table, which
|
|
# finds all moved messages where there are UserMessage rows but no
|
|
# Subscription rows. However, the difficulty is that this has a
|
|
# false-negative: between 2bc3924672fb and e566e985e4d2,
|
|
# multi-message moves only recorded their move on one of the
|
|
# messages. There may thus be messages with dangling UserMessage
|
|
# rows which are in the same topic as ones we found already, but
|
|
# do not record as having moved, so were not found by that filter.
|
|
#
|
|
# We determine when zerver/0310_jsoonfield, the migration next merged
|
|
# after e566e985e4d2 was merged, was run, and examine all messages
|
|
# moved earlier than that migration. We do not limit the early side
|
|
# of moves, since it is already naturally bounded by when message
|
|
# moves were introduced, and it is plausible that servers were running
|
|
# message-move the code before it was merged.
|
|
#
|
|
# For each potential single-message move in this range, we examine all
|
|
# other messages in the topic which were sent before the move, and
|
|
# check them for dangling UserMessage rows from users who are not
|
|
# subscribed. We then compare those newly-found messages against the
|
|
# known bad messages to guess which move was responsible for them.
|
|
BROADEN_MOVES = """
|
|
INSERT INTO bad_moves_cve_2024_27286 (
|
|
WITH other_messages AS (
|
|
SELECT messages_in_topic.id AS message_id,
|
|
messages_in_topic.recipient_id,
|
|
UPPER(messages_in_topic.subject) AS upper_topic,
|
|
messages_in_topic.date_sent
|
|
FROM bad_moves_cve_2024_27286
|
|
|
|
JOIN zerver_message bad_message
|
|
ON bad_moves_cve_2024_27286.message_id = bad_message.id
|
|
|
|
JOIN zerver_message messages_in_topic
|
|
ON bad_message.recipient_id = messages_in_topic.recipient_id
|
|
AND UPPER(bad_message.subject) = UPPER(messages_in_topic.subject)
|
|
|
|
WHERE TO_TIMESTAMP(timestamp_moved::numeric) < (
|
|
SELECT applied FROM django_migrations WHERE app = 'zerver' AND name = '0310_jsonfield'
|
|
)
|
|
AND messages_in_topic.date_sent < TO_TIMESTAMP(timestamp_moved::numeric)
|
|
AND messages_in_topic.id NOT IN (SELECT already.message_id FROM bad_moves_cve_2024_27286 already)
|
|
|
|
GROUP BY 1
|
|
),
|
|
other_bad_messages AS (
|
|
SELECT other_messages.message_id,
|
|
other_messages.recipient_id,
|
|
other_messages.upper_topic,
|
|
other_messages.date_sent,
|
|
ARRAY_AGG(DISTINCT zerver_usermessage.id) as extra_usermessage_ids
|
|
|
|
FROM other_messages
|
|
|
|
JOIN zerver_usermessage
|
|
ON zerver_usermessage.message_id = other_messages.message_id
|
|
|
|
LEFT JOIN zerver_subscription
|
|
ON zerver_subscription.recipient_id = other_messages.recipient_id
|
|
AND zerver_subscription.user_profile_id = zerver_usermessage.user_profile_id
|
|
|
|
WHERE zerver_subscription.id IS NULL
|
|
|
|
GROUP BY 1, 2, 3, 4
|
|
)
|
|
SELECT other_bad_messages.message_id,
|
|
other_bad_messages.extra_usermessage_ids,
|
|
move_trigger.timestamp_moved,
|
|
move_trigger.moved_from_stream_id,
|
|
move_trigger.moved_to_stream_id
|
|
FROM other_bad_messages
|
|
LEFT JOIN LATERAL (
|
|
SELECT bad_moves_cve_2024_27286.*
|
|
FROM bad_moves_cve_2024_27286
|
|
JOIN zerver_message
|
|
ON zerver_message.id = bad_moves_cve_2024_27286.message_id
|
|
JOIN zerver_stream
|
|
ON zerver_stream.recipient_id = zerver_message.recipient_id
|
|
AND bad_moves_cve_2024_27286.moved_to_stream_id = zerver_stream.id
|
|
WHERE other_bad_messages.recipient_id = zerver_message.recipient_id
|
|
AND other_bad_messages.upper_topic = UPPER(zerver_message.subject)
|
|
AND TO_TIMESTAMP(bad_moves_cve_2024_27286.timestamp_moved::numeric) > other_bad_messages.date_sent
|
|
ORDER BY bad_moves_cve_2024_27286.message_id ASC, bad_moves_cve_2024_27286.timestamp_moved ASC
|
|
LIMIT 1
|
|
) move_trigger ON true
|
|
)
|
|
"""
|
|
|
|
|
|
def log_extra_usermessage_rows(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
|
|
Message = apps.get_model("zerver", "message")
|
|
UserMessage = apps.get_model("zerver", "usermessage")
|
|
Stream = apps.get_model("zerver", "stream")
|
|
|
|
messages = Message.objects.raw(
|
|
"SELECT * FROM zerver_message JOIN bad_moves_cve_2024_27286 ON message_id = zerver_message.id"
|
|
)
|
|
if len(messages) == 0: # RawQuerySet does not have .exists() or .count()
|
|
return
|
|
|
|
with ExitStack() as stack:
|
|
if settings.PRODUCTION:
|
|
log_file: TextIO = stack.enter_context(
|
|
open("/var/log/zulip/migrations_0501_delete_dangling_usermessages.log", "w")
|
|
)
|
|
else:
|
|
log_file = sys.stderr
|
|
print("", file=log_file)
|
|
stack.enter_context(redirect_stdout(log_file))
|
|
|
|
for message in messages:
|
|
realm = message.realm
|
|
# Reimplement realm.url
|
|
if realm.string_id == "":
|
|
hostname = settings.EXTERNAL_HOST
|
|
else:
|
|
hostname = settings.REALM_HOSTS.get(
|
|
realm.string_id, f"{realm.string_id}.{settings.EXTERNAL_HOST}"
|
|
)
|
|
|
|
stream = Stream.objects.only("id").get(recipient_id=message.recipient_id)
|
|
print(
|
|
f"{settings.EXTERNAL_URI_SCHEME}{hostname}/#narrow/stream/{stream.id}/near/{message.id}",
|
|
)
|
|
print(
|
|
f" Moved at {message.timestamp_moved} from stream id {message.moved_from_stream_id} to {message.moved_to_stream_id}"
|
|
)
|
|
|
|
# Find out how many of those are users, and not bots
|
|
ums = (
|
|
UserMessage.objects.filter(
|
|
id__in=message.extra_usermessage_ids, user_profile__is_bot=False
|
|
)
|
|
.select_related("user_profile")
|
|
.only("flags", "user_profile__delivery_email")
|
|
)
|
|
print(
|
|
f" Was still readable by {len(ums)} users, {len(message.extra_usermessage_ids) - len(ums)} bots",
|
|
)
|
|
if len(message.extra_usermessage_ids) > 25:
|
|
continue
|
|
for um in ums:
|
|
read = "(read)" if um.flags & 1 else "(unread)"
|
|
print(f" {um.user_profile.delivery_email} {read}")
|
|
print("")
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
atomic = False
|
|
|
|
dependencies = [
|
|
("zerver", "0496_alter_scheduledmessage_read_by_sender"),
|
|
]
|
|
|
|
operations = [
|
|
migrations.RunSQL(BUILD_BAD_MOVES_TABLE, elidable=True),
|
|
migrations.RunSQL(BROADEN_MOVES, elidable=True),
|
|
migrations.RunPython(log_extra_usermessage_rows, reverse_code=migrations.RunPython.noop),
|
|
migrations.RunSQL(
|
|
"""
|
|
DELETE FROM zerver_usermessage
|
|
WHERE id IN (SELECT UNNEST(extra_usermessage_ids) FROM bad_moves_cve_2024_27286)
|
|
""",
|
|
elidable=True,
|
|
),
|
|
migrations.RunSQL("DROP TABLE bad_moves_cve_2024_27286", elidable=True),
|
|
]
|