scheduled_message: Make export/import work.

Closes #25130 by addressing the import/export part of it.
This commit is contained in:
Mateusz Mandera
2023-04-29 20:45:22 +02:00
committed by Tim Abbott
parent 780ef71891
commit 0abf60fd01
3 changed files with 175 additions and 43 deletions

View File

@@ -54,6 +54,7 @@ from zerver.models import (
RealmPlayground, RealmPlayground,
RealmUserDefault, RealmUserDefault,
Recipient, Recipient,
ScheduledMessage,
Service, Service,
Stream, Stream,
Subscription, Subscription,
@@ -205,7 +206,6 @@ NON_EXPORTED_TABLES = {
# sense to export, but is relatively low value. # sense to export, but is relatively low value.
"zerver_scheduledemail", "zerver_scheduledemail",
"zerver_scheduledemail_users", "zerver_scheduledemail_users",
"zerver_scheduledmessage",
# These tables are related to a user's 2FA authentication # These tables are related to a user's 2FA authentication
# configuration, which will need to be set up again on the new # configuration, which will need to be set up again on the new
# server. # server.
@@ -223,8 +223,6 @@ NON_EXPORTED_TABLES = {
"zerver_archivedreaction", "zerver_archivedreaction",
"zerver_archivedsubmessage", "zerver_archivedsubmessage",
"zerver_archivetransaction", "zerver_archivetransaction",
# We don't export this until export of ScheduledMessage in general is implemented.
"zerver_attachment_scheduled_messages",
# Social auth tables are not needed post-export, since we don't # Social auth tables are not needed post-export, since we don't
# use any of this state outside of a direct authentication flow. # use any of this state outside of a direct authentication flow.
"social_auth_association", "social_auth_association",
@@ -252,6 +250,7 @@ IMPLICIT_TABLES = {
# ManyToMany relationships are exported implicitly when importing # ManyToMany relationships are exported implicitly when importing
# the parent table. # the parent table.
"zerver_attachment_messages", "zerver_attachment_messages",
"zerver_attachment_scheduled_messages",
} }
ATTACHMENT_TABLES = { ATTACHMENT_TABLES = {
@@ -292,6 +291,7 @@ DATE_FIELDS: Dict[TableName, List[Field]] = {
"zerver_muteduser": ["date_muted"], "zerver_muteduser": ["date_muted"],
"zerver_realmauditlog": ["event_time"], "zerver_realmauditlog": ["event_time"],
"zerver_realm": ["date_created"], "zerver_realm": ["date_created"],
"zerver_scheduledmessage": ["scheduled_timestamp"],
"zerver_stream": ["date_created"], "zerver_stream": ["date_created"],
"zerver_useractivityinterval": ["start", "end"], "zerver_useractivityinterval": ["start", "end"],
"zerver_useractivity": ["last_visit"], "zerver_useractivity": ["last_visit"],
@@ -691,6 +691,12 @@ def get_realm_config() -> Config:
include_rows="realm_id__in", include_rows="realm_id__in",
) )
Config(
custom_tables=["zerver_scheduledmessage"],
virtual_parent=realm_config,
custom_fetch=custom_fetch_scheduled_messages,
)
Config( Config(
table="zerver_defaultstream", table="zerver_defaultstream",
model=DefaultStream, model=DefaultStream,
@@ -1047,10 +1053,13 @@ def custom_fetch_user_profile_cross_realm(response: TableData, context: Context)
def fetch_attachment_data( def fetch_attachment_data(
response: TableData, realm_id: int, message_ids: Set[int] response: TableData, realm_id: int, message_ids: Set[int], scheduled_message_ids: Set[int]
) -> List[Attachment]: ) -> List[Attachment]:
attachments = list( attachments = list(
Attachment.objects.filter(realm_id=realm_id, messages__in=message_ids).distinct() Attachment.objects.filter(
Q(messages__in=message_ids) | Q(scheduled_messages__in=scheduled_message_ids),
realm_id=realm_id,
).distinct()
) )
response["zerver_attachment"] = make_raw(attachments) response["zerver_attachment"] = make_raw(attachments)
floatify_datetime_fields(response, "zerver_attachment") floatify_datetime_fields(response, "zerver_attachment")
@@ -1060,11 +1069,18 @@ def fetch_attachment_data(
quite ALL messages for the realm. So, we need to quite ALL messages for the realm. So, we need to
clean up our attachment data to have correct clean up our attachment data to have correct
values for response['zerver_attachment'][<n>]['messages']. values for response['zerver_attachment'][<n>]['messages'].
Same reasoning applies to scheduled_messages.
""" """
for row in response["zerver_attachment"]: for row in response["zerver_attachment"]:
filtered_message_ids = set(row["messages"]).intersection(message_ids) filtered_message_ids = set(row["messages"]).intersection(message_ids)
row["messages"] = sorted(filtered_message_ids) row["messages"] = sorted(filtered_message_ids)
filtered_scheduled_message_ids = set(row["scheduled_messages"]).intersection(
scheduled_message_ids
)
row["scheduled_messages"] = sorted(filtered_scheduled_message_ids)
return attachments return attachments
@@ -1123,6 +1139,19 @@ def custom_fetch_huddle_objects(response: TableData, context: Context) -> None:
response["zerver_huddle"] = make_raw(Huddle.objects.filter(id__in=huddle_ids)) response["zerver_huddle"] = make_raw(Huddle.objects.filter(id__in=huddle_ids))
def custom_fetch_scheduled_messages(response: TableData, context: Context) -> None:
"""
Simple custom fetch function to fetch only the ScheduledMessage objects that we're allowed to.
"""
realm = context["realm"]
exportable_scheduled_message_ids = context["exportable_scheduled_message_ids"]
query = ScheduledMessage.objects.filter(realm=realm, id__in=exportable_scheduled_message_ids)
rows = make_raw(list(query))
response["zerver_scheduledmessage"] = rows
def fetch_usermessages( def fetch_usermessages(
realm: Realm, realm: Realm,
message_ids: Set[int], message_ids: Set[int],
@@ -1827,6 +1856,28 @@ def do_write_stats_file_for_realm_export(output_dir: Path) -> None:
f.write("\n") f.write("\n")
def get_exportable_scheduled_message_ids(
realm: Realm, public_only: bool = False, consent_message_id: Optional[int] = None
) -> Set[int]:
"""
Scheduled messages are private to the sender, so which ones we export depends on the
public/consent/full export mode.
"""
if public_only:
return set()
if consent_message_id:
sender_ids = get_consented_user_ids(consent_message_id)
return set(
ScheduledMessage.objects.filter(sender_id__in=sender_ids, realm=realm).values_list(
"id", flat=True
)
)
return set(ScheduledMessage.objects.filter(realm=realm).values_list("id", flat=True))
def do_export_realm( def do_export_realm(
realm: Realm, realm: Realm,
output_dir: Path, output_dir: Path,
@@ -1848,12 +1899,20 @@ def do_export_realm(
create_soft_link(source=output_dir, in_progress=True) create_soft_link(source=output_dir, in_progress=True)
exportable_scheduled_message_ids = get_exportable_scheduled_message_ids(
realm, public_only, consent_message_id
)
logging.info("Exporting data from get_realm_config()...") logging.info("Exporting data from get_realm_config()...")
export_from_config( export_from_config(
response=response, response=response,
config=realm_config, config=realm_config,
seed_object=realm, seed_object=realm,
context=dict(realm=realm, exportable_user_ids=exportable_user_ids), context=dict(
realm=realm,
exportable_user_ids=exportable_user_ids,
exportable_scheduled_message_ids=exportable_scheduled_message_ids,
),
) )
logging.info("...DONE with get_realm_config() data") logging.info("...DONE with get_realm_config() data")
@@ -1892,7 +1951,10 @@ def do_export_realm(
# zerver_attachment # zerver_attachment
attachments = export_attachment_table( attachments = export_attachment_table(
realm=realm, output_dir=output_dir, message_ids=message_ids realm=realm,
output_dir=output_dir,
message_ids=message_ids,
scheduled_message_ids=exportable_scheduled_message_ids,
) )
logging.info("Exporting uploaded files and avatars") logging.info("Exporting uploaded files and avatars")
@@ -1921,11 +1983,14 @@ def do_export_realm(
def export_attachment_table( def export_attachment_table(
realm: Realm, output_dir: Path, message_ids: Set[int] realm: Realm, output_dir: Path, message_ids: Set[int], scheduled_message_ids: Set[int]
) -> List[Attachment]: ) -> List[Attachment]:
response: TableData = {} response: TableData = {}
attachments = fetch_attachment_data( attachments = fetch_attachment_data(
response=response, realm_id=realm.id, message_ids=message_ids response=response,
realm_id=realm.id,
message_ids=message_ids,
scheduled_message_ids=scheduled_message_ids,
) )
output_file = os.path.join(output_dir, "attachment.json") output_file = os.path.join(output_dir, "attachment.json")
write_table_data(output_file=output_file, data=response) write_table_data(output_file=output_file, data=response)

View File

@@ -58,6 +58,7 @@ from zerver.models import (
RealmPlayground, RealmPlayground,
RealmUserDefault, RealmUserDefault,
Recipient, Recipient,
ScheduledMessage,
Service, Service,
Stream, Stream,
Subscription, Subscription,
@@ -136,6 +137,7 @@ ID_MAP: Dict[str, Dict[int, int]] = {
"analytics_streamcount": {}, "analytics_streamcount": {},
"analytics_usercount": {}, "analytics_usercount": {},
"realmuserdefault": {}, "realmuserdefault": {},
"scheduledmessage": {},
} }
id_map_to_list: Dict[str, Dict[int, List[int]]] = { id_map_to_list: Dict[str, Dict[int, List[int]]] = {
@@ -371,6 +373,9 @@ def fix_message_rendered_content(
).rendered_content ).rendered_content
message["rendered_content"] = rendered_content message["rendered_content"] = rendered_content
if "scheduled_timestamp" not in message:
# This logic runs also for ScheduledMessage, which doesn't use
# the rendered_content_version field.
message["rendered_content_version"] = markdown_version message["rendered_content_version"] = markdown_version
except Exception: except Exception:
# This generally happens with two possible causes: # This generally happens with two possible causes:
@@ -1312,6 +1317,27 @@ def do_import_realm(import_dir: Path, subdomain: str, processes: int = 1) -> Rea
sender_map = {user["id"]: user for user in data["zerver_userprofile"]} sender_map = {user["id"]: user for user in data["zerver_userprofile"]}
if "zerver_scheduledmessage" in data:
fix_datetime_fields(data, "zerver_scheduledmessage")
re_map_foreign_keys(data, "zerver_scheduledmessage", "sender", related_table="user_profile")
re_map_foreign_keys(data, "zerver_scheduledmessage", "recipient", related_table="recipient")
re_map_foreign_keys(
data, "zerver_scheduledmessage", "sending_client", related_table="client"
)
re_map_foreign_keys(data, "zerver_scheduledmessage", "stream", related_table="stream")
re_map_foreign_keys(data, "zerver_scheduledmessage", "realm", related_table="realm")
fix_upload_links(data, "zerver_scheduledmessage")
fix_message_rendered_content(
realm=realm,
sender_map=sender_map,
messages=data["zerver_scheduledmessage"],
)
update_model_ids(ScheduledMessage, data, "scheduledmessage")
bulk_import_model(data, ScheduledMessage)
# Import zerver_message and zerver_usermessage # Import zerver_message and zerver_usermessage
import_message_data(realm=realm, sender_map=sender_map, import_dir=import_dir) import_message_data(realm=realm, sender_map=sender_map, import_dir=import_dir)
@@ -1524,11 +1550,7 @@ def import_attachments(data: TableData) -> None:
parent_model = Attachment parent_model = Attachment
parent_db_table_name = "zerver_attachment" parent_db_table_name = "zerver_attachment"
parent_singular = "attachment" parent_singular = "attachment"
child_singular = "message"
child_plural = "messages"
m2m_table_name = "zerver_attachment_messages"
parent_id = "attachment_id" parent_id = "attachment_id"
child_id = "message_id"
update_model_ids(parent_model, data, "attachment") update_model_ids(parent_model, data, "attachment")
# We don't bulk_import_model yet, because we need to first compute # We don't bulk_import_model yet, because we need to first compute
@@ -1538,18 +1560,20 @@ def import_attachments(data: TableData) -> None:
# We do this in a slightly convoluted way to anticipate # We do this in a slightly convoluted way to anticipate
# a future where we may need to call re_map_foreign_keys. # a future where we may need to call re_map_foreign_keys.
def format_m2m_data(
child_singular: str, child_plural: str, m2m_table_name: str, child_id: str
) -> Tuple[str, List[Record], str]:
m2m_rows: List[Record] = [] m2m_rows: List[Record] = []
for parent_row in data[parent_db_table_name]: for parent_row in data[parent_db_table_name]:
for fk_id in parent_row[child_plural]: for fk_id in parent_row[child_plural]:
m2m_row: Record = {} m2m_row: Record = {}
m2m_row[parent_singular] = parent_row["id"] m2m_row[parent_singular] = parent_row["id"]
m2m_row[child_singular] = ID_MAP["message"][fk_id] # child_singular will generally match the model name (e.g. Message, ScheduledMessage)
# after lowercasing, and that's what we enter as ID_MAP keys, so this should be
# a reasonable assumption to make.
m2m_row[child_singular] = ID_MAP[child_singular][fk_id]
m2m_rows.append(m2m_row) m2m_rows.append(m2m_row)
# TODO: Import of scheduled messages is not implemented yet.
if "scheduled_messages" in parent_row:
del parent_row["scheduled_messages"]
# Create our table data for insert. # Create our table data for insert.
m2m_data: TableData = {m2m_table_name: m2m_rows} m2m_data: TableData = {m2m_table_name: m2m_rows}
convert_to_id_fields(m2m_data, m2m_table_name, parent_singular) convert_to_id_fields(m2m_data, m2m_table_name, parent_singular)
@@ -1560,6 +1584,18 @@ def import_attachments(data: TableData) -> None:
for parent_row in data[parent_db_table_name]: for parent_row in data[parent_db_table_name]:
del parent_row[child_plural] del parent_row[child_plural]
return m2m_table_name, m2m_rows, child_id
messages_m2m_tuple = format_m2m_data(
"message", "messages", "zerver_attachment_messages", "message_id"
)
scheduled_messages_m2m_tuple = format_m2m_data(
"scheduledmessage",
"scheduled_messages",
"zerver_attachment_scheduled_messages",
"scheduledmessage_id",
)
# Update 'path_id' for the attachments # Update 'path_id' for the attachments
for attachment in data[parent_db_table_name]: for attachment in data[parent_db_table_name]:
attachment["path_id"] = path_maps["attachment_path"][attachment["path_id"]] attachment["path_id"] = path_maps["attachment_path"][attachment["path_id"]]
@@ -1571,6 +1607,10 @@ def import_attachments(data: TableData) -> None:
# TODO: Do this the kosher Django way. We may find a # TODO: Do this the kosher Django way. We may find a
# better way to do this in Django 1.9 particularly. # better way to do this in Django 1.9 particularly.
with connection.cursor() as cursor: with connection.cursor() as cursor:
for m2m_table_name, m2m_rows, child_id in [
messages_m2m_tuple,
scheduled_messages_m2m_tuple,
]:
sql_template = SQL( sql_template = SQL(
""" """
INSERT INTO {m2m_table_name} ({parent_id}, {child_id}) VALUES %s INSERT INTO {m2m_table_name} ({parent_id}, {child_id}) VALUES %s

View File

@@ -27,6 +27,7 @@ from zerver.actions.realm_settings import (
do_change_realm_plan_type, do_change_realm_plan_type,
do_set_realm_authentication_methods, do_set_realm_authentication_methods,
) )
from zerver.actions.scheduled_messages import check_schedule_message
from zerver.actions.user_activity import do_update_user_activity, do_update_user_activity_interval from zerver.actions.user_activity import do_update_user_activity, do_update_user_activity_interval
from zerver.actions.user_status import do_update_user_status from zerver.actions.user_status import do_update_user_status
from zerver.actions.user_topics import do_set_user_topic_visibility_policy from zerver.actions.user_topics import do_set_user_topic_visibility_policy
@@ -66,6 +67,7 @@ from zerver.models import (
RealmEmoji, RealmEmoji,
RealmUserDefault, RealmUserDefault,
Recipient, Recipient,
ScheduledMessage,
Stream, Stream,
Subscription, Subscription,
UserGroup, UserGroup,
@@ -790,6 +792,22 @@ class RealmImportExportTest(ExportFile):
sample_user, client, timezone_now(), UserPresence.LEGACY_STATUS_ACTIVE_INT sample_user, client, timezone_now(), UserPresence.LEGACY_STATUS_ACTIVE_INT
) )
# Set up scheduled messages.
ScheduledMessage.objects.filter(realm=original_realm).delete()
check_schedule_message(
sender=hamlet,
client=get_client("website"),
recipient_type_name="stream",
message_to=[Stream.objects.get(name="Denmark", realm=original_realm).id],
topic_name="test-import",
message_content="test message",
scheduled_message_id=None,
deliver_at=timezone_now() + datetime.timedelta(days=365),
realm=original_realm,
)
original_scheduled_message = ScheduledMessage.objects.filter(realm=original_realm).last()
assert original_scheduled_message is not None
# send Cordelia to the islands # send Cordelia to the islands
do_update_user_status( do_update_user_status(
user_profile=cordelia, user_profile=cordelia,
@@ -940,6 +958,15 @@ class RealmImportExportTest(ExportFile):
Recipient.objects.get(type=Recipient.HUDDLE, type_id=huddle_object.id).id, Recipient.objects.get(type=Recipient.HUDDLE, type_id=huddle_object.id).id,
) )
self.assertEqual(ScheduledMessage.objects.filter(realm=imported_realm).count(), 1)
imported_scheduled_message = ScheduledMessage.objects.first()
assert imported_scheduled_message is not None
self.assertEqual(imported_scheduled_message.content, original_scheduled_message.content)
self.assertEqual(
imported_scheduled_message.scheduled_timestamp,
original_scheduled_message.scheduled_timestamp,
)
for user_profile in UserProfile.objects.filter(realm=imported_realm): for user_profile in UserProfile.objects.filter(realm=imported_realm):
# Check that all Subscriptions have the correct is_user_active set. # Check that all Subscriptions have the correct is_user_active set.
self.assertEqual( self.assertEqual(