From f4ad464d82dd30fcbf4bf08a9f16bd74986301fd Mon Sep 17 00:00:00 2001 From: Rhea Parekh Date: Wed, 4 Apr 2018 00:22:16 +0530 Subject: [PATCH] import script: Fix broken links to attachments. The comments explain this pretty well, but basically because we rewrite the realm ID during the import process, we need to edit all the message bodies that link to an attachment to instead link to the post-processed URL where that file will be hosted on the new server. Fixes #8926. --- zerver/lib/export.py | 23 +++++++++++++++++++++++ zerver/lib/slack_data_to_zulip_data.py | 2 ++ 2 files changed, 25 insertions(+) diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 4788c2df25..7d227532a0 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -1255,6 +1255,26 @@ def fix_datetime_fields(data: TableData, table: TableName) -> None: if item[field_name] is not None: item[field_name] = datetime.datetime.fromtimestamp(item[field_name], tz=timezone_utc) +def fix_slack_upload_links(data: TableData, message_table: TableName) -> None: + """This is slack importer specific for now, though arguably it shouldn't be. + + Because the URLs for uploaded files encode the realm ID of the + organization being imported (which is only determined at import + time), we need to rewrite the URLs of links to uploaded files + during the import process. + """ + for message in data[message_table]: + if message['has_attachment'] is True: + # This code path needs to be kept in sync with the + # specific placeholder 'SlackImportAttachment' in the + # Slack import attachment code path. See the function + # 'get_attachment_path_and_content' in the + # 'slack_data_to_zulip_data' module. + if 'SlackImportAttachment' in message['content']: + for key, value in path_maps['attachment_path'].items(): + if key in message['content']: + message['content'] = message['content'].replace(key, value) + def current_table_ids(data: TableData, table: TableName) -> List[int]: """ Returns the ids present in the current table @@ -1738,6 +1758,9 @@ def import_message_data(import_dir: Path) -> None: re_map_foreign_keys(data, 'zerver_message', 'recipient', related_table="recipient") re_map_foreign_keys(data, 'zerver_message', 'sending_client', related_table='client') fix_datetime_fields(data, 'zerver_message') + # Parser to update message content with the updated attachment urls + fix_slack_upload_links(data, 'zerver_message') + re_map_foreign_keys(data, 'zerver_message', 'id', related_table='message', id_field=True) bulk_import_model(data, Message, 'zerver_message') diff --git a/zerver/lib/slack_data_to_zulip_data.py b/zerver/lib/slack_data_to_zulip_data.py index 93ab95f6c8..fa9a000d20 100755 --- a/zerver/lib/slack_data_to_zulip_data.py +++ b/zerver/lib/slack_data_to_zulip_data.py @@ -646,6 +646,8 @@ def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> T # 'upload_message_file' s3_path = "/".join([ str(realm_id), + 'SlackImportAttachment', # This is a special placeholder which should be kept + # in sync with 'exports.py' function 'import_message_data' format(random.randint(0, 255), 'x'), random_name(18), sanitize_name(fileinfo['name'])