import script: Fix broken links to attachments.

The comments explain this pretty well, but basically because we
rewrite the realm ID during the import process, we need to edit all
the message bodies that link to an attachment to instead link to the
post-processed URL where that file will be hosted on the new server.

Fixes #8926.
This commit is contained in:
Rhea Parekh
2018-04-04 00:22:16 +05:30
committed by Tim Abbott
parent 5a9cea4134
commit f4ad464d82
2 changed files with 25 additions and 0 deletions

View File

@@ -1255,6 +1255,26 @@ def fix_datetime_fields(data: TableData, table: TableName) -> None:
if item[field_name] is not None:
item[field_name] = datetime.datetime.fromtimestamp(item[field_name], tz=timezone_utc)
def fix_slack_upload_links(data: TableData, message_table: TableName) -> None:
"""This is slack importer specific for now, though arguably it shouldn't be.
Because the URLs for uploaded files encode the realm ID of the
organization being imported (which is only determined at import
time), we need to rewrite the URLs of links to uploaded files
during the import process.
"""
for message in data[message_table]:
if message['has_attachment'] is True:
# This code path needs to be kept in sync with the
# specific placeholder 'SlackImportAttachment' in the
# Slack import attachment code path. See the function
# 'get_attachment_path_and_content' in the
# 'slack_data_to_zulip_data' module.
if 'SlackImportAttachment' in message['content']:
for key, value in path_maps['attachment_path'].items():
if key in message['content']:
message['content'] = message['content'].replace(key, value)
def current_table_ids(data: TableData, table: TableName) -> List[int]:
"""
Returns the ids present in the current table
@@ -1738,6 +1758,9 @@ def import_message_data(import_dir: Path) -> None:
re_map_foreign_keys(data, 'zerver_message', 'recipient', related_table="recipient")
re_map_foreign_keys(data, 'zerver_message', 'sending_client', related_table='client')
fix_datetime_fields(data, 'zerver_message')
# Parser to update message content with the updated attachment urls
fix_slack_upload_links(data, 'zerver_message')
re_map_foreign_keys(data, 'zerver_message', 'id', related_table='message', id_field=True)
bulk_import_model(data, Message, 'zerver_message')

View File

@@ -646,6 +646,8 @@ def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> T
# 'upload_message_file'
s3_path = "/".join([
str(realm_id),
'SlackImportAttachment', # This is a special placeholder which should be kept
# in sync with 'exports.py' function 'import_message_data'
format(random.randint(0, 255), 'x'),
random_name(18),
sanitize_name(fileinfo['name'])