slack import: Extract process_message_files().

This is mostly an extraction, but it does change the
way we calculate `content`.  We append the markdown
links from ALL files to any content that came in the
message itself.

Separating this out also allows us to add more
test coverage for the extracted code.
This commit is contained in:
Steve Howell
2018-10-28 15:48:52 +00:00
committed by Tim Abbott
parent 00f822a26a
commit 2f58eb1057
2 changed files with 148 additions and 34 deletions

View File

@@ -514,7 +514,8 @@ def get_all_messages(slack_data_dir: str, added_channels: AddedChannelsT) -> Lis
all_messages += messages
return all_messages
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
def channel_message_to_zerver_message(realm_id: int,
users: List[ZerverFieldsT],
added_users: AddedUsersT,
added_recipient: AddedRecipientsT,
all_messages: List[ZerverFieldsT],
@@ -567,7 +568,6 @@ def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
]:
continue
has_attachment = has_image = False
try:
content, mentioned_user_ids, has_link = convert_to_zulip_markdown(
message['text'], users, added_channels, added_users)
@@ -595,39 +595,23 @@ def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
if subtype in ["bot_add", "sh_room_created", "me_message"]:
content = ('/me %s' % (content))
files = message.get('files', [])
if subtype == 'file_share':
# In Slack messages, uploads can either have the subtype as 'file_share' or
# have the upload information in 'files' keyword
files = [message['file']]
file_info = process_message_files(
message=message,
domain_name=domain_name,
realm_id=realm_id,
message_id=message_id,
user=user,
users=users,
added_users=added_users,
zerver_attachment=zerver_attachment,
uploads_list=uploads_list,
)
for fileinfo in files:
url = fileinfo['url_private']
# For attachments with slack download link
if 'files.slack.com' in url:
has_attachment = has_link = True
has_image = True if 'image' in fileinfo['mimetype'] else False
content += file_info['content']
has_link = has_link or file_info['has_link']
file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']]
file_user_email = get_user_email(file_user[0], domain_name)
s3_path, content = get_attachment_path_and_content(fileinfo, realm_id)
# construct attachments
build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
uploads_list)
build_attachment(realm_id, {message_id}, added_users[user],
fileinfo, s3_path, zerver_attachment)
# For attachments with link not from slack
# Example: Google drive integration
else:
has_link = True
if 'title' in fileinfo:
file_name = fileinfo['title']
else:
file_name = fileinfo['name']
content = '[%s](%s)' % (file_name, fileinfo['url_private'])
has_attachment = file_info['has_attachment']
has_image = file_info['has_image']
# construct message
subject = 'imported from slack'
@@ -652,6 +636,70 @@ def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \
reaction_list, id_list
def process_message_files(message: ZerverFieldsT,
domain_name: str,
realm_id: int,
message_id: int,
user: str,
users: List[ZerverFieldsT],
added_users: AddedUsersT,
zerver_attachment: List[ZerverFieldsT],
uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]:
has_attachment = False
has_image = False
has_link = False
files = message.get('files', [])
subtype = message.get('subtype')
if subtype == 'file_share':
# In Slack messages, uploads can either have the subtype as 'file_share' or
# have the upload information in 'files' keyword
files = [message['file']]
markdown_links = []
for fileinfo in files:
url = fileinfo['url_private']
if 'files.slack.com' in url:
# For attachments with slack download link
has_attachment = True
has_link = True
has_image = True if 'image' in fileinfo['mimetype'] else False
file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']]
file_user_email = get_user_email(file_user[0], domain_name)
s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id)
markdown_links.append(content_for_link)
# construct attachments
build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
uploads_list)
build_attachment(realm_id, {message_id}, added_users[user],
fileinfo, s3_path, zerver_attachment)
else:
# For attachments with link not from slack
# Example: Google drive integration
has_link = True
if 'title' in fileinfo:
file_name = fileinfo['title']
else:
file_name = fileinfo['name']
markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private']))
content = '\n'.join(markdown_links)
return dict(
content=content,
has_attachment=has_attachment,
has_image=has_image,
has_link=has_link,
)
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
str]:
# Should be kept in sync with its equivalent in zerver/lib/uploads in the function

View File

@@ -19,6 +19,7 @@ from zerver.data_import.slack import (
convert_slack_workspace_messages,
do_convert_data,
process_avatars,
process_message_files,
)
from zerver.data_import.import_util import (
build_zerver_realm,
@@ -495,7 +496,7 @@ class SlackImporter(ZulipTestCase):
user_list = [] # type: List[Dict[str, Any]]
reactions = [{"name": "grinning", "users": ["U061A5N1G"], "count": 1}]
attachments = uploads = [] # type: List[Dict[str, Any]]
id_list = (2, 0, 1)
id_list = (2, 0)
zerver_usermessage = [{'id': 3}, {'id': 5}, {'id': 6}, {'id': 9}]
@@ -574,3 +575,68 @@ class SlackImporter(ZulipTestCase):
# remove tar file created in 'do_convert_data' function
os.remove(output_dir + '.tar.gz')
self.assertFalse(os.path.exists(output_dir))
def test_message_files(self) -> None:
alice_id = 7
alice = dict(
id=alice_id,
profile=dict(
email='alice@example.com',
),
)
files = [
dict(
url_private='files.slack.com/apple.png',
title='Apple',
name='apple.png',
mimetype='image/png',
timestamp=9999,
created=8888,
size=3000000,
),
dict(
url_private='example.com/banana.zip',
title='banana',
),
]
message = dict(
user=alice_id,
files=files,
)
domain_name = 'example.com'
realm_id = 5
message_id = 99
user = 'alice'
users = [alice]
added_users = {
'alice': alice_id,
}
zerver_attachment = [] # type: List[Dict[str, Any]]
uploads_list = [] # type: List[Dict[str, Any]]
info = process_message_files(
message=message,
domain_name=domain_name,
realm_id=realm_id,
message_id=message_id,
user=user,
users=users,
added_users=added_users,
zerver_attachment=zerver_attachment,
uploads_list=uploads_list,
)
self.assertEqual(len(zerver_attachment), 1)
self.assertEqual(len(uploads_list), 1)
image_path = zerver_attachment[0]['path_id']
self.assertIn('/SlackImportAttachment/', image_path)
expected_content = '[Apple](/user_uploads/{image_path})\n[banana](example.com/banana.zip)'.format(image_path=image_path)
self.assertEqual(info['content'], expected_content)
self.assertTrue(info['has_link'])
self.assertTrue(info['has_image'])
self.assertEqual(uploads_list[0]['s3_path'], image_path)
self.assertEqual(uploads_list[0]['realm_id'], realm_id)
self.assertEqual(uploads_list[0]['user_profile_email'], 'alice@example.com')