mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
slack_import: Improve topic name for Slack threads.
This updates the topic name format for Slack threads to include a snippet of the original message. Currently the format looks like this; "{date} Slack thread {n}", which provides little to no context about the thread. Currently we also use the `thread_ts_str` key to keep track of Slack threads we're converting, this key format makes it so that it has a small chance of combining threads with the same timestamp under one topic. This commit updates it to use a new key format, `thread_key`. Fixes #27661.
This commit is contained in:
@@ -165,8 +165,8 @@ in mind about the import process:
|
||||
| Multi Channel Guest | Guest |
|
||||
| Channel creator | none |
|
||||
|
||||
- Slack threads are imported as topics with names like "2023-05-30
|
||||
Slack thread 1".
|
||||
- Slack threads are imported as topics with names that include snippets of the
|
||||
original message, such as "2023-05-30 Hi, can anyone reply if you're o…".
|
||||
|
||||
- Message edit history and `@user joined #channel_name` messages are not imported.
|
||||
|
||||
|
@@ -51,6 +51,7 @@ from zerver.data_import.slack_message_conversion import (
|
||||
from zerver.lib.emoji import codepoint_to_name, get_emoji_file_name
|
||||
from zerver.lib.exceptions import SlackImportInvalidFileError
|
||||
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE, do_common_export_processes
|
||||
from zerver.lib.message import truncate_content
|
||||
from zerver.lib.mime_types import guess_type
|
||||
from zerver.lib.storage import static_path
|
||||
from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, resize_realm_icon
|
||||
@@ -64,6 +65,7 @@ from zerver.models import (
|
||||
Recipient,
|
||||
UserProfile,
|
||||
)
|
||||
from zerver.models.constants import MAX_TOPIC_NAME_LENGTH
|
||||
|
||||
SlackToZulipUserIDT: TypeAlias = dict[str, int]
|
||||
AddedChannelsT: TypeAlias = dict[str, tuple[str, int]]
|
||||
@@ -898,6 +900,49 @@ def get_messages_iterator(
|
||||
yield from sorted(messages_for_one_day, key=get_timestamp_from_message)
|
||||
|
||||
|
||||
def get_parent_user_id_from_thread_message(thread_message: ZerverFieldsT, subtype: str) -> str:
|
||||
"""
|
||||
This retrieves the user id of the sender of the original thread
|
||||
message.
|
||||
"""
|
||||
if subtype == "thread_broadcast":
|
||||
return thread_message["root"]["user"]
|
||||
elif thread_message["thread_ts"] == thread_message["ts"]:
|
||||
# This is the original thread message
|
||||
return thread_message["user"]
|
||||
else:
|
||||
return thread_message["parent_user_id"]
|
||||
|
||||
|
||||
def get_zulip_thread_topic_name(
|
||||
message_content: str, thread_ts: datetime, thread_counter: dict[str, int]
|
||||
) -> str:
|
||||
"""
|
||||
The topic name format is date + message snippet + counter.
|
||||
|
||||
e.g "2024-05-22 Hello this is a long message that will be c… (1)"
|
||||
"""
|
||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||
|
||||
# Truncate
|
||||
truncated_zulip_topic_name = truncate_content(
|
||||
f"{thread_date} {message_content}".strip(), MAX_TOPIC_NAME_LENGTH, "…"
|
||||
)
|
||||
collision = thread_counter[truncated_zulip_topic_name]
|
||||
thread_counter[truncated_zulip_topic_name] += 1
|
||||
count = (f" ({collision + 1})") if collision > 0 else ""
|
||||
|
||||
# Important: The count is at the end, after …, so we need to
|
||||
# subtract its length when doing truncation.
|
||||
final_topic_name = (
|
||||
truncate_content(
|
||||
f"{thread_date} {message_content}".strip(), MAX_TOPIC_NAME_LENGTH - len(f"{count}"), "…"
|
||||
)
|
||||
+ f"{count}"
|
||||
)
|
||||
return final_topic_name
|
||||
|
||||
|
||||
def channel_message_to_zerver_message(
|
||||
realm_id: int,
|
||||
users: list[ZerverFieldsT],
|
||||
@@ -1034,22 +1079,21 @@ def channel_message_to_zerver_message(
|
||||
has_image = file_info["has_image"]
|
||||
|
||||
# Slack's unthreaded messages go into a single topic, while
|
||||
# threads each generate a unique topic labeled by the date and
|
||||
# a counter among topics on that day.
|
||||
# threads each generate a unique topic labeled by the date,
|
||||
# a snippet of the original message and a counter if there
|
||||
# are any thread with the same topic name
|
||||
topic_name = "imported from Slack"
|
||||
if convert_slack_threads and not is_direct_message_type and "thread_ts" in message:
|
||||
thread_ts = datetime.fromtimestamp(float(message["thread_ts"]), tz=timezone.utc)
|
||||
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
|
||||
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
|
||||
# threads with the same date.
|
||||
if thread_ts_str in thread_map:
|
||||
topic_name = thread_map[thread_ts_str]
|
||||
parent_user_id = get_parent_user_id_from_thread_message(message, subtype)
|
||||
thread_key = f"{thread_ts_str}-{parent_user_id}"
|
||||
|
||||
if thread_key in thread_map:
|
||||
topic_name = thread_map[thread_key]
|
||||
else:
|
||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||
thread_counter[thread_date] += 1
|
||||
count = thread_counter[thread_date]
|
||||
topic_name = f"{thread_date} Slack thread {count}"
|
||||
thread_map[thread_ts_str] = topic_name
|
||||
topic_name = get_zulip_thread_topic_name(content, thread_ts, thread_counter)
|
||||
thread_map[thread_key] = topic_name
|
||||
|
||||
if is_direct_message_type:
|
||||
topic_name = ""
|
||||
|
@@ -1359,7 +1359,13 @@ class SlackImporter(ZulipTestCase):
|
||||
user_data = [
|
||||
{"id": "U066MTL5U", "name": "john doe", "deleted": False, "real_name": "John"},
|
||||
{"id": "U061A5N1G", "name": "jane doe", "deleted": False, "real_name": "Jane"},
|
||||
{"id": "U061A1R2R", "name": "jon", "deleted": False, "real_name": "Jon"},
|
||||
{
|
||||
"id": "U061A1R2R",
|
||||
"name": "jon",
|
||||
"deleted": False,
|
||||
"real_name": "Jon",
|
||||
"profile": {"email": "jon@example.com"},
|
||||
},
|
||||
]
|
||||
|
||||
slack_user_id_to_zulip_user_id = {"U066MTL5U": 5, "U061A5N1G": 24, "U061A1R2R": 43}
|
||||
@@ -1380,10 +1386,10 @@ class SlackImporter(ZulipTestCase):
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"text": "message body text",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000006",
|
||||
# Thread!
|
||||
"ts": "1434139102.000002",
|
||||
# Start of thread 1!
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
@@ -1391,23 +1397,106 @@ class SlackImporter(ZulipTestCase):
|
||||
"text": "random",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000007",
|
||||
# A reply to thread 1
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"text": "random message but it's too long for the thread topic name",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000008",
|
||||
# A different Thread!
|
||||
# Start of thread 2!
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"text": "replying to the second thread :)",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1439869294.000008",
|
||||
# A reply to thread 2
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "message body text",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868295.000008",
|
||||
# Another different Thread!
|
||||
"thread_ts": "1439868295.000008",
|
||||
"ts": "1434139200.000002",
|
||||
# Start of thread 3!
|
||||
"thread_ts": "1434139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "The first reply to the third thread",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1439869295.000008",
|
||||
# A reply to thread 3!
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1434139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "<@U061A1R2R> please reply to this message",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1437139200.000002",
|
||||
# Start of thread 4!
|
||||
"thread_ts": "1437139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "Yes?",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1440869295.000008",
|
||||
# A reply to thread 4!
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1434139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "Look!",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1537139200.000002",
|
||||
# Start of thread 5!
|
||||
"thread_ts": "1537139200.000002",
|
||||
"has_image": True,
|
||||
"channel_name": "random",
|
||||
"files": [
|
||||
{
|
||||
"url_private": "https://files.slack.com/apple.png",
|
||||
"title": "Apple",
|
||||
"name": "apple.png",
|
||||
"mimetype": "image/png",
|
||||
"timestamp": 9999,
|
||||
"created": 8888,
|
||||
"size": 3000000,
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"text": "Delicious",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1637139200.000002",
|
||||
# A reply to thread 5!
|
||||
"parent_user_id": "U061A1R2R",
|
||||
"thread_ts": "1537139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "*foo* _bar_ ~baz~ [qux](https://chat.zulip.org)",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1547139200.000002",
|
||||
# Start of thread 6!
|
||||
"thread_ts": "1547139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "Delicious",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1637139200.000002",
|
||||
# A reply to thread 6!
|
||||
"parent_user_id": "U061A1R2R",
|
||||
"thread_ts": "1547139200.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
]
|
||||
@@ -1445,26 +1534,80 @@ class SlackImporter(ZulipTestCase):
|
||||
# functioning already tested in helper function
|
||||
self.assertEqual(zerver_usermessage, [])
|
||||
# subtype: channel_join is filtered
|
||||
self.assert_length(zerver_message, 5)
|
||||
self.assert_length(zerver_message, 13)
|
||||
|
||||
self.assertEqual(uploads, [])
|
||||
self.assertEqual(attachment, [])
|
||||
self.assert_length(uploads, 1)
|
||||
self.assert_length(attachment, 1)
|
||||
|
||||
# Message conversion already tested in tests.test_slack_message_conversion
|
||||
self.assertEqual(zerver_message[0]["content"], "@**Jane**: hey!")
|
||||
self.assertEqual(zerver_message[0]["has_link"], False)
|
||||
self.assertEqual(zerver_message[1]["content"], "random")
|
||||
self.assertEqual(zerver_message[1][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
|
||||
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
|
||||
# A new thread with a different date from 2015-06-12, starts the counter from 1.
|
||||
self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 1")
|
||||
# A new thread with a different timestamp, but the same date as 2015-08-18, starts the
|
||||
# counter from 2.
|
||||
self.assertEqual(zerver_message[4][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 2")
|
||||
self.assertEqual(
|
||||
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
|
||||
)
|
||||
|
||||
### THREAD 1 CONVERSATION ###
|
||||
# Test thread topic name contains message snippet
|
||||
expected_thread_1_message_1_content = "message body text"
|
||||
expected_thread_1_topic_name = "2015-06-12 message body text"
|
||||
self.assertEqual(zerver_message[1]["content"], expected_thread_1_message_1_content)
|
||||
self.assertEqual(zerver_message[1][EXPORT_TOPIC_NAME], expected_thread_1_topic_name)
|
||||
|
||||
# Thread reply is in the correct thread topic
|
||||
self.assertEqual(zerver_message[2]["content"], "random")
|
||||
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], expected_thread_1_topic_name)
|
||||
|
||||
### THREAD 2 CONVERSATION ###
|
||||
# Test thread topic name cut off
|
||||
expected_thread_2_message_1_content = (
|
||||
"random message but it's too long for the thread topic name"
|
||||
)
|
||||
expected_thread_2_topic_name = (
|
||||
"2015-08-18 random message but it's too long for the thread …"
|
||||
)
|
||||
self.assertEqual(zerver_message[3]["content"], expected_thread_2_message_1_content)
|
||||
self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], expected_thread_2_topic_name)
|
||||
# Record that truncation should use the full maximum topic length.
|
||||
self.assert_length(zerver_message[3][EXPORT_TOPIC_NAME], 60)
|
||||
|
||||
expected_thread_2_reply_1_message = "replying to the second thread :)"
|
||||
self.assertEqual(zerver_message[4]["content"], expected_thread_2_reply_1_message)
|
||||
self.assertEqual(zerver_message[4][EXPORT_TOPIC_NAME], expected_thread_2_topic_name)
|
||||
|
||||
### THREAD 3 CONVERSATION ###
|
||||
# Test thread topic name collision
|
||||
expected_thread_3_message_1_content = "message body text"
|
||||
expected_thread_3_topic_name = "2015-06-12 message body text (2)"
|
||||
self.assertEqual(zerver_message[5]["content"], expected_thread_3_message_1_content)
|
||||
self.assertEqual(zerver_message[5][EXPORT_TOPIC_NAME], expected_thread_3_topic_name)
|
||||
|
||||
### THREAD 4 CONVERSATION ###
|
||||
# Test mention syntax in thread topic name
|
||||
expected_thread_4_message_1_content = "@**Jon** please reply to this message"
|
||||
expected_thread_4_topic_name = "2015-07-17 @**Jon** please reply to this message"
|
||||
self.assertEqual(zerver_message[7]["content"], expected_thread_4_message_1_content)
|
||||
self.assertEqual(zerver_message[7][EXPORT_TOPIC_NAME], expected_thread_4_topic_name)
|
||||
|
||||
### THREAD 5 CONVERSATION ###
|
||||
# Test file link in thread topic name
|
||||
expected_thread_4_message_1_content = "Look!\n[Apple](/user_uploads/"
|
||||
expected_thread_4_topic_name = "2018-09-16 Look!\n[Apple](/user_uploads/"
|
||||
self.assertTrue(
|
||||
zerver_message[9]["content"].startswith(expected_thread_4_message_1_content)
|
||||
)
|
||||
self.assertTrue(
|
||||
zerver_message[9][EXPORT_TOPIC_NAME].startswith(expected_thread_4_topic_name)
|
||||
)
|
||||
|
||||
### THREAD 6 CONVERSATION ###
|
||||
# Test various formatting syntaxes in thread topic name
|
||||
expected_thread_4_message_1_content = "**foo** *bar* ~~baz~~ [qux](https://chat.zulip.org)"
|
||||
expected_thread_4_topic_name = (
|
||||
"2019-01-10 **foo** *bar* ~~baz~~ [qux](https://chat.zulip.o…"
|
||||
)
|
||||
self.assertEqual(zerver_message[11]["content"], expected_thread_4_message_1_content)
|
||||
self.assertEqual(zerver_message[11][EXPORT_TOPIC_NAME], expected_thread_4_topic_name)
|
||||
|
||||
@mock.patch("zerver.data_import.slack.build_usermessages", return_value=(2, 4))
|
||||
def test_channel_message_to_zerver_message_with_integration_bots(
|
||||
self, mock_build_usermessage: mock.Mock
|
||||
|
Reference in New Issue
Block a user