slack_data_import: Support converting integration bot users.

Currently, we're unable to convert messages from Slack's integration
bots because this message subtype doesn't come from a Slack "user", that
is they don't have a Slack user profile.

This is a preparatory change to support converting Slack's integration
bot messages. This commit artificially creates Slack user data from the
integration bot's "profile" so that we can create a corresponding Zulip
user for them.

Part of #31311.
This commit is contained in:
PieterCK
2024-08-08 14:50:38 +07:00
committed by Tim Abbott
parent 10946caa3d
commit f988412394
2 changed files with 156 additions and 14 deletions

View File

@@ -50,7 +50,7 @@ from zerver.lib.emoji import codepoint_to_name, get_emoji_file_name
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE, do_common_export_processes
from zerver.lib.mime_types import guess_type
from zerver.lib.storage import static_path
from zerver.lib.thumbnail import resize_realm_icon
from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, resize_realm_icon
from zerver.lib.upload import sanitize_name
from zerver.models import (
CustomProfileField,
@@ -472,6 +472,19 @@ def build_avatar_url(slack_user_id: str, user: ZerverFieldsT) -> tuple[str, str]
avatar_hash = user["profile"]["avatar_hash"]
avatar_url = f"https://ca.slack-edge.com/{team_id}-{slack_user_id}-{avatar_hash}"
avatar_source = UserProfile.AVATAR_FROM_USER
elif user.get("is_integration_bot"):
# Unlike other Slack user types, Slacks integration bot avatar URL ends with
# a file type extension (.png, in this case).
# e.g https://avatars.slack-edge.com/2024-05-01/7218497908_deb94eac4c_512.png
avatar_url = user["profile"]["image_72"]
content_type = guess_type(avatar_url)[0]
if content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES:
logging.info(
"Unsupported avatar type (%s) for user -> %s\n", content_type, user.get("name")
)
avatar_source = UserProfile.AVATAR_FROM_GRAVATAR
else:
avatar_source = UserProfile.AVATAR_FROM_USER
else:
logging.info("Failed to process avatar for user -> %s\n", user.get("name"))
return avatar_source, avatar_url
@@ -1262,12 +1275,39 @@ def get_timestamp_from_message(message: ZerverFieldsT) -> float:
return float(message["ts"])
def is_integration_bot_message(message: ZerverFieldsT) -> bool:
return message.get("subtype") == "bot_message" and "user" not in message and "bot_id" in message
def convert_bot_info_to_slack_user(bot_info: dict[str, Any]) -> ZerverFieldsT:
# We use "image_72," an icon-sized 72x72 pixel image, for the Slack integration
# bots avatar because it is the best available option. As a consequence, this
# will make the avatar appear blurry in places where a medium-sized avatar
# (500x500px) is expected, such as in the user profile menu.
bot_user = {
"id": bot_info["id"],
"name": bot_info["name"],
"deleted": bot_info["deleted"],
"is_mirror_dummy": False,
"real_name": bot_info["name"],
"is_integration_bot": True,
"profile": {
"image_72": bot_info["icons"]["image_72"],
"bot_id": bot_info["id"],
"first_name": bot_info["name"],
},
}
return bot_user
def fetch_shared_channel_users(
user_list: list[ZerverFieldsT], slack_data_dir: str, token: str
) -> None:
normal_user_ids = set()
mirror_dummy_user_ids = set()
added_channels = {}
integration_bot_users: list[str] = []
team_id_to_domain: dict[str, str] = {}
for user in user_list:
user["is_mirror_dummy"] = False
@@ -1296,10 +1336,25 @@ def fetch_shared_channel_users(
all_messages = get_messages_iterator(slack_data_dir, added_channels, {}, {})
for message in all_messages:
user_id = get_message_sending_user(message)
if user_id is None or user_id in normal_user_ids:
continue
mirror_dummy_user_ids.add(user_id)
if is_integration_bot_message(message):
# This message is likely from an integration bot. Since Slack's integration
# bots doesn't have user profiles, we need to artificially create users for
# them to convert their messages.
bot_id = message["bot_id"]
if bot_id in integration_bot_users:
continue
bot_info = get_slack_api_data(
"https://slack.com/api/bots.info", "bot", token=token, bot=bot_id
)
bot_user = convert_bot_info_to_slack_user(bot_info)
user_list.append(bot_user)
integration_bot_users.append(bot_id)
else:
user_id = get_message_sending_user(message)
if user_id is None or user_id in normal_user_ids:
continue
mirror_dummy_user_ids.add(user_id)
# Fetch data on the mirror_dummy_user_ids from the Slack API (it's
# not included in the data export file).

View File

@@ -65,6 +65,7 @@ def request_callback(request: PreparedRequest) -> tuple[int, dict[str, str], byt
"https://slack.com/api/users.list",
"https://slack.com/api/users.info",
"https://slack.com/api/team.info",
"https://slack.com/api/bots.info",
]
for endpoint in endpoints:
if request.url and endpoint in request.url:
@@ -96,6 +97,29 @@ def request_callback(request: PreparedRequest) -> tuple[int, dict[str, str], byt
except KeyError:
return (200, {}, orjson.dumps({"ok": False, "error": "user_not_found"}))
return (200, {}, orjson.dumps({"ok": True, "user": {"id": user_id, "team_id": team_id}}))
if request.url and "https://slack.com/api/bots.info" in request.url:
bot_info_dict = {
"B06NWMNUQ3W": {
"id": "B06NWMNUQ3W",
"deleted": False,
"name": "ClickUp",
"updated": 1714669546,
"app_id": "A3G4A68V9",
"icons": {
"image_36": "https://avatars.slack-edge.com/2024-05-01/7057208497908_a4351f6deb91094eac4c_36.png",
"image_48": "https://avatars.slack-edge.com/2024-05-01/7057208497908_a4351f6deb91094eac4c_48.png",
"image_72": "https://avatars.slack-edge.com/2024-05-01/7057208497908_a4351f6deb91094eac4c_72.png",
},
}
}
try:
bot_id = qs["bot"][0]
bot_info = bot_info_dict[bot_id]
except KeyError:
return (200, {}, orjson.dumps({"ok": False, "error": "bot_not_found"}))
return (200, {}, orjson.dumps({"ok": True, "bot": bot_info}))
# Else, https://slack.com/api/team.info
team_not_found: tuple[int, dict[str, str], bytes] = (
200,
@@ -147,6 +171,13 @@ class SlackImporter(ZulipTestCase):
get_slack_api_data(slack_users_info_url, "user", token=token, user="idontexist")
self.assertEqual(invalid.exception.args, ("Error accessing Slack API: user_not_found",))
# Bot info
slack_bots_info_url = "https://slack.com/api/bots.info"
responses.add_callback(responses.GET, slack_bots_info_url, callback=request_callback)
with self.assertRaises(Exception) as invalid:
get_slack_api_data(slack_bots_info_url, "XXXYYYZZZ", token=token)
self.assertEqual(invalid.exception.args, ("Error accessing Slack API: bot_not_found",))
# Team info
slack_team_info_url = "https://slack.com/api/team.info"
responses.add_callback(responses.GET, slack_team_info_url, callback=request_callback)
@@ -327,9 +358,10 @@ class SlackImporter(ZulipTestCase):
],
]
messages_mock.return_value = [
{"user": "U061A1R2R"},
{"user": "U061A5N1G"},
{"user": "U061A8H1G"},
{"user": "U061A1R2R", "team": "T6LARQE2Z"},
{"user": "U061A5N1G", "team": "T6LARQE2Z"},
{"user": "U061A8H1G", "team": "T6LARQE2Z"},
{"subtype": "bot_message", "text": "", "username": "ClickUp", "bot_id": "B06NWMNUQ3W"},
]
# Users info
slack_users_info_url = "https://slack.com/api/users.info"
@@ -337,11 +369,15 @@ class SlackImporter(ZulipTestCase):
# Team info
slack_team_info_url = "https://slack.com/api/team.info"
responses.add_callback(responses.GET, slack_team_info_url, callback=request_callback)
# Bot info
slack_bot_info_url = "https://slack.com/api/bots.info"
responses.add_callback(responses.GET, slack_bot_info_url, callback=request_callback)
slack_data_dir = self.fixture_file_name("", type="slack_fixtures")
fetch_shared_channel_users(users, slack_data_dir, "xoxb-valid-token")
# Normal users
self.assert_length(users, 8)
self.assert_length(users, 9)
self.assertEqual(users[0]["id"], "U061A1R2R")
self.assertEqual(users[0]["is_mirror_dummy"], False)
self.assertFalse("team_domain" in users[0])
@@ -353,6 +389,7 @@ class SlackImporter(ZulipTestCase):
# not deterministic.
later_users = sorted(users[3:], key=lambda x: x["id"])
expected_users = [
("B06NWMNUQ3W", "ClickUp"),
("U061A3E0G", "foreignteam1"),
("U061A8H1G", "foreignteam2"),
("U11111111", "foreignteam2"),
@@ -361,8 +398,12 @@ class SlackImporter(ZulipTestCase):
]
for expected, found in zip(expected_users, later_users, strict=False):
self.assertEqual(found["id"], expected[0])
self.assertEqual(found["team_domain"], expected[1])
self.assertEqual(found["is_mirror_dummy"], True)
if "bot_id" in found.get("profile", {}):
self.assertEqual(found["real_name"], expected[1])
self.assertEqual(found["is_mirror_dummy"], False)
else:
self.assertEqual(found["team_domain"], expected[1])
self.assertEqual(found["is_mirror_dummy"], True)
@mock.patch("zerver.data_import.slack.get_data_file")
def test_users_to_zerver_userprofile(self, mock_get_data_file: mock.Mock) -> None:
@@ -532,6 +573,35 @@ class SlackImporter(ZulipTestCase):
},
# Missing the `avatar_hash` value.
},
{
"id": "U1MBOTC81",
"name": "Integration Bot",
"deleted": False,
"is_mirror_dummy": False,
"real_name": "Integration Bot",
"is_integration_bot": True,
"profile": {
"image_72": "https://avatars.slack-edge.com/2024-05-01/7057208497908_a4351f6deb91094eac4c_512.png",
"bot_id": "B06NWMNUQ3W",
"first_name": "Integration Bot",
},
},
# Test error-handling for a bot user with an unknown avatar URL format.
# In reality, we expect the file to have a file extension type to be
# `.png` or within the range of `THUMBNAIL_ACCEPT_IMAGE_TYPES`.
{
"id": "U1RDFEC90",
"name": "Unknown Bot",
"deleted": False,
"is_mirror_dummy": False,
"real_name": "Unknown Bot",
"is_integration_bot": True,
"profile": {
"image_72": "https://avatars.slack-edge.com/2024-05-01/dasdasdasdasdXXXXXX",
"bot_id": "B0DSAMNUQ3W",
"first_name": "Unknown Bot",
},
},
]
mock_get_data_file.return_value = user_data
@@ -547,6 +617,8 @@ class SlackImporter(ZulipTestCase):
"U015J7JSE": 6,
"U1RDFEC80": 7,
"U1ZYFEC91": 8,
"U1MBOTC81": 9,
"U1RDFEC90": 10,
}
slack_data_dir = "./random_path"
timestamp = int(timezone_now().timestamp())
@@ -559,7 +631,9 @@ class SlackImporter(ZulipTestCase):
slack_user_id_to_zulip_user_id,
customprofilefield,
customprofilefield_value,
) = users_to_zerver_userprofile(slack_data_dir, user_data, 1, timestamp, "test_domain")
) = users_to_zerver_userprofile(
slack_data_dir, user_data, 1, timestamp, "testdomain.com"
)
# Test custom profile fields
self.assertEqual(customprofilefield[0]["field_type"], 1)
@@ -580,9 +654,9 @@ class SlackImporter(ZulipTestCase):
# test that the primary owner should always be imported first
self.assertDictEqual(slack_user_id_to_zulip_user_id, test_slack_user_id_to_zulip_user_id)
self.assert_length(avatar_list, 8)
self.assert_length(avatar_list, 9)
self.assert_length(zerver_userprofile, 9)
self.assert_length(zerver_userprofile, 11)
self.assertEqual(zerver_userprofile[0]["is_staff"], False)
self.assertEqual(zerver_userprofile[0]["is_bot"], False)
@@ -672,6 +746,19 @@ class SlackImporter(ZulipTestCase):
self.assertEqual(zerver_userprofile[8]["is_active"], True)
self.assertEqual(zerver_userprofile[8]["avatar_source"], "G")
# Test converting Slack's integration bot
self.assertEqual(
zerver_userprofile[9]["id"], test_slack_user_id_to_zulip_user_id["U1MBOTC81"]
)
self.assertEqual(zerver_userprofile[9]["is_active"], True)
self.assertEqual(zerver_userprofile[9]["avatar_source"], "U")
self.assertEqual(
zerver_userprofile[10]["id"], test_slack_user_id_to_zulip_user_id["U1RDFEC90"]
)
self.assertEqual(zerver_userprofile[10]["is_active"], True)
self.assertEqual(zerver_userprofile[10]["avatar_source"], "G")
def test_build_defaultstream(self) -> None:
realm_id = 1
stream_id = 1