mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-28 10:33:54 +00:00 
			
		
		
		
	slack: Fix bugs during import of thread messages.
Consider the following messages JSON (taken from real data, redacted):
```json
[
    {
        "subtype": "bot_message",
        "text": "",
        "attachments": [
            {
                "fallback": "Open Slack to cast your vote in this Simple Poll",
                "title": "Should we do a thing?",
                "id": 1,
                "color": "6ecadc",
                "fields": [
                    {
                        "title": "",
                        "value": "1️⃣ Yes 👍\n\n"",
                        "short": false
                    },
                    {
                        "title": "",
                        "value": "2️⃣ No 👎\n\n",
                        "short": false
                    },
                    {
                        "title": "",
                        "value": "3️⃣ Abstain :spock-hand:\n\n",
                        "short": false
                    }
                ],
                "mrkdwn_in": [
                    "fields"
                ]
            },
            {
                "callback_id": "12345678-1234-1234-1234-123456789abc",
                "fallback": "Open Slack to cast your vote in this Simple Poll",
                "id": 2,
                "color": "6ecadc",
                "actions": [
                    {
                        "id": "1",
                        "name": "vote",
                        "text": "1️⃣",
                        "type": "button",
                        "value": "1",
                        "style": ""
                    },
                    {
                        "id": "2",
                        "name": "vote",
                        "text": "2️⃣",
                        "type": "button",
                        "value": "2",
                        "style": ""
                    },
                    {
                        "id": "3",
                        "name": "vote",
                        "text": "3️⃣",
                        "type": "button",
                        "value": "3",
                        "style": ""
                    },
                    {
                        "id": "4",
                        "name": "delete-v2",
                        "text": "Delete Poll",
                        "type": "button",
                        "value": "",
                        "style": "danger",
                        "confirm": {
                            "text": "Are you sure you want to delete the Poll?",
                            "title": "Delete Poll?",
                            "ok_text": "Yes",
                            "dismiss_text": "No"
                        }
                    }
                ]
            },
            {
                "callback_id": "12345678-1234-1234-1234-123456789abc",
                "fallback": "Open Slack to cast your vote in this Simple Poll",
                "footer": "Simple Poll        <https:\/\/simplepoll.rocks\/dashboard\/redacted\/settings\/|Edit Settings>",
                "id": 3,
                "footer_icon": "https:\/\/simplepoll.rocks\/static\/main\/favicon.png",
                "color": "6ecadc"
            }
        ],
        "type": "message",
        "ts": "1234567890.123456",
        "bot_id": "B1ABCDEF1",
        "thread_ts": "1234567890.123456",
        "reply_count": 1,
        "reply_users_count": 1,
        "latest_reply": "1234567890.765432",
        "reply_users": [
            "U1ABC1234"
        ],
        "replies": [
            {
                "user": "U1ABC1234",
                "ts": "1234567890.765432"
            }
        ],
        "is_locked": false,
        "subscribed": false
    },
    {
        "user": "U1ABC1234",
        "type": "message",
        "ts": "1234567890.765432",
        "text": "Maybe do qux instead",
        "team": "T1AB23456",
        "user_team": "T1AB23456",
        "source_team": "T1AB23456",
        "user_profile": {
            "avatar_hash": "a123456789ab",
            "image_72": "https:\/\/avatars.slack-edge.com\/2017-01-01\/123456789abc_def123456789abcdef12_72.jpg",
            "first_name": "Alice",
            "real_name": "Alice Smith",
            "display_name": "a.smith",
            "team": "T1AB23456",
            "name": "a.smith",
            "is_restricted": false,
            "is_ultra_restricted": false
        },
        "thread_ts": "1234567890.123456",
        "blocks": [
            {
                "type": "rich_text",
                "block_id": "EoBdt",
                "elements": [
                    {
                        "type": "rich_text_section",
                        "elements": [
                            {
                                "type": "text",
                                "text": "Maybe do qux instead"
                            }
                        ]
                    }
                ]
            }
        ]
    }
]
```
533f177175/zerver/data_import/slack.py (L922-L924)
fails for the first message, because it lacks a 'user' key. It should fall back to the bot_id.
533f177175/zerver/data_import/slack.py (L925-L926)
fails for the second message, because it lacks a 'parent_user_id' key.
However, the thread root will have been processed earlier, so
memoization of thread parents fixes this issue. Because the original
message may not be in the same file, the memoization needs to be global
rather than in `channel_message_to_zerver_message`.
			
			
This commit is contained in:
		| @@ -912,18 +912,58 @@ def get_messages_iterator( | ||||
|         yield from sorted(messages_for_one_day, key=get_timestamp_from_message) | ||||
|  | ||||
|  | ||||
| # This is cached globally so that thread parent lookup works across multiple calls to | ||||
| # channel_message_to_zerver_message, and across multiple message JSON files (e.g. | ||||
| # for responses posted on a date after the thread root was created). | ||||
| # The keys for this map are thread_ts values (timestamps) - as that's what appears to | ||||
| # be the most sensible "thread identifier" for our purposes; Slack doesn't provide | ||||
| # a thread ID. | ||||
| thread_parent_map: dict[str, str] = {} | ||||
|  | ||||
|  | ||||
| def get_parent_user_id_from_thread_message(thread_message: ZerverFieldsT, subtype: str) -> str: | ||||
|     """ | ||||
|     This retrieves the user id of the sender of the original thread | ||||
|     message. | ||||
|     """ | ||||
|     if subtype == "thread_broadcast": | ||||
|         return thread_message["root"]["user"] | ||||
|     elif thread_message["thread_ts"] == thread_message["ts"]: | ||||
|         # This is the original thread message | ||||
|         return thread_message["user"] | ||||
|     else: | ||||
|         return thread_message["parent_user_id"] | ||||
|  | ||||
|     # Some messages posted by bots don't have a user key, but only a bot_id (namely, ones with | ||||
|     # subtype bot_message). For those, use bot_id as fallback when the user field doesn't exist. | ||||
|     try: | ||||
|         if subtype == "thread_broadcast": | ||||
|             try: | ||||
|                 return thread_message["root"]["user"] | ||||
|             except KeyError: | ||||
|                 return thread_message["root"]["bot_id"] | ||||
|         elif thread_message["thread_ts"] == thread_message["ts"]: | ||||
|             # This is the original thread message. We're following the logic recommended | ||||
|             # in Slack's documentation here: | ||||
|             # https://docs.slack.dev/messaging/retrieving-messages/#finding_threads | ||||
|             # - Identify parent messages by comparing the thread_ts and ts values. If they are equal, | ||||
|             #   the message is a parent message. | ||||
|             # - Threaded replies are also identified by comparing the thread_ts and ts values. | ||||
|             #   If they are different, the message is a reply. | ||||
|             try: | ||||
|                 ret = thread_message["user"] | ||||
|             except KeyError: | ||||
|                 ret = thread_message["bot_id"] | ||||
|             # Cache the thread parent's user/bot ID for later use. This will allow us to determine | ||||
|             # the parent user id for thread replies. | ||||
|             thread_parent_map[thread_message["thread_ts"]] = ret | ||||
|             return ret | ||||
|         else: | ||||
|             try: | ||||
|                 return thread_message["parent_user_id"] | ||||
|             except KeyError: | ||||
|                 return thread_message["bot_id"] | ||||
|     except KeyError: | ||||
|         # If Slack doesn't specify the parent user/bot ID in this message, use the cached one. | ||||
|         # | ||||
|         # TODO: Our caching strategy works under the assumption that we visit thread messages | ||||
|         # in the order of oldest-to-newest - so that we see the thread's parent message before | ||||
|         # thread replies. If messages are unsorted, we might process a | ||||
|         # reply before its parent, resulting in KeyError because the parent’s user ID hasn’t been cached yet. | ||||
|         return thread_parent_map[thread_message["thread_ts"]] | ||||
|  | ||||
|  | ||||
| def get_zulip_thread_topic_name( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user