From 551f382721c9aaf9037946e480ee4e4d343749af Mon Sep 17 00:00:00 2001 From: Prakhar Pratyush Date: Thu, 2 May 2024 17:28:38 +0530 Subject: [PATCH] data_import: Remove gitter data import tool. Gitter broke their older API as part of being integrated into Matrix. Their announcement blog says: "Anything left using the Gitter APIs will need to be updated to use the Matrix API" This commit drops the legacy Gitter import tool and we plan to build a new one for Matrix in future. (cherry picked from commit b17a006bf00cb84bcb1749274e9234bc16f75aab) --- docs/production/install.md | 7 +- help/import-from-gitter.md | 196 --------- .../migrating-from-other-chat-tools.md | 1 - help/include/sidebar_index.md | 1 - .../comparison_table_self_hosted.html | 3 +- templates/corporate/for/business.html | 2 +- templates/corporate/for/communities.md | 8 +- templates/corporate/for/open-source.html | 3 +- templates/corporate/for/research.html | 2 +- templates/zerver/create_realm.html | 2 +- tools/test-backend | 1 - zerver/data_import/gitter.py | 397 ------------------ zerver/data_import/import_util.py | 4 +- zerver/lib/import_realm.py | 6 +- .../commands/convert_gitter_data.py | 51 --- zerver/tests/fixtures/gitter_data.json | 44 -- zerver/tests/test_gitter_importer.py | 176 -------- 17 files changed, 14 insertions(+), 890 deletions(-) delete mode 100644 help/import-from-gitter.md delete mode 100644 zerver/data_import/gitter.py delete mode 100644 zerver/management/commands/convert_gitter_data.py delete mode 100644 zerver/tests/fixtures/gitter_data.json delete mode 100644 zerver/tests/test_gitter_importer.py diff --git a/docs/production/install.md b/docs/production/install.md index b6c28a679e..cdb6cdb0a4 100644 --- a/docs/production/install.md +++ b/docs/production/install.md @@ -10,7 +10,7 @@ You can choose from several convenient options for hosting Zulip: - Use [Zulip Cloud](https://zulip.com/plans/) hosting. Read our [guide to choosing between Zulip Cloud and self-hosting](https://zulip.com/help/getting-your-organization-started-with-zulip#choosing-between-zulip-cloud-and-self-hosting). -To **import data** from [Slack][slack-import], [Mattermost][mattermost-import], [Rocket.Chat][rocketchat-import], [Gitter][gitter-import], [Zulip Cloud][zulip-cloud-import], or [another Zulip +To **import data** from [Slack][slack-import], [Mattermost][mattermost-import], [Rocket.Chat][rocketchat-import], [Zulip Cloud][zulip-cloud-import], or [another Zulip server][zulip-server-import], follow the linked instructions. You can **try out Zulip** before setting up your own server by [checking @@ -100,9 +100,7 @@ documentation. If you are importing data, stop here and return to the import instructions for [Slack][slack-import], [Mattermost][mattermost-import], -[Rocket.Chat][rocketchat-import], [Gitter][gitter-import], [Zulip -Cloud][zulip-cloud-import], [a server backup][zulip-backups], or [another Zulip -server][zulip-server-import]. +[Rocket.Chat][rocketchat-import], [Zulip Cloud][zulip-cloud-import], [a server backup][zulip-backups], or [another Zulip server][zulip-server-import]. ::: @@ -114,7 +112,6 @@ server][zulip-server-import]. [slack-import]: https://zulip.com/help/import-from-slack [mattermost-import]: https://zulip.com/help/import-from-mattermost [rocketchat-import]: https://zulip.com/help/import-from-rocketchat -[gitter-import]: https://zulip.com/help/import-from-gitter [zulip-cloud-import]: export-and-import.md#import-into-a-new-zulip-server [zulip-server-import]: export-and-import.md#import-into-a-new-zulip-server diff --git a/help/import-from-gitter.md b/help/import-from-gitter.md deleted file mode 100644 index e22ef2f871..0000000000 --- a/help/import-from-gitter.md +++ /dev/null @@ -1,196 +0,0 @@ -# Import from Gitter (beta) - -You can import your current workspace into a Zulip organization. It's a great way -to preserve your workspace history when you migrate to Zulip, and to -make the transition easy for the members of your organization. - -The import will include your organization's: - -* **Name** -* **Message history**, including attachments and emoji reactions -* **Users**, including names and avatars -* **Channels**, including all user subscriptions - -## Import process overview - -To import your Gitter organization into Zulip, you will need to take the -following steps, which are described in more detail below: - -{start_tabs} - -1. [Export your Gitter data.](#export-your-gitter-data) - -1. [Import your Gitter data into Zulip.](#import-your-data-into-zulip) - -1. [Get your organization started with Zulip!](#get-your-organization-started-with-zulip) - -{end_tabs} - -## Import your organization from Gitter into Zulip - -### Export your Gitter data - -Gitter's [data export tool](https://github.com/minrk/archive-gitter) allows you -to export all public channel messages. - -{start_tabs} - -1. Export your Gitter data. You will receive `.json` files of the public - rooms that you are a part of. - -1. Select the `gitter_data.json` file of the room which you want to - import into Zulip. - -!!! warn "" - - **Note:** You will need a Gitter API token to export data. You can get this - token by following the instructions in the **Getting Started** section of - the [Gitter documentation](https://developer.gitter.im/docs/). - -{end_tabs} - -### Import your data into Zulip - -{!import-your-data-into-zulip.md!} - -{start_tabs} - -{tab|zulip-cloud} - -{!import-into-a-zulip-cloud-organization.md!} - -1. The **username** that will have the [owner role](/help/roles-and-permissions) - in your Zulip organization. - -{!import-zulip-cloud-organization-warning.md!} - -{tab|self-hosting} - -{!import-into-a-self-hosted-zulip-server.md!} - -1. To import into an organization hosted on the root domain - (`EXTERNAL_HOST`) of the Zulip installation, run the following - commands. - - {!import-self-hosted-server-tips.md!} - - ``` - cd /home/zulip/deployments/current - ./scripts/stop-server - ./manage.py convert_gitter_data /tmp/gitter_data.json --output /tmp/converted_gitter_data - ./manage.py import '' /tmp/converted_gitter_data - ./scripts/start-server - ``` - - Alternatively, to import into a custom subdomain, run: - - ``` - cd /home/zulip/deployments/current - ./scripts/stop-server - ./manage.py convert_gitter_data /tmp/gitter_data.json --output /tmp/converted_gitter_data - ./manage.py import /tmp/converted_gitter_data - ./scripts/start-server - ``` - -1. Follow [step 4](https://zulip.readthedocs.io/en/stable/production/install.html#step-4-configure-and-use) - of the guide for [installing a new Zulip - server](https://zulip.readthedocs.io/en/stable/production/install.html). - -{end_tabs} - -#### Import details - -Whether you are using Zulip Cloud or self-hosting Zulip, here are a few notes to -keep in mind about the import process: - -- [Gitter's export tool](https://github.com/minrk/archive-gitter) does not export - workspace settings, so you will need to [configure the settings for your Zulip - organization](/help/customize-organization-settings). This includes settings - like [email visibility](/help/configure-email-visibility), - [message editing permissions](/help/restrict-message-editing-and-deletion), - and [how users can join your organization](/help/restrict-account-creation). - -- Gitter's export tool does not export user settings, so users in your organization - may want to [customize their account settings](/help/getting-started-with-zulip). - -- The [Gitter API][gitter-api-user-data] doesn't contain data on which users are - administrators of a Gitter channel. As a result, all Gitter users are imported - into Zulip as [members](/help/roles-and-permissions). - -- Gitter's export tool doesn't export email addresses, only GitHub usernames. - Zulip's import tool will set GitHub as the only authentication method enabled - by default to avoid user confusion. - -- You can merge multiple Gitter channels into a single Zulip - organization using [this - tool](https://github.com/minrk/archive-gitter/pull/5). - -- Zulip's import tool doesn't translate Gitter's Markdown format into Zulip's - Markdown format (there are a few corner cases where the syntax is different). - Additionally, Gitter's issue mention syntax isn't translated. - -- Message edit history is not imported. - -[grant-admin-access]: https://zulip.readthedocs.io/en/stable/production/management-commands.html#other-useful-manage-py-commands -[gitter-api-user-data]: https://developer.gitter.im/docs/user-resource - -## Get your organization started with Zulip - -Once the import process is completed, you will need to: - -{start_tabs} - -1. [Configure the settings for your organization](/help/customize-organization-settings), - which are not exported. This includes settings like [email - visibility](/help/configure-email-visibility), [message editing - permissions](/help/restrict-message-editing-and-deletion), - and [how users can join your organization](/help/restrict-account-creation). - -2. [Configure user roles](/help/change-a-users-role). Only organization owners - and administrators can do this. - * If you [import into Zulip Cloud](#import-your-data-into-zulip), you will - specify the user whose account will have the owner role when you request the - import. - * If you self-host, you can follow the Zulip documentation on [making a user an - organization owner from the terminal][grant-admin-access] to mark the appropriate - users as organization owners. - -3. All users from your previous workspace will have accounts in your new Zulip - organization. However, you will need to let users know about their new - accounts, and [how they will log in for the first time - ](#how-users-will-log-in-for-the-first-time). - -4. Share the URL for your new Zulip organization, and (recommended) the [Getting - started with Zulip guide](/help/getting-started-with-zulip). - -5. Migrate any [integrations](/integrations/). - -{end_tabs} - -## How users will log in for the first time - -When you create your organization, users will immediately be able to log in -without a password using GitHub as the [authentication method -](/help/configure-authentication-methods). Once they log -in, users whose accounts have been imported will need to [change their Zulip -email address](/help/change-your-email-address) in order to receive [email -notifications](/help/email-notifications). - -!!! warn "" - - A user's email notifications will not work until they update the email - associated with their Zulip account. - -When user accounts are imported, users initially do not have passwords -configured. Users can [reset their own passwords](/help/change-your-password) by -following the instructions on your Zulip organization's login page. - -!!! tip "" - - For security reasons, passwords are never exported. - -## Related articles - -* [Choosing between Zulip Cloud and self-hosting](/help/zulip-cloud-or-self-hosting) -* [Setting up your organization](/help/getting-your-organization-started-with-zulip) -* [Getting started with Zulip](/help/getting-started-with-zulip) diff --git a/help/include/migrating-from-other-chat-tools.md b/help/include/migrating-from-other-chat-tools.md index 47e6cbb5f5..9c89d8ce94 100644 --- a/help/include/migrating-from-other-chat-tools.md +++ b/help/include/migrating-from-other-chat-tools.md @@ -6,7 +6,6 @@ more. Follow the detailed import guides: incoming webhook](https://zulip.com/integrations/doc/slack_incoming) also makes it easy to migrate any integrations. * [Import from Mattermost](/help/import-from-mattermost) -* [Import from Gitter](/help/import-from-gitter) * [Import from Rocket.Chat](/help/import-from-rocketchat) !!! warn "" diff --git a/help/include/sidebar_index.md b/help/include/sidebar_index.md index a2ece81ab5..afedad7aa7 100644 --- a/help/include/sidebar_index.md +++ b/help/include/sidebar_index.md @@ -167,7 +167,6 @@ * [Communities directory](/help/communities-directory) * [Import from Mattermost](/help/import-from-mattermost) * [Import from Slack](/help/import-from-slack) -* [Import from Gitter](/help/import-from-gitter) * [Import from Rocket.Chat](/help/import-from-rocketchat) * [Configure authentication methods](/help/configure-authentication-methods) * [SAML authentication](/help/saml-authentication) diff --git a/templates/corporate/comparison_table_self_hosted.html b/templates/corporate/comparison_table_self_hosted.html index 14450ece4f..a184b2046d 100644 --- a/templates/corporate/comparison_table_self_hosted.html +++ b/templates/corporate/comparison_table_self_hosted.html @@ -1024,8 +1024,7 @@ Import data from Slack, - Mattermost, - Gitter or + Mattermost, or Rocket.Chat diff --git a/templates/corporate/for/business.html b/templates/corporate/for/business.html index 4c1979b505..3b35a45371 100644 --- a/templates/corporate/for/business.html +++ b/templates/corporate/for/business.html @@ -553,7 +553,7 @@

Make the move today

-

Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, Gitter, or Rocket.Chat.

+

Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, or Rocket.Chat.

If you have any questions, please contact us at sales@zulip.com. You can also drop by our friendly development community at chat.zulip.org to ask for help or suggest improvements!

diff --git a/templates/corporate/for/communities.md b/templates/corporate/for/communities.md index 9fb1d89894..db6df09648 100644 --- a/templates/corporate/for/communities.md +++ b/templates/corporate/for/communities.md @@ -132,10 +132,10 @@ most of their time integrating improvements into Zulip. Many communities that migrated from [Slack](/help/import-from-slack), [Mattermost](/help/import-from-mattermost), -[Gitter](/help/import-from-gitter), or -[Rocket.Chat](/help/import-from-rocketchat) to Zulip tell us -that Zulip helped them manage and grow an inclusive, healthy -community. We hope Zulip can help your community succeed too! +[Rocket.Chat](/help/import-from-rocketchat), or +Gitter to Zulip tell us that Zulip helped them manage +and grow an inclusive, healthy community. We hope Zulip +can help your community succeed too! > “I highly recommend Zulip to other communities. We’re coming from diff --git a/templates/corporate/for/open-source.html b/templates/corporate/for/open-source.html index f2290f8523..b68f2d08f9 100644 --- a/templates/corporate/for/open-source.html +++ b/templates/corporate/for/open-source.html @@ -670,8 +670,7 @@ from another platform is easy! Import your existing organization from Slack, - Mattermost, - Gitter, or + Mattermost, or Rocket.Chat. diff --git a/templates/corporate/for/research.html b/templates/corporate/for/research.html index 247f91a5cb..113264616e 100644 --- a/templates/corporate/for/research.html +++ b/templates/corporate/for/research.html @@ -383,7 +383,7 @@ conferences. -
  • Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, Gitter, or Rocket.Chat.
  • +
  • Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, or Rocket.Chat.
  • If you have any questions, please contact us at sales@zulip.com. You can also drop by our friendly development community at chat.zulip.org to ask for help or suggest improvements!
  • diff --git a/templates/zerver/create_realm.html b/templates/zerver/create_realm.html index cc9fbe546c..27523d2fe3 100644 --- a/templates/zerver/create_realm.html +++ b/templates/zerver/create_realm.html @@ -43,7 +43,7 @@ {% trans %} Or import from Slack, Mattermost, - Gitter, or Rocket.Chat. + or Rocket.Chat. {% endtrans %} diff --git a/tools/test-backend b/tools/test-backend index 089ed75bb7..21c3366f28 100755 --- a/tools/test-backend +++ b/tools/test-backend @@ -112,7 +112,6 @@ not_yet_fully_covered = [ # Data import files; relatively low priority "zerver/data_import/sequencer.py", "zerver/data_import/slack.py", - "zerver/data_import/gitter.py", "zerver/data_import/import_util.py", # Webhook integrations with incomplete coverage "zerver/webhooks/greenhouse/view.py", diff --git a/zerver/data_import/gitter.py b/zerver/data_import/gitter.py deleted file mode 100644 index f827a894e9..0000000000 --- a/zerver/data_import/gitter.py +++ /dev/null @@ -1,397 +0,0 @@ -import logging -import os -from typing import Any, Dict, List, Set, Tuple - -import dateutil.parser -import orjson -from django.conf import settings -from django.forms.models import model_to_dict -from django.utils.timezone import now as timezone_now -from typing_extensions import TypeAlias - -from zerver.data_import.import_util import ( - ZerverFieldsT, - build_avatar, - build_defaultstream, - build_message, - build_realm, - build_recipient, - build_stream, - build_subscription, - build_usermessages, - build_zerver_realm, - create_converted_data_files, - long_term_idle_helper, - make_subscriber_map, - process_avatars, -) -from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE -from zerver.models import Recipient, UserProfile -from zproject.backends import GitHubAuthBackend - -# stubs -GitterDataT: TypeAlias = List[Dict[str, Any]] - -realm_id = 0 - - -def gitter_workspace_to_realm( - domain_name: str, gitter_data: GitterDataT, realm_subdomain: str -) -> Tuple[ZerverFieldsT, List[ZerverFieldsT], Dict[str, int], Dict[str, int]]: - """ - Returns: - 1. realm, converted realm data - 2. avatars, which is list to map avatars to Zulip avatar records.json - 3. user_map, which is a dictionary to map from Gitter user id to Zulip user id - 4. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id - """ - NOW = float(timezone_now().timestamp()) - zerver_realm: List[ZerverFieldsT] = build_zerver_realm(realm_id, realm_subdomain, NOW, "Gitter") - - realm = build_realm(zerver_realm, realm_id, domain_name) - - # Users will have GitHub's generated noreply email addresses so their only way to log in - # at first is via GitHub. So we set GitHub to be the only authentication method enabled - # default to avoid user confusion. - realm["zerver_realmauthenticationmethod"] = [ - { - "name": GitHubAuthBackend.auth_backend_name, - "realm": realm_id, - # The id doesn't matter since it gets set by the import later properly, but we need to set - # it to something in the dict. - "id": 1, - } - ] - - zerver_userprofile, avatars, user_map = build_userprofile(int(NOW), domain_name, gitter_data) - zerver_stream, zerver_defaultstream, stream_map = build_stream_map(int(NOW), gitter_data) - zerver_recipient, zerver_subscription = build_recipient_and_subscription( - zerver_userprofile, zerver_stream - ) - - realm["zerver_userprofile"] = zerver_userprofile - realm["zerver_stream"] = zerver_stream - realm["zerver_defaultstream"] = zerver_defaultstream - realm["zerver_recipient"] = zerver_recipient - realm["zerver_subscription"] = zerver_subscription - - return realm, avatars, user_map, stream_map - - -def build_userprofile( - timestamp: Any, domain_name: str, gitter_data: GitterDataT -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]: - """ - Returns: - 1. zerver_userprofile, which is a list of user profile - 2. avatar_list, which is list to map avatars to Zulip avatars records.json - 3. added_users, which is a dictionary to map from Gitter user id to Zulip id - """ - logging.info("######### IMPORTING USERS STARTED #########\n") - zerver_userprofile = [] - avatar_list: List[ZerverFieldsT] = [] - user_map: Dict[str, int] = {} - user_id = 0 - - for data in gitter_data: - if get_user_from_message(data) not in user_map: - user_data = data["fromUser"] - user_map[user_data["id"]] = user_id - - email = get_user_email(user_data, domain_name) - if user_data.get("avatarUrl"): - build_avatar( - user_id, realm_id, email, user_data["avatarUrl"], timestamp, avatar_list - ) - - # Build userprofile object - userprofile = UserProfile( - full_name=user_data["displayName"], - id=user_id, - email=email, - delivery_email=email, - avatar_source="U", - date_joined=timestamp, - last_login=timestamp, - ) - userprofile_dict = model_to_dict(userprofile) - # Set realm id separately as the corresponding realm is not yet a Realm model - # instance - userprofile_dict["realm"] = realm_id - - # We use this later, even though Zulip doesn't - # support short_name - userprofile_dict["short_name"] = user_data["username"] - - zerver_userprofile.append(userprofile_dict) - user_id += 1 - logging.info("######### IMPORTING USERS FINISHED #########\n") - return zerver_userprofile, avatar_list, user_map - - -def get_user_email(user_data: ZerverFieldsT, domain_name: str) -> str: - # TODO Get user email from github - email = "{}@users.noreply.github.com".format(user_data["username"]) - return email - - -def build_stream_map( - timestamp: Any, gitter_data: GitterDataT -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]: - """ - Returns: - 1. stream, which is the list of streams - 2. defaultstreams, which is the list of default streams - 3. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id - """ - logging.info("######### IMPORTING STREAM STARTED #########\n") - stream_id = 0 - stream: List[ZerverFieldsT] = [] - - # Default stream when no "room" field is present - stream.append( - build_stream(timestamp, realm_id, "from gitter", "Imported from Gitter", stream_id) - ) - defaultstream = build_defaultstream(realm_id=realm_id, stream_id=stream_id, defaultstream_id=0) - stream_id += 1 - - # Gathering streams from gitter_data - stream_map: Dict[str, int] = {} - for data in gitter_data: - if "room" in data and data["room"] not in stream_map: - stream.append( - build_stream( - timestamp, realm_id, data["room"], f'Gitter room {data["room"]}', stream_id - ) - ) - stream_map[data["room"]] = stream_id - stream_id += 1 - logging.info("######### IMPORTING STREAMS FINISHED #########\n") - - return stream, [defaultstream], stream_map - - -def build_recipient_and_subscription( - zerver_userprofile: List[ZerverFieldsT], zerver_stream: List[ZerverFieldsT] -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT]]: - """ - Assumes that there is at least one stream with 'stream_id' = 0, - and that this stream is the only defaultstream, with 'defaultstream_id' = 0 - Returns: - 1. zerver_recipient, which is a list of mapped recipient - 2. zerver_subscription, which is a list of mapped subscription - """ - zerver_recipient = [] - zerver_subscription = [] - recipient_id = subscription_id = 0 - - # For streams - - # Initial recipients correspond to initial streams - # We enumerate all streams, and build a recipient for each - # Hence 'recipient_id'=n corresponds to 'stream_id'=n - for stream in zerver_stream: - zerver_recipient.append(build_recipient(recipient_id, recipient_id, Recipient.STREAM)) - recipient_id += 1 - - # For users - for user in zerver_userprofile: - zerver_recipient.append(build_recipient(user["id"], recipient_id, Recipient.PERSONAL)) - zerver_subscription.append(build_subscription(recipient_id, user["id"], subscription_id)) - recipient_id += 1 - subscription_id += 1 - - # As suggested in #14830, we subscribe every user to every stream. - # We rely on the above invariant: 'recipient_id'=n corresponds to 'stream_id'=n - # - # TODO: For multi-stream imports, subscribe users to streams - # based either on Gitter API data or who sent messages where. - for user in zerver_userprofile: - for stream in zerver_stream: - zerver_subscription.append( - build_subscription(stream["id"], user["id"], subscription_id) - ) - subscription_id += 1 - - return zerver_recipient, zerver_subscription - - -def get_timestamp_from_message(message: ZerverFieldsT) -> float: - # Gitter's timestamps are in UTC - return float(dateutil.parser.parse(message["sent"]).timestamp()) - - -def get_user_from_message(message: ZerverFieldsT) -> str: - return message["fromUser"]["id"] - - -def convert_gitter_workspace_messages( - gitter_data: GitterDataT, - output_dir: str, - subscriber_map: Dict[int, Set[int]], - user_map: Dict[str, int], - stream_map: Dict[str, int], - user_short_name_to_full_name: Dict[str, str], - zerver_userprofile: List[ZerverFieldsT], - realm_id: int, - chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE, -) -> None: - """ - Messages are stored in batches - """ - logging.info("######### IMPORTING MESSAGES STARTED #########\n") - - long_term_idle = long_term_idle_helper( - iter(gitter_data), - get_user_from_message, - get_timestamp_from_message, - lambda id: user_map[id], - iter(user_map.keys()), - zerver_userprofile, - ) - - message_id = 0 - - low_index = 0 - upper_index = low_index + chunk_size - dump_file_id = 1 - - while True: - message_json = {} - zerver_message = [] - zerver_usermessage: List[ZerverFieldsT] = [] - message_data = gitter_data[low_index:upper_index] - if len(message_data) == 0: - break - for message in message_data: - message_time = get_timestamp_from_message(message) - mentioned_user_ids = get_usermentions(message, user_map, user_short_name_to_full_name) - rendered_content = None - topic_name = "imported from Gitter" + ( - f' room {message["room"]}' if "room" in message else "" - ) - user_id = user_map[get_user_from_message(message)] - recipient_id = stream_map[message["room"]] if "room" in message else 0 - zulip_message = build_message( - topic_name=topic_name, - date_sent=message_time, - message_id=message_id, - content=message["text"], - rendered_content=rendered_content, - user_id=user_id, - recipient_id=recipient_id, - realm_id=realm_id, - ) - zerver_message.append(zulip_message) - - build_usermessages( - zerver_usermessage=zerver_usermessage, - subscriber_map=subscriber_map, - recipient_id=recipient_id, - mentioned_user_ids=mentioned_user_ids, - message_id=message_id, - is_private=False, - long_term_idle=long_term_idle, - ) - - message_id += 1 - - message_json["zerver_message"] = zerver_message - message_json["zerver_usermessage"] = zerver_usermessage - message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json") - logging.info("Writing messages to %s\n", message_filename) - write_data_to_file(os.path.join(message_filename), message_json) - - low_index = upper_index - upper_index = chunk_size + low_index - dump_file_id += 1 - - logging.info("######### IMPORTING MESSAGES FINISHED #########\n") - - -def get_usermentions( - message: Dict[str, Any], user_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str] -) -> List[int]: - mentioned_user_ids = [] - if "mentions" in message: - for mention in message["mentions"]: - if mention.get("userId") in user_map: - gitter_mention = "@{}".format(mention["screenName"]) - if mention["screenName"] not in user_short_name_to_full_name: - logging.info( - "Mentioned user %s never sent any messages, so has no full name data", - mention["screenName"], - ) - full_name = mention["screenName"] - else: - full_name = user_short_name_to_full_name[mention["screenName"]] - zulip_mention = f"@**{full_name}**" - message["text"] = message["text"].replace(gitter_mention, zulip_mention) - - mentioned_user_ids.append(user_map[mention["userId"]]) - return mentioned_user_ids - - -def do_convert_data(gitter_data_file: str, output_dir: str, threads: int = 6) -> None: - # Subdomain is set by the user while running the import commands - realm_subdomain = "" - domain_name = settings.EXTERNAL_HOST - - os.makedirs(output_dir, exist_ok=True) - # output directory should be empty initially - if os.listdir(output_dir): - raise Exception("Output directory should be empty!") - - # Read data from the Gitter file - with open(gitter_data_file, "rb") as fp: - gitter_data = orjson.loads(fp.read()) - - realm, avatar_list, user_map, stream_map = gitter_workspace_to_realm( - domain_name, gitter_data, realm_subdomain - ) - - subscriber_map = make_subscriber_map( - zerver_subscription=realm["zerver_subscription"], - ) - - # For user mentions - user_short_name_to_full_name = {} - for userprofile in realm["zerver_userprofile"]: - user_short_name_to_full_name[userprofile["short_name"]] = userprofile["full_name"] - - convert_gitter_workspace_messages( - gitter_data, - output_dir, - subscriber_map, - user_map, - stream_map, - user_short_name_to_full_name, - realm["zerver_userprofile"], - realm_id=realm_id, - ) - - avatar_folder = os.path.join(output_dir, "avatars") - avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) - os.makedirs(avatar_realm_folder, exist_ok=True) - avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads) - - attachment: Dict[str, List[Any]] = {"zerver_attachment": []} - - # IO realm.json - create_converted_data_files(realm, output_dir, "/realm.json") - # IO emoji records - create_converted_data_files([], output_dir, "/emoji/records.json") - # IO avatar records - create_converted_data_files(avatar_records, output_dir, "/avatars/records.json") - # IO uploads records - create_converted_data_files([], output_dir, "/uploads/records.json") - # IO attachments records - create_converted_data_files(attachment, output_dir, "/attachment.json") - - logging.info("######### DATA CONVERSION FINISHED #########\n") - logging.info("Zulip data dump created at %s", output_dir) - - -def write_data_to_file(output_file: str, data: Any) -> None: - with open(output_file, "wb") as f: - f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index de4f47a2f1..9dcf2b3301 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -310,7 +310,7 @@ def build_recipients( ) -> List[ZerverFieldsT]: """ This function was only used HipChat import, this function may be - required for future conversions. The Slack and Gitter conversions do it more + required for future conversions. The Slack conversions do it more tightly integrated with creating other objects. """ @@ -582,7 +582,7 @@ def process_avatars( 2. avatar_dir: Folder where the downloaded avatars are saved 3. realm_id: Realm ID. - We use this for Slack and Gitter conversions, where avatars need to be + We use this for Slack conversions, where avatars need to be downloaded. For simpler conversions see write_avatar_png. """ diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 0651e2e317..3fd234b2f1 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -206,7 +206,7 @@ def create_subscription_events(data: TableData, realm_id: int) -> None: type event for all the existing Stream subscriptions. This is needed for all the export tools which do not include the - table `zerver_realmauditlog` (Slack, Gitter, etc.) because the appropriate + table `zerver_realmauditlog` (e.g. Slack) because the appropriate data about when a user was subscribed is not exported by the third-party service. """ @@ -830,10 +830,6 @@ def import_uploads( # so, it is an error, default_user_profile_id will be # None, and we assert. For emoji / realm icons, we # fall back to default_user_profile_id. - # default_user_profile_id can be None in Gitter - # imports, which do not create any owners; but Gitter - # does not have emoji which we would need to allocate - # a user to. assert default_user_profile_id is not None metadata["user_profile_id"] = str(default_user_profile_id) else: diff --git a/zerver/management/commands/convert_gitter_data.py b/zerver/management/commands/convert_gitter_data.py deleted file mode 100644 index 158d3eba35..0000000000 --- a/zerver/management/commands/convert_gitter_data.py +++ /dev/null @@ -1,51 +0,0 @@ -import argparse -import os -import tempfile -from typing import Any - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError, CommandParser -from typing_extensions import override - -from zerver.data_import.gitter import do_convert_data - - -class Command(BaseCommand): - help = """Convert the Gitter data into Zulip data format.""" - - @override - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument( - "gitter_data", nargs="+", metavar="", help="Gitter data in json format" - ) - - parser.add_argument( - "--output", dest="output_dir", help="Directory to write exported data to." - ) - - parser.add_argument( - "--threads", - default=settings.DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM, - help="Threads to download avatars and attachments faster", - ) - - parser.formatter_class = argparse.RawTextHelpFormatter - - @override - def handle(self, *args: Any, **options: Any) -> None: - output_dir = options["output_dir"] - if output_dir is None: - output_dir = tempfile.mkdtemp(prefix="converted-gitter-data-") - else: - output_dir = os.path.realpath(output_dir) - - num_threads = int(options["threads"]) - if num_threads < 1: - raise CommandError("You must have at least one thread.") - - for path in options["gitter_data"]: - if not os.path.exists(path): - raise CommandError(f"Gitter data file not found: '{path}'") - # TODO add json check - print("Converting data ...") - do_convert_data(path, output_dir, num_threads) diff --git a/zerver/tests/fixtures/gitter_data.json b/zerver/tests/fixtures/gitter_data.json deleted file mode 100644 index d7c5a10d33..0000000000 --- a/zerver/tests/fixtures/gitter_data.json +++ /dev/null @@ -1,44 +0,0 @@ -[ -{ - "fromUser": { - "avatarUrl": "https://avatars-02.gitter.im/gh/uv/4/username", - "displayName": "User Full Name", - "gv": "4", - "id": "54d7876c15522ed4b3dbbefb", - "url": "/user1", - "username": "username1", - "v": 17 - }, - "html": "test message", - "id": "57509d0ef44fde236e52f395", - "issues": [], - "meta": [], - "readBy": 8, - "sent": "2015-06-02T20:54:38.747Z", - "text": "test message", - "unread": false, - "urls": [], - "v": 1 -}, -{ - "fromUser": { - "avatarUrl": "https://avatars-02.gitter.im/gh/uv/4/username", - "displayName": "User Full Name 2", - "gv": "3", - "id": "54d7876c15522ed4b3dbtest", - "url": "/user2", - "username": "username2", - "v": 17 - }, - "html": "test message 2", - "id": "57509d0ef44fde236e52f395", - "issues": [], - "meta": [], - "readBy": 8, - "sent": "2016-07-02T19:54:38.747Z", - "text": "test message 2", - "unread": false, - "urls": [], - "v": 1 -} -] diff --git a/zerver/tests/test_gitter_importer.py b/zerver/tests/test_gitter_importer.py deleted file mode 100644 index 75e950acbd..0000000000 --- a/zerver/tests/test_gitter_importer.py +++ /dev/null @@ -1,176 +0,0 @@ -import os -from datetime import timedelta -from typing import Any -from unittest import mock - -import dateutil.parser -import orjson -import time_machine - -from zerver.data_import.gitter import do_convert_data, get_usermentions -from zerver.lib.import_realm import do_import_realm -from zerver.lib.test_classes import ZulipTestCase -from zerver.models import Message, UserProfile -from zerver.models.realms import get_realm -from zproject.backends import ( - AUTH_BACKEND_NAME_MAP, - GitHubAuthBackend, - auth_enabled_helper, - github_auth_enabled, -) - - -class GitterImporter(ZulipTestCase): - @mock.patch("zerver.data_import.gitter.process_avatars", return_value=[]) - def test_gitter_import_data_conversion(self, mock_process_avatars: mock.Mock) -> None: - output_dir = self.make_import_output_dir("gitter") - gitter_file = os.path.join(os.path.dirname(__file__), "fixtures/gitter_data.json") - - # We need some time-mocking to set up user soft-deactivation logic. - # One of the messages in the import data - # is significantly older than the other one. We mock the current time in the relevant module - # to match the sent time of the more recent message - to make it look like one of the messages - # is very recent, while the other one is old. This should cause that the sender of the recent - # message to NOT be soft-deactivated, while the sender of the other one is. - with open(gitter_file) as f: - gitter_data = orjson.loads(f.read()) - sent_datetime = dateutil.parser.parse(gitter_data[1]["sent"]) - with self.assertLogs(level="INFO"), time_machine.travel( - (sent_datetime + timedelta(days=1)), tick=False - ): - do_convert_data(gitter_file, output_dir) - - def read_file(output_file: str) -> Any: - full_path = os.path.join(output_dir, output_file) - with open(full_path, "rb") as f: - return orjson.loads(f.read()) - - self.assertEqual(os.path.exists(os.path.join(output_dir, "avatars")), True) - self.assertEqual(os.path.exists(os.path.join(output_dir, "emoji")), True) - self.assertEqual(os.path.exists(os.path.join(output_dir, "attachment.json")), True) - - realm = read_file("realm.json") - - # test realm - self.assertEqual( - "Organization imported from Gitter!", realm["zerver_realm"][0]["description"] - ) - - # test users - exported_user_ids = self.get_set(realm["zerver_userprofile"], "id") - exported_user_full_name = self.get_set(realm["zerver_userprofile"], "full_name") - self.assertIn("User Full Name", exported_user_full_name) - exported_user_email = self.get_set(realm["zerver_userprofile"], "email") - self.assertIn("username2@users.noreply.github.com", exported_user_email) - - # test stream - self.assert_length(realm["zerver_stream"], 1) - self.assertEqual(realm["zerver_stream"][0]["name"], "from gitter") - self.assertEqual(realm["zerver_stream"][0]["deactivated"], False) - self.assertEqual(realm["zerver_stream"][0]["realm"], realm["zerver_realm"][0]["id"]) - - self.assertEqual( - realm["zerver_defaultstream"][0]["stream"], realm["zerver_stream"][0]["id"] - ) - - # test recipient - exported_recipient_id = self.get_set(realm["zerver_recipient"], "id") - exported_recipient_type = self.get_set(realm["zerver_recipient"], "type") - self.assertEqual({1, 2}, exported_recipient_type) - - # test subscription - exported_subscription_userprofile = self.get_set( - realm["zerver_subscription"], "user_profile" - ) - self.assertEqual({0, 1}, exported_subscription_userprofile) - exported_subscription_recipient = self.get_set(realm["zerver_subscription"], "recipient") - self.assert_length(exported_subscription_recipient, 3) - self.assertIn(realm["zerver_subscription"][1]["recipient"], exported_recipient_id) - - messages = read_file("messages-000001.json") - - # test messages - exported_messages_id = self.get_set(messages["zerver_message"], "id") - self.assertIn(messages["zerver_message"][0]["sender"], exported_user_ids) - self.assertIn(messages["zerver_message"][1]["recipient"], exported_recipient_id) - self.assertIn(messages["zerver_message"][0]["content"], "test message") - - # test usermessages and soft-deactivation of users - [user_should_be_long_term_idle] = ( - user - for user in realm["zerver_userprofile"] - if user["delivery_email"] == "username1@users.noreply.github.com" - ) - [user_should_not_be_long_term_idle] = ( - user - for user in realm["zerver_userprofile"] - if user["delivery_email"] == "username2@users.noreply.github.com" - ) - self.assertEqual(user_should_be_long_term_idle["long_term_idle"], True) - - # Only the user who's not soft-deactivated gets UserMessages. - exported_usermessage_userprofile = self.get_set( - messages["zerver_usermessage"], "user_profile" - ) - self.assertEqual( - {user_should_not_be_long_term_idle["id"]}, exported_usermessage_userprofile - ) - exported_usermessage_message = self.get_set(messages["zerver_usermessage"], "message") - self.assertEqual(exported_usermessage_message, exported_messages_id) - - @mock.patch("zerver.data_import.gitter.process_avatars", return_value=[]) - def test_gitter_import_to_existing_database(self, mock_process_avatars: mock.Mock) -> None: - output_dir = self.make_import_output_dir("gitter") - gitter_file = os.path.join(os.path.dirname(__file__), "fixtures/gitter_data.json") - with self.assertLogs(level="INFO"): - do_convert_data(gitter_file, output_dir) - - with self.assertLogs(level="INFO"): - do_import_realm(output_dir, "test-gitter-import") - - realm = get_realm("test-gitter-import") - - # test rendered_messages - realm_users = UserProfile.objects.filter(realm=realm) - messages = Message.objects.filter(realm_id=realm.id, sender__in=realm_users) - for message in messages: - self.assertIsNotNone(message.rendered_content, None) - - self.assertTrue(github_auth_enabled(realm)) - for auth_backend_name in AUTH_BACKEND_NAME_MAP: - if auth_backend_name == GitHubAuthBackend.auth_backend_name: - continue - - self.assertFalse(auth_enabled_helper([auth_backend_name], realm)) - - def test_get_usermentions(self) -> None: - user_map = {"57124a4": 3, "57124b4": 5, "57124c4": 8} - user_short_name_to_full_name = { - "user": "user name", - "user2": "user2", - "user3": "user name 3", - "user4": "user 4", - } - messages = [ - {"text": "hi @user", "mentions": [{"screenName": "user", "userId": "57124a4"}]}, - { - "text": "hi @user2 @user3", - "mentions": [ - {"screenName": "user2", "userId": "57124b4"}, - {"screenName": "user3", "userId": "57124c4"}, - ], - }, - {"text": "hi @user4", "mentions": [{"screenName": "user4"}]}, - {"text": "hi @user5", "mentions": [{"screenName": "user", "userId": "5712ds4"}]}, - ] - - self.assertEqual(get_usermentions(messages[0], user_map, user_short_name_to_full_name), [3]) - self.assertEqual(messages[0]["text"], "hi @**user name**") - self.assertEqual( - get_usermentions(messages[1], user_map, user_short_name_to_full_name), [5, 8] - ) - self.assertEqual(messages[1]["text"], "hi @**user2** @**user name 3**") - self.assertEqual(get_usermentions(messages[2], user_map, user_short_name_to_full_name), []) - self.assertEqual(messages[2]["text"], "hi @user4") - self.assertEqual(get_usermentions(messages[3], user_map, user_short_name_to_full_name), []) - self.assertEqual(messages[3]["text"], "hi @user5")