mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	hipchat_import: Remove tool from codebase.
Remove functions and scripts used by HipChat import tool and those which will no longer be required in future.
This commit is contained in:
		@@ -27,6 +27,7 @@ in bursts.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#### Full feature changelog
 | 
					#### Full feature changelog
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Removed HipChat import tool.
 | 
				
			||||||
- Added support for moving topics to private streams.
 | 
					- Added support for moving topics to private streams.
 | 
				
			||||||
- Added support for subscribing another stream's membership to a stream.
 | 
					- Added support for subscribing another stream's membership to a stream.
 | 
				
			||||||
- Added RealmAuditLog for most settings state changes in Zulip; this
 | 
					- Added RealmAuditLog for most settings state changes in Zulip; this
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -78,7 +78,6 @@ backup][zulip-backups] or importing your data from [Slack][slack-import],
 | 
				
			|||||||
or another Zulip server, you should stop here
 | 
					or another Zulip server, you should stop here
 | 
				
			||||||
and return to the import instructions.
 | 
					and return to the import instructions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[hipchat-import]: https://zulip.com/help/import-from-hipchat
 | 
					 | 
				
			||||||
[slack-import]: https://zulip.com/help/import-from-slack
 | 
					[slack-import]: https://zulip.com/help/import-from-slack
 | 
				
			||||||
[zulip-backups]: ../production/export-and-import.html#backups
 | 
					[zulip-backups]: ../production/export-and-import.html#backups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -59,9 +59,6 @@ httplib2
 | 
				
			|||||||
# Forked to avoid pulling in scipy: https://github.com/mailgun/talon/issues/130
 | 
					# Forked to avoid pulling in scipy: https://github.com/mailgun/talon/issues/130
 | 
				
			||||||
https://github.com/zulip/talon/archive/7d8bdc4dbcfcc5a73298747293b99fe53da55315.zip#egg=talon==1.2.10.zulip1
 | 
					https://github.com/zulip/talon/archive/7d8bdc4dbcfcc5a73298747293b99fe53da55315.zip#egg=talon==1.2.10.zulip1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Needed for HipChat import
 | 
					 | 
				
			||||||
hypchat
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Needed for inlining the CSS in emails
 | 
					# Needed for inlining the CSS in emails
 | 
				
			||||||
premailer
 | 
					premailer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -355,9 +355,6 @@ httplib2==0.18.1 \
 | 
				
			|||||||
    --hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
 | 
					    --hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
 | 
				
			||||||
    --hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
 | 
					    --hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
 | 
				
			||||||
    # via -r requirements/common.in
 | 
					    # via -r requirements/common.in
 | 
				
			||||||
hypchat==0.21 \
 | 
					 | 
				
			||||||
    --hash=sha256:ef37a9cd8103bb13ad772b28ba9223ca9d4278371e374450c3ea2918df70a8e9 \
 | 
					 | 
				
			||||||
    # via -r requirements/common.in
 | 
					 | 
				
			||||||
hyper==0.7.0 \
 | 
					hyper==0.7.0 \
 | 
				
			||||||
    --hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
 | 
					    --hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
 | 
				
			||||||
    --hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
 | 
					    --hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
 | 
				
			||||||
@@ -915,7 +912,7 @@ python-binary-memcached==0.30.1 \
 | 
				
			|||||||
python-dateutil==2.8.1 \
 | 
					python-dateutil==2.8.1 \
 | 
				
			||||||
    --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
 | 
					    --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
 | 
				
			||||||
    --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
 | 
					    --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
 | 
				
			||||||
    # via -r requirements/common.in, arrow, botocore, hypchat, moto
 | 
					    # via -r requirements/common.in, arrow, botocore, moto
 | 
				
			||||||
python-debian==0.1.38 \
 | 
					python-debian==0.1.38 \
 | 
				
			||||||
    --hash=sha256:a1f89336d7675a56cdd92fa90cd8c00b9178dabcc6d3e08a397e80eca2b855f3 \
 | 
					    --hash=sha256:a1f89336d7675a56cdd92fa90cd8c00b9178dabcc6d3e08a397e80eca2b855f3 \
 | 
				
			||||||
    --hash=sha256:a352bb5f9ef19b0272078f516ee0ec42b05e90ac85651d87c10e7041550dcc1d \
 | 
					    --hash=sha256:a352bb5f9ef19b0272078f516ee0ec42b05e90ac85651d87c10e7041550dcc1d \
 | 
				
			||||||
@@ -1043,7 +1040,7 @@ requests-oauthlib==1.3.0 \
 | 
				
			|||||||
requests[security]==2.25.0 \
 | 
					requests[security]==2.25.0 \
 | 
				
			||||||
    --hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
 | 
					    --hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
 | 
				
			||||||
    --hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
 | 
					    --hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
 | 
				
			||||||
    # via -r requirements/common.in, docker, hypchat, matrix-client, moto, premailer, pyoembed, python-digitalocean, python-gcm, python-twitter, requests-oauthlib, responses, semgrep, social-auth-core, sphinx, stripe, twilio, zulip
 | 
					    # via -r requirements/common.in, docker, matrix-client, moto, premailer, pyoembed, python-digitalocean, python-gcm, python-twitter, requests-oauthlib, responses, semgrep, social-auth-core, sphinx, stripe, twilio, zulip
 | 
				
			||||||
responses==0.12.0 \
 | 
					responses==0.12.0 \
 | 
				
			||||||
    --hash=sha256:0de50fbf600adf5ef9f0821b85cc537acca98d66bc7776755924476775c1989c \
 | 
					    --hash=sha256:0de50fbf600adf5ef9f0821b85cc537acca98d66bc7776755924476775c1989c \
 | 
				
			||||||
    --hash=sha256:e80d5276011a4b79ecb62c5f82ba07aa23fb31ecbc95ee7cad6de250a3c97444 \
 | 
					    --hash=sha256:e80d5276011a4b79ecb62c5f82ba07aa23fb31ecbc95ee7cad6de250a3c97444 \
 | 
				
			||||||
@@ -1117,7 +1114,7 @@ sh==1.14.1 \
 | 
				
			|||||||
six==1.15.0 \
 | 
					six==1.15.0 \
 | 
				
			||||||
    --hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
 | 
					    --hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
 | 
				
			||||||
    --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
 | 
					    --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
 | 
				
			||||||
    # via argon2-cffi, automat, aws-sam-translator, cfn-lint, cryptography, django-bitfield, docker, ecdsa, hypchat, isodate, jsonschema, junit-xml, libthumbor, moto, openapi-core, openapi-schema-validator, openapi-spec-validator, parsel, pip-tools, protego, pyopenssl, python-binary-memcached, python-dateutil, python-debian, python-jose, qrcode, responses, social-auth-app-django, social-auth-core, talon, traitlets, twilio, w3lib, websocket-client, zulip
 | 
					    # via argon2-cffi, automat, aws-sam-translator, cfn-lint, cryptography, django-bitfield, docker, ecdsa, isodate, jsonschema, junit-xml, libthumbor, moto, openapi-core, openapi-schema-validator, openapi-spec-validator, parsel, pip-tools, protego, pyopenssl, python-binary-memcached, python-dateutil, python-debian, python-jose, qrcode, responses, social-auth-app-django, social-auth-core, talon, traitlets, twilio, w3lib, websocket-client, zulip
 | 
				
			||||||
snakeviz==2.1.0 \
 | 
					snakeviz==2.1.0 \
 | 
				
			||||||
    --hash=sha256:8ce375b18ae4a749516d7e6c6fbbf8be6177c53974f53534d8eadb646cd279b1 \
 | 
					    --hash=sha256:8ce375b18ae4a749516d7e6c6fbbf8be6177c53974f53534d8eadb646cd279b1 \
 | 
				
			||||||
    --hash=sha256:92ad876fb6a201a7e23a6b85ea96d9643a51e285667c253a8653643804f7cb68 \
 | 
					    --hash=sha256:92ad876fb6a201a7e23a6b85ea96d9643a51e285667c253a8653643804f7cb68 \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -243,9 +243,6 @@ httplib2==0.18.1 \
 | 
				
			|||||||
    --hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
 | 
					    --hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
 | 
				
			||||||
    --hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
 | 
					    --hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
 | 
				
			||||||
    # via -r requirements/common.in
 | 
					    # via -r requirements/common.in
 | 
				
			||||||
hypchat==0.21 \
 | 
					 | 
				
			||||||
    --hash=sha256:ef37a9cd8103bb13ad772b28ba9223ca9d4278371e374450c3ea2918df70a8e9 \
 | 
					 | 
				
			||||||
    # via -r requirements/common.in
 | 
					 | 
				
			||||||
hyper==0.7.0 \
 | 
					hyper==0.7.0 \
 | 
				
			||||||
    --hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
 | 
					    --hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
 | 
				
			||||||
    --hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
 | 
					    --hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
 | 
				
			||||||
@@ -646,7 +643,7 @@ python-binary-memcached==0.30.1 \
 | 
				
			|||||||
python-dateutil==2.8.1 \
 | 
					python-dateutil==2.8.1 \
 | 
				
			||||||
    --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
 | 
					    --hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
 | 
				
			||||||
    --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
 | 
					    --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
 | 
				
			||||||
    # via -r requirements/common.in, botocore, hypchat
 | 
					    # via -r requirements/common.in, botocore
 | 
				
			||||||
python-gcm==0.4 \
 | 
					python-gcm==0.4 \
 | 
				
			||||||
    --hash=sha256:511c35fc5ae829f7fc3cbdb45c4ec3fda02f85e4fae039864efe82682ccb9c18 \
 | 
					    --hash=sha256:511c35fc5ae829f7fc3cbdb45c4ec3fda02f85e4fae039864efe82682ccb9c18 \
 | 
				
			||||||
    # via -r requirements/common.in
 | 
					    # via -r requirements/common.in
 | 
				
			||||||
@@ -751,7 +748,7 @@ requests-oauthlib==1.3.0 \
 | 
				
			|||||||
requests[security]==2.25.0 \
 | 
					requests[security]==2.25.0 \
 | 
				
			||||||
    --hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
 | 
					    --hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
 | 
				
			||||||
    --hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
 | 
					    --hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
 | 
				
			||||||
    # via -r requirements/common.in, hypchat, matrix-client, premailer, pyoembed, python-gcm, python-twitter, requests-oauthlib, social-auth-core, stripe, twilio, zulip
 | 
					    # via -r requirements/common.in, matrix-client, premailer, pyoembed, python-gcm, python-twitter, requests-oauthlib, social-auth-core, stripe, twilio, zulip
 | 
				
			||||||
s3transfer==0.3.3 \
 | 
					s3transfer==0.3.3 \
 | 
				
			||||||
    --hash=sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13 \
 | 
					    --hash=sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13 \
 | 
				
			||||||
    --hash=sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db \
 | 
					    --hash=sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db \
 | 
				
			||||||
@@ -763,7 +760,7 @@ sentry-sdk==0.19.4 \
 | 
				
			|||||||
six==1.15.0 \
 | 
					six==1.15.0 \
 | 
				
			||||||
    --hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
 | 
					    --hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
 | 
				
			||||||
    --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
 | 
					    --hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
 | 
				
			||||||
    # via argon2-cffi, cryptography, django-bitfield, hypchat, isodate, jsonschema, libthumbor, openapi-core, openapi-schema-validator, openapi-spec-validator, pyopenssl, python-binary-memcached, python-dateutil, qrcode, social-auth-app-django, social-auth-core, talon, traitlets, twilio, zulip
 | 
					    # via argon2-cffi, cryptography, django-bitfield, isodate, jsonschema, libthumbor, openapi-core, openapi-schema-validator, openapi-spec-validator, pyopenssl, python-binary-memcached, python-dateutil, qrcode, social-auth-app-django, social-auth-core, talon, traitlets, twilio, zulip
 | 
				
			||||||
social-auth-app-django==4.0.0 \
 | 
					social-auth-app-django==4.0.0 \
 | 
				
			||||||
    --hash=sha256:2c69e57df0b30c9c1823519c5f1992cbe4f3f98fdc7d95c840e091a752708840 \
 | 
					    --hash=sha256:2c69e57df0b30c9c1823519c5f1992cbe4f3f98fdc7d95c840e091a752708840 \
 | 
				
			||||||
    --hash=sha256:567ad0e028311541d7dfed51d3bf2c60440a6fd236d5d4d06c5a618b3d6c57c5 \
 | 
					    --hash=sha256:567ad0e028311541d7dfed51d3bf2c60440a6fd236d5d4d06c5a618b3d6c57c5 \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -243,8 +243,7 @@
 | 
				
			|||||||
        <a class="feature-block" href="/help/import-from-slack" target="_blank" rel="noopener noreferrer">
 | 
					        <a class="feature-block" href="/help/import-from-slack" target="_blank" rel="noopener noreferrer">
 | 
				
			||||||
            <h3>DATA IMPORT</h3>
 | 
					            <h3>DATA IMPORT</h3>
 | 
				
			||||||
            <p>
 | 
					            <p>
 | 
				
			||||||
                Import an existing Slack, Mattermost, HipChat, Stride,
 | 
					                Import an existing Slack, Mattermost or Gitter workspace into Zulip.
 | 
				
			||||||
                or Gitter workspace into Zulip.
 | 
					 | 
				
			||||||
            </p>
 | 
					            </p>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
        <a class="feature-block" href="/help/add-custom-profile-fields" target="_blank" rel="noopener noreferrer">
 | 
					        <a class="feature-block" href="/help/add-custom-profile-fields" target="_blank" rel="noopener noreferrer">
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@ message is about.
 | 
				
			|||||||
|---|---|---
 | 
					|---|---|---
 | 
				
			||||||
| Zulip | Stream | Topic
 | 
					| Zulip | Stream | Topic
 | 
				
			||||||
| Email | Mailing list | Subject line
 | 
					| Email | Mailing list | Subject line
 | 
				
			||||||
| Slack/IRC/HipChat | Channel/Room | -
 | 
					| Slack/IRC | Channel/Room | -
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Messages with the same stream and topic are shown together as a
 | 
					Messages with the same stream and topic are shown together as a
 | 
				
			||||||
conversational thread. Here is what it looks like in Zulip.
 | 
					conversational thread. Here is what it looks like in Zulip.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
There are a lot of team chat apps. So why did we build Zulip?
 | 
					There are a lot of team chat apps. So why did we build Zulip?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
We talk about Slack in the discussion below, but the problems apply equally
 | 
					We talk about Slack in the discussion below, but the problems apply equally
 | 
				
			||||||
to other apps with Slack’s conversation model, including HipChat, IRC,
 | 
					to other apps with Slack’s conversation model, including IRC,
 | 
				
			||||||
Mattermost, Discord, Spark, and others.
 | 
					Mattermost, Discord, Spark, and others.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Reading busy Slack channels is extremely inefficient.
 | 
					## Reading busy Slack channels is extremely inefficient.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -120,7 +120,6 @@ not_yet_fully_covered = [
 | 
				
			|||||||
    'zerver/tornado/sharding.py',
 | 
					    'zerver/tornado/sharding.py',
 | 
				
			||||||
    'zerver/tornado/views.py',
 | 
					    'zerver/tornado/views.py',
 | 
				
			||||||
    # Data import files; relatively low priority
 | 
					    # Data import files; relatively low priority
 | 
				
			||||||
    'zerver/data_import/hipchat*.py',
 | 
					 | 
				
			||||||
    'zerver/data_import/sequencer.py',
 | 
					    'zerver/data_import/sequencer.py',
 | 
				
			||||||
    'zerver/data_import/slack.py',
 | 
					    'zerver/data_import/slack.py',
 | 
				
			||||||
    'zerver/data_import/gitter.py',
 | 
					    'zerver/data_import/gitter.py',
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -43,4 +43,4 @@ API_FEATURE_LEVEL = 36
 | 
				
			|||||||
#   historical commits sharing the same major version, in which case a
 | 
					#   historical commits sharing the same major version, in which case a
 | 
				
			||||||
#   minor version bump suffices.
 | 
					#   minor version bump suffices.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PROVISION_VERSION = '119.0'
 | 
					PROVISION_VERSION = '120.0'
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,882 +0,0 @@
 | 
				
			|||||||
import base64
 | 
					 | 
				
			||||||
import glob
 | 
					 | 
				
			||||||
import logging
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import re
 | 
					 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
import subprocess
 | 
					 | 
				
			||||||
from typing import Any, Callable, Dict, List, Optional, Set
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import dateutil
 | 
					 | 
				
			||||||
import hypchat
 | 
					 | 
				
			||||||
import orjson
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					 | 
				
			||||||
from django.utils.timezone import now as timezone_now
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from zerver.data_import.hipchat_attachment import AttachmentHandler
 | 
					 | 
				
			||||||
from zerver.data_import.hipchat_user import UserHandler
 | 
					 | 
				
			||||||
from zerver.data_import.import_util import (
 | 
					 | 
				
			||||||
    SubscriberHandler,
 | 
					 | 
				
			||||||
    build_message,
 | 
					 | 
				
			||||||
    build_personal_subscriptions,
 | 
					 | 
				
			||||||
    build_public_stream_subscriptions,
 | 
					 | 
				
			||||||
    build_realm,
 | 
					 | 
				
			||||||
    build_realm_emoji,
 | 
					 | 
				
			||||||
    build_recipients,
 | 
					 | 
				
			||||||
    build_stream,
 | 
					 | 
				
			||||||
    build_stream_subscriptions,
 | 
					 | 
				
			||||||
    build_user_profile,
 | 
					 | 
				
			||||||
    build_zerver_realm,
 | 
					 | 
				
			||||||
    create_converted_data_files,
 | 
					 | 
				
			||||||
    make_subscriber_map,
 | 
					 | 
				
			||||||
    make_user_messages,
 | 
					 | 
				
			||||||
    write_avatar_png,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from zerver.data_import.sequencer import NEXT_ID, IdMapper
 | 
					 | 
				
			||||||
from zerver.lib.utils import process_list_in_batches
 | 
					 | 
				
			||||||
from zerver.models import RealmEmoji, Recipient, UserProfile
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# stubs
 | 
					 | 
				
			||||||
ZerverFieldsT = Dict[str, Any]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def str_date_to_float(date_str: str) -> float:
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
        Dates look like this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        "2018-08-08T14:23:54Z 626267"
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    parts = date_str.split(' ')
 | 
					 | 
				
			||||||
    time_str = parts[0].replace('T', ' ')
 | 
					 | 
				
			||||||
    date_time = dateutil.parser.parse(time_str)
 | 
					 | 
				
			||||||
    timestamp = date_time.timestamp()
 | 
					 | 
				
			||||||
    if len(parts) == 2:
 | 
					 | 
				
			||||||
        microseconds = int(parts[1])
 | 
					 | 
				
			||||||
        timestamp += microseconds / 1000000.0
 | 
					 | 
				
			||||||
    return timestamp
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def untar_input_file(tar_file: str) -> str:
 | 
					 | 
				
			||||||
    data_dir = tar_file.replace('.tar', '')
 | 
					 | 
				
			||||||
    data_dir = os.path.abspath(data_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if os.path.exists(data_dir):
 | 
					 | 
				
			||||||
        logging.info('input data was already untarred to %s, we will use it', data_dir)
 | 
					 | 
				
			||||||
        return data_dir
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    os.makedirs(data_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    subprocess.check_call(['tar', '-xf', tar_file, '-C', data_dir])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('input data was untarred to %s', data_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return data_dir
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def read_user_data(data_dir: str) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
    fn = 'users.json'
 | 
					 | 
				
			||||||
    data_file = os.path.join(data_dir, fn)
 | 
					 | 
				
			||||||
    with open(data_file, "rb") as fp:
 | 
					 | 
				
			||||||
        return orjson.loads(fp.read())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def convert_user_data(user_handler: UserHandler,
 | 
					 | 
				
			||||||
                      slim_mode: bool,
 | 
					 | 
				
			||||||
                      user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                      raw_data: List[ZerverFieldsT],
 | 
					 | 
				
			||||||
                      realm_id: int) -> None:
 | 
					 | 
				
			||||||
    flat_data = [
 | 
					 | 
				
			||||||
        d['User']
 | 
					 | 
				
			||||||
        for d in raw_data
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def process(in_dict: ZerverFieldsT) -> ZerverFieldsT:
 | 
					 | 
				
			||||||
        delivery_email = in_dict['email']
 | 
					 | 
				
			||||||
        email = in_dict['email']
 | 
					 | 
				
			||||||
        full_name = in_dict['name']
 | 
					 | 
				
			||||||
        id = user_id_mapper.get(in_dict['id'])
 | 
					 | 
				
			||||||
        is_mirror_dummy = False
 | 
					 | 
				
			||||||
        short_name = in_dict['mention_name']
 | 
					 | 
				
			||||||
        timezone = in_dict['timezone']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        role = UserProfile.ROLE_MEMBER
 | 
					 | 
				
			||||||
        if in_dict['account_type'] == 'admin':
 | 
					 | 
				
			||||||
            role = UserProfile.ROLE_REALM_ADMINISTRATOR
 | 
					 | 
				
			||||||
        if in_dict['account_type'] == 'guest':
 | 
					 | 
				
			||||||
            role = UserProfile.ROLE_GUEST
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        date_joined = int(timezone_now().timestamp())
 | 
					 | 
				
			||||||
        is_active = not in_dict['is_deleted']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not email:
 | 
					 | 
				
			||||||
            if role == UserProfile.ROLE_GUEST:
 | 
					 | 
				
			||||||
                # HipChat guest users don't have emails, so
 | 
					 | 
				
			||||||
                # we just fake them.
 | 
					 | 
				
			||||||
                email = f'guest-{id}@example.com'
 | 
					 | 
				
			||||||
                delivery_email = email
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                # HipChat sometimes doesn't export an email for deactivated users.
 | 
					 | 
				
			||||||
                assert not is_active
 | 
					 | 
				
			||||||
                email = delivery_email = f"deactivated-{id}@example.com"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # unmapped fields:
 | 
					 | 
				
			||||||
        #    title - Developer, Project Manager, etc.
 | 
					 | 
				
			||||||
        #    rooms - no good sample data
 | 
					 | 
				
			||||||
        #    created - we just use "now"
 | 
					 | 
				
			||||||
        #    roles - we just use account_type
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if in_dict.get('avatar'):
 | 
					 | 
				
			||||||
            avatar_source = 'U'
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            avatar_source = 'G'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return build_user_profile(
 | 
					 | 
				
			||||||
            avatar_source=avatar_source,
 | 
					 | 
				
			||||||
            date_joined=date_joined,
 | 
					 | 
				
			||||||
            delivery_email=delivery_email,
 | 
					 | 
				
			||||||
            email=email,
 | 
					 | 
				
			||||||
            full_name=full_name,
 | 
					 | 
				
			||||||
            id=id,
 | 
					 | 
				
			||||||
            is_active=is_active,
 | 
					 | 
				
			||||||
            role=role,
 | 
					 | 
				
			||||||
            is_mirror_dummy=is_mirror_dummy,
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            short_name=short_name,
 | 
					 | 
				
			||||||
            timezone=timezone,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for raw_item in flat_data:
 | 
					 | 
				
			||||||
        user = process(raw_item)
 | 
					 | 
				
			||||||
        user_handler.add_user(user)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def convert_avatar_data(avatar_folder: str,
 | 
					 | 
				
			||||||
                        raw_data: List[ZerverFieldsT],
 | 
					 | 
				
			||||||
                        user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                        realm_id: int) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    This code is pretty specific to how HipChat sends us data.
 | 
					 | 
				
			||||||
    They give us the avatar payloads in base64 in users.json.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    We process avatars in our own pass of that data, rather
 | 
					 | 
				
			||||||
    than doing it while we're getting other user data.  I
 | 
					 | 
				
			||||||
    chose to keep this separate, as otherwise you have a lot
 | 
					 | 
				
			||||||
    of extraneous data getting passed around.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    This code has MAJOR SIDE EFFECTS--namely writing a bunch
 | 
					 | 
				
			||||||
    of files to the avatars directory.
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    avatar_records = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for d in raw_data:
 | 
					 | 
				
			||||||
        raw_user = d['User']
 | 
					 | 
				
			||||||
        avatar_payload = raw_user.get('avatar')
 | 
					 | 
				
			||||||
        if not avatar_payload:
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        bits = base64.b64decode(avatar_payload)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        raw_user_id = raw_user['id']
 | 
					 | 
				
			||||||
        if not user_id_mapper.has(raw_user_id):
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        user_id = user_id_mapper.get(raw_user_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        metadata = write_avatar_png(
 | 
					 | 
				
			||||||
            avatar_folder=avatar_folder,
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            user_id=user_id,
 | 
					 | 
				
			||||||
            bits=bits,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        avatar_records.append(metadata)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return avatar_records
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def read_room_data(data_dir: str) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
    fn = 'rooms.json'
 | 
					 | 
				
			||||||
    data_file = os.path.join(data_dir, fn)
 | 
					 | 
				
			||||||
    with open(data_file, "rb") as f:
 | 
					 | 
				
			||||||
        data = orjson.loads(f.read())
 | 
					 | 
				
			||||||
    return data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def convert_room_data(raw_data: List[ZerverFieldsT],
 | 
					 | 
				
			||||||
                      subscriber_handler: SubscriberHandler,
 | 
					 | 
				
			||||||
                      stream_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                      user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                      realm_id: int,
 | 
					 | 
				
			||||||
                      api_token: Optional[str]=None) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
    flat_data = [
 | 
					 | 
				
			||||||
        d['Room']
 | 
					 | 
				
			||||||
        for d in raw_data
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_invite_only(v: str) -> bool:
 | 
					 | 
				
			||||||
        if v == 'public':
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        elif v == 'private':
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            raise Exception('unexpected value')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    streams = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for in_dict in flat_data:
 | 
					 | 
				
			||||||
        now = int(timezone_now().timestamp())
 | 
					 | 
				
			||||||
        stream_id = stream_id_mapper.get(in_dict['id'])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        invite_only = get_invite_only(in_dict['privacy'])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        stream = build_stream(
 | 
					 | 
				
			||||||
            date_created=now,
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=in_dict['name'],
 | 
					 | 
				
			||||||
            description=in_dict['topic'],
 | 
					 | 
				
			||||||
            stream_id=stream_id,
 | 
					 | 
				
			||||||
            deactivated=in_dict['is_archived'],
 | 
					 | 
				
			||||||
            invite_only=invite_only,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if invite_only:
 | 
					 | 
				
			||||||
            users: Set[int] = {
 | 
					 | 
				
			||||||
                user_id_mapper.get(key)
 | 
					 | 
				
			||||||
                for key in in_dict['members']
 | 
					 | 
				
			||||||
                if user_id_mapper.has(key)
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if user_id_mapper.has(in_dict['owner']):
 | 
					 | 
				
			||||||
                owner = user_id_mapper.get(in_dict['owner'])
 | 
					 | 
				
			||||||
                users.add(owner)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            users = set()
 | 
					 | 
				
			||||||
            if api_token is not None:
 | 
					 | 
				
			||||||
                hc = hypchat.HypChat(api_token)
 | 
					 | 
				
			||||||
                room_data = hc.fromurl('{}/v2/room/{}/member'.format(hc.endpoint, in_dict['id']))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                for item in room_data['items']:
 | 
					 | 
				
			||||||
                    hipchat_user_id = item['id']
 | 
					 | 
				
			||||||
                    zulip_user_id = user_id_mapper.get(hipchat_user_id)
 | 
					 | 
				
			||||||
                    users.add(zulip_user_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if users:
 | 
					 | 
				
			||||||
            subscriber_handler.set_info(
 | 
					 | 
				
			||||||
                stream_id=stream_id,
 | 
					 | 
				
			||||||
                users=users,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # unmapped fields:
 | 
					 | 
				
			||||||
        #    guest_access_url: no Zulip equivalent
 | 
					 | 
				
			||||||
        #    created: we just use "now"
 | 
					 | 
				
			||||||
        #    participants: no good sample data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        streams.append(stream)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return streams
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def make_realm(realm_id: int) -> ZerverFieldsT:
 | 
					 | 
				
			||||||
    NOW = float(timezone_now().timestamp())
 | 
					 | 
				
			||||||
    domain_name = settings.EXTERNAL_HOST
 | 
					 | 
				
			||||||
    realm_subdomain = ""
 | 
					 | 
				
			||||||
    zerver_realm = build_zerver_realm(realm_id, realm_subdomain, NOW, 'HipChat')
 | 
					 | 
				
			||||||
    realm = build_realm(zerver_realm, realm_id, domain_name)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # We may override these later.
 | 
					 | 
				
			||||||
    realm['zerver_defaultstream'] = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return realm
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def write_avatar_data(raw_user_data: List[ZerverFieldsT],
 | 
					 | 
				
			||||||
                      output_dir: str,
 | 
					 | 
				
			||||||
                      user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                      realm_id: int) -> None:
 | 
					 | 
				
			||||||
    avatar_folder = os.path.join(output_dir, 'avatars')
 | 
					 | 
				
			||||||
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
 | 
					 | 
				
			||||||
    os.makedirs(avatar_realm_folder, exist_ok=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    avatar_records = convert_avatar_data(
 | 
					 | 
				
			||||||
        avatar_folder=avatar_folder,
 | 
					 | 
				
			||||||
        raw_data=raw_user_data,
 | 
					 | 
				
			||||||
        user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def write_emoticon_data(realm_id: int,
 | 
					 | 
				
			||||||
                        data_dir: str,
 | 
					 | 
				
			||||||
                        output_dir: str) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    This function does most of the work for processing emoticons, the bulk
 | 
					 | 
				
			||||||
    of which is copying files.  We also write a json file with metadata.
 | 
					 | 
				
			||||||
    Finally, we return a list of RealmEmoji dicts to our caller.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    In our data_dir we have a pretty simple setup:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        emoticons.json - has very simple metadata on emojis:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          {
 | 
					 | 
				
			||||||
            "Emoticon": {
 | 
					 | 
				
			||||||
              "id": 9875487,
 | 
					 | 
				
			||||||
              "path": "emoticons/yasss.jpg",
 | 
					 | 
				
			||||||
              "shortcut": "yasss"
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
          },
 | 
					 | 
				
			||||||
          {
 | 
					 | 
				
			||||||
            "Emoticon": {
 | 
					 | 
				
			||||||
              "id": 718017,
 | 
					 | 
				
			||||||
              "path": "emoticons/yayyyyy.gif",
 | 
					 | 
				
			||||||
              "shortcut": "yayyyyy"
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        emoticons/ - contains a bunch of image files:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            slytherinsnake.gif
 | 
					 | 
				
			||||||
            spanishinquisition.jpg
 | 
					 | 
				
			||||||
            sparkle.png
 | 
					 | 
				
			||||||
            spiderman.gif
 | 
					 | 
				
			||||||
            stableparrot.gif
 | 
					 | 
				
			||||||
            stalkerparrot.gif
 | 
					 | 
				
			||||||
            supergirl.png
 | 
					 | 
				
			||||||
            superman.png
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    We move all the relevant files to Zulip's more nested
 | 
					 | 
				
			||||||
    directory structure.
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('Starting to process emoticons')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fn = 'emoticons.json'
 | 
					 | 
				
			||||||
    data_file = os.path.join(data_dir, fn)
 | 
					 | 
				
			||||||
    if not os.path.exists(data_file):
 | 
					 | 
				
			||||||
        logging.warning("HipChat export does not contain emoticons.json.")
 | 
					 | 
				
			||||||
        logging.warning("As a result, custom emoji cannot be imported.")
 | 
					 | 
				
			||||||
        return []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    with open(data_file, "rb") as f:
 | 
					 | 
				
			||||||
        data = orjson.loads(f.read())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if isinstance(data, dict) and 'Emoticons' in data:
 | 
					 | 
				
			||||||
        # Handle the hc-migrate export format for emoticons.json.
 | 
					 | 
				
			||||||
        flat_data = [
 | 
					 | 
				
			||||||
            dict(
 | 
					 | 
				
			||||||
                path=d['path'],
 | 
					 | 
				
			||||||
                name=d['shortcut'],
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            for d in data['Emoticons']
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        flat_data = [
 | 
					 | 
				
			||||||
            dict(
 | 
					 | 
				
			||||||
                path=d['Emoticon']['path'],
 | 
					 | 
				
			||||||
                name=d['Emoticon']['shortcut'],
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            for d in data
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    emoji_folder = os.path.join(output_dir, 'emoji')
 | 
					 | 
				
			||||||
    os.makedirs(emoji_folder, exist_ok=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def process(data: ZerverFieldsT) -> ZerverFieldsT:
 | 
					 | 
				
			||||||
        source_sub_path = data['path']
 | 
					 | 
				
			||||||
        source_fn = os.path.basename(source_sub_path)
 | 
					 | 
				
			||||||
        source_path = os.path.join(data_dir, source_sub_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Use our template from RealmEmoji
 | 
					 | 
				
			||||||
        # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}"
 | 
					 | 
				
			||||||
        target_fn = source_fn
 | 
					 | 
				
			||||||
        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            emoji_file_name=target_fn,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        target_path = os.path.join(emoji_folder, target_sub_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        os.makedirs(os.path.dirname(target_path), exist_ok=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        source_path = os.path.abspath(source_path)
 | 
					 | 
				
			||||||
        target_path = os.path.abspath(target_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        shutil.copyfile(source_path, target_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return dict(
 | 
					 | 
				
			||||||
            path=target_path,
 | 
					 | 
				
			||||||
            s3_path=target_path,
 | 
					 | 
				
			||||||
            file_name=target_fn,
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=data['name'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    emoji_records = list(map(process, flat_data))
 | 
					 | 
				
			||||||
    create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    realmemoji = [
 | 
					 | 
				
			||||||
        build_realm_emoji(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=rec['name'],
 | 
					 | 
				
			||||||
            id=NEXT_ID('realmemoji'),
 | 
					 | 
				
			||||||
            file_name=rec['file_name'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        for rec in emoji_records
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
    logging.info('Done processing emoticons')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return realmemoji
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def write_message_data(realm_id: int,
 | 
					 | 
				
			||||||
                       slim_mode: bool,
 | 
					 | 
				
			||||||
                       message_key: str,
 | 
					 | 
				
			||||||
                       zerver_recipient: List[ZerverFieldsT],
 | 
					 | 
				
			||||||
                       subscriber_map: Dict[int, Set[int]],
 | 
					 | 
				
			||||||
                       data_dir: str,
 | 
					 | 
				
			||||||
                       output_dir: str,
 | 
					 | 
				
			||||||
                       masking_content: bool,
 | 
					 | 
				
			||||||
                       stream_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                       user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                       user_handler: UserHandler,
 | 
					 | 
				
			||||||
                       attachment_handler: AttachmentHandler) -> None:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    stream_id_to_recipient_id = {
 | 
					 | 
				
			||||||
        d['type_id']: d['id']
 | 
					 | 
				
			||||||
        for d in zerver_recipient
 | 
					 | 
				
			||||||
        if d['type'] == Recipient.STREAM
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    user_id_to_recipient_id = {
 | 
					 | 
				
			||||||
        d['type_id']: d['id']
 | 
					 | 
				
			||||||
        for d in zerver_recipient
 | 
					 | 
				
			||||||
        if d['type'] == Recipient.PERSONAL
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_stream_recipient_id(raw_message: ZerverFieldsT) -> int:
 | 
					 | 
				
			||||||
        fn_id = raw_message['fn_id']
 | 
					 | 
				
			||||||
        stream_id = stream_id_mapper.get(fn_id)
 | 
					 | 
				
			||||||
        recipient_id = stream_id_to_recipient_id[stream_id]
 | 
					 | 
				
			||||||
        return recipient_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_pm_recipient_id(raw_message: ZerverFieldsT) -> int:
 | 
					 | 
				
			||||||
        raw_user_id = raw_message['receiver_id']
 | 
					 | 
				
			||||||
        assert(raw_user_id)
 | 
					 | 
				
			||||||
        user_id = user_id_mapper.get(raw_user_id)
 | 
					 | 
				
			||||||
        recipient_id = user_id_to_recipient_id[user_id]
 | 
					 | 
				
			||||||
        return recipient_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if message_key in ['UserMessage', 'NotificationMessage']:
 | 
					 | 
				
			||||||
        is_pm_data = False
 | 
					 | 
				
			||||||
        dir_glob = os.path.join(data_dir, 'rooms', '*', 'history.json')
 | 
					 | 
				
			||||||
        get_recipient_id = get_stream_recipient_id
 | 
					 | 
				
			||||||
        get_files_dir = lambda fn_id: os.path.join(data_dir, 'rooms', str(fn_id), 'files')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    elif message_key == 'PrivateUserMessage':
 | 
					 | 
				
			||||||
        is_pm_data = True
 | 
					 | 
				
			||||||
        dir_glob = os.path.join(data_dir, 'users', '*', 'history.json')
 | 
					 | 
				
			||||||
        get_recipient_id = get_pm_recipient_id
 | 
					 | 
				
			||||||
        get_files_dir = lambda fn_id: os.path.join(data_dir, 'users', 'files')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        raise Exception('programming error: invalid message_key: ' + message_key)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    history_files = glob.glob(dir_glob)
 | 
					 | 
				
			||||||
    for fn in history_files:
 | 
					 | 
				
			||||||
        dir = os.path.dirname(fn)
 | 
					 | 
				
			||||||
        fn_id = os.path.basename(dir)
 | 
					 | 
				
			||||||
        files_dir = get_files_dir(fn_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        process_message_file(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            slim_mode=slim_mode,
 | 
					 | 
				
			||||||
            fn=fn,
 | 
					 | 
				
			||||||
            fn_id=fn_id,
 | 
					 | 
				
			||||||
            files_dir=files_dir,
 | 
					 | 
				
			||||||
            get_recipient_id=get_recipient_id,
 | 
					 | 
				
			||||||
            message_key=message_key,
 | 
					 | 
				
			||||||
            subscriber_map=subscriber_map,
 | 
					 | 
				
			||||||
            data_dir=data_dir,
 | 
					 | 
				
			||||||
            output_dir=output_dir,
 | 
					 | 
				
			||||||
            is_pm_data=is_pm_data,
 | 
					 | 
				
			||||||
            masking_content=masking_content,
 | 
					 | 
				
			||||||
            user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
            user_handler=user_handler,
 | 
					 | 
				
			||||||
            attachment_handler=attachment_handler,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_hipchat_sender_id(realm_id: int,
 | 
					 | 
				
			||||||
                          slim_mode: bool,
 | 
					 | 
				
			||||||
                          message_dict: Dict[str, Any],
 | 
					 | 
				
			||||||
                          user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                          user_handler: UserHandler) -> Optional[int]:
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    The HipChat export is inconsistent in how it renders
 | 
					 | 
				
			||||||
    senders, and sometimes we don't even get an id.
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    if isinstance(message_dict['sender'], str):
 | 
					 | 
				
			||||||
        if slim_mode:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        # Some HipChat instances just give us a person's
 | 
					 | 
				
			||||||
        # name in the sender field for NotificationMessage.
 | 
					 | 
				
			||||||
        # We turn them into a mirror user.
 | 
					 | 
				
			||||||
        mirror_user = user_handler.get_mirror_user(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=message_dict['sender'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        sender_id = mirror_user['id']
 | 
					 | 
				
			||||||
        return sender_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    raw_sender_id = message_dict['sender']['id']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if raw_sender_id == 0:
 | 
					 | 
				
			||||||
        if slim_mode:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        mirror_user = user_handler.get_mirror_user(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=message_dict['sender']['name'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        sender_id = mirror_user['id']
 | 
					 | 
				
			||||||
        return sender_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if not user_id_mapper.has(raw_sender_id):
 | 
					 | 
				
			||||||
        if slim_mode:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        mirror_user = user_handler.get_mirror_user(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            name=message_dict['sender']['id'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        sender_id = mirror_user['id']
 | 
					 | 
				
			||||||
        return sender_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # HAPPY PATH: HipChat just gave us an ordinary
 | 
					 | 
				
			||||||
    # sender_id.
 | 
					 | 
				
			||||||
    sender_id = user_id_mapper.get(raw_sender_id)
 | 
					 | 
				
			||||||
    return sender_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def process_message_file(realm_id: int,
 | 
					 | 
				
			||||||
                         slim_mode: bool,
 | 
					 | 
				
			||||||
                         fn: str,
 | 
					 | 
				
			||||||
                         fn_id: str,
 | 
					 | 
				
			||||||
                         files_dir: str,
 | 
					 | 
				
			||||||
                         get_recipient_id: Callable[[ZerverFieldsT], int],
 | 
					 | 
				
			||||||
                         message_key: str,
 | 
					 | 
				
			||||||
                         subscriber_map: Dict[int, Set[int]],
 | 
					 | 
				
			||||||
                         data_dir: str,
 | 
					 | 
				
			||||||
                         output_dir: str,
 | 
					 | 
				
			||||||
                         is_pm_data: bool,
 | 
					 | 
				
			||||||
                         masking_content: bool,
 | 
					 | 
				
			||||||
                         user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                         user_handler: UserHandler,
 | 
					 | 
				
			||||||
                         attachment_handler: AttachmentHandler) -> None:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_raw_messages(fn: str) -> List[ZerverFieldsT]:
 | 
					 | 
				
			||||||
        with open(fn, "rb") as f:
 | 
					 | 
				
			||||||
            data = orjson.loads(f.read())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        flat_data = [
 | 
					 | 
				
			||||||
            d[message_key]
 | 
					 | 
				
			||||||
            for d in data
 | 
					 | 
				
			||||||
            if message_key in d
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def get_raw_message(d: Dict[str, Any]) -> Optional[ZerverFieldsT]:
 | 
					 | 
				
			||||||
            sender_id = get_hipchat_sender_id(
 | 
					 | 
				
			||||||
                realm_id=realm_id,
 | 
					 | 
				
			||||||
                slim_mode=slim_mode,
 | 
					 | 
				
			||||||
                message_dict=d,
 | 
					 | 
				
			||||||
                user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
                user_handler=user_handler,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if sender_id is None:
 | 
					 | 
				
			||||||
                return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if is_pm_data:
 | 
					 | 
				
			||||||
                # We need to compare with str() on both sides here.
 | 
					 | 
				
			||||||
                # In Stride, user IDs are strings, but in HipChat,
 | 
					 | 
				
			||||||
                # they are integers, and fn_id is always a string.
 | 
					 | 
				
			||||||
                if str(sender_id) != str(fn_id):
 | 
					 | 
				
			||||||
                    # PMs are in multiple places in the HipChat export,
 | 
					 | 
				
			||||||
                    # and we only use the copy from the sender
 | 
					 | 
				
			||||||
                    return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            content = d['message']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if masking_content:
 | 
					 | 
				
			||||||
                content = re.sub('[a-z]', 'x', content)
 | 
					 | 
				
			||||||
                content = re.sub('[A-Z]', 'X', content)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            return dict(
 | 
					 | 
				
			||||||
                fn_id=fn_id,
 | 
					 | 
				
			||||||
                sender_id=sender_id,
 | 
					 | 
				
			||||||
                receiver_id=d.get('receiver', {}).get('id'),
 | 
					 | 
				
			||||||
                content=content,
 | 
					 | 
				
			||||||
                mention_user_ids=d.get('mentions', []),
 | 
					 | 
				
			||||||
                date_sent=str_date_to_float(d['timestamp']),
 | 
					 | 
				
			||||||
                attachment=d.get('attachment'),
 | 
					 | 
				
			||||||
                files_dir=files_dir,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        raw_messages = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for d in flat_data:
 | 
					 | 
				
			||||||
            raw_message = get_raw_message(d)
 | 
					 | 
				
			||||||
            if raw_message is not None:
 | 
					 | 
				
			||||||
                raw_messages.append(raw_message)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return raw_messages
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    raw_messages = get_raw_messages(fn)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def process_batch(lst: List[Any]) -> None:
 | 
					 | 
				
			||||||
        process_raw_message_batch(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            raw_messages=lst,
 | 
					 | 
				
			||||||
            subscriber_map=subscriber_map,
 | 
					 | 
				
			||||||
            user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
            user_handler=user_handler,
 | 
					 | 
				
			||||||
            attachment_handler=attachment_handler,
 | 
					 | 
				
			||||||
            get_recipient_id=get_recipient_id,
 | 
					 | 
				
			||||||
            is_pm_data=is_pm_data,
 | 
					 | 
				
			||||||
            output_dir=output_dir,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    chunk_size = 1000
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    process_list_in_batches(
 | 
					 | 
				
			||||||
        lst=raw_messages,
 | 
					 | 
				
			||||||
        chunk_size=chunk_size,
 | 
					 | 
				
			||||||
        process_batch=process_batch,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def process_raw_message_batch(realm_id: int,
 | 
					 | 
				
			||||||
                              raw_messages: List[Dict[str, Any]],
 | 
					 | 
				
			||||||
                              subscriber_map: Dict[int, Set[int]],
 | 
					 | 
				
			||||||
                              user_id_mapper: IdMapper,
 | 
					 | 
				
			||||||
                              user_handler: UserHandler,
 | 
					 | 
				
			||||||
                              attachment_handler: AttachmentHandler,
 | 
					 | 
				
			||||||
                              get_recipient_id: Callable[[ZerverFieldsT], int],
 | 
					 | 
				
			||||||
                              is_pm_data: bool,
 | 
					 | 
				
			||||||
                              output_dir: str) -> None:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def fix_mentions(content: str,
 | 
					 | 
				
			||||||
                     mention_user_ids: Set[int]) -> str:
 | 
					 | 
				
			||||||
        for user_id in mention_user_ids:
 | 
					 | 
				
			||||||
            user = user_handler.get_user(user_id=user_id)
 | 
					 | 
				
			||||||
            hipchat_mention = '@{short_name}'.format(**user)
 | 
					 | 
				
			||||||
            zulip_mention = '@**{full_name}**'.format(**user)
 | 
					 | 
				
			||||||
            content = content.replace(hipchat_mention, zulip_mention)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        content = content.replace('@here', '@**all**')
 | 
					 | 
				
			||||||
        return content
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mention_map: Dict[int, Set[int]] = {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    zerver_message = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    import html2text
 | 
					 | 
				
			||||||
    h = html2text.HTML2Text()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for raw_message in raw_messages:
 | 
					 | 
				
			||||||
        # One side effect here:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        message_id = NEXT_ID('message')
 | 
					 | 
				
			||||||
        mention_user_ids = {
 | 
					 | 
				
			||||||
            user_id_mapper.get(id)
 | 
					 | 
				
			||||||
            for id in set(raw_message['mention_user_ids'])
 | 
					 | 
				
			||||||
            if user_id_mapper.has(id)
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        mention_map[message_id] = mention_user_ids
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        content = fix_mentions(
 | 
					 | 
				
			||||||
            content=raw_message['content'],
 | 
					 | 
				
			||||||
            mention_user_ids=mention_user_ids,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        content = h.handle(content)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if len(content) > 10000:
 | 
					 | 
				
			||||||
            logging.info('skipping too-long message of length %s', len(content))
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        date_sent = raw_message['date_sent']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            recipient_id = get_recipient_id(raw_message)
 | 
					 | 
				
			||||||
        except KeyError:
 | 
					 | 
				
			||||||
            logging.debug("Could not find recipient_id for a message, skipping.")
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        rendered_content = None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if is_pm_data:
 | 
					 | 
				
			||||||
            topic_name = ''
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            topic_name = 'imported from HipChat'
 | 
					 | 
				
			||||||
        user_id = raw_message['sender_id']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Another side effect:
 | 
					 | 
				
			||||||
        extra_content = attachment_handler.handle_message_data(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            message_id=message_id,
 | 
					 | 
				
			||||||
            sender_id=user_id,
 | 
					 | 
				
			||||||
            attachment=raw_message['attachment'],
 | 
					 | 
				
			||||||
            files_dir=raw_message['files_dir'],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if extra_content:
 | 
					 | 
				
			||||||
            has_attachment = True
 | 
					 | 
				
			||||||
            content += '\n' + extra_content
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            has_attachment = False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        message = build_message(
 | 
					 | 
				
			||||||
            content=content,
 | 
					 | 
				
			||||||
            message_id=message_id,
 | 
					 | 
				
			||||||
            date_sent=date_sent,
 | 
					 | 
				
			||||||
            recipient_id=recipient_id,
 | 
					 | 
				
			||||||
            rendered_content=rendered_content,
 | 
					 | 
				
			||||||
            topic_name=topic_name,
 | 
					 | 
				
			||||||
            user_id=user_id,
 | 
					 | 
				
			||||||
            has_attachment=has_attachment,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        zerver_message.append(message)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    zerver_usermessage = make_user_messages(
 | 
					 | 
				
			||||||
        zerver_message=zerver_message,
 | 
					 | 
				
			||||||
        subscriber_map=subscriber_map,
 | 
					 | 
				
			||||||
        is_pm_data=is_pm_data,
 | 
					 | 
				
			||||||
        mention_map=mention_map,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    message_json = dict(
 | 
					 | 
				
			||||||
        zerver_message=zerver_message,
 | 
					 | 
				
			||||||
        zerver_usermessage=zerver_usermessage,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    dump_file_id = NEXT_ID('dump_file_id')
 | 
					 | 
				
			||||||
    message_file = f"/messages-{dump_file_id:06}.json"
 | 
					 | 
				
			||||||
    create_converted_data_files(message_json, output_dir, message_file)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def do_convert_data(input_tar_file: str,
 | 
					 | 
				
			||||||
                    output_dir: str,
 | 
					 | 
				
			||||||
                    masking_content: bool,
 | 
					 | 
				
			||||||
                    api_token: Optional[str]=None,
 | 
					 | 
				
			||||||
                    slim_mode: bool=False) -> None:
 | 
					 | 
				
			||||||
    input_data_dir = untar_input_file(input_tar_file)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    attachment_handler = AttachmentHandler()
 | 
					 | 
				
			||||||
    user_handler = UserHandler()
 | 
					 | 
				
			||||||
    subscriber_handler = SubscriberHandler()
 | 
					 | 
				
			||||||
    user_id_mapper = IdMapper()
 | 
					 | 
				
			||||||
    stream_id_mapper = IdMapper()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    realm_id = 0
 | 
					 | 
				
			||||||
    realm = make_realm(realm_id=realm_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # users.json -> UserProfile
 | 
					 | 
				
			||||||
    raw_user_data = read_user_data(data_dir=input_data_dir)
 | 
					 | 
				
			||||||
    convert_user_data(
 | 
					 | 
				
			||||||
        user_handler=user_handler,
 | 
					 | 
				
			||||||
        slim_mode=slim_mode,
 | 
					 | 
				
			||||||
        user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
        raw_data=raw_user_data,
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    normal_users = user_handler.get_normal_users()
 | 
					 | 
				
			||||||
    # Don't write zerver_userprofile here, because we
 | 
					 | 
				
			||||||
    # may add more users later.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # streams.json -> Stream
 | 
					 | 
				
			||||||
    raw_stream_data = read_room_data(data_dir=input_data_dir)
 | 
					 | 
				
			||||||
    zerver_stream = convert_room_data(
 | 
					 | 
				
			||||||
        raw_data=raw_stream_data,
 | 
					 | 
				
			||||||
        subscriber_handler=subscriber_handler,
 | 
					 | 
				
			||||||
        stream_id_mapper=stream_id_mapper,
 | 
					 | 
				
			||||||
        user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
        api_token=api_token,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    realm['zerver_stream'] = zerver_stream
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    zerver_recipient = build_recipients(
 | 
					 | 
				
			||||||
        zerver_userprofile=normal_users,
 | 
					 | 
				
			||||||
        zerver_stream=zerver_stream,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    realm['zerver_recipient'] = zerver_recipient
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if api_token is None:
 | 
					 | 
				
			||||||
        if slim_mode:
 | 
					 | 
				
			||||||
            public_stream_subscriptions: List[ZerverFieldsT] = []
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            public_stream_subscriptions = build_public_stream_subscriptions(
 | 
					 | 
				
			||||||
                zerver_userprofile=normal_users,
 | 
					 | 
				
			||||||
                zerver_recipient=zerver_recipient,
 | 
					 | 
				
			||||||
                zerver_stream=zerver_stream,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        private_stream_subscriptions = build_stream_subscriptions(
 | 
					 | 
				
			||||||
            get_users=subscriber_handler.get_users,
 | 
					 | 
				
			||||||
            zerver_recipient=zerver_recipient,
 | 
					 | 
				
			||||||
            zerver_stream=[stream_dict for stream_dict in zerver_stream
 | 
					 | 
				
			||||||
                           if stream_dict['invite_only']],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        stream_subscriptions = build_stream_subscriptions(
 | 
					 | 
				
			||||||
            get_users=subscriber_handler.get_users,
 | 
					 | 
				
			||||||
            zerver_recipient=zerver_recipient,
 | 
					 | 
				
			||||||
            zerver_stream=zerver_stream,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    personal_subscriptions = build_personal_subscriptions(
 | 
					 | 
				
			||||||
        zerver_recipient=zerver_recipient,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    zerver_subscription = personal_subscriptions + stream_subscriptions
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    realm['zerver_subscription'] = zerver_subscription
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    zerver_realmemoji = write_emoticon_data(
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
        data_dir=input_data_dir,
 | 
					 | 
				
			||||||
        output_dir=output_dir,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    realm['zerver_realmemoji'] = zerver_realmemoji
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    subscriber_map = make_subscriber_map(
 | 
					 | 
				
			||||||
        zerver_subscription=zerver_subscription,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('Start importing message data')
 | 
					 | 
				
			||||||
    for message_key in ['UserMessage',
 | 
					 | 
				
			||||||
                        'NotificationMessage',
 | 
					 | 
				
			||||||
                        'PrivateUserMessage']:
 | 
					 | 
				
			||||||
        write_message_data(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            slim_mode=slim_mode,
 | 
					 | 
				
			||||||
            message_key=message_key,
 | 
					 | 
				
			||||||
            zerver_recipient=zerver_recipient,
 | 
					 | 
				
			||||||
            subscriber_map=subscriber_map,
 | 
					 | 
				
			||||||
            data_dir=input_data_dir,
 | 
					 | 
				
			||||||
            output_dir=output_dir,
 | 
					 | 
				
			||||||
            masking_content=masking_content,
 | 
					 | 
				
			||||||
            stream_id_mapper=stream_id_mapper,
 | 
					 | 
				
			||||||
            user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
            user_handler=user_handler,
 | 
					 | 
				
			||||||
            attachment_handler=attachment_handler,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Order is important here...don't write users until
 | 
					 | 
				
			||||||
    # we process everything else, since we may introduce
 | 
					 | 
				
			||||||
    # mirror users when processing messages.
 | 
					 | 
				
			||||||
    realm['zerver_userprofile'] = user_handler.get_all_users()
 | 
					 | 
				
			||||||
    realm['sort_by_date'] = True
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    create_converted_data_files(realm, output_dir, '/realm.json')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('Start importing avatar data')
 | 
					 | 
				
			||||||
    write_avatar_data(
 | 
					 | 
				
			||||||
        raw_user_data=raw_user_data,
 | 
					 | 
				
			||||||
        output_dir=output_dir,
 | 
					 | 
				
			||||||
        user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    attachment_handler.write_info(
 | 
					 | 
				
			||||||
        output_dir=output_dir,
 | 
					 | 
				
			||||||
        realm_id=realm_id,
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('Start making tarball')
 | 
					 | 
				
			||||||
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
 | 
					 | 
				
			||||||
    logging.info('Done making tarball')
 | 
					 | 
				
			||||||
@@ -1,136 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
from typing import Any, Dict, List, Optional
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from zerver.data_import.import_util import build_attachment, create_converted_data_files
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class AttachmentHandler:
 | 
					 | 
				
			||||||
    def __init__(self) -> None:
 | 
					 | 
				
			||||||
        self.info_dict: Dict[str, Dict[str, Any]] = {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def handle_message_data(self,
 | 
					 | 
				
			||||||
                            realm_id: int,
 | 
					 | 
				
			||||||
                            message_id: int,
 | 
					 | 
				
			||||||
                            sender_id: int,
 | 
					 | 
				
			||||||
                            attachment: Dict[str, Any],
 | 
					 | 
				
			||||||
                            files_dir: str) -> Optional[str]:
 | 
					 | 
				
			||||||
        if not attachment:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        name = attachment['name']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if 'path' not in attachment:
 | 
					 | 
				
			||||||
            logging.info('Skipping HipChat attachment with missing path data: ' + name)
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        size = attachment['size']
 | 
					 | 
				
			||||||
        path = attachment['path']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        local_fn = os.path.join(files_dir, path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not os.path.exists(local_fn):
 | 
					 | 
				
			||||||
            # HipChat has an option to not include these in its
 | 
					 | 
				
			||||||
            # exports, since file uploads can be very large.
 | 
					 | 
				
			||||||
            logging.info('Skipping attachment with no file data: ' + local_fn)
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        target_path = os.path.join(
 | 
					 | 
				
			||||||
            str(realm_id),
 | 
					 | 
				
			||||||
            'HipChatImportAttachment',
 | 
					 | 
				
			||||||
            path,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if target_path in self.info_dict:
 | 
					 | 
				
			||||||
            logging.info("file used multiple times: " + path)
 | 
					 | 
				
			||||||
            info = self.info_dict[target_path]
 | 
					 | 
				
			||||||
            info['message_ids'].add(message_id)
 | 
					 | 
				
			||||||
            return info['content']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # HipChat provides size info, but it's not
 | 
					 | 
				
			||||||
        # completely trustworthy, so we we just
 | 
					 | 
				
			||||||
        # ask the OS for file details.
 | 
					 | 
				
			||||||
        size = os.path.getsize(local_fn)
 | 
					 | 
				
			||||||
        mtime = os.path.getmtime(local_fn)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        content = f'[{name}](/user_uploads/{target_path})'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        info = dict(
 | 
					 | 
				
			||||||
            message_ids={message_id},
 | 
					 | 
				
			||||||
            sender_id=sender_id,
 | 
					 | 
				
			||||||
            local_fn=local_fn,
 | 
					 | 
				
			||||||
            target_path=target_path,
 | 
					 | 
				
			||||||
            name=name,
 | 
					 | 
				
			||||||
            size=size,
 | 
					 | 
				
			||||||
            mtime=mtime,
 | 
					 | 
				
			||||||
            content=content,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        self.info_dict[target_path] = info
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return content
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def write_info(self, output_dir: str, realm_id: int) -> None:
 | 
					 | 
				
			||||||
        attachments: List[Dict[str, Any]] = []
 | 
					 | 
				
			||||||
        uploads_records: List[Dict[str, Any]] = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def add_attachment(info: Dict[str, Any]) -> None:
 | 
					 | 
				
			||||||
            build_attachment(
 | 
					 | 
				
			||||||
                realm_id=realm_id,
 | 
					 | 
				
			||||||
                message_ids=info['message_ids'],
 | 
					 | 
				
			||||||
                user_id=info['sender_id'],
 | 
					 | 
				
			||||||
                fileinfo=dict(
 | 
					 | 
				
			||||||
                    created=info['mtime'],  # minor lie
 | 
					 | 
				
			||||||
                    size=info['size'],
 | 
					 | 
				
			||||||
                    name=info['name'],
 | 
					 | 
				
			||||||
                ),
 | 
					 | 
				
			||||||
                s3_path=info['target_path'],
 | 
					 | 
				
			||||||
                zerver_attachment=attachments,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def add_upload(info: Dict[str, Any]) -> None:
 | 
					 | 
				
			||||||
            target_path = info['target_path']
 | 
					 | 
				
			||||||
            upload_rec = dict(
 | 
					 | 
				
			||||||
                size=info['size'],
 | 
					 | 
				
			||||||
                user_profile_id=info['sender_id'],
 | 
					 | 
				
			||||||
                realm_id=realm_id,
 | 
					 | 
				
			||||||
                s3_path=target_path,
 | 
					 | 
				
			||||||
                path=target_path,
 | 
					 | 
				
			||||||
                content_type=None,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            uploads_records.append(upload_rec)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def make_full_target_path(info: Dict[str, Any]) -> str:
 | 
					 | 
				
			||||||
            target_path = info['target_path']
 | 
					 | 
				
			||||||
            full_target_path = os.path.join(
 | 
					 | 
				
			||||||
                output_dir,
 | 
					 | 
				
			||||||
                'uploads',
 | 
					 | 
				
			||||||
                target_path,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            full_target_path = os.path.abspath(full_target_path)
 | 
					 | 
				
			||||||
            os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
 | 
					 | 
				
			||||||
            return full_target_path
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def copy_file(info: Dict[str, Any]) -> None:
 | 
					 | 
				
			||||||
            source_path = info['local_fn']
 | 
					 | 
				
			||||||
            target_path = make_full_target_path(info)
 | 
					 | 
				
			||||||
            shutil.copyfile(source_path, target_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        logging.info('Start processing attachment files')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for info in self.info_dict.values():
 | 
					 | 
				
			||||||
            add_attachment(info)
 | 
					 | 
				
			||||||
            add_upload(info)
 | 
					 | 
				
			||||||
            copy_file(info)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        uploads_folder = os.path.join(output_dir, 'uploads')
 | 
					 | 
				
			||||||
        os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        attachment = dict(
 | 
					 | 
				
			||||||
            zerver_attachment=attachments,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
 | 
					 | 
				
			||||||
        create_converted_data_files(attachment, output_dir, '/attachment.json')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        logging.info('Done processing attachment files')
 | 
					 | 
				
			||||||
@@ -1,84 +0,0 @@
 | 
				
			|||||||
from typing import Any, Dict, List
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.timezone import now as timezone_now
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from zerver.data_import.import_util import build_user_profile
 | 
					 | 
				
			||||||
from zerver.models import UserProfile
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class UserHandler:
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    Our UserHandler class is a glorified wrapper
 | 
					 | 
				
			||||||
    around the data that eventually goes into
 | 
					 | 
				
			||||||
    zerver_userprofile.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    The class helps us do things like map ids
 | 
					 | 
				
			||||||
    to names for mentions.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    We also sometimes need to build mirror
 | 
					 | 
				
			||||||
    users on the fly.
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self) -> None:
 | 
					 | 
				
			||||||
        self.id_to_user_map: Dict[int, Dict[str, Any]] = {}
 | 
					 | 
				
			||||||
        self.name_to_mirror_user_map: Dict[str, Dict[str, Any]] = {}
 | 
					 | 
				
			||||||
        self.mirror_user_id = 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def add_user(self, user: Dict[str, Any]) -> None:
 | 
					 | 
				
			||||||
        user_id = user['id']
 | 
					 | 
				
			||||||
        self.id_to_user_map[user_id] = user
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_user(self, user_id: int) -> Dict[str, Any]:
 | 
					 | 
				
			||||||
        user = self.id_to_user_map[user_id]
 | 
					 | 
				
			||||||
        return user
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_mirror_user(self,
 | 
					 | 
				
			||||||
                        realm_id: int,
 | 
					 | 
				
			||||||
                        name: str) -> Dict[str, Any]:
 | 
					 | 
				
			||||||
        if name in self.name_to_mirror_user_map:
 | 
					 | 
				
			||||||
            user = self.name_to_mirror_user_map[name]
 | 
					 | 
				
			||||||
            return user
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        user_id = self._new_mirror_user_id()
 | 
					 | 
				
			||||||
        short_name = name
 | 
					 | 
				
			||||||
        full_name = name
 | 
					 | 
				
			||||||
        email = f'mirror-{user_id}@example.com'
 | 
					 | 
				
			||||||
        delivery_email = email
 | 
					 | 
				
			||||||
        avatar_source = 'G'
 | 
					 | 
				
			||||||
        date_joined = int(timezone_now().timestamp())
 | 
					 | 
				
			||||||
        timezone = 'UTC'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        user = build_user_profile(
 | 
					 | 
				
			||||||
            avatar_source=avatar_source,
 | 
					 | 
				
			||||||
            date_joined=date_joined,
 | 
					 | 
				
			||||||
            delivery_email=delivery_email,
 | 
					 | 
				
			||||||
            email=email,
 | 
					 | 
				
			||||||
            full_name=full_name,
 | 
					 | 
				
			||||||
            id=user_id,
 | 
					 | 
				
			||||||
            is_active=False,
 | 
					 | 
				
			||||||
            role=UserProfile.ROLE_MEMBER,
 | 
					 | 
				
			||||||
            is_mirror_dummy=True,
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            short_name=short_name,
 | 
					 | 
				
			||||||
            timezone=timezone,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.name_to_mirror_user_map[name] = user
 | 
					 | 
				
			||||||
        return user
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _new_mirror_user_id(self) -> int:
 | 
					 | 
				
			||||||
        next_id = self.mirror_user_id
 | 
					 | 
				
			||||||
        while next_id in self.id_to_user_map:
 | 
					 | 
				
			||||||
            next_id += 1
 | 
					 | 
				
			||||||
        self.mirror_user_id = next_id + 1
 | 
					 | 
				
			||||||
        return next_id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_normal_users(self) -> List[Dict[str, Any]]:
 | 
					 | 
				
			||||||
        users = list(self.id_to_user_map.values())
 | 
					 | 
				
			||||||
        return users
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_all_users(self) -> List[Dict[str, Any]]:
 | 
					 | 
				
			||||||
        normal_users = self.get_normal_users()
 | 
					 | 
				
			||||||
        mirror_users = list(self.name_to_mirror_user_map.values())
 | 
					 | 
				
			||||||
        all_users = normal_users + mirror_users
 | 
					 | 
				
			||||||
        return all_users
 | 
					 | 
				
			||||||
@@ -170,8 +170,8 @@ def build_public_stream_subscriptions(
 | 
				
			|||||||
        zerver_recipient: List[ZerverFieldsT],
 | 
					        zerver_recipient: List[ZerverFieldsT],
 | 
				
			||||||
        zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
 | 
					        zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    This function is only used for HipChat now, but it may apply to
 | 
					    This function was only used for HipChat, but it may apply to
 | 
				
			||||||
    future conversions.  We often don't get full subscriber data in
 | 
					    future conversions.  We often did't get full subscriber data in
 | 
				
			||||||
    the HipChat export, so this function just autosubscribes all
 | 
					    the HipChat export, so this function just autosubscribes all
 | 
				
			||||||
    users to every public stream.  This returns a list of Subscription
 | 
					    users to every public stream.  This returns a list of Subscription
 | 
				
			||||||
    dicts.
 | 
					    dicts.
 | 
				
			||||||
@@ -298,8 +298,8 @@ def build_recipients(zerver_userprofile: Iterable[ZerverFieldsT],
 | 
				
			|||||||
                     zerver_stream: Iterable[ZerverFieldsT],
 | 
					                     zerver_stream: Iterable[ZerverFieldsT],
 | 
				
			||||||
                     zerver_huddle: Iterable[ZerverFieldsT] = []) -> List[ZerverFieldsT]:
 | 
					                     zerver_huddle: Iterable[ZerverFieldsT] = []) -> List[ZerverFieldsT]:
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    As of this writing, we only use this in the HipChat
 | 
					    This function was only used HipChat import, this function may be
 | 
				
			||||||
    conversion.  The Slack and Gitter conversions do it more
 | 
					    required for future conversions. The Slack and Gitter conversions do it more
 | 
				
			||||||
    tightly integrated with creating other objects.
 | 
					    tightly integrated with creating other objects.
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,7 +8,7 @@ sequences work.
 | 
				
			|||||||
You need to be a bit careful here, since
 | 
					You need to be a bit careful here, since
 | 
				
			||||||
you're dealing with a big singleton, but
 | 
					you're dealing with a big singleton, but
 | 
				
			||||||
for data imports that's usually easy to
 | 
					for data imports that's usually easy to
 | 
				
			||||||
manage.  See hipchat.py for example usage.
 | 
					manage.
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _seq() -> Callable[[], int]:
 | 
					def _seq() -> Callable[[], int]:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,10 +52,6 @@ TAB_DISPLAY_NAMES = {
 | 
				
			|||||||
    'desktop': 'Desktop',
 | 
					    'desktop': 'Desktop',
 | 
				
			||||||
    'mobile': 'Mobile',
 | 
					    'mobile': 'Mobile',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    'cloud': 'HipChat Cloud',
 | 
					 | 
				
			||||||
    'server': 'HipChat Server or Data Center',
 | 
					 | 
				
			||||||
    'stride': 'Stride',
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    'mm-default': 'Default installation',
 | 
					    'mm-default': 'Default installation',
 | 
				
			||||||
    'mm-docker': 'Docker',
 | 
					    'mm-docker': 'Docker',
 | 
				
			||||||
    'mm-gitlab-omnibus': 'GitLab Omnibus',
 | 
					    'mm-gitlab-omnibus': 'GitLab Omnibus',
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,81 +0,0 @@
 | 
				
			|||||||
import argparse
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
from typing import Any
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
Example usage for testing purposes:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Move the data:
 | 
					 | 
				
			||||||
    rm -Rf ~/hipchat-data
 | 
					 | 
				
			||||||
    mkdir ~/hipchat-data
 | 
					 | 
				
			||||||
    ./manage.py convert_hipchat_data ~/hipchat-31028-2018-08-08_23-23-22.tar --output ~/hipchat-data
 | 
					 | 
				
			||||||
    ./manage.py import --destroy-rebuild-database hipchat ~/hipchat-data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Test out the realm:
 | 
					 | 
				
			||||||
    ./tools/run-dev.py
 | 
					 | 
				
			||||||
    go to browser and use your dev url
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
spec:
 | 
					 | 
				
			||||||
    https://confluence.atlassian.com/hipchatkb/
 | 
					 | 
				
			||||||
    exporting-from-hipchat-server-or-data-center-for-data-portability-950821555.html
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError, CommandParser
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from zerver.data_import.hipchat import do_convert_data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Command(BaseCommand):
 | 
					 | 
				
			||||||
    help = """Convert the HipChat data into Zulip data format."""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def add_arguments(self, parser: CommandParser) -> None:
 | 
					 | 
				
			||||||
        parser.add_argument('hipchat_tar', nargs='+',
 | 
					 | 
				
			||||||
                            metavar='<hipchat data tarfile>',
 | 
					 | 
				
			||||||
                            help="tar of HipChat data")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.add_argument('--output', dest='output_dir',
 | 
					 | 
				
			||||||
                            help='Directory to write exported data to.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.add_argument('--mask', dest='masking_content',
 | 
					 | 
				
			||||||
                            action="store_true",
 | 
					 | 
				
			||||||
                            help='Mask the content for privacy during QA.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.add_argument('--slim-mode',
 | 
					 | 
				
			||||||
                            action="store_true",
 | 
					 | 
				
			||||||
                            help="Default to no public stream subscriptions if no token is available." +
 | 
					 | 
				
			||||||
                            "  See import docs for details.")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.add_argument('--token', dest='api_token',
 | 
					 | 
				
			||||||
                            help='API token for the HipChat API for fetching subscribers.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.formatter_class = argparse.RawTextHelpFormatter
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def handle(self, *args: Any, **options: Any) -> None:
 | 
					 | 
				
			||||||
        output_dir = options["output_dir"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if output_dir is None:
 | 
					 | 
				
			||||||
            raise CommandError("You need to specify --output <output directory>")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if os.path.exists(output_dir) and not os.path.isdir(output_dir):
 | 
					 | 
				
			||||||
            raise CommandError(output_dir + " is not a directory")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        os.makedirs(output_dir, exist_ok=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if os.listdir(output_dir):
 | 
					 | 
				
			||||||
            raise CommandError('Output directory should be empty!')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        output_dir = os.path.realpath(output_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for path in options['hipchat_tar']:
 | 
					 | 
				
			||||||
            if not os.path.exists(path):
 | 
					 | 
				
			||||||
                raise CommandError(f"Tar file not found: '{path}'")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            print("Converting data ...")
 | 
					 | 
				
			||||||
            do_convert_data(
 | 
					 | 
				
			||||||
                input_tar_file=path,
 | 
					 | 
				
			||||||
                output_dir=output_dir,
 | 
					 | 
				
			||||||
                masking_content=options.get('masking_content', False),
 | 
					 | 
				
			||||||
                slim_mode=options['slim_mode'],
 | 
					 | 
				
			||||||
                api_token=options.get("api_token"),
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
@@ -1,76 +0,0 @@
 | 
				
			|||||||
from typing import Any, Dict
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from zerver.data_import.hipchat import get_hipchat_sender_id
 | 
					 | 
				
			||||||
from zerver.data_import.hipchat_user import UserHandler
 | 
					 | 
				
			||||||
from zerver.data_import.sequencer import IdMapper
 | 
					 | 
				
			||||||
from zerver.lib.test_classes import ZulipTestCase
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class HipChatImporter(ZulipTestCase):
 | 
					 | 
				
			||||||
    def test_sender_ids(self) -> None:
 | 
					 | 
				
			||||||
        realm_id = 5
 | 
					 | 
				
			||||||
        user_handler = UserHandler()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        user_id_mapper = IdMapper()
 | 
					 | 
				
			||||||
        self.assertEqual(user_id_mapper.get(1), 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Simulate a "normal" user first.
 | 
					 | 
				
			||||||
        user_with_id = dict(
 | 
					 | 
				
			||||||
            id=1,
 | 
					 | 
				
			||||||
            # other fields don't matter here
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        user_handler.add_user(user=user_with_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        normal_message: Dict[str, Any] = dict(
 | 
					 | 
				
			||||||
            sender=dict(
 | 
					 | 
				
			||||||
                id=1,
 | 
					 | 
				
			||||||
            ),
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        sender_id = get_hipchat_sender_id(
 | 
					 | 
				
			||||||
            realm_id=realm_id,
 | 
					 | 
				
			||||||
            slim_mode=False,
 | 
					 | 
				
			||||||
            message_dict=normal_message,
 | 
					 | 
				
			||||||
            user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
            user_handler=user_handler,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.assertEqual(sender_id, 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        bot_message = dict(
 | 
					 | 
				
			||||||
            sender='fred_bot',
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Every message from fred_bot should
 | 
					 | 
				
			||||||
        # return the same sender_id.
 | 
					 | 
				
			||||||
        fred_bot_sender_id = 2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for i in range(3):
 | 
					 | 
				
			||||||
            sender_id = get_hipchat_sender_id(
 | 
					 | 
				
			||||||
                realm_id=realm_id,
 | 
					 | 
				
			||||||
                slim_mode=False,
 | 
					 | 
				
			||||||
                message_dict=bot_message,
 | 
					 | 
				
			||||||
                user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
                user_handler=user_handler,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            self.assertEqual(sender_id, fred_bot_sender_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        id_zero_message = dict(
 | 
					 | 
				
			||||||
            sender=dict(
 | 
					 | 
				
			||||||
                id=0,
 | 
					 | 
				
			||||||
                name='hal_bot',
 | 
					 | 
				
			||||||
            ),
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        hal_bot_sender_id = 3
 | 
					 | 
				
			||||||
        for i in range(3):
 | 
					 | 
				
			||||||
            sender_id = get_hipchat_sender_id(
 | 
					 | 
				
			||||||
                realm_id=realm_id,
 | 
					 | 
				
			||||||
                slim_mode=False,
 | 
					 | 
				
			||||||
                message_dict=id_zero_message,
 | 
					 | 
				
			||||||
                user_id_mapper=user_id_mapper,
 | 
					 | 
				
			||||||
                user_handler=user_handler,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            self.assertEqual(sender_id, hal_bot_sender_id)
 | 
					 | 
				
			||||||
@@ -51,7 +51,7 @@ def api_teamcity_webhook(request: HttpRequest, user_profile: UserProfile,
 | 
				
			|||||||
                         payload: Dict[str, Any]=REQ(argument_type='body')) -> HttpResponse:
 | 
					                         payload: Dict[str, Any]=REQ(argument_type='body')) -> HttpResponse:
 | 
				
			||||||
    message = payload.get('build')
 | 
					    message = payload.get('build')
 | 
				
			||||||
    if message is None:
 | 
					    if message is None:
 | 
				
			||||||
        # Ignore third-party specific (e.g. Slack/HipChat) payload formats
 | 
					        # Ignore third-party specific (e.g. Slack) payload formats
 | 
				
			||||||
        # and notify the bot owner
 | 
					        # and notify the bot owner
 | 
				
			||||||
        message = MISCONFIGURED_PAYLOAD_TYPE_ERROR_MESSAGE.format(
 | 
					        message = MISCONFIGURED_PAYLOAD_TYPE_ERROR_MESSAGE.format(
 | 
				
			||||||
            bot_name=user_profile.full_name,
 | 
					            bot_name=user_profile.full_name,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user