hipchat import: Break messages into smaller batches.

Even individual "room" files from hipchat can be large, so we process only 1000 messages at a time within each file, which produces smaller JSON files.
2025-10-31 20:13:46 +00:00 · 2018-10-15 12:15:04 +00:00
parent 6650bb2240
commit b1dd9a251b
1 changed files with 31 additions and 0 deletions
--- a/zerver/data_import/hipchat.py
+++ b/zerver/data_import/hipchat.py
@@ -13,6 +13,10 @@ from django.conf import settings
 from django.forms.models import model_to_dict
 from django.utils.timezone import now as timezone_now

+from zerver.lib.utils import (
+    process_list_in_batches,
+)
+
 from zerver.models import (
    RealmEmoji,
    Recipient,
@@ -451,6 +455,33 @@ def process_message_file(realm_id: int,

    raw_messages = get_raw_messages(fn)

+    def process_batch(lst: List[Any]) -> None:
+        process_raw_message_batch(
+            realm_id=realm_id,
+            raw_messages=lst,
+            zerver_subscription=zerver_subscription,
+            user_handler=user_handler,
+            attachment_handler=attachment_handler,
+            get_recipient_id=get_recipient_id,
+            output_dir=output_dir,
+        )
+
+    chunk_size = 1000
+
+    process_list_in_batches(
+        lst=raw_messages,
+        chunk_size=chunk_size,
+        process_batch=process_batch,
+    )
+
+def process_raw_message_batch(realm_id: int,
+                              raw_messages: List[Dict[str, Any]],
+                              zerver_subscription: List[ZerverFieldsT],
+                              user_handler: UserHandler,
+                              attachment_handler: AttachmentHandler,
+                              get_recipient_id: Callable[[ZerverFieldsT], int],
+                              output_dir: str) -> None:
+
    def fix_mentions(content: str,
                     mention_user_ids: List[int]) -> str:
        for user_id in mention_user_ids: