mirror of
https://github.com/zulip/zulip.git
synced 2025-11-16 20:02:15 +00:00
export: Stop calling processes threads.
This commit is contained in:
committed by
Tim Abbott
parent
131580f23c
commit
a815261abf
@@ -590,7 +590,7 @@ def process_avatars(
|
||||
avatar_list: list[ZerverFieldsT],
|
||||
avatar_dir: str,
|
||||
realm_id: int,
|
||||
threads: int,
|
||||
processes: int,
|
||||
size_url_suffix: str = "",
|
||||
) -> list[ZerverFieldsT]:
|
||||
"""
|
||||
@@ -637,7 +637,7 @@ def process_avatars(
|
||||
run_parallel(
|
||||
partial(get_avatar, avatar_dir, size_url_suffix),
|
||||
avatar_upload_list,
|
||||
processes=threads,
|
||||
processes=processes,
|
||||
catch=True,
|
||||
report=lambda count: logging.info("Finished %s items", count),
|
||||
)
|
||||
@@ -658,7 +658,7 @@ def get_uploads(upload_dir: str, upload: list[str]) -> None:
|
||||
|
||||
|
||||
def process_uploads(
|
||||
upload_list: list[ZerverFieldsT], upload_dir: str, threads: int
|
||||
upload_list: list[ZerverFieldsT], upload_dir: str, processes: int
|
||||
) -> list[ZerverFieldsT]:
|
||||
"""
|
||||
This function downloads the uploads and saves it in the realm's upload directory.
|
||||
@@ -680,7 +680,7 @@ def process_uploads(
|
||||
run_parallel(
|
||||
partial(get_uploads, upload_dir),
|
||||
upload_url_list,
|
||||
processes=threads,
|
||||
processes=processes,
|
||||
catch=True,
|
||||
report=lambda count: logging.info("Finished %s items", count),
|
||||
)
|
||||
@@ -715,7 +715,7 @@ def process_emojis(
|
||||
zerver_realmemoji: list[ZerverFieldsT],
|
||||
emoji_dir: str,
|
||||
emoji_url_map: ZerverFieldsT,
|
||||
threads: int,
|
||||
processes: int,
|
||||
) -> list[ZerverFieldsT]:
|
||||
"""
|
||||
This function downloads the custom emojis and saves in the output emoji folder.
|
||||
|
||||
@@ -1618,7 +1618,7 @@ def do_convert_zipfile(
|
||||
original_path: str,
|
||||
output_dir: str,
|
||||
token: str,
|
||||
threads: int = 6,
|
||||
processes: int = 6,
|
||||
convert_slack_threads: bool = False,
|
||||
) -> None:
|
||||
assert original_path.endswith(".zip")
|
||||
@@ -1661,7 +1661,7 @@ def do_convert_zipfile(
|
||||
|
||||
zipObj.extractall(slack_data_dir)
|
||||
|
||||
do_convert_directory(slack_data_dir, output_dir, token, threads, convert_slack_threads)
|
||||
do_convert_directory(slack_data_dir, output_dir, token, processes, convert_slack_threads)
|
||||
finally:
|
||||
# Always clean up the uncompressed directory
|
||||
rm_tree(slack_data_dir)
|
||||
@@ -1686,7 +1686,7 @@ def do_convert_directory(
|
||||
slack_data_dir: str,
|
||||
output_dir: str,
|
||||
token: str,
|
||||
threads: int = 6,
|
||||
processes: int = 6,
|
||||
convert_slack_threads: bool = False,
|
||||
) -> None:
|
||||
check_slack_token_access(token, SLACK_IMPORT_TOKEN_SCOPES)
|
||||
@@ -1752,18 +1752,20 @@ def do_convert_directory(
|
||||
|
||||
emoji_folder = os.path.join(output_dir, "emoji")
|
||||
os.makedirs(emoji_folder, exist_ok=True)
|
||||
emoji_records = process_emojis(realm["zerver_realmemoji"], emoji_folder, emoji_url_map, threads)
|
||||
emoji_records = process_emojis(
|
||||
realm["zerver_realmemoji"], emoji_folder, emoji_url_map, processes
|
||||
)
|
||||
|
||||
avatar_folder = os.path.join(output_dir, "avatars")
|
||||
avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
|
||||
os.makedirs(avatar_realm_folder, exist_ok=True)
|
||||
avatar_records = process_avatars(
|
||||
avatar_list, avatar_folder, realm_id, threads, size_url_suffix="-512"
|
||||
avatar_list, avatar_folder, realm_id, processes, size_url_suffix="-512"
|
||||
)
|
||||
|
||||
uploads_folder = os.path.join(output_dir, "uploads")
|
||||
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
||||
uploads_records = process_uploads(uploads_list, uploads_folder, threads)
|
||||
uploads_records = process_uploads(uploads_list, uploads_folder, processes)
|
||||
attachment = {"zerver_attachment": zerver_attachment}
|
||||
|
||||
team_info_dict = get_slack_api_data("https://slack.com/api/team.info", "team", token=token)
|
||||
|
||||
@@ -2451,7 +2451,7 @@ def get_exportable_scheduled_message_ids(
|
||||
def do_export_realm(
|
||||
realm: Realm,
|
||||
output_dir: Path,
|
||||
threads: int,
|
||||
processes: int,
|
||||
export_type: int,
|
||||
exportable_user_ids: set[int] | None = None,
|
||||
export_as_active: bool | None = None,
|
||||
@@ -2462,11 +2462,11 @@ def do_export_realm(
|
||||
# indicates a bug.
|
||||
assert export_type == RealmExport.EXPORT_FULL_WITH_CONSENT
|
||||
|
||||
# We need at least one thread running to export
|
||||
# We need at least one process running to export
|
||||
# UserMessage rows. The management command should
|
||||
# enforce this for us.
|
||||
if not settings.TEST_SUITE:
|
||||
assert threads >= 1
|
||||
assert processes >= 1
|
||||
|
||||
realm_config = get_realm_config()
|
||||
|
||||
@@ -2544,7 +2544,7 @@ def do_export_realm(
|
||||
|
||||
# Start parallel jobs to export the UserMessage objects.
|
||||
launch_user_message_subprocesses(
|
||||
threads=threads,
|
||||
processes=processes,
|
||||
output_dir=output_dir,
|
||||
export_full_with_consent=export_type == RealmExport.EXPORT_FULL_WITH_CONSENT,
|
||||
exportable_user_ids=exportable_user_ids,
|
||||
@@ -2585,12 +2585,12 @@ def export_attachment_table(
|
||||
|
||||
|
||||
def launch_user_message_subprocesses(
|
||||
threads: int,
|
||||
processes: int,
|
||||
output_dir: Path,
|
||||
export_full_with_consent: bool,
|
||||
exportable_user_ids: set[int] | None,
|
||||
) -> None:
|
||||
logging.info("Launching %d PARALLEL subprocesses to export UserMessage rows", threads)
|
||||
logging.info("Launching %d PARALLEL subprocesses to export UserMessage rows", processes)
|
||||
pids = {}
|
||||
|
||||
if export_full_with_consent:
|
||||
@@ -2600,12 +2600,12 @@ def launch_user_message_subprocesses(
|
||||
f.write(orjson.dumps(list(exportable_user_ids)))
|
||||
logging.info("Created consented_user_ids.json file.")
|
||||
|
||||
for shard_id in range(threads):
|
||||
for shard_id in range(processes):
|
||||
arguments = [
|
||||
os.path.join(settings.DEPLOY_ROOT, "manage.py"),
|
||||
"export_usermessage_batch",
|
||||
f"--path={output_dir}",
|
||||
f"--thread={shard_id}",
|
||||
f"--process={shard_id}",
|
||||
]
|
||||
if export_full_with_consent:
|
||||
arguments.append("--export-full-with-consent")
|
||||
@@ -2955,7 +2955,7 @@ def get_consented_user_ids(realm: Realm) -> set[int]:
|
||||
def export_realm_wrapper(
|
||||
export_row: RealmExport,
|
||||
output_dir: str,
|
||||
threads: int,
|
||||
processes: int,
|
||||
upload: bool,
|
||||
percent_callback: Callable[[Any], None] | None = None,
|
||||
export_as_active: bool | None = None,
|
||||
@@ -2972,7 +2972,7 @@ def export_realm_wrapper(
|
||||
tarball_path, stats = do_export_realm(
|
||||
realm=export_row.realm,
|
||||
output_dir=output_dir,
|
||||
threads=threads,
|
||||
processes=processes,
|
||||
export_type=export_row.type,
|
||||
export_as_active=export_as_active,
|
||||
exportable_user_ids=exportable_user_ids,
|
||||
|
||||
@@ -32,9 +32,9 @@ class Command(ZulipBaseCommand):
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--threads",
|
||||
"--processes",
|
||||
default=settings.DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM,
|
||||
help="Threads to use in exporting UserMessage objects in parallel",
|
||||
help="Processes to use in exporting UserMessage objects in parallel",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -57,9 +57,9 @@ class Command(ZulipBaseCommand):
|
||||
if token is None:
|
||||
raise CommandError("Enter Slack legacy token!")
|
||||
|
||||
num_threads = int(options["threads"])
|
||||
if num_threads < 1:
|
||||
raise CommandError("You must have at least one thread.")
|
||||
num_processes = int(options["processes"])
|
||||
if num_processes < 1:
|
||||
raise CommandError("You must have at least one process.")
|
||||
|
||||
for path in options["slack_data_path"]:
|
||||
if not os.path.exists(path):
|
||||
@@ -72,7 +72,7 @@ class Command(ZulipBaseCommand):
|
||||
path,
|
||||
output_dir,
|
||||
token,
|
||||
threads=num_threads,
|
||||
processes=num_processes,
|
||||
convert_slack_threads=convert_slack_threads,
|
||||
)
|
||||
elif os.path.isfile(path) and path.endswith(".zip"):
|
||||
@@ -80,7 +80,7 @@ class Command(ZulipBaseCommand):
|
||||
path,
|
||||
output_dir,
|
||||
token,
|
||||
threads=num_threads,
|
||||
processes=num_processes,
|
||||
convert_slack_threads=convert_slack_threads,
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -68,8 +68,8 @@ class Command(ZulipBaseCommand):
|
||||
make sure you have the procedure right and minimize downtime.
|
||||
|
||||
Performance: In one test, the tool exported a realm with hundreds
|
||||
of users and ~1M messages of history with --threads=1 in about 3
|
||||
hours of serial runtime (goes down to ~50m with --threads=6 on a
|
||||
of users and ~1M messages of history with --parallel=1 in about 3
|
||||
hours of serial runtime (goes down to ~50m with --parallel=6 on a
|
||||
machine with 8 CPUs). Importing that same data set took about 30
|
||||
minutes. But this will vary a lot depending on the average number
|
||||
of recipients of messages in the realm, hardware, etc."""
|
||||
@@ -80,9 +80,9 @@ class Command(ZulipBaseCommand):
|
||||
"--output", dest="output_dir", help="Directory to write exported data to."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--threads",
|
||||
"--parallel",
|
||||
default=settings.DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM,
|
||||
help="Threads to use in exporting UserMessage objects in parallel",
|
||||
help="Processes to use in exporting UserMessage objects in parallel",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--public-only",
|
||||
@@ -121,9 +121,9 @@ class Command(ZulipBaseCommand):
|
||||
|
||||
print(f"\033[94mExporting realm\033[0m: {realm.string_id}")
|
||||
|
||||
num_threads = int(options["threads"])
|
||||
if num_threads < 1:
|
||||
raise CommandError("You must have at least one thread.")
|
||||
processes = int(options["parallel"])
|
||||
if processes < 1:
|
||||
raise CommandError("You must have at least one process.")
|
||||
|
||||
if public_only and export_full_with_consent:
|
||||
raise CommandError("Please pass either --public-only or --export-full-with-consennt")
|
||||
@@ -183,7 +183,7 @@ class Command(ZulipBaseCommand):
|
||||
export_realm_wrapper(
|
||||
export_row=export_row,
|
||||
output_dir=output_dir,
|
||||
threads=num_threads,
|
||||
processes=processes,
|
||||
upload=options["upload"],
|
||||
percent_callback=percent_callback,
|
||||
export_as_active=True if options["deactivate_realm"] else None,
|
||||
|
||||
@@ -17,7 +17,7 @@ class Command(ZulipBaseCommand):
|
||||
@override
|
||||
def add_arguments(self, parser: ArgumentParser) -> None:
|
||||
parser.add_argument("--path", help="Path to find messages.json archives")
|
||||
parser.add_argument("--thread", help="Thread ID")
|
||||
parser.add_argument("--process", help="Process identifier (used only for debug output)")
|
||||
parser.add_argument(
|
||||
"--export-full-with-consent",
|
||||
action="store_true",
|
||||
@@ -26,7 +26,7 @@ class Command(ZulipBaseCommand):
|
||||
|
||||
@override
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
logging.info("Starting UserMessage batch thread %s", options["thread"])
|
||||
logging.info("Starting UserMessage batch process %s", options["process"])
|
||||
path = options["path"]
|
||||
files = set(glob.glob(os.path.join(path, "messages-*.json.partial")))
|
||||
|
||||
@@ -47,7 +47,7 @@ class Command(ZulipBaseCommand):
|
||||
except FileNotFoundError:
|
||||
# Already claimed by another process
|
||||
continue
|
||||
logging.info("Thread %s processing %s", options["thread"], output_path)
|
||||
logging.info("Process %s processing %s", options["process"], output_path)
|
||||
try:
|
||||
export_usermessages_batch(
|
||||
locked_path,
|
||||
|
||||
@@ -416,7 +416,7 @@ class RealmImportExportTest(ExportFile):
|
||||
do_export_realm(
|
||||
realm=realm,
|
||||
output_dir=output_dir,
|
||||
threads=0,
|
||||
processes=0,
|
||||
export_type=export_type,
|
||||
exportable_user_ids=exportable_user_ids,
|
||||
)
|
||||
|
||||
@@ -537,7 +537,7 @@ class TestExport(ZulipTestCase):
|
||||
call_command(self.COMMAND_NAME, "-r=zulip", "--export-full-with-consent")
|
||||
m.assert_called_once_with(
|
||||
export_row=mock.ANY,
|
||||
threads=mock.ANY,
|
||||
processes=mock.ANY,
|
||||
output_dir=mock.ANY,
|
||||
percent_callback=mock.ANY,
|
||||
upload=False,
|
||||
|
||||
@@ -63,7 +63,7 @@ class RealmExportTest(ZulipTestCase):
|
||||
self.assertEqual(args["realm"], admin.realm)
|
||||
self.assertEqual(args["export_type"], RealmExport.EXPORT_PUBLIC)
|
||||
self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-"))
|
||||
self.assertEqual(args["threads"], 6)
|
||||
self.assertEqual(args["processes"], 6)
|
||||
|
||||
# Get the entry and test that iago initiated it.
|
||||
export_row = RealmExport.objects.first()
|
||||
@@ -121,7 +121,7 @@ class RealmExportTest(ZulipTestCase):
|
||||
def fake_export_realm(
|
||||
realm: Realm,
|
||||
output_dir: str,
|
||||
threads: int,
|
||||
processes: int,
|
||||
export_type: int,
|
||||
exportable_user_ids: set[int] | None = None,
|
||||
export_as_active: bool | None = None,
|
||||
@@ -129,7 +129,7 @@ class RealmExportTest(ZulipTestCase):
|
||||
self.assertEqual(realm, admin.realm)
|
||||
self.assertEqual(export_type, RealmExport.EXPORT_PUBLIC)
|
||||
self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-"))
|
||||
self.assertEqual(threads, 6)
|
||||
self.assertEqual(processes, 6)
|
||||
|
||||
# Check that the export shows up as in progress
|
||||
result = self.client_get("/json/export/realm")
|
||||
|
||||
@@ -1929,7 +1929,7 @@ by Pieter
|
||||
with self.assertLogs(level="INFO"), self.settings(EXTERNAL_HOST="zulip.example.com"):
|
||||
# We need to mock EXTERNAL_HOST to be a valid domain because Slack's importer
|
||||
# uses it to generate email addresses for users without an email specified.
|
||||
do_convert_zipfile(test_slack_zip_file, output_dir, token, threads=1)
|
||||
do_convert_zipfile(test_slack_zip_file, output_dir, token, processes=1)
|
||||
|
||||
self.assertTrue(os.path.exists(output_dir))
|
||||
self.assertTrue(os.path.exists(output_dir + "/realm.json"))
|
||||
@@ -2138,7 +2138,7 @@ by Pieter
|
||||
with self.assertLogs(level="INFO"), self.settings(EXTERNAL_HOST="zulip.example.com"):
|
||||
# We need to mock EXTERNAL_HOST to be a valid domain because Slack's importer
|
||||
# uses it to generate email addresses for users without an email specified.
|
||||
do_convert_zipfile(test_slack_zip_file, output_dir, token, threads=1)
|
||||
do_convert_zipfile(test_slack_zip_file, output_dir, token, processes=1)
|
||||
|
||||
@mock.patch("zerver.data_import.slack.check_slack_token_access")
|
||||
@responses.activate
|
||||
|
||||
@@ -173,7 +173,7 @@ class DeferredWorker(QueueProcessingWorker):
|
||||
export_realm_wrapper(
|
||||
export_row=export_row,
|
||||
output_dir=output_dir,
|
||||
threads=1 if self.threaded else 6,
|
||||
processes=1 if self.threaded else 6,
|
||||
upload=True,
|
||||
)
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user