mirror of
https://github.com/zulip/zulip.git
synced 2025-11-05 22:43:42 +00:00
export: Switch chunked() to the itertools.batched definition.
We only require Python 3.10, so we cannot use `itertools.batched` directly, which came in 3.12. Include and use a short version of it.
This commit is contained in:
committed by
Tim Abbott
parent
cf33119348
commit
78bc17ecbe
@@ -19,7 +19,8 @@ from collections.abc import Callable, Iterable, Mapping
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from email.headerregistry import Address
|
from email.headerregistry import Address
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict, cast
|
from itertools import islice
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict, TypeVar, cast
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
@@ -1837,7 +1838,7 @@ def export_partial_message_files(
|
|||||||
|
|
||||||
all_message_ids |= message_ids
|
all_message_ids |= message_ids
|
||||||
|
|
||||||
message_id_chunks = chunkify(sorted(all_message_ids), chunk_size=MESSAGE_BATCH_CHUNK_SIZE)
|
message_id_chunks = batched(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE)
|
||||||
|
|
||||||
write_message_partials(
|
write_message_partials(
|
||||||
realm=realm,
|
realm=realm,
|
||||||
@@ -1853,7 +1854,7 @@ def export_partial_message_files(
|
|||||||
def write_message_partials(
|
def write_message_partials(
|
||||||
*,
|
*,
|
||||||
realm: Realm,
|
realm: Realm,
|
||||||
message_id_chunks: list[list[int]],
|
message_id_chunks: Iterable[tuple[int, ...]],
|
||||||
output_dir: Path,
|
output_dir: Path,
|
||||||
user_profile_ids: set[int],
|
user_profile_ids: set[int],
|
||||||
collected_client_ids: set[int],
|
collected_client_ids: set[int],
|
||||||
@@ -2707,19 +2708,16 @@ def get_id_list_gently_from_database(*, base_query: Any, id_field: str) -> list[
|
|||||||
return all_ids
|
return all_ids
|
||||||
|
|
||||||
|
|
||||||
def chunkify(lst: list[int], chunk_size: int) -> list[list[int]]:
|
# We only require Python 3.10, which does not include
|
||||||
# chunkify([1,2,3,4,5], 2) == [[1,2], [3,4], [5]]
|
# itertools.batched; include our own equivalent
|
||||||
result = []
|
T = TypeVar("T")
|
||||||
i = 0
|
|
||||||
while True:
|
|
||||||
chunk = lst[i : i + chunk_size]
|
|
||||||
if len(chunk) == 0:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
result.append(chunk)
|
|
||||||
i += chunk_size
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]:
|
||||||
|
iterator = iter(iterable)
|
||||||
|
batch: tuple[T, ...]
|
||||||
|
while batch := tuple(islice(iterator, n)):
|
||||||
|
yield batch
|
||||||
|
|
||||||
|
|
||||||
def export_messages_single_user(
|
def export_messages_single_user(
|
||||||
@@ -2769,7 +2767,7 @@ def export_messages_single_user(
|
|||||||
all_message_ids |= reaction_message_ids
|
all_message_ids |= reaction_message_ids
|
||||||
|
|
||||||
dump_file_id = 1
|
dump_file_id = 1
|
||||||
for message_id_chunk in chunkify(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE):
|
for message_id_chunk in batched(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE):
|
||||||
fat_query = (
|
fat_query = (
|
||||||
UserMessage.objects.select_related("message", "message__sending_client")
|
UserMessage.objects.select_related("message", "message__sending_client")
|
||||||
.filter(user_profile=user_profile, message_id__in=message_id_chunk)
|
.filter(user_profile=user_profile, message_id__in=message_id_chunk)
|
||||||
|
|||||||
Reference in New Issue
Block a user