export: Switch chunked() to the itertools.batched definition.

We only require Python 3.10, so we cannot use `itertools.batched`
directly, which came in 3.12.  Include and use a short version of it.
This commit is contained in:
Alex Vandiver
2025-08-22 18:59:32 +00:00
committed by Tim Abbott
parent cf33119348
commit 78bc17ecbe

View File

@@ -19,7 +19,8 @@ from collections.abc import Callable, Iterable, Mapping
from datetime import datetime from datetime import datetime
from email.headerregistry import Address from email.headerregistry import Address
from functools import cache from functools import cache
from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict, cast from itertools import islice
from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict, TypeVar, cast
from urllib.parse import urlsplit from urllib.parse import urlsplit
import orjson import orjson
@@ -1837,7 +1838,7 @@ def export_partial_message_files(
all_message_ids |= message_ids all_message_ids |= message_ids
message_id_chunks = chunkify(sorted(all_message_ids), chunk_size=MESSAGE_BATCH_CHUNK_SIZE) message_id_chunks = batched(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE)
write_message_partials( write_message_partials(
realm=realm, realm=realm,
@@ -1853,7 +1854,7 @@ def export_partial_message_files(
def write_message_partials( def write_message_partials(
*, *,
realm: Realm, realm: Realm,
message_id_chunks: list[list[int]], message_id_chunks: Iterable[tuple[int, ...]],
output_dir: Path, output_dir: Path,
user_profile_ids: set[int], user_profile_ids: set[int],
collected_client_ids: set[int], collected_client_ids: set[int],
@@ -2707,19 +2708,16 @@ def get_id_list_gently_from_database(*, base_query: Any, id_field: str) -> list[
return all_ids return all_ids
def chunkify(lst: list[int], chunk_size: int) -> list[list[int]]: # We only require Python 3.10, which does not include
# chunkify([1,2,3,4,5], 2) == [[1,2], [3,4], [5]] # itertools.batched; include our own equivalent
result = [] T = TypeVar("T")
i = 0
while True:
chunk = lst[i : i + chunk_size]
if len(chunk) == 0:
break
else:
result.append(chunk)
i += chunk_size
return result
def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]:
iterator = iter(iterable)
batch: tuple[T, ...]
while batch := tuple(islice(iterator, n)):
yield batch
def export_messages_single_user( def export_messages_single_user(
@@ -2769,7 +2767,7 @@ def export_messages_single_user(
all_message_ids |= reaction_message_ids all_message_ids |= reaction_message_ids
dump_file_id = 1 dump_file_id = 1
for message_id_chunk in chunkify(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE): for message_id_chunk in batched(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE):
fat_query = ( fat_query = (
UserMessage.objects.select_related("message", "message__sending_client") UserMessage.objects.select_related("message", "message__sending_client")
.filter(user_profile=user_profile, message_id__in=message_id_chunk) .filter(user_profile=user_profile, message_id__in=message_id_chunk)