mirror of
https://github.com/zulip/zulip.git
synced 2025-10-27 10:03:56 +00:00
utils: Remove unused query_chunker function.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
271333301d
commit
58b7a4eb44
@@ -1,10 +1,8 @@
|
||||
import hashlib
|
||||
import heapq
|
||||
import itertools
|
||||
import re
|
||||
import secrets
|
||||
from itertools import zip_longest
|
||||
from typing import Any, Callable, Iterator, List, Optional, Set, Tuple, TypeVar
|
||||
from typing import Any, Callable, List, Optional, TypeVar
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
@@ -99,66 +97,6 @@ def assert_is_not_none(value: Optional[T]) -> T:
|
||||
return value
|
||||
|
||||
|
||||
def query_chunker(
|
||||
queries: List[Any],
|
||||
id_collector: Optional[Set[int]] = None,
|
||||
chunk_size: int = 1000,
|
||||
db_chunk_size: Optional[int] = None,
|
||||
) -> Iterator[Any]:
|
||||
"""
|
||||
This merges one or more Django ascending-id queries into
|
||||
a generator that returns chunks of chunk_size row objects
|
||||
during each yield, preserving id order across all results..
|
||||
|
||||
Queries should satisfy these conditions:
|
||||
- They should be Django filters.
|
||||
- They should return Django objects with "id" attributes.
|
||||
- They should be disjoint.
|
||||
|
||||
The generator also populates id_collector, which we use
|
||||
internally to enforce unique ids, but which the caller
|
||||
can pass in to us if they want the side effect of collecting
|
||||
all ids.
|
||||
"""
|
||||
if db_chunk_size is None:
|
||||
db_chunk_size = chunk_size // len(queries)
|
||||
|
||||
assert db_chunk_size >= 2
|
||||
assert chunk_size >= 2
|
||||
|
||||
if id_collector is not None:
|
||||
assert len(id_collector) == 0
|
||||
else:
|
||||
id_collector = set()
|
||||
|
||||
def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]:
|
||||
q = q.order_by("id")
|
||||
min_id = -1
|
||||
while True:
|
||||
rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
|
||||
if len(rows) == 0:
|
||||
break
|
||||
for row in rows:
|
||||
yield (row.id, i, row)
|
||||
min_id = rows[-1].id
|
||||
|
||||
iterators = [chunkify(q, i) for i, q in enumerate(queries)]
|
||||
merged_query = heapq.merge(*iterators)
|
||||
|
||||
while True:
|
||||
tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
|
||||
if len(tup_chunk) == 0:
|
||||
break
|
||||
|
||||
# Do duplicate-id management here.
|
||||
tup_ids = {tup[0] for tup in tup_chunk}
|
||||
assert len(tup_ids) == len(tup_chunk)
|
||||
assert len(tup_ids.intersection(id_collector)) == 0
|
||||
id_collector.update(tup_ids)
|
||||
|
||||
yield [row for row_id, i, row in tup_chunk]
|
||||
|
||||
|
||||
def process_list_in_batches(
|
||||
lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
|
||||
) -> None:
|
||||
|
||||
Reference in New Issue
Block a user