mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 22:13:26 +00:00
utils: Remove unused query_chunker function.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
271333301d
commit
58b7a4eb44
@@ -1,10 +1,8 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import heapq
|
|
||||||
import itertools
|
|
||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
from typing import Any, Callable, Iterator, List, Optional, Set, Tuple, TypeVar
|
from typing import Any, Callable, List, Optional, TypeVar
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
@@ -99,66 +97,6 @@ def assert_is_not_none(value: Optional[T]) -> T:
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
def query_chunker(
|
|
||||||
queries: List[Any],
|
|
||||||
id_collector: Optional[Set[int]] = None,
|
|
||||||
chunk_size: int = 1000,
|
|
||||||
db_chunk_size: Optional[int] = None,
|
|
||||||
) -> Iterator[Any]:
|
|
||||||
"""
|
|
||||||
This merges one or more Django ascending-id queries into
|
|
||||||
a generator that returns chunks of chunk_size row objects
|
|
||||||
during each yield, preserving id order across all results..
|
|
||||||
|
|
||||||
Queries should satisfy these conditions:
|
|
||||||
- They should be Django filters.
|
|
||||||
- They should return Django objects with "id" attributes.
|
|
||||||
- They should be disjoint.
|
|
||||||
|
|
||||||
The generator also populates id_collector, which we use
|
|
||||||
internally to enforce unique ids, but which the caller
|
|
||||||
can pass in to us if they want the side effect of collecting
|
|
||||||
all ids.
|
|
||||||
"""
|
|
||||||
if db_chunk_size is None:
|
|
||||||
db_chunk_size = chunk_size // len(queries)
|
|
||||||
|
|
||||||
assert db_chunk_size >= 2
|
|
||||||
assert chunk_size >= 2
|
|
||||||
|
|
||||||
if id_collector is not None:
|
|
||||||
assert len(id_collector) == 0
|
|
||||||
else:
|
|
||||||
id_collector = set()
|
|
||||||
|
|
||||||
def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]:
|
|
||||||
q = q.order_by("id")
|
|
||||||
min_id = -1
|
|
||||||
while True:
|
|
||||||
rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
|
|
||||||
if len(rows) == 0:
|
|
||||||
break
|
|
||||||
for row in rows:
|
|
||||||
yield (row.id, i, row)
|
|
||||||
min_id = rows[-1].id
|
|
||||||
|
|
||||||
iterators = [chunkify(q, i) for i, q in enumerate(queries)]
|
|
||||||
merged_query = heapq.merge(*iterators)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
|
|
||||||
if len(tup_chunk) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Do duplicate-id management here.
|
|
||||||
tup_ids = {tup[0] for tup in tup_chunk}
|
|
||||||
assert len(tup_ids) == len(tup_chunk)
|
|
||||||
assert len(tup_ids.intersection(id_collector)) == 0
|
|
||||||
id_collector.update(tup_ids)
|
|
||||||
|
|
||||||
yield [row for row_id, i, row in tup_chunk]
|
|
||||||
|
|
||||||
|
|
||||||
def process_list_in_batches(
|
def process_list_in_batches(
|
||||||
lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
|
lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ from zerver.lib.upload import (
|
|||||||
upload_emoji_image,
|
upload_emoji_image,
|
||||||
upload_message_file,
|
upload_message_file,
|
||||||
)
|
)
|
||||||
from zerver.lib.utils import query_chunker
|
|
||||||
from zerver.models import (
|
from zerver.models import (
|
||||||
AlertWord,
|
AlertWord,
|
||||||
Attachment,
|
Attachment,
|
||||||
@@ -66,132 +65,6 @@ from zerver.models import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class QueryUtilTest(ZulipTestCase):
|
|
||||||
def _create_messages(self) -> None:
|
|
||||||
for name in ["cordelia", "hamlet", "iago"]:
|
|
||||||
user = self.example_user(name)
|
|
||||||
for _ in range(5):
|
|
||||||
self.send_personal_message(user, self.example_user("othello"))
|
|
||||||
|
|
||||||
def test_query_chunker(self) -> None:
|
|
||||||
self._create_messages()
|
|
||||||
|
|
||||||
cordelia = self.example_user("cordelia")
|
|
||||||
hamlet = self.example_user("hamlet")
|
|
||||||
|
|
||||||
def get_queries() -> List[Any]:
|
|
||||||
queries = [
|
|
||||||
Message.objects.filter(sender_id=cordelia.id),
|
|
||||||
Message.objects.filter(sender_id=hamlet.id),
|
|
||||||
Message.objects.exclude(sender_id__in=[cordelia.id, hamlet.id]),
|
|
||||||
]
|
|
||||||
return queries
|
|
||||||
|
|
||||||
for query in get_queries():
|
|
||||||
# For our test to be meaningful, we want non-empty queries
|
|
||||||
# at first
|
|
||||||
self.assertGreater(len(list(query)), 0)
|
|
||||||
|
|
||||||
queries = get_queries()
|
|
||||||
|
|
||||||
all_msg_ids: Set[int] = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=20,
|
|
||||||
)
|
|
||||||
|
|
||||||
all_row_ids = []
|
|
||||||
for chunk in chunker:
|
|
||||||
for row in chunk:
|
|
||||||
all_row_ids.append(row.id)
|
|
||||||
|
|
||||||
self.assertEqual(all_row_ids, sorted(all_row_ids))
|
|
||||||
self.assert_length(all_msg_ids, len(Message.objects.all()))
|
|
||||||
|
|
||||||
# Now just search for cordelia/hamlet. Note that we don't really
|
|
||||||
# need the order_by here, but it should be harmless.
|
|
||||||
queries = [
|
|
||||||
Message.objects.filter(sender_id=cordelia.id).order_by("id"),
|
|
||||||
Message.objects.filter(sender_id=hamlet.id),
|
|
||||||
]
|
|
||||||
all_msg_ids = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=7, # use a different size
|
|
||||||
)
|
|
||||||
list(chunker) # exhaust the iterator
|
|
||||||
self.assertEqual(
|
|
||||||
len(all_msg_ids),
|
|
||||||
len(Message.objects.filter(sender_id__in=[cordelia.id, hamlet.id])),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Try just a single query to validate chunking.
|
|
||||||
queries = [
|
|
||||||
Message.objects.exclude(sender_id=cordelia.id),
|
|
||||||
]
|
|
||||||
all_msg_ids = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=11, # use a different size each time
|
|
||||||
)
|
|
||||||
list(chunker) # exhaust the iterator
|
|
||||||
self.assertEqual(
|
|
||||||
len(all_msg_ids),
|
|
||||||
len(Message.objects.exclude(sender_id=cordelia.id)),
|
|
||||||
)
|
|
||||||
self.assertGreater(len(all_msg_ids), 15)
|
|
||||||
|
|
||||||
# Verify assertions about disjoint-ness.
|
|
||||||
queries = [
|
|
||||||
Message.objects.exclude(sender_id=cordelia.id),
|
|
||||||
Message.objects.filter(sender_id=hamlet.id),
|
|
||||||
]
|
|
||||||
all_msg_ids = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=13, # use a different size each time
|
|
||||||
)
|
|
||||||
with self.assertRaises(AssertionError):
|
|
||||||
list(chunker) # exercise the iterator
|
|
||||||
|
|
||||||
# Try to confuse things with ids part of the query...
|
|
||||||
queries = [
|
|
||||||
Message.objects.filter(id__lte=10),
|
|
||||||
Message.objects.filter(id__gt=10),
|
|
||||||
]
|
|
||||||
all_msg_ids = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=11, # use a different size each time
|
|
||||||
)
|
|
||||||
self.assert_length(all_msg_ids, 0) # until we actually use the iterator
|
|
||||||
list(chunker) # exhaust the iterator
|
|
||||||
self.assert_length(all_msg_ids, len(Message.objects.all()))
|
|
||||||
|
|
||||||
# Verify that we can just get the first chunk with a next() call.
|
|
||||||
queries = [
|
|
||||||
Message.objects.all(),
|
|
||||||
]
|
|
||||||
all_msg_ids = set()
|
|
||||||
chunker = query_chunker(
|
|
||||||
queries=queries,
|
|
||||||
id_collector=all_msg_ids,
|
|
||||||
chunk_size=10, # use a different size each time
|
|
||||||
)
|
|
||||||
first_chunk = next(chunker)
|
|
||||||
self.assert_length(first_chunk, 10)
|
|
||||||
self.assert_length(all_msg_ids, 10)
|
|
||||||
expected_msg = Message.objects.all()[0:10][5]
|
|
||||||
actual_msg = first_chunk[5]
|
|
||||||
self.assertEqual(actual_msg.content, expected_msg.content)
|
|
||||||
self.assertEqual(actual_msg.sender_id, expected_msg.sender_id)
|
|
||||||
|
|
||||||
|
|
||||||
class ImportExportTest(ZulipTestCase):
|
class ImportExportTest(ZulipTestCase):
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|||||||
Reference in New Issue
Block a user