utils: Remove unused query_chunker function.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2025-10-31 12:03:46 +00:00 · 2021-08-13 18:32:16 -07:00
parent 271333301d
commit 58b7a4eb44
2 changed files with 1 additions and 190 deletions
--- a/zerver/lib/utils.py
+++ b/zerver/lib/utils.py
@@ -1,10 +1,8 @@
 import hashlib
-import heapq
-import itertools
 import re
 import secrets
 from itertools import zip_longest
-from typing import Any, Callable, Iterator, List, Optional, Set, Tuple, TypeVar
+from typing import Any, Callable, List, Optional, TypeVar

 from django.conf import settings

@@ -99,66 +97,6 @@ def assert_is_not_none(value: Optional[T]) -> T:
    return value


-def query_chunker(
-    queries: List[Any],
-    id_collector: Optional[Set[int]] = None,
-    chunk_size: int = 1000,
-    db_chunk_size: Optional[int] = None,
-) -> Iterator[Any]:
-    """
-    This merges one or more Django ascending-id queries into
-    a generator that returns chunks of chunk_size row objects
-    during each yield, preserving id order across all results..
-
-    Queries should satisfy these conditions:
-        - They should be Django filters.
-        - They should return Django objects with "id" attributes.
-        - They should be disjoint.
-
-    The generator also populates id_collector, which we use
-    internally to enforce unique ids, but which the caller
-    can pass in to us if they want the side effect of collecting
-    all ids.
-    """
-    if db_chunk_size is None:
-        db_chunk_size = chunk_size // len(queries)
-
-    assert db_chunk_size >= 2
-    assert chunk_size >= 2
-
-    if id_collector is not None:
-        assert len(id_collector) == 0
-    else:
-        id_collector = set()
-
-    def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]:
-        q = q.order_by("id")
-        min_id = -1
-        while True:
-            rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
-            if len(rows) == 0:
-                break
-            for row in rows:
-                yield (row.id, i, row)
-            min_id = rows[-1].id
-
-    iterators = [chunkify(q, i) for i, q in enumerate(queries)]
-    merged_query = heapq.merge(*iterators)
-
-    while True:
-        tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
-        if len(tup_chunk) == 0:
-            break
-
-        # Do duplicate-id management here.
-        tup_ids = {tup[0] for tup in tup_chunk}
-        assert len(tup_ids) == len(tup_chunk)
-        assert len(tup_ids.intersection(id_collector)) == 0
-        id_collector.update(tup_ids)
-
-        yield [row for row_id, i, row in tup_chunk]
-
-
 def process_list_in_batches(
    lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
 ) -> None:
--- a/zerver/tests/test_import_export.py
+++ b/zerver/tests/test_import_export.py
@@ -33,7 +33,6 @@ from zerver.lib.upload import (
    upload_emoji_image,
    upload_message_file,
 )
-from zerver.lib.utils import query_chunker
 from zerver.models import (
    AlertWord,
    Attachment,
@@ -66,132 +65,6 @@ from zerver.models import (
 )


-class QueryUtilTest(ZulipTestCase):
-    def _create_messages(self) -> None:
-        for name in ["cordelia", "hamlet", "iago"]:
-            user = self.example_user(name)
-            for _ in range(5):
-                self.send_personal_message(user, self.example_user("othello"))
-
-    def test_query_chunker(self) -> None:
-        self._create_messages()
-
-        cordelia = self.example_user("cordelia")
-        hamlet = self.example_user("hamlet")
-
-        def get_queries() -> List[Any]:
-            queries = [
-                Message.objects.filter(sender_id=cordelia.id),
-                Message.objects.filter(sender_id=hamlet.id),
-                Message.objects.exclude(sender_id__in=[cordelia.id, hamlet.id]),
-            ]
-            return queries
-
-        for query in get_queries():
-            # For our test to be meaningful, we want non-empty queries
-            # at first
-            self.assertGreater(len(list(query)), 0)
-
-        queries = get_queries()
-
-        all_msg_ids: Set[int] = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=20,
-        )
-
-        all_row_ids = []
-        for chunk in chunker:
-            for row in chunk:
-                all_row_ids.append(row.id)
-
-        self.assertEqual(all_row_ids, sorted(all_row_ids))
-        self.assert_length(all_msg_ids, len(Message.objects.all()))
-
-        # Now just search for cordelia/hamlet.  Note that we don't really
-        # need the order_by here, but it should be harmless.
-        queries = [
-            Message.objects.filter(sender_id=cordelia.id).order_by("id"),
-            Message.objects.filter(sender_id=hamlet.id),
-        ]
-        all_msg_ids = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=7,  # use a different size
-        )
-        list(chunker)  # exhaust the iterator
-        self.assertEqual(
-            len(all_msg_ids),
-            len(Message.objects.filter(sender_id__in=[cordelia.id, hamlet.id])),
-        )
-
-        # Try just a single query to validate chunking.
-        queries = [
-            Message.objects.exclude(sender_id=cordelia.id),
-        ]
-        all_msg_ids = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=11,  # use a different size each time
-        )
-        list(chunker)  # exhaust the iterator
-        self.assertEqual(
-            len(all_msg_ids),
-            len(Message.objects.exclude(sender_id=cordelia.id)),
-        )
-        self.assertGreater(len(all_msg_ids), 15)
-
-        # Verify assertions about disjoint-ness.
-        queries = [
-            Message.objects.exclude(sender_id=cordelia.id),
-            Message.objects.filter(sender_id=hamlet.id),
-        ]
-        all_msg_ids = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=13,  # use a different size each time
-        )
-        with self.assertRaises(AssertionError):
-            list(chunker)  # exercise the iterator
-
-        # Try to confuse things with ids part of the query...
-        queries = [
-            Message.objects.filter(id__lte=10),
-            Message.objects.filter(id__gt=10),
-        ]
-        all_msg_ids = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=11,  # use a different size each time
-        )
-        self.assert_length(all_msg_ids, 0)  # until we actually use the iterator
-        list(chunker)  # exhaust the iterator
-        self.assert_length(all_msg_ids, len(Message.objects.all()))
-
-        # Verify that we can just get the first chunk with a next() call.
-        queries = [
-            Message.objects.all(),
-        ]
-        all_msg_ids = set()
-        chunker = query_chunker(
-            queries=queries,
-            id_collector=all_msg_ids,
-            chunk_size=10,  # use a different size each time
-        )
-        first_chunk = next(chunker)
-        self.assert_length(first_chunk, 10)
-        self.assert_length(all_msg_ids, 10)
-        expected_msg = Message.objects.all()[0:10][5]
-        actual_msg = first_chunk[5]
-        self.assertEqual(actual_msg.content, expected_msg.content)
-        self.assertEqual(actual_msg.sender_id, expected_msg.sender_id)
-
-
 class ImportExportTest(ZulipTestCase):
    def setUp(self) -> None:
        super().setUp()