mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-31 12:03:46 +00:00 
			
		
		
		
	utils: Remove unused query_chunker function.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
		
				
					committed by
					
						 Tim Abbott
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							271333301d
						
					
				
				
					commit
					58b7a4eb44
				
			| @@ -1,10 +1,8 @@ | ||||
| import hashlib | ||||
| import heapq | ||||
| import itertools | ||||
| import re | ||||
| import secrets | ||||
| from itertools import zip_longest | ||||
| from typing import Any, Callable, Iterator, List, Optional, Set, Tuple, TypeVar | ||||
| from typing import Any, Callable, List, Optional, TypeVar | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
| @@ -99,66 +97,6 @@ def assert_is_not_none(value: Optional[T]) -> T: | ||||
|     return value | ||||
|  | ||||
|  | ||||
| def query_chunker( | ||||
|     queries: List[Any], | ||||
|     id_collector: Optional[Set[int]] = None, | ||||
|     chunk_size: int = 1000, | ||||
|     db_chunk_size: Optional[int] = None, | ||||
| ) -> Iterator[Any]: | ||||
|     """ | ||||
|     This merges one or more Django ascending-id queries into | ||||
|     a generator that returns chunks of chunk_size row objects | ||||
|     during each yield, preserving id order across all results.. | ||||
|  | ||||
|     Queries should satisfy these conditions: | ||||
|         - They should be Django filters. | ||||
|         - They should return Django objects with "id" attributes. | ||||
|         - They should be disjoint. | ||||
|  | ||||
|     The generator also populates id_collector, which we use | ||||
|     internally to enforce unique ids, but which the caller | ||||
|     can pass in to us if they want the side effect of collecting | ||||
|     all ids. | ||||
|     """ | ||||
|     if db_chunk_size is None: | ||||
|         db_chunk_size = chunk_size // len(queries) | ||||
|  | ||||
|     assert db_chunk_size >= 2 | ||||
|     assert chunk_size >= 2 | ||||
|  | ||||
|     if id_collector is not None: | ||||
|         assert len(id_collector) == 0 | ||||
|     else: | ||||
|         id_collector = set() | ||||
|  | ||||
|     def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]: | ||||
|         q = q.order_by("id") | ||||
|         min_id = -1 | ||||
|         while True: | ||||
|             rows = list(q.filter(id__gt=min_id)[0:db_chunk_size]) | ||||
|             if len(rows) == 0: | ||||
|                 break | ||||
|             for row in rows: | ||||
|                 yield (row.id, i, row) | ||||
|             min_id = rows[-1].id | ||||
|  | ||||
|     iterators = [chunkify(q, i) for i, q in enumerate(queries)] | ||||
|     merged_query = heapq.merge(*iterators) | ||||
|  | ||||
|     while True: | ||||
|         tup_chunk = list(itertools.islice(merged_query, 0, chunk_size)) | ||||
|         if len(tup_chunk) == 0: | ||||
|             break | ||||
|  | ||||
|         # Do duplicate-id management here. | ||||
|         tup_ids = {tup[0] for tup in tup_chunk} | ||||
|         assert len(tup_ids) == len(tup_chunk) | ||||
|         assert len(tup_ids.intersection(id_collector)) == 0 | ||||
|         id_collector.update(tup_ids) | ||||
|  | ||||
|         yield [row for row_id, i, row in tup_chunk] | ||||
|  | ||||
|  | ||||
| def process_list_in_batches( | ||||
|     lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None] | ||||
| ) -> None: | ||||
|   | ||||
| @@ -33,7 +33,6 @@ from zerver.lib.upload import ( | ||||
|     upload_emoji_image, | ||||
|     upload_message_file, | ||||
| ) | ||||
| from zerver.lib.utils import query_chunker | ||||
| from zerver.models import ( | ||||
|     AlertWord, | ||||
|     Attachment, | ||||
| @@ -66,132 +65,6 @@ from zerver.models import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class QueryUtilTest(ZulipTestCase): | ||||
|     def _create_messages(self) -> None: | ||||
|         for name in ["cordelia", "hamlet", "iago"]: | ||||
|             user = self.example_user(name) | ||||
|             for _ in range(5): | ||||
|                 self.send_personal_message(user, self.example_user("othello")) | ||||
|  | ||||
|     def test_query_chunker(self) -> None: | ||||
|         self._create_messages() | ||||
|  | ||||
|         cordelia = self.example_user("cordelia") | ||||
|         hamlet = self.example_user("hamlet") | ||||
|  | ||||
|         def get_queries() -> List[Any]: | ||||
|             queries = [ | ||||
|                 Message.objects.filter(sender_id=cordelia.id), | ||||
|                 Message.objects.filter(sender_id=hamlet.id), | ||||
|                 Message.objects.exclude(sender_id__in=[cordelia.id, hamlet.id]), | ||||
|             ] | ||||
|             return queries | ||||
|  | ||||
|         for query in get_queries(): | ||||
|             # For our test to be meaningful, we want non-empty queries | ||||
|             # at first | ||||
|             self.assertGreater(len(list(query)), 0) | ||||
|  | ||||
|         queries = get_queries() | ||||
|  | ||||
|         all_msg_ids: Set[int] = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=20, | ||||
|         ) | ||||
|  | ||||
|         all_row_ids = [] | ||||
|         for chunk in chunker: | ||||
|             for row in chunk: | ||||
|                 all_row_ids.append(row.id) | ||||
|  | ||||
|         self.assertEqual(all_row_ids, sorted(all_row_ids)) | ||||
|         self.assert_length(all_msg_ids, len(Message.objects.all())) | ||||
|  | ||||
|         # Now just search for cordelia/hamlet.  Note that we don't really | ||||
|         # need the order_by here, but it should be harmless. | ||||
|         queries = [ | ||||
|             Message.objects.filter(sender_id=cordelia.id).order_by("id"), | ||||
|             Message.objects.filter(sender_id=hamlet.id), | ||||
|         ] | ||||
|         all_msg_ids = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=7,  # use a different size | ||||
|         ) | ||||
|         list(chunker)  # exhaust the iterator | ||||
|         self.assertEqual( | ||||
|             len(all_msg_ids), | ||||
|             len(Message.objects.filter(sender_id__in=[cordelia.id, hamlet.id])), | ||||
|         ) | ||||
|  | ||||
|         # Try just a single query to validate chunking. | ||||
|         queries = [ | ||||
|             Message.objects.exclude(sender_id=cordelia.id), | ||||
|         ] | ||||
|         all_msg_ids = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=11,  # use a different size each time | ||||
|         ) | ||||
|         list(chunker)  # exhaust the iterator | ||||
|         self.assertEqual( | ||||
|             len(all_msg_ids), | ||||
|             len(Message.objects.exclude(sender_id=cordelia.id)), | ||||
|         ) | ||||
|         self.assertGreater(len(all_msg_ids), 15) | ||||
|  | ||||
|         # Verify assertions about disjoint-ness. | ||||
|         queries = [ | ||||
|             Message.objects.exclude(sender_id=cordelia.id), | ||||
|             Message.objects.filter(sender_id=hamlet.id), | ||||
|         ] | ||||
|         all_msg_ids = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=13,  # use a different size each time | ||||
|         ) | ||||
|         with self.assertRaises(AssertionError): | ||||
|             list(chunker)  # exercise the iterator | ||||
|  | ||||
|         # Try to confuse things with ids part of the query... | ||||
|         queries = [ | ||||
|             Message.objects.filter(id__lte=10), | ||||
|             Message.objects.filter(id__gt=10), | ||||
|         ] | ||||
|         all_msg_ids = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=11,  # use a different size each time | ||||
|         ) | ||||
|         self.assert_length(all_msg_ids, 0)  # until we actually use the iterator | ||||
|         list(chunker)  # exhaust the iterator | ||||
|         self.assert_length(all_msg_ids, len(Message.objects.all())) | ||||
|  | ||||
|         # Verify that we can just get the first chunk with a next() call. | ||||
|         queries = [ | ||||
|             Message.objects.all(), | ||||
|         ] | ||||
|         all_msg_ids = set() | ||||
|         chunker = query_chunker( | ||||
|             queries=queries, | ||||
|             id_collector=all_msg_ids, | ||||
|             chunk_size=10,  # use a different size each time | ||||
|         ) | ||||
|         first_chunk = next(chunker) | ||||
|         self.assert_length(first_chunk, 10) | ||||
|         self.assert_length(all_msg_ids, 10) | ||||
|         expected_msg = Message.objects.all()[0:10][5] | ||||
|         actual_msg = first_chunk[5] | ||||
|         self.assertEqual(actual_msg.content, expected_msg.content) | ||||
|         self.assertEqual(actual_msg.sender_id, expected_msg.sender_id) | ||||
|  | ||||
|  | ||||
| class ImportExportTest(ZulipTestCase): | ||||
|     def setUp(self) -> None: | ||||
|         super().setUp() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user