mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 14:03:30 +00:00 
			
		
		
		
	utils: Remove unused query_chunker function.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							271333301d
						
					
				
				
					commit
					58b7a4eb44
				
			@@ -1,10 +1,8 @@
 | 
				
			|||||||
import hashlib
 | 
					import hashlib
 | 
				
			||||||
import heapq
 | 
					 | 
				
			||||||
import itertools
 | 
					 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import secrets
 | 
					import secrets
 | 
				
			||||||
from itertools import zip_longest
 | 
					from itertools import zip_longest
 | 
				
			||||||
from typing import Any, Callable, Iterator, List, Optional, Set, Tuple, TypeVar
 | 
					from typing import Any, Callable, List, Optional, TypeVar
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -99,66 +97,6 @@ def assert_is_not_none(value: Optional[T]) -> T:
 | 
				
			|||||||
    return value
 | 
					    return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def query_chunker(
 | 
					 | 
				
			||||||
    queries: List[Any],
 | 
					 | 
				
			||||||
    id_collector: Optional[Set[int]] = None,
 | 
					 | 
				
			||||||
    chunk_size: int = 1000,
 | 
					 | 
				
			||||||
    db_chunk_size: Optional[int] = None,
 | 
					 | 
				
			||||||
) -> Iterator[Any]:
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    This merges one or more Django ascending-id queries into
 | 
					 | 
				
			||||||
    a generator that returns chunks of chunk_size row objects
 | 
					 | 
				
			||||||
    during each yield, preserving id order across all results..
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Queries should satisfy these conditions:
 | 
					 | 
				
			||||||
        - They should be Django filters.
 | 
					 | 
				
			||||||
        - They should return Django objects with "id" attributes.
 | 
					 | 
				
			||||||
        - They should be disjoint.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    The generator also populates id_collector, which we use
 | 
					 | 
				
			||||||
    internally to enforce unique ids, but which the caller
 | 
					 | 
				
			||||||
    can pass in to us if they want the side effect of collecting
 | 
					 | 
				
			||||||
    all ids.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if db_chunk_size is None:
 | 
					 | 
				
			||||||
        db_chunk_size = chunk_size // len(queries)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert db_chunk_size >= 2
 | 
					 | 
				
			||||||
    assert chunk_size >= 2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if id_collector is not None:
 | 
					 | 
				
			||||||
        assert len(id_collector) == 0
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        id_collector = set()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]:
 | 
					 | 
				
			||||||
        q = q.order_by("id")
 | 
					 | 
				
			||||||
        min_id = -1
 | 
					 | 
				
			||||||
        while True:
 | 
					 | 
				
			||||||
            rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
 | 
					 | 
				
			||||||
            if len(rows) == 0:
 | 
					 | 
				
			||||||
                break
 | 
					 | 
				
			||||||
            for row in rows:
 | 
					 | 
				
			||||||
                yield (row.id, i, row)
 | 
					 | 
				
			||||||
            min_id = rows[-1].id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    iterators = [chunkify(q, i) for i, q in enumerate(queries)]
 | 
					 | 
				
			||||||
    merged_query = heapq.merge(*iterators)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while True:
 | 
					 | 
				
			||||||
        tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
 | 
					 | 
				
			||||||
        if len(tup_chunk) == 0:
 | 
					 | 
				
			||||||
            break
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Do duplicate-id management here.
 | 
					 | 
				
			||||||
        tup_ids = {tup[0] for tup in tup_chunk}
 | 
					 | 
				
			||||||
        assert len(tup_ids) == len(tup_chunk)
 | 
					 | 
				
			||||||
        assert len(tup_ids.intersection(id_collector)) == 0
 | 
					 | 
				
			||||||
        id_collector.update(tup_ids)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        yield [row for row_id, i, row in tup_chunk]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def process_list_in_batches(
 | 
					def process_list_in_batches(
 | 
				
			||||||
    lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
 | 
					    lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
 | 
				
			||||||
) -> None:
 | 
					) -> None:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,7 +33,6 @@ from zerver.lib.upload import (
 | 
				
			|||||||
    upload_emoji_image,
 | 
					    upload_emoji_image,
 | 
				
			||||||
    upload_message_file,
 | 
					    upload_message_file,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
from zerver.lib.utils import query_chunker
 | 
					 | 
				
			||||||
from zerver.models import (
 | 
					from zerver.models import (
 | 
				
			||||||
    AlertWord,
 | 
					    AlertWord,
 | 
				
			||||||
    Attachment,
 | 
					    Attachment,
 | 
				
			||||||
@@ -66,132 +65,6 @@ from zerver.models import (
 | 
				
			|||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class QueryUtilTest(ZulipTestCase):
 | 
					 | 
				
			||||||
    def _create_messages(self) -> None:
 | 
					 | 
				
			||||||
        for name in ["cordelia", "hamlet", "iago"]:
 | 
					 | 
				
			||||||
            user = self.example_user(name)
 | 
					 | 
				
			||||||
            for _ in range(5):
 | 
					 | 
				
			||||||
                self.send_personal_message(user, self.example_user("othello"))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_query_chunker(self) -> None:
 | 
					 | 
				
			||||||
        self._create_messages()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        cordelia = self.example_user("cordelia")
 | 
					 | 
				
			||||||
        hamlet = self.example_user("hamlet")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def get_queries() -> List[Any]:
 | 
					 | 
				
			||||||
            queries = [
 | 
					 | 
				
			||||||
                Message.objects.filter(sender_id=cordelia.id),
 | 
					 | 
				
			||||||
                Message.objects.filter(sender_id=hamlet.id),
 | 
					 | 
				
			||||||
                Message.objects.exclude(sender_id__in=[cordelia.id, hamlet.id]),
 | 
					 | 
				
			||||||
            ]
 | 
					 | 
				
			||||||
            return queries
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for query in get_queries():
 | 
					 | 
				
			||||||
            # For our test to be meaningful, we want non-empty queries
 | 
					 | 
				
			||||||
            # at first
 | 
					 | 
				
			||||||
            self.assertGreater(len(list(query)), 0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        queries = get_queries()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        all_msg_ids: Set[int] = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=20,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        all_row_ids = []
 | 
					 | 
				
			||||||
        for chunk in chunker:
 | 
					 | 
				
			||||||
            for row in chunk:
 | 
					 | 
				
			||||||
                all_row_ids.append(row.id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.assertEqual(all_row_ids, sorted(all_row_ids))
 | 
					 | 
				
			||||||
        self.assert_length(all_msg_ids, len(Message.objects.all()))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Now just search for cordelia/hamlet.  Note that we don't really
 | 
					 | 
				
			||||||
        # need the order_by here, but it should be harmless.
 | 
					 | 
				
			||||||
        queries = [
 | 
					 | 
				
			||||||
            Message.objects.filter(sender_id=cordelia.id).order_by("id"),
 | 
					 | 
				
			||||||
            Message.objects.filter(sender_id=hamlet.id),
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
        all_msg_ids = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=7,  # use a different size
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        list(chunker)  # exhaust the iterator
 | 
					 | 
				
			||||||
        self.assertEqual(
 | 
					 | 
				
			||||||
            len(all_msg_ids),
 | 
					 | 
				
			||||||
            len(Message.objects.filter(sender_id__in=[cordelia.id, hamlet.id])),
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Try just a single query to validate chunking.
 | 
					 | 
				
			||||||
        queries = [
 | 
					 | 
				
			||||||
            Message.objects.exclude(sender_id=cordelia.id),
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
        all_msg_ids = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=11,  # use a different size each time
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        list(chunker)  # exhaust the iterator
 | 
					 | 
				
			||||||
        self.assertEqual(
 | 
					 | 
				
			||||||
            len(all_msg_ids),
 | 
					 | 
				
			||||||
            len(Message.objects.exclude(sender_id=cordelia.id)),
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        self.assertGreater(len(all_msg_ids), 15)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Verify assertions about disjoint-ness.
 | 
					 | 
				
			||||||
        queries = [
 | 
					 | 
				
			||||||
            Message.objects.exclude(sender_id=cordelia.id),
 | 
					 | 
				
			||||||
            Message.objects.filter(sender_id=hamlet.id),
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
        all_msg_ids = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=13,  # use a different size each time
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        with self.assertRaises(AssertionError):
 | 
					 | 
				
			||||||
            list(chunker)  # exercise the iterator
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Try to confuse things with ids part of the query...
 | 
					 | 
				
			||||||
        queries = [
 | 
					 | 
				
			||||||
            Message.objects.filter(id__lte=10),
 | 
					 | 
				
			||||||
            Message.objects.filter(id__gt=10),
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
        all_msg_ids = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=11,  # use a different size each time
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        self.assert_length(all_msg_ids, 0)  # until we actually use the iterator
 | 
					 | 
				
			||||||
        list(chunker)  # exhaust the iterator
 | 
					 | 
				
			||||||
        self.assert_length(all_msg_ids, len(Message.objects.all()))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Verify that we can just get the first chunk with a next() call.
 | 
					 | 
				
			||||||
        queries = [
 | 
					 | 
				
			||||||
            Message.objects.all(),
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
        all_msg_ids = set()
 | 
					 | 
				
			||||||
        chunker = query_chunker(
 | 
					 | 
				
			||||||
            queries=queries,
 | 
					 | 
				
			||||||
            id_collector=all_msg_ids,
 | 
					 | 
				
			||||||
            chunk_size=10,  # use a different size each time
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        first_chunk = next(chunker)
 | 
					 | 
				
			||||||
        self.assert_length(first_chunk, 10)
 | 
					 | 
				
			||||||
        self.assert_length(all_msg_ids, 10)
 | 
					 | 
				
			||||||
        expected_msg = Message.objects.all()[0:10][5]
 | 
					 | 
				
			||||||
        actual_msg = first_chunk[5]
 | 
					 | 
				
			||||||
        self.assertEqual(actual_msg.content, expected_msg.content)
 | 
					 | 
				
			||||||
        self.assertEqual(actual_msg.sender_id, expected_msg.sender_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ImportExportTest(ZulipTestCase):
 | 
					class ImportExportTest(ZulipTestCase):
 | 
				
			||||||
    def setUp(self) -> None:
 | 
					    def setUp(self) -> None:
 | 
				
			||||||
        super().setUp()
 | 
					        super().setUp()
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user