mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 14:03:30 +00:00
queue: Only ACK drain_queue once it has completed work on the list.
Currently, drain_queue and json_drain_queue ack every message as it is pulled off of the queue, until the queue is empty. This means that if the consumer crashes between pulling a batch of messages off the queue, and actually processing them, those messages will be permanently lost. Sending an ACK on every message also results in a significant amount lot of traffic to rabbitmq, with notable performance implications. Send a singular ACK after the processing has completed, by making `drain_queue` into a contextmanager. Additionally, use the `multiple` flag to ACK all of the messages at once -- or explicitly NACK the messages if processing failed. Sending a NACK will re-queue them at the front of the queue. Performance of a no-op dequeue before this change: ``` $ ./manage.py queue_rate --count 50000 --batch Purging queue... Enqueue rate: 10847 / sec Dequeue rate: 2479 / sec ``` Performance of a no-op dequeue after this change (a 25% increase): ``` $ ./manage.py queue_rate --count 50000 --batch Purging queue... Enqueue rate: 10752 / sec Dequeue rate: 3079 / sec ```
This commit is contained in:
committed by
Tim Abbott
parent
df86a564dc
commit
baf882a133
@@ -3,7 +3,8 @@ import random
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Set
|
from contextlib import contextmanager
|
||||||
|
from typing import Any, Callable, Dict, Iterator, List, Mapping, Optional, Set
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
import pika
|
import pika
|
||||||
@@ -167,25 +168,40 @@ class SimpleQueueClient:
|
|||||||
callback(orjson.loads(body))
|
callback(orjson.loads(body))
|
||||||
self.register_consumer(queue_name, wrapped_callback)
|
self.register_consumer(queue_name, wrapped_callback)
|
||||||
|
|
||||||
def drain_queue(self, queue_name: str) -> List[bytes]:
|
@contextmanager
|
||||||
"Returns all messages in the desired queue"
|
def drain_queue(self, queue_name: str) -> Iterator[List[bytes]]:
|
||||||
|
"""As a contextmanger, yields all messages in the desired queue.
|
||||||
|
|
||||||
|
NACKs all of the messages if the block throws an exception,
|
||||||
|
ACKs them otherwise.
|
||||||
|
"""
|
||||||
messages = []
|
messages = []
|
||||||
|
max_tag: Optional[int] = None
|
||||||
|
|
||||||
def opened(channel: BlockingChannel) -> None:
|
def opened(channel: BlockingChannel) -> None:
|
||||||
|
nonlocal max_tag
|
||||||
while True:
|
while True:
|
||||||
(meta, _, message) = channel.basic_get(queue_name)
|
(meta, _, message) = channel.basic_get(queue_name)
|
||||||
|
|
||||||
if message is None:
|
if message is None:
|
||||||
break
|
break
|
||||||
|
max_tag = meta.delivery_tag
|
||||||
channel.basic_ack(meta.delivery_tag)
|
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
|
||||||
self.ensure_queue(queue_name, opened)
|
self.ensure_queue(queue_name, opened)
|
||||||
return messages
|
assert self.channel is not None
|
||||||
|
try:
|
||||||
|
yield messages
|
||||||
|
if max_tag:
|
||||||
|
self.channel.basic_ack(max_tag, multiple=True)
|
||||||
|
except Exception:
|
||||||
|
if max_tag:
|
||||||
|
self.channel.basic_nack(max_tag, multiple=True)
|
||||||
|
raise
|
||||||
|
|
||||||
def json_drain_queue(self, queue_name: str) -> List[Dict[str, Any]]:
|
@contextmanager
|
||||||
return list(map(orjson.loads, self.drain_queue(queue_name)))
|
def json_drain_queue(self, queue_name: str) -> Iterator[List[Dict[str, Any]]]:
|
||||||
|
with self.drain_queue(queue_name) as binary_messages:
|
||||||
|
yield list(map(orjson.loads, binary_messages))
|
||||||
|
|
||||||
def queue_size(self) -> int:
|
def queue_size(self) -> int:
|
||||||
assert self.channel is not None
|
assert self.channel is not None
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class TestQueueImplementation(ZulipTestCase):
|
|||||||
queue_client = get_queue_client()
|
queue_client = get_queue_client()
|
||||||
queue_client.publish("test_suite", b"test_event\x00\xff")
|
queue_client.publish("test_suite", b"test_event\x00\xff")
|
||||||
|
|
||||||
result = queue_client.drain_queue("test_suite")
|
with queue_client.drain_queue("test_suite") as result:
|
||||||
self.assertEqual(result, [b"test_event\x00\xff"])
|
self.assertEqual(result, [b"test_event\x00\xff"])
|
||||||
|
|
||||||
@override_settings(USING_RABBITMQ=True)
|
@override_settings(USING_RABBITMQ=True)
|
||||||
@@ -32,7 +32,27 @@ class TestQueueImplementation(ZulipTestCase):
|
|||||||
queue_json_publish("test_suite", {"event": "my_event"})
|
queue_json_publish("test_suite", {"event": "my_event"})
|
||||||
|
|
||||||
queue_client = get_queue_client()
|
queue_client = get_queue_client()
|
||||||
result = queue_client.json_drain_queue("test_suite")
|
with queue_client.json_drain_queue("test_suite") as result:
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0]['event'], 'my_event')
|
||||||
|
|
||||||
|
@override_settings(USING_RABBITMQ=True)
|
||||||
|
def test_queue_basics_json_error(self) -> None:
|
||||||
|
queue_json_publish("test_suite", {"event": "my_event"})
|
||||||
|
|
||||||
|
queue_client = get_queue_client()
|
||||||
|
raised = False
|
||||||
|
try:
|
||||||
|
with queue_client.json_drain_queue("test_suite") as result:
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0]['event'], 'my_event')
|
||||||
|
raise ValueError()
|
||||||
|
except ValueError:
|
||||||
|
raised = True
|
||||||
|
assert raised
|
||||||
|
|
||||||
|
# Still in the queue to be fetched
|
||||||
|
with queue_client.json_drain_queue("test_suite") as result:
|
||||||
self.assertEqual(len(result), 1)
|
self.assertEqual(len(result), 1)
|
||||||
self.assertEqual(result[0]['event'], 'my_event')
|
self.assertEqual(result[0]['event'], 'my_event')
|
||||||
|
|
||||||
@@ -104,12 +124,13 @@ class TestQueueImplementation(ZulipTestCase):
|
|||||||
'WARNING:zulip.queue:Failed to send to rabbitmq, trying to reconnect and send again'
|
'WARNING:zulip.queue:Failed to send to rabbitmq, trying to reconnect and send again'
|
||||||
])
|
])
|
||||||
|
|
||||||
result = queue_client.json_drain_queue("test_suite")
|
with queue_client.json_drain_queue("test_suite") as result:
|
||||||
self.assertEqual(len(result), 1)
|
self.assertEqual(len(result), 1)
|
||||||
self.assertEqual(result[0]['event'], 'my_event')
|
self.assertEqual(result[0]['event'], 'my_event')
|
||||||
|
|
||||||
@override_settings(USING_RABBITMQ=True)
|
@override_settings(USING_RABBITMQ=True)
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
queue_client = get_queue_client()
|
queue_client = get_queue_client()
|
||||||
queue_client.drain_queue("test_suite")
|
with queue_client.drain_queue("test_suite"):
|
||||||
|
pass
|
||||||
super().tearDown()
|
super().tearDown()
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import base64
|
|||||||
import os
|
import os
|
||||||
import smtplib
|
import smtplib
|
||||||
import time
|
import time
|
||||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple
|
from contextlib import contextmanager
|
||||||
|
from typing import Any, Callable, Dict, Iterator, List, Mapping, Optional, Tuple
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
@@ -58,7 +59,8 @@ class WorkerTest(ZulipTestCase):
|
|||||||
callback(data)
|
callback(data)
|
||||||
self.queue = []
|
self.queue = []
|
||||||
|
|
||||||
def json_drain_queue(self, queue_name: str) -> List[Event]:
|
@contextmanager
|
||||||
|
def json_drain_queue(self, queue_name: str) -> Iterator[List[Event]]:
|
||||||
events = [
|
events = [
|
||||||
dct
|
dct
|
||||||
for (queue_name, dct)
|
for (queue_name, dct)
|
||||||
@@ -70,7 +72,7 @@ class WorkerTest(ZulipTestCase):
|
|||||||
# queues, which was a bug at one point.
|
# queues, which was a bug at one point.
|
||||||
self.queue = []
|
self.queue = []
|
||||||
|
|
||||||
return events
|
yield events
|
||||||
|
|
||||||
def queue_size(self) -> int:
|
def queue_size(self) -> int:
|
||||||
return len(self.queue)
|
return len(self.queue)
|
||||||
|
|||||||
@@ -324,7 +324,7 @@ class LoopQueueProcessingWorker(QueueProcessingWorker):
|
|||||||
self.initialize_statistics()
|
self.initialize_statistics()
|
||||||
self.is_consuming = True
|
self.is_consuming = True
|
||||||
while self.is_consuming:
|
while self.is_consuming:
|
||||||
events = self.q.json_drain_queue(self.queue_name)
|
with self.q.json_drain_queue(self.queue_name) as events:
|
||||||
self.do_consume(self.consume_batch, events)
|
self.do_consume(self.consume_batch, events)
|
||||||
# To avoid spinning the CPU, we go to sleep if there's
|
# To avoid spinning the CPU, we go to sleep if there's
|
||||||
# nothing in the queue, or for certain queues with
|
# nothing in the queue, or for certain queues with
|
||||||
|
|||||||
Reference in New Issue
Block a user