dependencies: Remove WebSockets system for sending messages.

Zulip has had a small use of WebSockets (specifically, for the code path of sending messages, via the webapp only) since ~2013. We originally added this use of WebSockets in the hope that the latency benefits of doing so would allow us to avoid implementing a markdown local echo; they were not. Further, HTTP/2 may have eliminated the latency difference we hoped to exploit by using WebSockets in any case. While we’d originally imagined using WebSockets for other endpoints, there was never a good justification for moving more components to the WebSockets system. This WebSockets code path had a lot of downsides/complexity, including: * The messy hack involving constructing an emulated request object to hook into doing Django requests. * The `message_senders` queue processor system, which increases RAM needs and must be provisioned independently from the rest of the server). * A duplicate check_send_receive_time Nagios test specific to WebSockets. * The requirement for users to have their firewalls/NATs allow WebSocket connections, and a setting to disable them for networks where WebSockets don’t work. * Dependencies on the SockJS family of libraries, which has at times been poorly maintained, and periodically throws random JavaScript exceptions in our production environments without a deep enough traceback to effectively investigate. * A total of about 1600 lines of our code related to the feature. * Increased load on the Tornado system, especially around a Zulip server restart, and especially for large installations like zulipchat.com, resulting in extra delay before messages can be sent again. As detailed in https://github.com/zulip/zulip/pull/12862#issuecomment-536152397, it appears that removing WebSockets moderately increases the time it takes for the `send_message` API query to return from the server, but does not significantly change the time between when a message is sent and when it is received by clients. We don’t understand the reason for that change (suggesting the possibility of a measurement error), and even if it is a real change, we consider that potential small latency regression to be acceptable. If we later want WebSockets, we’ll likely want to just use Django Channels. Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2025-11-04 05:53:43 +00:00 · 2019-07-22 16:43:40 -07:00
parent 6fc2a317e9
commit ea6934c26d
50 changed files with 48 additions and 4081 deletions
--- a/zerver/worker/queue_processors.py
+++ b/zerver/worker/queue_processors.py
@@ -13,8 +13,6 @@ import socket

 from django.conf import settings
 from django.db import connection
-from django.core.handlers.wsgi import WSGIRequest
-from django.core.handlers.base import BaseHandler
 from zerver.models import \
    get_client, get_system_bot, PreregistrationUser, \
    get_user_profile_by_id, Message, Realm, UserMessage, UserProfile, \
@@ -22,7 +20,7 @@ from zerver.models import \
 from zerver.lib.context_managers import lockfile
 from zerver.lib.error_notify import do_report_error
 from zerver.lib.feedback import handle_feedback
-from zerver.lib.queue import SimpleQueueClient, queue_json_publish, retry_event
+from zerver.lib.queue import SimpleQueueClient, retry_event
 from zerver.lib.timestamp import timestamp_to_datetime
 from zerver.lib.email_notifications import handle_missedmessage_emails
 from zerver.lib.push_notifications import handle_push_notification, handle_remove_push_notification, \
@@ -38,9 +36,7 @@ from zerver.lib.send_email import send_future_email, send_email_from_dict, \
 from zerver.lib.email_mirror import process_message as mirror_email, rate_limit_mirror_by_realm, \
    is_missed_message_address, decode_stream_email_address
 from zerver.lib.streams import access_stream_by_id
-from zerver.tornado.socket import req_redis_key, respond_send_message
 from zerver.lib.db import reset_queries
-from zerver.lib.redis_utils import get_redis_client
 from zerver.context_processors import common_context
 from zerver.lib.outgoing_webhook import do_rest_call, get_outgoing_webhook_service_handler
 from zerver.models import get_bot_services, RealmAuditLog
@@ -51,7 +47,6 @@ from zerver.lib.export import export_realm_wrapper
 from zerver.lib.remote_server import PushNotificationBouncerRetryLaterError

 import os
-import sys
 import ujson
 from collections import defaultdict
 import email
@@ -59,7 +54,6 @@ import time
 import datetime
 import logging
 import requests
-from io import StringIO
 import urllib

 logger = logging.getLogger(__name__)
@@ -484,66 +478,6 @@ class SlowQueryWorker(LoopQueueProcessingWorker):
            internal_send_message(error_bot_realm, settings.ERROR_BOT,
                                  "stream", settings.SLOW_QUERY_LOGS_STREAM, topic, content)

-@assign_queue("message_sender")
-class MessageSenderWorker(QueueProcessingWorker):
-    def __init__(self) -> None:
-        super().__init__()
-        self.redis_client = get_redis_client()
-        self.handler = BaseHandler()
-        self.handler.load_middleware()
-
-    def consume(self, event: Mapping[str, Any]) -> None:
-        server_meta = event['server_meta']
-
-        environ = {
-            'REQUEST_METHOD': 'SOCKET',
-            'SCRIPT_NAME': '',
-            'PATH_INFO': '/json/messages',
-            'SERVER_NAME': '127.0.0.1',
-            'SERVER_PORT': 9993,
-            'SERVER_PROTOCOL': 'ZULIP_SOCKET/1.0',
-            'wsgi.version': (1, 0),
-            'wsgi.input': StringIO(),
-            'wsgi.errors': sys.stderr,
-            'wsgi.multithread': False,
-            'wsgi.multiprocess': True,
-            'wsgi.run_once': False,
-            'zulip.emulated_method': 'POST'
-        }
-
-        if 'socket_user_agent' in event['request']:
-            environ['HTTP_USER_AGENT'] = event['request']['socket_user_agent']
-            del event['request']['socket_user_agent']
-
-        # We're mostly using a WSGIRequest for convenience
-        environ.update(server_meta['request_environ'])
-        request = WSGIRequest(environ)
-        # Note: If we ever support non-POST methods, we'll need to change this.
-        request._post = event['request']
-        request.csrf_processing_done = True
-
-        user_profile = get_user_profile_by_id(server_meta['user_id'])
-        request._cached_user = user_profile
-
-        resp = self.handler.get_response(request)
-        server_meta['time_request_finished'] = time.time()
-        server_meta['worker_log_data'] = request._log_data
-
-        resp_content = resp.content.decode('utf-8')
-        response_data = ujson.loads(resp_content)
-        if response_data['result'] == 'error':
-            check_and_send_restart_signal()
-
-        result = {'response': response_data, 'req_id': event['req_id'],
-                  'server_meta': server_meta}
-
-        redis_key = req_redis_key(event['req_id'])
-        self.redis_client.hmset(redis_key, {'status': 'complete',
-                                            'response': resp_content})
-
-        queue_json_publish(server_meta['return_queue'], result,
-                           respond_send_message)
-
@assign_queue('digest_emails')
 class DigestWorker(QueueProcessingWorker):  # nocoverage
    # Who gets a digest is entirely determined by the enqueue_digest_emails