Files
zulip/zerver/management/commands/runtornado.py
Alex Vandiver d587252ddb tornado: Move SIGTERM shutdown handler into a callback.
A SIGTERM can show up at any point in the ioloop, even in places which
are not prepared to handle it.  This results in the process ignoring
the `sys.exit` which the SIGTERM handler calls, with an uncaught
SystemExit exception:

```
2021-11-09 15:37:49.368 ERR  [tornado.application:9803] Uncaught exception
Traceback (most recent call last):
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/http1connection.py", line 238, in _read_message
    delegate.finish()
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/httpserver.py", line 314, in finish
    self.delegate.finish()
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/routing.py", line 251, in finish
    self.delegate.finish()
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 2097, in finish
    self.execute()
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 2130, in execute
    **self.path_kwargs)
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/gen.py", line 307, in wrapper
    yielded = next(result)
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 1510, in _execute
    result = method(*self.path_args, **self.path_kwargs)
  File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/handlers.py", line 150, in get
    request = self.convert_tornado_request_to_django_request()
  File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/handlers.py", line 113, in convert_tornado_request_to_django_request
    request = WSGIRequest(environ)
  File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/django/core/handlers/wsgi.py", line 66, in __init__
    script_name = get_script_name(environ)
  File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/event_queue.py", line 611, in <lambda>
    signal.signal(signal.SIGTERM, lambda signum, stack: sys.exit(1))
SystemExit: 1
```

Supervisor then terminates the process with a SIGKILL, which results
in dropping data held in the tornado process, as it does not dump its
queue.

The only command which is safe to run in the signal handler is
`ioloop.add_callback_from_signal`, which schedules the callback to run
during the course of the normal ioloop.  This callbacks does an
orderly shutdown of the server and the ioloop before exiting.

(cherry picked from commit bc5539d871)
2021-11-12 09:59:58 -08:00

121 lines
4.3 KiB
Python

import logging
import sys
from typing import Any, Callable
from urllib.parse import SplitResult
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError, CommandParser
from tornado import ioloop
from tornado.log import app_log
# We must call zerver.tornado.ioloop_logging.instrument_tornado_ioloop
# before we import anything else from our project in order for our
# Tornado load logging to work; otherwise we might accidentally import
# zerver.lib.queue (which will instantiate the Tornado ioloop) before
# this.
from zerver.tornado.ioloop_logging import instrument_tornado_ioloop
settings.RUNNING_INSIDE_TORNADO = True
instrument_tornado_ioloop()
from zerver.lib.debug import interactive_debug_listen
from zerver.tornado.application import create_tornado_application, setup_tornado_rabbitmq
from zerver.tornado.autoreload import start as zulip_autoreload_start
from zerver.tornado.event_queue import (
add_client_gc_hook,
get_wrapped_process_notification,
missedmessage_hook,
setup_event_queue,
)
from zerver.tornado.sharding import notify_tornado_queue_name
if settings.USING_RABBITMQ:
from zerver.lib.queue import TornadoQueueClient, get_queue_client
def handle_callback_exception(callback: Callable[..., Any]) -> None:
logging.exception("Exception in callback", stack_info=True)
app_log.error("Exception in callback %r", callback, exc_info=True)
class Command(BaseCommand):
help = "Starts a Tornado Web server wrapping Django."
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"addrport",
nargs="?",
help="[optional port number or ipaddr:port]\n "
"(use multiple ports to start multiple servers)",
)
def handle(self, addrport: str, **options: bool) -> None:
interactive_debug_listen()
import django
from tornado import httpserver
if addrport.isdigit():
addr, port = "", int(addrport)
else:
r = SplitResult("", addrport, "", "", "")
if r.port is None:
raise CommandError(f"{addrport!r} does not have a valid port number.")
addr, port = r.hostname or "", r.port
if not addr:
addr = "127.0.0.1"
if settings.DEBUG:
logging.basicConfig(
level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s"
)
def inner_run() -> None:
from django.conf import settings
from django.utils import translation
translation.activate(settings.LANGUAGE_CODE)
# We pass display_num_errors=False, since Django will
# likely display similar output anyway.
self.check(display_num_errors=False)
print(f"Tornado server (re)started on port {port}")
if settings.USING_RABBITMQ:
queue_client = get_queue_client()
assert isinstance(queue_client, TornadoQueueClient)
# Process notifications received via RabbitMQ
queue_name = notify_tornado_queue_name(port)
queue_client.start_json_consumer(
queue_name, get_wrapped_process_notification(queue_name)
)
try:
# Application is an instance of Django's standard wsgi handler.
application = create_tornado_application()
if settings.AUTORELOAD:
zulip_autoreload_start()
# start tornado web server in single-threaded mode
http_server = httpserver.HTTPServer(application, xheaders=True)
http_server.listen(port, address=addr)
from zerver.tornado.ioloop_logging import logging_data
logging_data["port"] = str(port)
setup_event_queue(http_server, port)
add_client_gc_hook(missedmessage_hook)
setup_tornado_rabbitmq()
instance = ioloop.IOLoop.instance()
if django.conf.settings.DEBUG:
instance.set_blocking_log_threshold(5)
instance.handle_callback_exception = handle_callback_exception
instance.start()
except KeyboardInterrupt:
sys.exit(0)
inner_run()