Files
zulip/tools/run-dev.py
Anders Kaseorg ea6934c26d dependencies: Remove WebSockets system for sending messages.
Zulip has had a small use of WebSockets (specifically, for the code
path of sending messages, via the webapp only) since ~2013.  We
originally added this use of WebSockets in the hope that the latency
benefits of doing so would allow us to avoid implementing a markdown
local echo; they were not.  Further, HTTP/2 may have eliminated the
latency difference we hoped to exploit by using WebSockets in any
case.

While we’d originally imagined using WebSockets for other endpoints,
there was never a good justification for moving more components to the
WebSockets system.

This WebSockets code path had a lot of downsides/complexity,
including:

* The messy hack involving constructing an emulated request object to
  hook into doing Django requests.
* The `message_senders` queue processor system, which increases RAM
  needs and must be provisioned independently from the rest of the
  server).
* A duplicate check_send_receive_time Nagios test specific to
  WebSockets.
* The requirement for users to have their firewalls/NATs allow
  WebSocket connections, and a setting to disable them for networks
  where WebSockets don’t work.
* Dependencies on the SockJS family of libraries, which has at times
  been poorly maintained, and periodically throws random JavaScript
  exceptions in our production environments without a deep enough
  traceback to effectively investigate.
* A total of about 1600 lines of our code related to the feature.
* Increased load on the Tornado system, especially around a Zulip
  server restart, and especially for large installations like
  zulipchat.com, resulting in extra delay before messages can be sent
  again.

As detailed in
https://github.com/zulip/zulip/pull/12862#issuecomment-536152397, it
appears that removing WebSockets moderately increases the time it
takes for the `send_message` API query to return from the server, but
does not significantly change the time between when a message is sent
and when it is received by clients.  We don’t understand the reason
for that change (suggesting the possibility of a measurement error),
and even if it is a real change, we consider that potential small
latency regression to be acceptable.

If we later want WebSockets, we’ll likely want to just use Django
Channels.

Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-01-14 22:34:00 -08:00

375 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import pwd
import signal
import subprocess
import sys
import traceback
from urllib.parse import urlunparse
# check for the venv
from lib import sanity_check
sanity_check.check_venv(__file__)
from tornado import httpclient
from tornado import httputil
from tornado import gen
from tornado import web
from tornado.ioloop import IOLoop
from typing import Any, Callable, Generator, List, Optional
if 'posix' in os.name and os.geteuid() == 0:
raise RuntimeError("run-dev.py should not be run as root.")
parser = argparse.ArgumentParser(description=r"""
Starts the app listening on localhost, for local development.
This script launches the Django and Tornado servers, then runs a reverse proxy
which serves to both of them. After it's all up and running, browse to
http://localhost:9991/
Note that, while runserver and runtornado have the usual auto-restarting
behavior, the reverse proxy itself does *not* automatically restart on changes
to this file.
""",
formatter_class=argparse.RawTextHelpFormatter)
TOOLS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.dirname(TOOLS_DIR))
from tools.lib.test_script import (
assert_provisioning_status_ok,
)
parser.add_argument('--test',
action='store_true',
help='Use the testing database and ports')
parser.add_argument('--minify',
action='store_true',
help='Minifies assets for testing in dev')
parser.add_argument('--interface',
action='store',
default=None, help='Set the IP or hostname for the proxy to listen on')
parser.add_argument('--no-clear-memcached',
action='store_false', dest='clear_memcached',
default=True, help='Do not clear memcached')
parser.add_argument('--force',
action="store_true",
default=False, help='Run command despite possible problems.')
parser.add_argument('--enable-tornado-logging',
action="store_true",
default=False, help='Enable access logs from tornado proxy server.')
options = parser.parse_args()
assert_provisioning_status_ok(options.force)
if options.interface is None:
user_id = os.getuid()
user_name = pwd.getpwuid(user_id).pw_name
if user_name in ["vagrant", "zulipdev"]:
# In the Vagrant development environment, we need to listen on
# all ports, and it's safe to do so, because Vagrant is only
# exposing certain guest ports (by default just 9991) to the
# host. The same argument applies to the remote development
# servers using username "zulipdev".
options.interface = None
else:
# Otherwise, only listen to requests on localhost for security.
options.interface = "127.0.0.1"
elif options.interface == "":
options.interface = None
runserver_args = [] # type: List[str]
base_port = 9991
if options.test:
base_port = 9981
settings_module = "zproject.test_settings"
# Don't auto-reload when running casper tests
runserver_args = ['--noreload']
else:
settings_module = "zproject.settings"
manage_args = ['--settings=%s' % (settings_module,)]
os.environ['DJANGO_SETTINGS_MODULE'] = settings_module
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.zulip_tools import WARNING, ENDC
proxy_port = base_port
django_port = base_port + 1
tornado_port = base_port + 2
webpack_port = base_port + 3
thumbor_port = base_port + 4
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
# Clean up stale .pyc files etc.
subprocess.check_call('./tools/clean-repo')
if options.clear_memcached:
print("Clearing memcached ...")
subprocess.check_call('./scripts/setup/flush-memcached')
# Set up a new process group, so that we can later kill run{server,tornado}
# and all of the processes they spawn.
os.setpgrp()
# Save pid of parent process to the pid file. It can be used later by
# tools/stop-run-dev to kill the server without having to find the
# terminal in question.
if options.test:
pid_file_path = os.path.join(os.path.join(os.getcwd(), 'var/casper/run_dev.pid'))
else:
pid_file_path = os.path.join(os.path.join(os.getcwd(), 'var/run/run_dev.pid'))
# Required for compatibility python versions.
if not os.path.exists(os.path.dirname(pid_file_path)):
os.makedirs(os.path.dirname(pid_file_path))
with open(pid_file_path, 'w+') as f:
f.write(str(os.getpgrp()) + "\n")
# Pass --nostatic because we configure static serving ourselves in
# zulip/urls.py.
cmds = [['./manage.py', 'runserver'] +
manage_args + runserver_args + ['127.0.0.1:%d' % (django_port,)],
['env', 'PYTHONUNBUFFERED=1', './manage.py', 'runtornado'] +
manage_args + ['127.0.0.1:%d' % (tornado_port,)],
['./manage.py', 'process_queue', '--all'] + manage_args,
['env', 'PGHOST=127.0.0.1', # Force password authentication using .pgpass
'./puppet/zulip/files/postgresql/process_fts_updates'],
['./manage.py', 'deliver_scheduled_messages'],
['/srv/zulip-thumbor-venv/bin/thumbor', '-c', './zthumbor/thumbor.conf',
'-p', '%s' % (thumbor_port,)]]
if options.test:
# We just need to compile webpack assets once at startup, not run a daemon,
# in test mode. Additionally, webpack-dev-server doesn't support running 2
# copies on the same system, so this model lets us run the casper tests
# with a running development server.
subprocess.check_call(['./tools/webpack', '--quiet', '--test'])
else:
webpack_cmd = ['./tools/webpack', '--watch', '--port', str(webpack_port)]
if options.minify:
webpack_cmd.append('--minify')
if options.interface is None:
# If interface is None and we're listening on all ports, we also need
# to disable the webpack host check so that webpack will serve assets.
webpack_cmd.append('--disable-host-check')
if options.interface:
webpack_cmd += ["--host", options.interface]
else:
webpack_cmd += ["--host", "0.0.0.0"]
cmds.append(webpack_cmd)
for cmd in cmds:
subprocess.Popen(cmd)
def transform_url(protocol, path, query, target_port, target_host):
# type: (str, str, str, int, str) -> str
# generate url with target host
host = ":".join((target_host, str(target_port)))
# Here we are going to rewrite the path a bit so that it is in parity with
# what we will have for production
if path.startswith('/thumbor'):
path = path[len('/thumbor'):]
newpath = urlunparse((protocol, host, path, '', query, ''))
return newpath
@gen.engine
def fetch_request(url, callback, **kwargs):
# type: (str, Any, **Any) -> Generator[Callable[..., Any], Any, None]
# use large timeouts to handle polling requests
req = httpclient.HTTPRequest(
url,
connect_timeout=240.0,
request_timeout=240.0,
decompress_response=False,
**kwargs
)
client = httpclient.AsyncHTTPClient()
# wait for response
response = yield gen.Task(client.fetch, req)
callback(response)
class BaseHandler(web.RequestHandler):
# target server ip
target_host = '127.0.0.1' # type: str
# target server port
target_port = None # type: int
def _add_request_headers(self, exclude_lower_headers_list=None):
# type: (Optional[List[str]]) -> httputil.HTTPHeaders
exclude_lower_headers_list = exclude_lower_headers_list or []
headers = httputil.HTTPHeaders()
for header, v in self.request.headers.get_all():
if header.lower() not in exclude_lower_headers_list:
headers.add(header, v)
return headers
def get(self):
# type: () -> None
pass
def head(self):
# type: () -> None
pass
def post(self):
# type: () -> None
pass
def put(self):
# type: () -> None
pass
def patch(self):
# type: () -> None
pass
def options(self):
# type: () -> None
pass
def delete(self):
# type: () -> None
pass
def handle_response(self, response):
# type: (Any) -> None
if response.error and not isinstance(response.error, httpclient.HTTPError):
self.set_status(500)
self.write('Internal server error:\n' + str(response.error))
else:
self.set_status(response.code, response.reason)
self._headers = httputil.HTTPHeaders() # clear tornado default header
for header, v in response.headers.get_all():
# some header appear multiple times, eg 'Set-Cookie'
self.add_header(header, v)
if response.body:
self.write(response.body)
self.finish()
@web.asynchronous
def prepare(self):
# type: () -> None
if 'X-REAL-IP' not in self.request.headers:
self.request.headers['X-REAL-IP'] = self.request.remote_ip
if 'X-FORWARDED_PORT' not in self.request.headers:
self.request.headers['X-FORWARDED-PORT'] = str(proxy_port)
url = transform_url(
self.request.protocol,
self.request.path,
self.request.query,
self.target_port,
self.target_host,
)
try:
fetch_request(
url=url,
callback=self.handle_response,
method=self.request.method,
headers=self._add_request_headers(["upgrade-insecure-requests"]),
follow_redirects=False,
body=getattr(self.request, 'body'),
allow_nonstandard_methods=True
)
except httpclient.HTTPError as e:
if hasattr(e, 'response') and e.response:
self.handle_response(e.response)
else:
self.set_status(500)
self.write('Internal server error:\n' + str(e))
self.finish()
class WebPackHandler(BaseHandler):
target_port = webpack_port
class DjangoHandler(BaseHandler):
target_port = django_port
class TornadoHandler(BaseHandler):
target_port = tornado_port
class ThumborHandler(BaseHandler):
target_port = thumbor_port
class Application(web.Application):
def __init__(self, enable_logging=False):
# type: (bool) -> None
handlers = [
(r"/json/events.*", TornadoHandler),
(r"/api/v1/events.*", TornadoHandler),
(r"/webpack.*", WebPackHandler),
(r"/thumbor.*", ThumborHandler),
(r"/.*", DjangoHandler)
]
super().__init__(handlers, enable_logging=enable_logging)
def log_request(self, handler):
# type: (BaseHandler) -> None
if self.settings['enable_logging']:
super().log_request(handler)
def on_shutdown():
# type: () -> None
IOLoop.instance().stop()
def shutdown_handler(*args, **kwargs):
# type: (*Any, **Any) -> None
io_loop = IOLoop.instance()
if io_loop._callbacks:
io_loop.call_later(1, shutdown_handler)
else:
io_loop.stop()
# log which services/ports will be started
print("Starting Zulip services on ports: web proxy: {},".format(proxy_port),
"Django: {}, Tornado: {}, Thumbor: {}".format(django_port, tornado_port, thumbor_port),
end='')
if options.test:
print("") # no webpack for --test
else:
print(", webpack: {}".format(webpack_port))
print("".join((WARNING,
"Note: only port {} is exposed to the host in a Vagrant environment.".format(
proxy_port), ENDC)))
try:
app = Application(enable_logging=options.enable_tornado_logging)
try:
app.listen(proxy_port, address=options.interface)
except OSError as e:
if e.errno == 98:
print('\n\nERROR: You probably have another server running!!!\n\n')
raise
ioloop = IOLoop.instance()
for s in (signal.SIGINT, signal.SIGTERM):
signal.signal(s, shutdown_handler)
ioloop.start()
except Exception:
# Print the traceback before we get SIGTERM and die.
traceback.print_exc()
raise
finally:
# Kill everything in our process group.
os.killpg(0, signal.SIGTERM)
# Remove pid file when development server closed correctly.
os.remove(pid_file_path)