Files
zulip/scripts/restart-server
Tim Abbott 0f1bdcc46f restart-server: Restart Tornado processes individually.
After some testing, I've confirmed that this seems to behave
significantly better in terms of the number of failed requests due to
Tornado being the process of restarting compared with the previous
version, as each individual process is only down for a short time,
rather than all of them being down at once.
2020-03-27 06:23:34 -07:00

110 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import configparser
import os
import sys
import pwd
import subprocess
import logging
import time
import shlex
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.zulip_tools import ENDC, OKGREEN, WARNING, DEPLOYMENTS_DIR, overwrite_symlink
logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s restart-server: %(message)s",
level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('--fill-cache', action='store_true', dest='fill_cache', default=False,
help='Fill the memcached caches')
args = parser.parse_args()
deploy_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
os.chdir(deploy_path)
if pwd.getpwuid(os.getuid()).pw_name != "zulip":
logging.error("Must be run as user 'zulip'.")
sys.exit(1)
# Send a statsd event on restarting the server
subprocess.check_call(["./manage.py", "send_stats", "incr", "events.server_restart", str(int(time.time()))])
if args.fill_cache:
logging.info("Filling memcached caches")
subprocess.check_call(["./manage.py", "fill_memcached_caches"])
core_server_services = ["zulip-django"]
if os.path.exists("/etc/supervisor/conf.d/thumbor.conf"):
core_server_services.append("zulip-thumbor")
current_symlink = os.path.join(DEPLOYMENTS_DIR, "current")
last_symlink = os.path.join(DEPLOYMENTS_DIR, "last")
change_symlink = os.readlink(current_symlink) != deploy_path
if change_symlink:
overwrite_symlink(os.readlink(current_symlink), last_symlink)
overwrite_symlink(deploy_path, current_symlink)
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
try:
tornado_processes = int(config_file.get('application_server', 'tornado_processes'))
except (configparser.NoSectionError, configparser.NoOptionError):
tornado_processes = 1
# We restart just the zulip-tornado service early, in order to
# minimize downtime of the tornado service caused by too many Python
# processes restarting at the same time resulting in it receiving
# insufficient priority. This is important, because Tornado is the
# main source of user-visible downtime when we restart a Zulip server.
if tornado_processes > 1:
for p in range(9800, 9800+tornado_processes):
# Restart Tornado processes individually for a better rate of
# restarts. This also avoids behavior with restarting a whole
# supervisord group where if any individual process is slow to
# stop, the whole bundle stays stopped for an extended time.
logging.info("Restarting Tornado process on port %s" % (p,))
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado:port-%s" % (p,)])
else:
logging.info("Restarting Tornado process")
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado", "zulip-tornado:*"])
# Restart the uWSGI and related processes via supervisorctl.
logging.info("Stopping workers")
subprocess.check_call(["supervisorctl", "stop", "zulip-workers:*"])
logging.info("Stopping server core")
subprocess.check_call(["supervisorctl", "stop"] + core_server_services)
logging.info("Starting server core")
subprocess.check_call(["supervisorctl", "start"] + core_server_services)
logging.info("Starting workers")
subprocess.check_call(["supervisorctl", "start", "zulip-workers:*"])
using_sso = subprocess.check_output(['./scripts/get-django-setting', 'USING_APACHE_SSO'])
if using_sso.strip() == b'True':
logging.info("Restarting Apache WSGI process...")
subprocess.check_call(["pkill", "-f", "apache2", "-u", "zulip"])
if os.path.exists("/etc/supervisor/conf.d/zulip_db.conf"):
subprocess.check_call(["supervisorctl", "restart", "process-fts-updates"])
logging.info("Done!")
print(OKGREEN + "Application restarted successfully!" + ENDC)
if change_symlink and "PWD" in os.environ:
for symlink in [last_symlink, current_symlink]:
if os.path.commonprefix([os.environ["PWD"], symlink]) == symlink:
print(
"""
%sYour shell entered its current directory through a symlink:
%s
which has now changed. Your shell will not see this change until you run:
cd %s
to traverse the symlink again.%s
"""
% (WARNING, symlink, shlex.quote(os.environ["PWD"]), ENDC),
file=sys.stderr,
)