mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
After some testing, I've confirmed that this seems to behave significantly better in terms of the number of failed requests due to Tornado being the process of restarting compared with the previous version, as each individual process is only down for a short time, rather than all of them being down at once.
110 lines
4.3 KiB
Python
Executable File
110 lines
4.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import configparser
|
|
import os
|
|
import sys
|
|
import pwd
|
|
import subprocess
|
|
import logging
|
|
import time
|
|
import shlex
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
|
from scripts.lib.zulip_tools import ENDC, OKGREEN, WARNING, DEPLOYMENTS_DIR, overwrite_symlink
|
|
|
|
logging.Formatter.converter = time.gmtime
|
|
logging.basicConfig(format="%(asctime)s restart-server: %(message)s",
|
|
level=logging.INFO)
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--fill-cache', action='store_true', dest='fill_cache', default=False,
|
|
help='Fill the memcached caches')
|
|
args = parser.parse_args()
|
|
|
|
deploy_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
|
|
os.chdir(deploy_path)
|
|
|
|
if pwd.getpwuid(os.getuid()).pw_name != "zulip":
|
|
logging.error("Must be run as user 'zulip'.")
|
|
sys.exit(1)
|
|
|
|
# Send a statsd event on restarting the server
|
|
subprocess.check_call(["./manage.py", "send_stats", "incr", "events.server_restart", str(int(time.time()))])
|
|
|
|
if args.fill_cache:
|
|
logging.info("Filling memcached caches")
|
|
subprocess.check_call(["./manage.py", "fill_memcached_caches"])
|
|
|
|
core_server_services = ["zulip-django"]
|
|
if os.path.exists("/etc/supervisor/conf.d/thumbor.conf"):
|
|
core_server_services.append("zulip-thumbor")
|
|
|
|
current_symlink = os.path.join(DEPLOYMENTS_DIR, "current")
|
|
last_symlink = os.path.join(DEPLOYMENTS_DIR, "last")
|
|
change_symlink = os.readlink(current_symlink) != deploy_path
|
|
if change_symlink:
|
|
overwrite_symlink(os.readlink(current_symlink), last_symlink)
|
|
overwrite_symlink(deploy_path, current_symlink)
|
|
|
|
config_file = configparser.RawConfigParser()
|
|
config_file.read("/etc/zulip/zulip.conf")
|
|
|
|
try:
|
|
tornado_processes = int(config_file.get('application_server', 'tornado_processes'))
|
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
|
tornado_processes = 1
|
|
|
|
# We restart just the zulip-tornado service early, in order to
|
|
# minimize downtime of the tornado service caused by too many Python
|
|
# processes restarting at the same time resulting in it receiving
|
|
# insufficient priority. This is important, because Tornado is the
|
|
# main source of user-visible downtime when we restart a Zulip server.
|
|
if tornado_processes > 1:
|
|
for p in range(9800, 9800+tornado_processes):
|
|
# Restart Tornado processes individually for a better rate of
|
|
# restarts. This also avoids behavior with restarting a whole
|
|
# supervisord group where if any individual process is slow to
|
|
# stop, the whole bundle stays stopped for an extended time.
|
|
logging.info("Restarting Tornado process on port %s" % (p,))
|
|
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado:port-%s" % (p,)])
|
|
else:
|
|
logging.info("Restarting Tornado process")
|
|
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado", "zulip-tornado:*"])
|
|
|
|
# Restart the uWSGI and related processes via supervisorctl.
|
|
logging.info("Stopping workers")
|
|
subprocess.check_call(["supervisorctl", "stop", "zulip-workers:*"])
|
|
logging.info("Stopping server core")
|
|
subprocess.check_call(["supervisorctl", "stop"] + core_server_services)
|
|
|
|
logging.info("Starting server core")
|
|
subprocess.check_call(["supervisorctl", "start"] + core_server_services)
|
|
logging.info("Starting workers")
|
|
subprocess.check_call(["supervisorctl", "start", "zulip-workers:*"])
|
|
|
|
using_sso = subprocess.check_output(['./scripts/get-django-setting', 'USING_APACHE_SSO'])
|
|
if using_sso.strip() == b'True':
|
|
logging.info("Restarting Apache WSGI process...")
|
|
subprocess.check_call(["pkill", "-f", "apache2", "-u", "zulip"])
|
|
|
|
if os.path.exists("/etc/supervisor/conf.d/zulip_db.conf"):
|
|
subprocess.check_call(["supervisorctl", "restart", "process-fts-updates"])
|
|
|
|
logging.info("Done!")
|
|
print(OKGREEN + "Application restarted successfully!" + ENDC)
|
|
|
|
if change_symlink and "PWD" in os.environ:
|
|
for symlink in [last_symlink, current_symlink]:
|
|
if os.path.commonprefix([os.environ["PWD"], symlink]) == symlink:
|
|
print(
|
|
"""
|
|
%sYour shell entered its current directory through a symlink:
|
|
%s
|
|
which has now changed. Your shell will not see this change until you run:
|
|
cd %s
|
|
to traverse the symlink again.%s
|
|
"""
|
|
% (WARNING, symlink, shlex.quote(os.environ["PWD"]), ENDC),
|
|
file=sys.stderr,
|
|
)
|