Files
zulip/scripts/lib/upgrade-zulip-stage-2
Anders Kaseorg 894a50b5c9 install: Support Ubuntu 22.04.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
2022-02-25 14:49:07 -08:00

400 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
#
# This script contains the actual logic for upgrading from an old
# version of Zulip to the new version. upgrade-zulip-stage-2 is
# always run from the new version of Zulip, so any bug fixes take
# effect on the very next upgrade.
import argparse
import glob
import hashlib
import logging
import os
import re
import subprocess
import sys
import time
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from typing import NoReturn
os.environ["PYTHONUNBUFFERED"] = "y"
# Force a known locale. Some packages on PyPI fail to install in some locales.
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
os.environ["LANGUAGE"] = "C.UTF-8"
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from scripts.lib.zulip_tools import (
assert_running_as_root,
get_config,
get_config_file,
listening_publicly,
parse_os_release,
run_psql_as_postgres,
start_arg_parser,
su_to_zulip,
)
assert_running_as_root()
# Set a known, reliable PATH
os.environ["PATH"] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s upgrade-zulip-stage-2: %(message)s", level=logging.INFO)
def error_desupported_os(vendor: str, os_version: str) -> "NoReturn":
# Link to documentation for how to correctly upgrade the OS.
logging.critical("Unsupported platform: %s %s", vendor, os_version)
logging.info(
"Sorry! The support for your OS has been discontinued.\n"
"Please upgrade your OS to a supported release first.\n"
"See https://zulip.readthedocs.io/en/latest/production/"
"upgrade-or-modify.html#upgrading-the-operating-system"
)
sys.exit(1)
# Do not upgrade on unsupported OS versions.
UNSUPPORTED_DISTROS = [
("ubuntu", "14.04"),
("ubuntu", "16.04"),
("ubuntu", "18.04"),
("debian", "9"),
]
distro_info = parse_os_release()
vendor = distro_info["ID"]
os_version = distro_info["VERSION_ID"]
if (vendor, os_version) in UNSUPPORTED_DISTROS:
error_desupported_os(vendor, os_version)
# make sure we have appropriate file permissions
os.umask(0o22)
restart_parser = start_arg_parser(action="restart", add_help=False)
parser = argparse.ArgumentParser(parents=[restart_parser])
parser.add_argument("deploy_path", metavar="deploy_path", help="Path to deployment directory")
parser.add_argument("--skip-puppet", action="store_true", help="Skip doing puppet/apt upgrades.")
parser.add_argument("--skip-migrations", action="store_true", help="Skip doing migrations.")
parser.add_argument(
"--skip-downgrade-check",
action="store_true",
help="Skip the safety check to prevent database downgrades.",
)
parser.add_argument(
"--from-git", action="store_true", help="Upgrading from git, so run update-prod-static."
)
parser.add_argument(
"--ignore-static-assets",
action="store_true",
help="Do not attempt to copy/manage static assets.",
)
parser.add_argument(
"--skip-purge-old-deployments", action="store_true", help="Skip purging old deployments."
)
parser.add_argument(
"--audit-fts-indexes", action="store_true", help="Audit and fix full text search indexes."
)
args = parser.parse_args()
if not args.skip_puppet and args.less_graceful:
logging.warning("Ignored --less-graceful; all upgrades without --skip-puppet are ungraceful.")
if not args.skip_puppet and args.skip_tornado:
logging.error(
"Cannot skip tornado restart unless we are skipping puppet! Omit --skip-tornado, or add --skip-puppet."
)
sys.exit(1)
deploy_path = args.deploy_path
os.chdir(deploy_path)
config_file = get_config_file()
IS_SERVER_UP = True
# Check if rabbitmq port 25672 is listening on anything except 127.0.0.1
rabbitmq_dist_listen = listening_publicly(25672)
# Check the erlang magic cookie size
cookie_size: Optional[int] = None
if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"):
with open("/var/lib/rabbitmq/.erlang.cookie", "r") as cookie_fh:
cookie_size = len(cookie_fh.readline())
else:
logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.")
if args.skip_puppet and rabbitmq_dist_listen:
logging.error(
"RabbitMQ is publicly-accessible on %s; this is a security vulnerability!",
", ".join(rabbitmq_dist_listen),
)
issue = "issue"
if cookie_size is not None and cookie_size == 20:
# See the below comment -- this is used as a lightweight
# signal for a cookie made with Erlang's bad randomizer.
logging.error(
"RabbitMQ erlang cookie is insecure; this is a critical security vulnerability!"
)
issue = "issues"
logging.error(
"To fix the above security %s, re-run the upgrade without --skip-puppet "
"(which may be set in /etc/zulip/zulip.conf), in order to restart the "
"necessary services. Running zulip-puppet-apply by itself is not sufficient.",
issue,
)
sys.exit(1)
def shutdown_server() -> None:
global IS_SERVER_UP
if IS_SERVER_UP:
logging.info("Stopping Zulip...")
subprocess.check_call(["./scripts/stop-server"], preexec_fn=su_to_zulip)
IS_SERVER_UP = False
# postgresql.version is required for database servers, but wasn't
# previously; fill it in based on what the OS provides.
if os.path.exists("/etc/init.d/postgresql"):
postgresql_version = get_config(config_file, "postgresql", "version")
if not postgresql_version:
default_postgresql_version = {
("debian", "10"): "11",
("debian", "11"): "13",
("ubuntu", "20.04"): "12",
("ubuntu", "22.04"): "14",
("centos", "7"): "11",
}
if (vendor, os_version) in default_postgresql_version:
postgresql_version = default_postgresql_version[(vendor, os_version)]
else:
error_desupported_os(vendor, os_version)
subprocess.check_call(
[
"crudini",
"--set",
"/etc/zulip/zulip.conf",
"postgresql",
"version",
postgresql_version,
]
)
if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"):
# Remove legacy tsearch_extras package references
run_psql_as_postgres(
config_file=config_file,
sql_query="DROP EXTENSION IF EXISTS tsearch_extras;",
)
subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"])
if not args.skip_puppet:
logging.info("Upgrading system packages...")
subprocess.check_call(["apt-get", "update"])
subprocess.check_call(["apt-get", "-y", "--allow-downgrades", "upgrade"])
# To bootstrap zulip-puppet-apply, we need to install the system yaml
# package; new installs get this, but old installs may not have it.
if not os.path.exists("/usr/share/doc/python3-yaml"):
logging.info("Installing system YAML package, for puppet...")
subprocess.check_call(["apt-get", "install", "python3-yaml"])
if not os.path.exists(os.path.join(deploy_path, "zproject/prod_settings.py")):
# This is normally done in unpack-zulip, but for upgrading from
# zulip<1.4.0, we need to do it. See discussion in commit 586b23637.
os.symlink("/etc/zulip/settings.py", os.path.join(deploy_path, "zproject/prod_settings.py"))
# Now we should have an environment set up where we can run our tools;
# first, creating the production venv.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "create-production-venv"), deploy_path]
)
# Check to make sure that this upgrade is not actually a database
# downgrade.
if not args.skip_downgrade_check:
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "check-database-compatibility.py")],
preexec_fn=su_to_zulip,
)
# Make sure the right version of node is installed
subprocess.check_call([os.path.join(deploy_path, "scripts", "lib", "install-node")])
subprocess.check_call([os.path.join(deploy_path, "scripts", "lib", "install-yarn")])
# Generate any new secrets that were added in the new version required.
# TODO: Do caching to only run this when it has changed.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "setup", "generate_secrets.py"), "--production"]
)
# Unpleasant migration: Remove any legacy deployed copies of
# images-google-64 from before we renamed that emojiset to
# "googleblob":
emoji_path = "/home/zulip/prod-static/generated/emoji/images-google-64/1f32d.png"
if os.path.exists(emoji_path):
with open(emoji_path, "rb") as f:
emoji_data = f.read()
emoji_sha = hashlib.sha1(emoji_data).hexdigest()
if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293":
import shutil
shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64")
# And then, building/installing the static assets.
if args.ignore_static_assets:
# For the OS version upgrade use case, the static assets are
# already in place, and we don't need to do anything. Further,
# neither of the options below will work for all installations,
# because if we installed from Git, `prod-static/serve` may be
# empty so we can't do the non-Git thing, whereas if we installed
# from a tarball, we won't have a `tools/` directory and thus
# cannot run `tools/update-prod-static`.
pass
elif args.from_git:
# Because `upgrade-zulip-from-git` needs to build static assets, it
# is at risk of being OOM killed on systems with limited free RAM.
mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
mem_gib = mem_bytes / (1024.0**3) # e.g. 3.74
# Ideally, we'd have 2 thresholds here, depending on whether the
# system is running queue workers multithreaded or multiprocess.
# See puppet/zulip/manifests/app_frontend_base.pp for background.
if mem_gib < 4.2:
logging.info("Shutting down server to ensure sufficient free RAM for webpack.")
shutdown_server()
# Note: The fact that this is before we apply Puppet changes means
# that we don't support adding new Puppet dependencies of
# update-prod-static with the Git upgrade process. But it'll fail
# safely; this seems like a worthwhile tradeoff to minimize downtime.
logging.info("Building static assets...")
try:
subprocess.check_call(["./tools/update-prod-static"], preexec_fn=su_to_zulip)
except subprocess.CalledProcessError:
logging.error("Failed to build static assets.")
if IS_SERVER_UP:
logging.error("Usually the cause is insufficient free RAM to run webpack.")
logging.error("Try stopping the Zulip server (scripts/stop-server) and trying again.")
sys.exit(1)
logging.info("Caching Zulip Git version...")
subprocess.check_call(["./tools/cache-zulip-git-version"], preexec_fn=su_to_zulip)
else:
# Since this doesn't do any actual work, it's likely safe to have
# this run before we apply Puppet changes (saving a bit of downtime).
logging.info("Installing static assets...")
subprocess.check_call(
["cp", "-rT", os.path.join(deploy_path, "prod-static/serve"), "/home/zulip/prod-static"],
preexec_fn=su_to_zulip,
)
# Our next optimization is to check whether any migrations are needed
# before we start the critical section of the restart. This saves
# about 1s of downtime in a no-op upgrade.
migrations_needed = False
if not args.skip_migrations:
logging.info("Checking for needed migrations")
migrations_output = subprocess.check_output(
["./manage.py", "showmigrations"], preexec_fn=su_to_zulip, text=True
)
for ln in migrations_output.split("\n"):
line_str = ln.strip()
if line_str.startswith("[ ]"):
migrations_needed = True
if (not args.skip_puppet or migrations_needed) and IS_SERVER_UP:
# By default, we shut down the service to apply migrations and
# Puppet changes, to minimize risk of issues due to inconsistent
# state.
shutdown_server()
if rabbitmq_dist_listen:
shutdown_server()
logging.info("Shutting down rabbitmq to adjust its ports...")
subprocess.check_call(["/usr/sbin/service", "rabbitmq-server", "stop"])
if cookie_size is not None and cookie_size == 20:
# Checking for a 20-character cookie is used as a signal that it
# was generated by Erlang's insecure randomizer, which only
# provides between 20 and 36 bits of entropy; were it 20
# characters long by a good randomizer, it would be 96 bits and
# more than sufficient. We generate, using good randomness, a
# 255-character cookie, the max allowed length.
logging.info("Generating a secure erlang cookie...")
subprocess.check_call(["./scripts/setup/generate-rabbitmq-cookie"])
# Adjust Puppet class names for the manifest renames in the 4.0 release
class_renames = {
"zulip::app_frontend": "zulip::profile::app_frontend",
"zulip::dockervoyager": "zulip::profile::docker",
"zulip::memcached": "zulip::profile::memcached",
"zulip::postgres_appdb_tuned": "zulip::profile::postgresql",
"zulip::postgres_backups": "zulip::postgresql_backups",
"zulip::rabbit": "zulip::profile::rabbitmq",
"zulip::voyager": "zulip::profile::standalone",
}
classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes"))
new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"]
if classes != new_classes:
logging.info("Adjusting Puppet classes for renames...")
subprocess.check_call(
[
"crudini",
"--set",
"/etc/zulip/zulip.conf",
"machine",
"puppet_classes",
", ".join(new_classes),
]
)
if not args.skip_puppet:
logging.info("Applying Puppet changes...")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--force"])
subprocess.check_call(["apt-get", "-y", "--allow-downgrades", "upgrade"])
if migrations_needed:
logging.info("Applying database migrations...")
subprocess.check_call(["./manage.py", "migrate", "--noinput"], preexec_fn=su_to_zulip)
subprocess.check_call(["./manage.py", "create_realm_internal_bots"], preexec_fn=su_to_zulip)
logging.info("Restarting Zulip...")
if IS_SERVER_UP or not args.skip_puppet:
# Even if the server wasn't up previously, puppet might have
# started it if there were supervisord configuration changes, so
# we need to use restart-server if puppet ran.
restart_args = ["--fill-cache"]
if args.skip_tornado:
restart_args.append("--skip-tornado")
if args.less_graceful:
restart_args.append("--less-graceful")
subprocess.check_call(["./scripts/restart-server", *restart_args], preexec_fn=su_to_zulip)
else:
subprocess.check_call(["./scripts/start-server", "--fill-cache"], preexec_fn=su_to_zulip)
logging.info("Upgrade complete!")
if args.audit_fts_indexes:
logging.info("Correcting full-text search indexes for updated dictionary files")
logging.info("This may take a while but the server should work while it runs.")
subprocess.check_call(["./manage.py", "audit_fts_indexes"], preexec_fn=su_to_zulip)
if not args.skip_purge_old_deployments:
logging.info("Purging old deployments...")
subprocess.check_call(["./scripts/purge-old-deployments"])
else:
logging.info("Skipping purging old deployments.")
if args.skip_puppet:
logging.info("Showing un-applied Puppet changes:")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--noop", "--show_diff"])