mirror of
https://github.com/zulip/zulip.git
synced 2025-11-05 22:43:42 +00:00
Decouple the sending of client restart events from the restarting of the servers. Restarts use the new Tornado restart-clients endpoint to inject "restart" events into queues of clients which were loaded from the previous Tornado process. The rate is controlled by the `application_server.client_restart_rate`, in clients per minute, or a flag to `restart-clients` which overrides it. Note that a web client will also spread its restart over 5 minutes, so artificially-slow client restarts are generally not very necessary. Restarts of clients are deferred to until after post-deploy hooks are run, such that the pre- and post- deploy hooks are around the actual server restarts, even if pushing restart events to clients takes significant time.
76 lines
2.6 KiB
Python
Executable File
76 lines
2.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import configparser
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
|
from scripts.lib.setup_path import setup_path
|
|
|
|
setup_path()
|
|
|
|
import requests
|
|
from requests.adapters import HTTPAdapter
|
|
from urllib3.util import Retry
|
|
|
|
from scripts.lib.zulip_tools import get_config, get_config_file, get_tornado_ports
|
|
|
|
config_file = get_config_file()
|
|
reload_rate = int(
|
|
get_config(
|
|
config_file,
|
|
"application_server",
|
|
"client_reload_rate",
|
|
"50",
|
|
)
|
|
)
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--rate", type=int, help="Number of clients to reload per second", default=reload_rate
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
reload_rate = args.rate
|
|
|
|
secret_config_file = configparser.RawConfigParser()
|
|
secret_config_file.read("/etc/zulip/zulip-secrets.conf")
|
|
shared_secret = get_config(secret_config_file, "secrets", "shared_secret")
|
|
assert shared_secret
|
|
|
|
# Perform relatively slow retries (2s, 4s, 8s) with backoff, including
|
|
# on POST requests. Failure to send this request successfully means
|
|
# that clients may fail to reload, so we want to be somewhat resilient
|
|
# to failures. Since we are on localhost, we do not expect network
|
|
# failures, only Tornado restarts, to cause failures here.
|
|
retry = Retry(total=3, backoff_factor=1, allowed_methods=Retry.DEFAULT_ALLOWED_METHODS | {"POST"})
|
|
c = requests.Session()
|
|
c.mount("http://", HTTPAdapter(max_retries=retry))
|
|
|
|
logging.Formatter.converter = time.gmtime
|
|
logging.basicConfig(format="%(asctime)s reload-clients: %(message)s", level=logging.INFO)
|
|
|
|
for port in get_tornado_ports(config_file):
|
|
logging.info("Starting to send client reload events to Tornado port %d", port)
|
|
try:
|
|
complete = False
|
|
# Rather than make a sustained one request per second, we batch
|
|
# into 5-second chunks of 5 times the client_reload_rate
|
|
SECONDS_PER_BATCH = 5
|
|
while not complete:
|
|
logging.info("Sending reload events to %d clients", reload_rate * SECONDS_PER_BATCH)
|
|
resp = c.post(
|
|
f"http://127.0.0.1:{port}/api/internal/web_reload_clients",
|
|
data={"client_count": reload_rate * SECONDS_PER_BATCH, "secret": shared_secret},
|
|
timeout=5,
|
|
)
|
|
resp.raise_for_status()
|
|
complete = resp.json()["complete"]
|
|
time.sleep(SECONDS_PER_BATCH)
|
|
except requests.exceptions.HTTPError:
|
|
# Failures in one shard likely won't affect other shards --
|
|
# give up on this shard, and try the next one,
|
|
logging.exception("Failed to send web_reload_clients request to Tornado port %d", port)
|