Files
zulip/scripts/nagios/check-rabbitmq-consumers
Alex Vandiver 2a12fedcf1 tornado: Remove explicit tornado_processes setting; compute it.
We can compute the intended number of processes from the sharding
configuration.  In doing so, also validate that all of the ports are
contiguous.

This removes a discrepancy between `scripts/lib/sharding.py` and other
parts of the codebase about if merely having a `[tornado_sharding]`
section is sufficient to enable sharding.  Having behaviour which
changes merely based on if an empty section exists is surprising.

This does require that a (presumably empty) `9800` configuration line
exist, but making that default explicit is useful.

After this commit, configuring sharding can be done by adding to
`zulip.conf`:

```
[tornado_sharding]
9800 =              # default
9801 = other_realm
```

Followed by running `./scripts/refresh-sharding-and-restart`.
2020-09-18 15:13:40 -07:00

79 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import subprocess
import sys
import time
from collections import defaultdict
from typing import Dict
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(ZULIP_PATH)
from scripts.lib.check_rabbitmq_queue import normal_queues
from scripts.lib.zulip_tools import get_config_file, get_tornado_ports
states = {
0: "OK",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN",
}
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
print("This script must be run as the root or rabbitmq user")
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument('--min-threshold',
dest='min_count',
type=int,
default=1)
options = parser.parse_args()
config_file = get_config_file()
TORNADO_PROCESSES = len(get_tornado_ports(config_file))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True)
consumers: Dict[str, int] = defaultdict(int)
queues = {
*normal_queues,
# These queues may not be present if settings.TORNADO_PROCESSES > 1
'notify_tornado',
}
for queue_name in queues:
queue_name = queue_name.strip()
consumers[queue_name] = 0
for line in output.split('\n'):
parts = line.split('\t')
if len(parts) >= 2:
queue_name = parts[0]
if queue_name.startswith("notify_tornado_"):
queue_name = "notify_tornado"
consumers[queue_name] += 1
now = int(time.time())
for queue_name in consumers.keys():
state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name
state_file_tmp = state_file_path + "-tmp"
target_count = options.min_count
if queue_name == "notify_tornado":
target_count = TORNADO_PROCESSES
if consumers[queue_name] < target_count:
status = 2
else:
status = 0
with open(state_file_tmp, "w") as f:
f.write(f"{now}|{status}|{states[status]}|queue {queue_name} has {consumers[queue_name]} consumers, needs {target_count}\n")
os.rename(state_file_tmp, state_file_path)