Files
zulip/scripts/nagios/check-rabbitmq-consumers
Tim Abbott 3f03dcdf5e nagios: Support multiple tornado processes.
This allows our Tornado monitoring to correctly report whether
multiple configured Tornado processes are running.

This setup isn't ideal, in that it can't detect cases where the wrong
set of Tornado processes are running, but it's nice and simple and
should catch most actual problems.
2018-11-06 16:50:03 -08:00

110 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python3
import sys
import time
import argparse
import configparser
from collections import defaultdict
import os
import subprocess
if False:
from typing import Any, Dict, Optional, Union
states = {
0: "OK",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN"
}
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
print("This script must be run as the root or rabbitmq user")
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument('--min-threshold',
dest='min_count',
type=int,
default=1,
action='store')
options = parser.parse_args()
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
def get_config(section, key, default_value):
# type: (str, str, str) -> str
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True)
consumers = defaultdict(int) # type: Dict[str, int]
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
from scripts.lib.zulip_tools import su_to_zulip
queues = {
'deferred_work'
'digest_emails',
'email_mirror',
'embed_links',
'embedded_bots',
'error_reports',
'feedback_messages',
'invites',
'message_sender',
'missedmessage_emails',
'missedmessage_email_senders',
'email_senders',
'missedmessage_mobile_notifications',
'outgoing_webhooks',
'signups',
'slow_queries',
'user_activity'
'user_activity_interval',
'user_presence',
# These queues may not be present if settings.TORNADO_PROCESSES > 1
'notify_tornado',
'tornado_return',
}
for queue_name in queues:
queue_name = queue_name.strip()
consumers[queue_name] = 0
for line in output.split('\n'):
parts = line.split('\t')
if len(parts) >= 2:
queue_name = parts[0]
if queue_name.startswith("tornado_return_"):
queue_name = "tornado_return"
if queue_name.startswith("notify_tornado_"):
queue_name = "notify_tornado"
consumers[queue_name] += 1
now = int(time.time())
for queue_name in consumers.keys():
state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name
state_file_tmp = state_file_path + "-tmp"
target_count = options.min_count
if queue_name in ["tornado_return", "notify_tornado"]:
target_count = TORNADO_PROCESSES
if consumers[queue_name] < target_count:
status = 2
else:
status = 0
with open(state_file_tmp, "w") as f:
f.write("%s|%s|%s|queue %s has %s consumers, needs %s\n" % (
now, status, states[status], queue_name,
consumers[queue_name], target_count))
os.rename(state_file_tmp, state_file_path)