Files
zulip/scripts/nagios/check-rabbitmq-consumers
Tom Daff f1f937e4ea monitoring: Fix check-rabbitmq-consumers.
Missing commas in the definition of all the queues to check meant that it would be looking for queues with concatenated names, rather than the correct ones. Added the commas.
2020-03-25 17:19:55 -07:00

106 lines
3.0 KiB
Python
Executable File

#!/usr/bin/env python3
import sys
import time
import argparse
import configparser
from collections import defaultdict
import os
import subprocess
from typing import Dict
states = {
0: "OK",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN"
}
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
print("This script must be run as the root or rabbitmq user")
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument('--min-threshold',
dest='min_count',
type=int,
default=1,
action='store')
options = parser.parse_args()
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
def get_config(section, key, default_value):
# type: (str, str, str) -> str
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True)
consumers = defaultdict(int) # type: Dict[str, int]
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
queues = {
'deferred_work',
'digest_emails',
'email_mirror',
'embed_links',
'embedded_bots',
'error_reports',
'invites',
'message_sender',
'missedmessage_emails',
'missedmessage_email_senders',
'email_senders',
'missedmessage_mobile_notifications',
'outgoing_webhooks',
'signups',
'slow_queries',
'user_activity',
'user_activity_interval',
'user_presence',
# These queues may not be present if settings.TORNADO_PROCESSES > 1
'notify_tornado',
'tornado_return',
}
for queue_name in queues:
queue_name = queue_name.strip()
consumers[queue_name] = 0
for line in output.split('\n'):
parts = line.split('\t')
if len(parts) >= 2:
queue_name = parts[0]
if queue_name.startswith("tornado_return_"):
queue_name = "tornado_return"
if queue_name.startswith("notify_tornado_"):
queue_name = "notify_tornado"
consumers[queue_name] += 1
now = int(time.time())
for queue_name in consumers.keys():
state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name
state_file_tmp = state_file_path + "-tmp"
target_count = options.min_count
if queue_name in ["tornado_return", "notify_tornado"]:
target_count = TORNADO_PROCESSES
if consumers[queue_name] < target_count:
status = 2
else:
status = 0
with open(state_file_tmp, "w") as f:
f.write("%s|%s|%s|queue %s has %s consumers, needs %s\n" % (
now, status, states[status], queue_name,
consumers[queue_name], target_count))
os.rename(state_file_tmp, state_file_path)