Files
zulip/puppet/zulip-internal/files/nagios_plugins/check_send_receive_time
Leo Franchi 9b4491db6a Don't use hardcoded bots in nagios' check_send_receive
(imported from commit 82add135bf5b819bcc992af8420eec14cf829ccc)
2013-11-01 14:13:05 -04:00

157 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python
"""
Script to provide information about send-receive times.
It supports both munin and nagios outputs
It must be run on a machine that is using the live database for the
Django ORM.
"""
import datetime
import sys
import optparse
import random
import traceback
import os
def total_seconds(timedelta):
return (timedelta.microseconds + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6) / 10.**6
usage = """Usage: send-receive.py [options] [config]
'config' is optional, if present will return config info.
Otherwise, returns the output data."""
parser = optparse.OptionParser(usage=usage)
parser.add_option('--site',
dest='site',
default="https://api.zulip.com",
action='store')
parser.add_option('--nagios',
dest='nagios',
action='store_true')
parser.add_option('--munin',
dest='munin',
action='store_true')
(options, args) = parser.parse_args()
if not options.nagios and not options.munin:
print 'No output options specified! Please provide --munin or --nagios'
sys.exit(0)
if len(args) > 2:
print usage
sys.exit(0)
if options.munin:
if len(args) and args[0] == 'config':
print \
"""graph_title Send-Receive times
graph_info The number of seconds it takes to send and receive a message from the server
graph_args -u 5 -l 0
graph_vlabel RTT (seconds)
sendreceive.label Send-receive round trip time
sendreceive.warning 3
sendreceive.critical 5"""
sys.exit(0)
sys.path.append('/home/zulip/deployments/current/api')
import zulip
sys.path.append('/home/zulip/deployments/current')
os.environ['DJANGO_SETTINGS_MODULE'] = "zproject.settings"
from zerver.models import get_user_profile_by_email
from django.conf import settings
states = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3
}
def report(state, time, msg=None):
if msg:
print "%s: %s" % (state, msg)
else:
print "%s: send time was %s" % (state, time)
exit(states[state])
def send_zulip(sender, message):
result = sender.send_message(message)
if result["result"] != "success" and options.nagios:
report("CRITICAL", "Error sending Zulip, args were: %s, %s" % (message, result))
def get_zulips():
global queue_id, last_event_id
res = zulip_recipient.get_events(queue_id=queue_id, last_event_id=last_event_id)
if 'error' in res.get('result'):
report("CRITICAL", "Error receiving Zulips, error was: %s" % (res["msg"]))
for event in res['events']:
last_event_id = max(last_event_id, int(event['id']))
return [event['message'] for event in res['events']]
if "staging" in options.site and settings.NAGIOS_STAGING_SEND_BOT is not None and \
settings.NAGIOS_STAGING_RECEIVE_BOT is not None:
sender = get_user_profile_by_email(settings.NAGIOS_STAGING_SEND_BOT)
recipient = get_user_profile_by_email(settings.NAGIOS_STAGING_RECEIVE_BOT)
site = "https://staging.zulip.com"
else:
sender = get_user_profile_by_email(settings.NAGIOS_SEND_BOT)
recipient = get_user_profile_by_email(settings.NAGIOS_RECEIVE_BOT)
site = settings.EXTERNAL_API_HOST
zulip_sender = zulip.Client(
email=sender.email,
api_key=sender.api_key,
verbose=True,
client="test: Zulip monitoring",
site=site)
zulip_recipient = zulip.Client(
email=recipient.email,
api_key=recipient.api_key,
verbose=True,
client="test: Zulip monitoring",
site=site)
try:
res = zulip_recipient.register(event_types=["message"])
if 'error' in res.get('result'):
report("CRITICAL", "Error subscribing to Zulips: %s" % (res['msg']))
queue_id, last_event_id = (res['queue_id'], res['last_event_id'])
except Exception:
report("CRITICAL", "Error subscribing to Zulips:\n%s" % (traceback.format_exc()))
msg_to_send = str(random.getrandbits(64))
time_start = datetime.datetime.now()
send_zulip(zulip_sender, {
"type": 'private',
"content": msg_to_send,
"subject": "time to send",
"to": recipient.email,
})
msg_content = []
while msg_to_send not in msg_content:
messages = get_zulips()
time_diff = datetime.datetime.now() - time_start
msg_content = [m['content'] for m in messages]
if options.nagios:
if time_diff.seconds > 3:
report('WARNING', time_diff)
if time_diff.seconds > 6:
report('CRITICAL', time_diff)
if options.munin:
print "sendreceive.value %s" % total_seconds(time_diff)
elif options.nagios:
report('OK', time_diff)