#!/usr/bin/env python """ Script to provide information about send-receive times. It supports both munin and nagios outputs It must be run on a machine that is using the live database for the Django ORM. """ import datetime import sys import optparse import random import traceback import os def total_seconds(timedelta): return (timedelta.microseconds + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6) / 10.**6 usage = """Usage: send-receive.py [options] [config] 'config' is optional, if present will return config info. Otherwise, returns the output data.""" parser = optparse.OptionParser(usage=usage) parser.add_option('--site', dest='site', default="https://api.zulip.com", action='store') parser.add_option('--nagios', dest='nagios', action='store_true') parser.add_option('--munin', dest='munin', action='store_true') (options, args) = parser.parse_args() if not options.nagios and not options.munin: print 'No output options specified! Please provide --munin or --nagios' sys.exit(0) if len(args) > 2: print usage sys.exit(0) if options.munin: if len(args) and args[0] == 'config': print \ """graph_title Send-Receive times graph_info The number of seconds it takes to send and receive a message from the server graph_args -u 5 -l 0 graph_vlabel RTT (seconds) sendreceive.label Send-receive round trip time sendreceive.warning 3 sendreceive.critical 5""" sys.exit(0) sys.path.append('/home/zulip/deployments/current/api') import zulip sys.path.append('/home/zulip/deployments/current') os.environ['DJANGO_SETTINGS_MODULE'] = "zproject.settings" from zerver.models import get_user_profile_by_email from django.conf import settings states = { "OK": 0, "WARNING": 1, "CRITICAL": 2, "UNKNOWN": 3 } def report(state, time, msg=None): if msg: print "%s: %s" % (state, msg) else: print "%s: send time was %s" % (state, time) exit(states[state]) def send_zulip(sender, message): result = sender.send_message(message) if result["result"] != "success" and options.nagios: report("CRITICAL", "Error sending Zulip, args were: %s, %s" % (message, result)) def get_zulips(): global queue_id, last_event_id res = zulip_recipient.get_events(queue_id=queue_id, last_event_id=last_event_id) if 'error' in res.get('result'): report("CRITICAL", "Error receiving Zulips, error was: %s" % (res["msg"])) for event in res['events']: last_event_id = max(last_event_id, int(event['id'])) return [event['message'] for event in res['events']] if "staging" in options.site and settings.NAGIOS_STAGING_SEND_BOT is not None and \ settings.NAGIOS_STAGING_RECEIVE_BOT is not None: sender = get_user_profile_by_email(settings.NAGIOS_STAGING_SEND_BOT) recipient = get_user_profile_by_email(settings.NAGIOS_STAGING_RECEIVE_BOT) site = "https://staging.zulip.com" else: sender = get_user_profile_by_email(settings.NAGIOS_SEND_BOT) recipient = get_user_profile_by_email(settings.NAGIOS_RECEIVE_BOT) site = settings.EXTERNAL_API_HOST zulip_sender = zulip.Client( email=sender.email, api_key=sender.api_key, verbose=True, client="test: Zulip monitoring", site=site) zulip_recipient = zulip.Client( email=recipient.email, api_key=recipient.api_key, verbose=True, client="test: Zulip monitoring", site=site) try: res = zulip_recipient.register(event_types=["message"]) if 'error' in res.get('result'): report("CRITICAL", "Error subscribing to Zulips: %s" % (res['msg'])) queue_id, last_event_id = (res['queue_id'], res['last_event_id']) except Exception: report("CRITICAL", "Error subscribing to Zulips:\n%s" % (traceback.format_exc())) msg_to_send = str(random.getrandbits(64)) time_start = datetime.datetime.now() send_zulip(zulip_sender, { "type": 'private', "content": msg_to_send, "subject": "time to send", "to": recipient.email, }) msg_content = [] while msg_to_send not in msg_content: messages = get_zulips() time_diff = datetime.datetime.now() - time_start msg_content = [m['content'] for m in messages] if options.nagios: if time_diff.seconds > 3: report('WARNING', time_diff) if time_diff.seconds > 6: report('CRITICAL', time_diff) if options.munin: print "sendreceive.value %s" % total_seconds(time_diff) elif options.nagios: report('OK', time_diff)