Files
zulip/puppet/zulip-internal/files/nagios_plugins/check_send_receive_time
Zev Benjamin dd678465ae [manual] Move puppet modules to the top level
The new puppet.conf file has to be moved into place manually.

(imported from commit 253d9a95386dae8c803a998ce2dc7e8be40c880a)
2013-10-30 15:42:26 -04:00

157 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python
"""
Script to provide information about send-receive times.
It supports both munin and nagios outputs
It must be run on a machine that is using the live database for the
Django ORM.
"""
import datetime
import sys
import optparse
import random
import traceback
def total_seconds(timedelta):
return (timedelta.microseconds + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6) / 10.**6
usage = """Usage: send-receive.py [options] [config]
'config' is optional, if present will return config info.
Otherwise, returns the output data."""
parser = optparse.OptionParser(usage=usage)
parser.add_option('--site',
dest='site',
default="https://api.zulip.com",
action='store')
parser.add_option('--nagios',
dest='nagios',
action='store_true')
parser.add_option('--munin',
dest='munin',
action='store_true')
(options, args) = parser.parse_args()
if not options.nagios and not options.munin:
print 'No output options specified! Please provide --munin or --nagios'
sys.exit(0)
if len(args) > 2:
print usage
sys.exit(0)
if options.munin:
if len(args) and args[0] == 'config':
print \
"""graph_title Send-Receive times
graph_info The number of seconds it takes to send and receive a message from the server
graph_args -u 5 -l 0
graph_vlabel RTT (seconds)
sendreceive.label Send-receive round trip time
sendreceive.warning 3
sendreceive.critical 5"""
sys.exit(0)
sys.path.append('/home/zulip/deployments/current/api')
import zulip
states = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3
}
def report(state, time, msg=None):
if msg:
print "%s: %s" % (state, msg)
else:
print "%s: send time was %s" % (state, time)
exit(states[state])
def send_zulip(sender, message):
result = sender.send_message(message)
if result["result"] != "success" and options.nagios:
report("CRITICAL", "Error sending Zulip, args were: %s, %s" % (message, result))
def get_zulips():
global queue_id, last_event_id
res = zulip_recipient.get_events(queue_id=queue_id, last_event_id=last_event_id)
if 'error' in res.get('result'):
report("CRITICAL", "Error receiving Zulips, error was: %s" % (res["msg"]))
for event in res['events']:
last_event_id = max(last_event_id, int(event['id']))
return [event['message'] for event in res['events']]
if "staging" in options.site:
# hamlet and othello are default users on staging
sender = "hamlet@zulip.com"
sender_key = "dfe1c934d555f4b9538d0d4cfd3069c2"
recipient = "othello@zulip.com"
recipient_key = "4e5d97591bec64bf57d2698ffbb563e3"
site = "https://staging.zulip.com/api"
else:
# cordelia and iago are default users on prod
sender = "iago@zulip.com"
sender_key = "d43b53c27a8106195b46781abc67901a"
recipient = "cordelia@zulip.com"
recipient_key = "24cf18de98d5c31da9c6c79f0cbec195"
site = "https://api.zulip.com"
zulip_sender = zulip.Client(
email=sender,
api_key=sender_key,
verbose=True,
client="test: Zulip monitoring",
site=site)
zulip_recipient = zulip.Client(
email=recipient,
api_key=recipient_key,
verbose=True,
client="test: Zulip monitoring",
site=site)
try:
res = zulip_recipient.register(event_types=["message"])
if 'error' in res.get('result'):
report("CRITICAL", "Error subscribing to Zulips: %s" % (res['msg']))
queue_id, last_event_id = (res['queue_id'], res['last_event_id'])
except Exception:
report("CRITICAL", "Error subscribing to Zulips:\n%s" % (traceback.format_exc()))
msg_to_send = str(random.getrandbits(64))
time_start = datetime.datetime.now()
send_zulip(zulip_sender, {
"type": 'private',
"content": msg_to_send,
"subject": "time to send",
"to": recipient,
})
msg_content = []
while msg_to_send not in msg_content:
messages = get_zulips()
time_diff = datetime.datetime.now() - time_start
msg_content = [m['content'] for m in messages]
if options.nagios:
if time_diff.seconds > 3:
report('WARNING', time_diff)
if time_diff.seconds > 6:
report('CRITICAL', time_diff)
if options.munin:
print "sendreceive.value %s" % total_seconds(time_diff)
elif options.nagios:
report('OK', time_diff)