mirror of
https://github.com/zulip/zulip.git
synced 2025-11-02 21:13:36 +00:00
python: Reformat with Black, except quotes.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
5028c081cb
commit
11741543da
@@ -7,7 +7,7 @@ import time
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def nagios_from_file(results_file: str, max_time_diff: int=60 * 2) -> 'Tuple[int, str]':
|
||||
def nagios_from_file(results_file: str, max_time_diff: int = 60 * 2) -> 'Tuple[int, str]':
|
||||
"""Returns a nagios-appropriate string and return code obtained by
|
||||
parsing the desired file on disk. The file on disk should be of format
|
||||
|
||||
@@ -39,6 +39,7 @@ def nagios_from_file(results_file: str, max_time_diff: int=60 * 2) -> 'Tuple[int
|
||||
|
||||
return (ret, f"{state}: {data}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
RESULTS_FILE = sys.argv[1]
|
||||
extra_args = {}
|
||||
|
||||
@@ -32,17 +32,13 @@ usage = """Usage: send-receive.py [options] [config]
|
||||
Otherwise, returns the output data."""
|
||||
|
||||
parser = argparse.ArgumentParser(usage=usage)
|
||||
parser.add_argument('--site',
|
||||
default="https://api.zulip.com")
|
||||
parser.add_argument('--site', default="https://api.zulip.com")
|
||||
|
||||
parser.add_argument('--nagios',
|
||||
action='store_true')
|
||||
parser.add_argument('--nagios', action='store_true')
|
||||
|
||||
parser.add_argument('--insecure',
|
||||
action='store_true')
|
||||
parser.add_argument('--insecure', action='store_true')
|
||||
|
||||
parser.add_argument('--munin',
|
||||
action='store_true')
|
||||
parser.add_argument('--munin', action='store_true')
|
||||
|
||||
parser.add_argument('config', nargs='?')
|
||||
|
||||
@@ -54,13 +50,15 @@ if not options.nagios and not options.munin:
|
||||
|
||||
if options.munin:
|
||||
if options.config == 'config':
|
||||
print("""graph_title Send-Receive times
|
||||
print(
|
||||
"""graph_title Send-Receive times
|
||||
graph_info The number of seconds it takes to send and receive a message from the server
|
||||
graph_args -u 5 -l 0
|
||||
graph_vlabel RTT (seconds)
|
||||
sendreceive.label Send-receive round trip time
|
||||
sendreceive.warning 3
|
||||
sendreceive.critical 5""")
|
||||
sendreceive.critical 5"""
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
sys.path.append('/home/zulip/deployments/current')
|
||||
@@ -79,6 +77,7 @@ states = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, timestamp: Any = None, msg: Optional[str] = None) -> None:
|
||||
now = int(time.time())
|
||||
if msg is None:
|
||||
@@ -90,11 +89,13 @@ def report(state: str, timestamp: Any = None, msg: Optional[str] = None) -> None
|
||||
print(f"{state}: {msg}")
|
||||
exit(states[state])
|
||||
|
||||
|
||||
def send_zulip(sender: zulip.Client, message: Dict[str, Any]) -> None:
|
||||
result = sender.send_message(message)
|
||||
if result["result"] != "success" and options.nagios:
|
||||
report("CRITICAL", msg=f"Error sending Zulip, args were: {message}, {result}")
|
||||
|
||||
|
||||
def get_zulips() -> List[Dict[str, Any]]:
|
||||
global queue_id, last_event_id
|
||||
res = zulip_recipient.get_events(queue_id=queue_id, last_event_id=last_event_id)
|
||||
@@ -108,8 +109,12 @@ def get_zulips() -> List[Dict[str, Any]]:
|
||||
report("CRITICAL", msg="Got heartbeat waiting for Zulip, which means get_events is hanging")
|
||||
return [event['message'] for event in res['events']]
|
||||
|
||||
if "staging" in options.site and settings.NAGIOS_STAGING_SEND_BOT is not None and \
|
||||
settings.NAGIOS_STAGING_RECEIVE_BOT is not None:
|
||||
|
||||
if (
|
||||
"staging" in options.site
|
||||
and settings.NAGIOS_STAGING_SEND_BOT is not None
|
||||
and settings.NAGIOS_STAGING_RECEIVE_BOT is not None
|
||||
):
|
||||
sender = get_system_bot(settings.NAGIOS_STAGING_SEND_BOT)
|
||||
recipient = get_system_bot(settings.NAGIOS_STAGING_RECEIVE_BOT)
|
||||
else:
|
||||
@@ -122,7 +127,8 @@ zulip_sender = zulip.Client(
|
||||
verbose=True,
|
||||
insecure=options.insecure,
|
||||
client="ZulipMonitoring/0.1",
|
||||
site=options.site)
|
||||
site=options.site,
|
||||
)
|
||||
|
||||
zulip_recipient = zulip.Client(
|
||||
email=recipient.email,
|
||||
@@ -130,7 +136,8 @@ zulip_recipient = zulip.Client(
|
||||
verbose=True,
|
||||
insecure=options.insecure,
|
||||
client="ZulipMonitoring/0.1",
|
||||
site=options.site)
|
||||
site=options.site,
|
||||
)
|
||||
|
||||
try:
|
||||
res = zulip_recipient.register(event_types=["message"])
|
||||
@@ -142,12 +149,15 @@ except Exception:
|
||||
msg_to_send = str(random.getrandbits(64))
|
||||
time_start = time.time()
|
||||
|
||||
send_zulip(zulip_sender, {
|
||||
"type": 'private',
|
||||
"content": msg_to_send,
|
||||
"subject": "time to send",
|
||||
"to": recipient.email,
|
||||
})
|
||||
send_zulip(
|
||||
zulip_sender,
|
||||
{
|
||||
"type": 'private',
|
||||
"content": msg_to_send,
|
||||
"subject": "time to send",
|
||||
"to": recipient.email,
|
||||
},
|
||||
)
|
||||
|
||||
msg_content: List[str] = []
|
||||
|
||||
|
||||
@@ -22,10 +22,12 @@ states = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, num: str) -> None:
|
||||
print(f"{state}: {num} rows in fts_update_log table")
|
||||
exit(states[state])
|
||||
|
||||
|
||||
conn = psycopg2.connect(database="zulip")
|
||||
cursor = conn.cursor()
|
||||
|
||||
|
||||
@@ -31,15 +31,15 @@ def run_sql_query(query: str) -> List[List[str]]:
|
||||
'-t', # Omit header line
|
||||
'-A', # Don't pad with spaces
|
||||
'-z', # Separate columns with nulls
|
||||
'-v', 'ON_ERROR_STOP=1',
|
||||
'-v',
|
||||
'ON_ERROR_STOP=1',
|
||||
'zulip',
|
||||
'-c', f'SELECT {query}',
|
||||
'-c',
|
||||
f'SELECT {query}',
|
||||
]
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
command,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True
|
||||
command, stderr=subprocess.STDOUT, universal_newlines=True
|
||||
).strip()
|
||||
if not output:
|
||||
return []
|
||||
@@ -84,9 +84,9 @@ if replication_info:
|
||||
report('CRITICAL', f'replica is in state {state}, not streaming')
|
||||
|
||||
msg = f'replica is {replay_lag} bytes behind in replay of WAL logs from {primary_server}'
|
||||
if replay_lag > 5 * 16 * 1024**2:
|
||||
if replay_lag > 5 * 16 * 1024 ** 2:
|
||||
report('CRITICAL', msg)
|
||||
elif replay_lag > 16 * 1024**2:
|
||||
elif replay_lag > 16 * 1024 ** 2:
|
||||
report('WARNING', msg)
|
||||
else:
|
||||
report('OK', msg)
|
||||
@@ -115,9 +115,9 @@ else:
|
||||
for lag_type in ('write', 'flush', 'replay'):
|
||||
lag_bytes = lag[lag_type]
|
||||
msg = f'replica {client_addr} is {lag_bytes} bytes behind in {lag_type} of WAL logs'
|
||||
if lag_bytes > 5 * 16 * 1024**2:
|
||||
if lag_bytes > 5 * 16 * 1024 ** 2:
|
||||
report('CRITICAL', msg)
|
||||
elif lag_bytes > 16 * 1024**2:
|
||||
elif lag_bytes > 16 * 1024 ** 2:
|
||||
report('WARNING', msg)
|
||||
else:
|
||||
report('OK', msg)
|
||||
|
||||
@@ -12,13 +12,18 @@ states = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, msg: str) -> None:
|
||||
print(f"{state}: {msg}")
|
||||
exit(states[state])
|
||||
|
||||
if subprocess.check_output(['psql', '-v', 'ON_ERROR_STOP=1',
|
||||
'postgres', '-t', '-c',
|
||||
'SELECT pg_is_in_recovery()']).strip() != b'f':
|
||||
|
||||
if (
|
||||
subprocess.check_output(
|
||||
['psql', '-v', 'ON_ERROR_STOP=1', 'postgres', '-t', '-c', 'SELECT pg_is_in_recovery()']
|
||||
).strip()
|
||||
!= b'f'
|
||||
):
|
||||
report('OK', 'this is not the primary')
|
||||
|
||||
try:
|
||||
|
||||
@@ -15,6 +15,7 @@ logging.Formatter.converter = time.gmtime
|
||||
logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run(args: List[str], dry_run: bool = False) -> str:
|
||||
if dry_run:
|
||||
print("Would have run: " + " ".join(map(shlex.quote, args)))
|
||||
@@ -22,7 +23,10 @@ def run(args: List[str], dry_run: bool = False) -> str:
|
||||
|
||||
return subprocess.check_output(args, stdin=subprocess.DEVNULL, universal_newlines=True)
|
||||
|
||||
recovery_val = run(['psql', '-v', 'ON_ERROR_STOP=1', '-t', '-c', 'SELECT pg_is_in_recovery()']).strip()
|
||||
|
||||
recovery_val = run(
|
||||
['psql', '-v', 'ON_ERROR_STOP=1', '-t', '-c', 'SELECT pg_is_in_recovery()']
|
||||
).strip()
|
||||
# Assertion to check that we're extracting the value correctly.
|
||||
assert recovery_val in ['t', 'f']
|
||||
if recovery_val == 't':
|
||||
|
||||
@@ -36,8 +36,7 @@ import psycopg2.extensions
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--quiet',
|
||||
action='store_true')
|
||||
parser.add_argument('--quiet', action='store_true')
|
||||
options = parser.parse_args()
|
||||
|
||||
logging.Formatter.converter = time.gmtime
|
||||
@@ -48,6 +47,7 @@ if options.quiet:
|
||||
else:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
def update_fts_columns(cursor: psycopg2.extensions.cursor) -> int:
|
||||
cursor.execute(
|
||||
"SELECT id, message_id FROM fts_update_log LIMIT %s;",
|
||||
@@ -56,22 +56,30 @@ def update_fts_columns(cursor: psycopg2.extensions.cursor) -> int:
|
||||
ids = []
|
||||
for (id, message_id) in cursor.fetchall():
|
||||
if USING_PGROONGA:
|
||||
cursor.execute("UPDATE zerver_message SET "
|
||||
"search_pgroonga = "
|
||||
"escape_html(subject) || ' ' || rendered_content "
|
||||
"WHERE id = %s", (message_id,))
|
||||
cursor.execute("UPDATE zerver_message SET "
|
||||
"search_tsvector = to_tsvector('zulip.english_us_search', "
|
||||
"subject || rendered_content) "
|
||||
"WHERE id = %s", (message_id,))
|
||||
cursor.execute(
|
||||
"UPDATE zerver_message SET "
|
||||
"search_pgroonga = "
|
||||
"escape_html(subject) || ' ' || rendered_content "
|
||||
"WHERE id = %s",
|
||||
(message_id,),
|
||||
)
|
||||
cursor.execute(
|
||||
"UPDATE zerver_message SET "
|
||||
"search_tsvector = to_tsvector('zulip.english_us_search', "
|
||||
"subject || rendered_content) "
|
||||
"WHERE id = %s",
|
||||
(message_id,),
|
||||
)
|
||||
ids.append(id)
|
||||
cursor.execute("DELETE FROM fts_update_log WHERE id = ANY(%s)", (ids,))
|
||||
return len(ids)
|
||||
|
||||
|
||||
def am_master(cursor: psycopg2.extensions.cursor) -> bool:
|
||||
cursor.execute("SELECT pg_is_in_recovery()")
|
||||
return not cursor.fetchall()[0][0]
|
||||
|
||||
|
||||
pg_args = {}
|
||||
|
||||
# Path to the root of the Zulip codebase in production
|
||||
@@ -81,6 +89,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.
|
||||
try:
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
|
||||
from django.conf import settings
|
||||
|
||||
if settings.REMOTE_POSTGRES_HOST != '':
|
||||
pg_args['host'] = settings.REMOTE_POSTGRES_HOST
|
||||
if settings.REMOTE_POSTGRES_PORT != '':
|
||||
|
||||
@@ -21,10 +21,12 @@ states: Dict[str, int] = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, output: str) -> None:
|
||||
print(f"{state}\n{output}")
|
||||
exit(states[state])
|
||||
|
||||
|
||||
output = ""
|
||||
down_count = 0
|
||||
for results_file_name in os.listdir(RESULTS_DIR):
|
||||
|
||||
@@ -35,29 +35,36 @@ states: Dict[str, int] = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, short_msg: str, too_old: AbstractSet[Any] = set()) -> None:
|
||||
too_old_data = ""
|
||||
if too_old:
|
||||
too_old_data = "\nLast call to get_message for recently out of date mirrors:\n" + "\n".join(
|
||||
"{:>16}: {}".format(user.user_profile.email,
|
||||
user.last_visit.strftime("%Y-%m-%d %H:%M %Z"),
|
||||
) for user in too_old
|
||||
"{:>16}: {}".format(
|
||||
user.user_profile.email,
|
||||
user.last_visit.strftime("%Y-%m-%d %H:%M %Z"),
|
||||
)
|
||||
for user in too_old
|
||||
)
|
||||
print(f"{state}: {short_msg}{too_old_data}")
|
||||
|
||||
exit(states[state])
|
||||
|
||||
|
||||
now = timezone_now()
|
||||
|
||||
all_users = UserActivity.objects.filter(query__in=["get_events", "/api/v1/events"],
|
||||
client__name="zephyr_mirror")
|
||||
new_inactive_users = [user for user in all_users if user.last_visit <
|
||||
now - datetime.timedelta(minutes=10)]
|
||||
old_inactive_users = [user for user in new_inactive_users if user.last_visit <
|
||||
now - datetime.timedelta(minutes=60)]
|
||||
all_users = UserActivity.objects.filter(
|
||||
query__in=["get_events", "/api/v1/events"], client__name="zephyr_mirror"
|
||||
)
|
||||
new_inactive_users = [
|
||||
user for user in all_users if user.last_visit < now - datetime.timedelta(minutes=10)
|
||||
]
|
||||
old_inactive_users = [
|
||||
user for user in new_inactive_users if user.last_visit < now - datetime.timedelta(minutes=60)
|
||||
]
|
||||
recently_inactive_users = set(new_inactive_users) - set(old_inactive_users)
|
||||
|
||||
if (len(recently_inactive_users) / float(len(old_inactive_users))) > .25:
|
||||
if (len(recently_inactive_users) / float(len(old_inactive_users))) > 0.25:
|
||||
report("CRITICAL", "Many mirrors recently became inactive", recently_inactive_users)
|
||||
else:
|
||||
report("OK", "Most mirrors that were recently active continue to be active")
|
||||
|
||||
@@ -22,12 +22,16 @@ states: Dict[str, int] = {
|
||||
"UNKNOWN": 3,
|
||||
}
|
||||
|
||||
|
||||
def report(state: str, data: str, last_check: float) -> None:
|
||||
print("{}: Last test run completed at {}\n{}".format(
|
||||
state, time.strftime("%Y-%m-%d %H:%M %Z", time.gmtime(last_check)),
|
||||
data))
|
||||
print(
|
||||
"{}: Last test run completed at {}\n{}".format(
|
||||
state, time.strftime("%Y-%m-%d %H:%M %Z", time.gmtime(last_check)), data
|
||||
)
|
||||
)
|
||||
exit(states[state])
|
||||
|
||||
|
||||
with open(RESULTS_FILE) as f:
|
||||
data = f.read().strip()
|
||||
if data.split("\n")[-1].strip() == "0":
|
||||
|
||||
@@ -56,6 +56,7 @@ def address_of(device_id: int) -> Optional[str]:
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
def guess_gateway(device_id: int) -> Optional[str]:
|
||||
# This will not work if the default gateway isn't n.n.n.1.
|
||||
address = address_of(device_id)
|
||||
@@ -65,6 +66,7 @@ def guess_gateway(device_id: int) -> Optional[str]:
|
||||
gateway[3] = '1'
|
||||
return '.'.join(gateway)
|
||||
|
||||
|
||||
log = logging.getLogger('configure-cloud-interfaces')
|
||||
log.setLevel(logging.DEBUG)
|
||||
|
||||
@@ -107,19 +109,43 @@ for device in macs.values():
|
||||
dev_num = str(device_number)
|
||||
address = address_of(device_number)
|
||||
gateway = guess_gateway(device_number)
|
||||
assert(address is not None)
|
||||
assert(gateway is not None)
|
||||
assert address is not None
|
||||
assert gateway is not None
|
||||
|
||||
# Horrible hack to route return packets on the correct interface
|
||||
# See https://unix.stackexchange.com/a/4421/933
|
||||
subprocess.check_call(['/sbin/ip', 'rule', 'add', 'fwmark', dev_num, 'table', dev_num])
|
||||
subprocess.check_call(
|
||||
['/sbin/ip', 'rule', 'add', 'fwmark', dev_num, 'table', dev_num])
|
||||
[
|
||||
'/sbin/ip',
|
||||
'route',
|
||||
'add',
|
||||
'0.0.0.0/0',
|
||||
'table',
|
||||
dev_num,
|
||||
'dev',
|
||||
f'ens{device_number}',
|
||||
'via',
|
||||
gateway,
|
||||
]
|
||||
)
|
||||
subprocess.check_call(
|
||||
['/sbin/ip', 'route', 'add', '0.0.0.0/0', 'table', dev_num, 'dev',
|
||||
f'ens{device_number}', 'via', gateway])
|
||||
subprocess.check_call(
|
||||
['/sbin/iptables', '-t', 'mangle', '-A', 'OUTPUT', '-m', 'conntrack', '--ctorigdst',
|
||||
address, '-j', 'MARK', '--set-mark', dev_num])
|
||||
[
|
||||
'/sbin/iptables',
|
||||
'-t',
|
||||
'mangle',
|
||||
'-A',
|
||||
'OUTPUT',
|
||||
'-m',
|
||||
'conntrack',
|
||||
'--ctorigdst',
|
||||
address,
|
||||
'-j',
|
||||
'MARK',
|
||||
'--set-mark',
|
||||
dev_num,
|
||||
]
|
||||
)
|
||||
|
||||
to_configure.remove(address)
|
||||
|
||||
@@ -129,12 +155,25 @@ for device in macs.values():
|
||||
log.info(f"Configuring {device} with IP {ip}")
|
||||
subprocess.check_call(['/sbin/ifconfig', device, ip])
|
||||
subprocess.check_call(
|
||||
['/sbin/iptables', '-t', 'mangle', '-A', 'OUTPUT', '-m', 'conntrack', '--ctorigdst',
|
||||
ip, '-j', 'MARK', '--set-mark', str(device_number)])
|
||||
[
|
||||
'/sbin/iptables',
|
||||
'-t',
|
||||
'mangle',
|
||||
'-A',
|
||||
'OUTPUT',
|
||||
'-m',
|
||||
'conntrack',
|
||||
'--ctorigdst',
|
||||
ip,
|
||||
'-j',
|
||||
'MARK',
|
||||
'--set-mark',
|
||||
str(device_number),
|
||||
]
|
||||
)
|
||||
|
||||
for throwaway in range(2):
|
||||
# Don't freak out if this doesn't work.
|
||||
subprocess.call(
|
||||
['/sbin/ip', 'route', 'del', '10.0.0.0/8'])
|
||||
subprocess.call(['/sbin/ip', 'route', 'del', '10.0.0.0/8'])
|
||||
|
||||
log.info("Finished.")
|
||||
|
||||
Reference in New Issue
Block a user