puppet: Remove zephyr configuration and hosts.

This commit is contained in:
Alex Vandiver
2025-06-13 14:58:21 -04:00
committed by Tim Abbott
parent 4dff3dfba6
commit dfadc9fa39
18 changed files with 0 additions and 562 deletions

View File

@@ -1,4 +0,0 @@
# We don't actually need a valid Kerberos cache since these are sent
# unauth anyway -- but a cache is required for zwrite to run.
* * * * * zulip env KRB5CCNAME=/home/zulip/ccache/zmirror-tabbott zwrite -c zulip-mirror-nagios -i nagios-test -m test -Szulip-nagios@mit.edu -d -q >/dev/null 2>/dev/null

View File

@@ -1,26 +0,0 @@
SHELL=/bin/bash
# Edit this file to introduce tasks to be run by cron.
#
# Each task to run has to be defined through a single line
# indicating with different fields when the task will be run
# and what command to run for the task
#
# To define the time you can provide concrete values for
# minute (m), hour (h), day of month (dom), month (mon),
# and day of week (dow) or use '*' in these fields (for 'any').
#
# Notice that tasks will be started based on the cron's system
# daemon's notion of time and timezones.
#
# Output of the crontab jobs (including errors) is sent through
# email to the user the crontab file belongs to (unless redirected).
#
# For example, you can run a backup of all your user accounts
# at 5 a.m every week with:
# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
#
# For more information see the manual pages of crontab(5) and cron(8)
#
# m h dom mon dow command
35 * * * * zulip /home/zulip/python-zulip-api/zulip/integrations/zephyr/zmirror-renew-kerberos
*/2 * * * * zulip /home/zulip/python-zulip-api/zulip/integrations/zephyr/check-mirroring --sharded &> /var/lib/nagios_state/check-mirroring-results-tmp; mv /var/lib/nagios_state/check-mirroring-results-tmp /var/lib/nagios_state/check-mirroring-results

View File

@@ -1,9 +0,0 @@
# Accept incoming traffic on UDP port 2104 (zhm)
-A INPUT -p udp --dport 2104 -j ACCEPT
# It's hard to know what ephemeral ports the zephyr clients are listening on.
# Apparently they do not send outgoing traffic sufficient for the
# ESTABLISHED,RELATED rule above. So for now we allow all UDP traffic.
#
# FIXME: do something better here.
-A INPUT -p udp -j ACCEPT

View File

@@ -1,9 +0,0 @@
# Accept incoming traffic on UDP port 2104 (zhm)
-A INPUT -p udp --dport 2104 -j ACCEPT
# It's hard to know what ephemeral ports the zephyr clients are listening on.
# Apparently they do not send outgoing traffic sufficient for the
# ESTABLISHED,RELATED rule above. So for now we allow all UDP traffic.
#
# FIXME: do something better here.
-A INPUT -p udp -j ACCEPT

View File

@@ -66,21 +66,6 @@ define command{
command_line /usr/lib/nagios/plugins/check_disk -W$ARG1$ -K$ARG2$ -w $ARG1$ -c $ARG2$ -A -l -X cgroup -X tmpfs -X devtmpfs -X squashfs -X configfs -X tracefs
}
define command{
command_name check_zephyr_mirror_forwarding
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/zulip_zephyr_mirror/check_zephyr_mirror'
}
define command{
command_name check_personal_zephyr_mirrors
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/zulip_zephyr_mirror/check_personal_zephyr_mirrors'
}
define command{
command_name check_user_zephyr_mirror_liveness
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/zulip_app_frontend/check_cron_file /var/lib/nagios_state/check_user_zephyr_mirror_liveness'
}
define command{
command_name check_debian_packages
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/zulip_base/check_debian_packages'
@@ -106,11 +91,6 @@ define command{
command_line /usr/lib/nagios/plugins/check_by_ssh -p 22 -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/bin/check_postgres --dbname=$ARG1$ --dbuser=$ARG2$ --action $ARG3$ --warning="$ARG4$" --critical="$ARG5$"'
}
define command{
command_name check_sync_public_streams
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_procs -u zulip -c 1:1 -a "/home/zulip/python-zulip-api/zulip/integrations/zephyr/sync-public-streams"'
}
define command{
command_name check_redis_ssh
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_redis -H 127.0.0.1 -C /var/lib/nagios/redis_password'

View File

@@ -55,16 +55,6 @@ define hostgroup {
alias Redis servers
}
define hostgroup {
hostgroup_name zmirrorp
alias Zephyr mirror personals servers
}
define hostgroup {
hostgroup_name zmirror
alias Zephyr mirror classes servers
}
define hostgroup {
hostgroup_name postgresql
alias PostgreSQL app servers

View File

@@ -95,41 +95,6 @@ define service {
### Service groups
#### zmirror / zmirrorp
define service {
use generic-service
service_description zephyr mirror forwarding
hostgroup_name zmirror
check_command check_zephyr_mirror_forwarding!22
normal_check_interval 2
# Note: the actual check is run via cron, so retry_check_interval
# should always equal normal_check_interval.
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {
use generic-service
service_description zmirror subscriptions syncing
hostgroup_name zmirror
check_command check_sync_public_streams!22
normal_check_interval 2
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {
use generic-service
service_description Check personal zephyr mirrors
hostgroup_name zmirrorp
check_command check_personal_zephyr_mirrors!22
contact_groups admins
}
#### Application frontends
define service {
@@ -166,14 +131,6 @@ define service {
contact_groups ops_message
}
define service {
use generic-service
service_description user zephyr mirror liveness
hostgroup_name prod_frontends
check_command check_user_zephyr_mirror_liveness!22
contact_groups admins
}
define service {
use generic-service
service_description Check analytics state

View File

@@ -1,57 +0,0 @@
#!/usr/bin/env python3
"""
Nagios plugin to check that Zephyr personals mirrors are forwarding.
This script works by just monitoring the files under
/home/zulip/mirror_status, which are updated by the Zephyr personals
mirrors when they receive the messages sent every minute by
/etc/cron.d/test_zephyr_personal_mirrors
"""
import os
import sys
import time
from typing import NoReturn
RESULTS_DIR: str = "/home/zulip/mirror_status"
states: dict[str, int] = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3,
}
def report(state: str, output: str) -> NoReturn:
print(f"{state}\n{output}")
sys.exit(states[state])
output = ""
down_count = 0
for results_file_name in os.listdir(RESULTS_DIR):
this_state = "OK"
results_file = os.path.join(RESULTS_DIR, results_file_name)
with open(results_file) as f:
data = f.read().strip()
last_check = os.stat(results_file).st_mtime
time_since_last_check = time.time() - last_check
# time_since_last_check threshold needs to be strictly greater
# than 1 minute, since with cron we expect intervals of at least 1
# minute without any update
if data.split("\n")[-1].strip() != "0" or time_since_last_check >= 90:
down_count += 1
this_state = "DOWN"
last_check_ts = time.strftime("%Y-%m-%d %H:%M %Z", time.gmtime(last_check))
output += f"{results_file}: {this_state} ({last_check_ts})\n"
if down_count == 0:
state = "OK"
elif down_count < 5:
state = "WARNING"
else:
state = "CRITICAL"
report(state, output)

View File

@@ -1,95 +0,0 @@
#!/usr/bin/env python3
"""
Nagios plugin to check that our MIT users' Zephyr mirrors are running.
It must be run on a machine that is using the live database for the
Django ORM.
"""
import os
import sys
from datetime import timedelta
sys.path.append("/home/zulip/deployments/current")
from scripts.lib.setup_path import setup_path
from scripts.lib.zulip_tools import atomic_nagios_write
setup_path()
import django
from django.utils.timezone import now as timezone_now
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
sys.path.append("/home/zulip/deployments/current")
sys.path.append("/home/zulip/deployments/current/zerver")
django.setup()
from zerver.models import UserActivity
from zerver.models.clients import get_client
now = timezone_now()
zephyr_client = get_client("zephyr_mirror")
all_users = UserActivity.objects.filter(
# We need to use the client_id so we can use the partial index we
# have created, which builds in both the query and the client_id.
# The partial index is:
# CREATE INDEX CONCURRENTLY zerver_useractivity_zehpyr_liveness
# ON zerver_useractivity(last_visit)
# WHERE client_id = 1005
# AND query IN ('get_events', '/api/v1/events');
query__in=["get_events", "/api/v1/events"],
client_id=zephyr_client.id,
)
new_inactive_user_count = (
all_users.filter(last_visit__lt=now - timedelta(minutes=10))
.values("user_profile_id")
.distinct("user_profile_id")
.count()
)
old_inactive_user_count = (
all_users.filter(last_visit__lt=now - timedelta(minutes=60))
.values("user_profile_id")
.distinct("user_profile_id")
.count()
)
recently_inactive_user_count = new_inactive_user_count - old_inactive_user_count
if recently_inactive_user_count / float(old_inactive_user_count) > 0.25:
recently_inactive_users = (
all_users.filter(last_visit__lt=now - timedelta(minutes=10))
.distinct("user_profile_id")
.difference(
all_users.filter(last_visit__lt=now - timedelta(minutes=60)).distinct("user_profile_id")
)
)
too_old_data = (
"Many mirrors recently became inactive\n"
"Last call to get_message for recently out of date mirrors:\n"
+ "\n".join(
"{:>16}: {}".format(
user.user_profile.email,
user.last_visit.strftime("%Y-%m-%d %H:%M %Z"),
)
for user in recently_inactive_users
)
)
sys.exit(
atomic_nagios_write(
"check_user_zephyr_mirror_liveness", "critical", too_old_data, int(now.timestamp())
)
)
else:
atomic_nagios_write(
"check_user_zephyr_mirror_liveness",
"ok",
"Most mirrors that were recently active continue to be active",
int(now.timestamp()),
)

View File

@@ -1,50 +0,0 @@
#!/usr/bin/env python3
"""
Nagios plugin to check that Zephyr mirror forwarding is running.
This script just checks the contents of a file. The forwarding test
itself lives in api/integrations/zephyr/check-mirroring and should be
run out of cron.
See puppet/kandra/files/cron.d/zephyr-mirror for the crontab details.
"""
import os
import sys
import time
from typing import NoReturn
RESULTS_FILE = "/var/lib/nagios_state/check-mirroring-results"
states: dict[str, int] = {
"OK": 0,
"WARNING": 1,
"CRITICAL": 2,
"UNKNOWN": 3,
}
def report(state: str, data: str, last_check: float) -> NoReturn:
print(
"{}: Last test run completed at {}\n{}".format(
state, time.strftime("%Y-%m-%d %H:%M %Z", time.gmtime(last_check)), data
)
)
sys.exit(states[state])
with open(RESULTS_FILE) as f:
data = f.read().strip()
if data.split("\n")[-1].strip() == "0":
state = "OK"
else:
state = "CRITICAL"
last_check = os.stat(RESULTS_FILE).st_mtime
time_since_last_check = time.time() - last_check
if time_since_last_check > 60 * 5:
state = "UNKNOWN"
data = "Results file is stale"
report(state, data, last_check)

View File

@@ -1,23 +0,0 @@
[program:zmirror]
command=/home/zulip/python-zulip-api/zulip/integrations/zephyr/zephyr_mirror.py --site=https://zephyr.zulipchat.com --stamp-path=/home/zulip/ --user=tabbott/extra --forward-class-messages --log-path=/var/log/zulip/mirror-log --on-startup-command="/home/zulip/python-zulip-api/zulip/integrations/zephyr/zmirror-renew-kerberos"
priority=200 ; the relative start priority (default 999)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; whether/when to restart (default: unexpected)
stopsignal=TERM ; signal used to kill process (default TERM)
stopwaitsecs=30 ; max num secs to wait b4 SIGKILL (default 10)
user=zulip ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/var/log/zulip/zmirror.log ; stdout log path, NONE for none; default AUTO
environment=HOME="/home/zulip",USER="zulip"
[program:sync-public-streams]
command=/home/zulip/python-zulip-api/zulip/integrations/zephyr/sync-public-streams --site=https://zephyr.zulipchat.com
priority=200 ; the relative start priority (default 999)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; whether/when to restart (default: unexpected)
stopsignal=TERM ; signal used to kill process (default TERM)
stopwaitsecs=30 ; max num secs to wait b4 SIGKILL (default 10)
user=zulip ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/var/log/zulip/sync-public-streams.log ; stdout log path, NONE for none; default AUTO
environment=HOME="/home/zulip"

View File

@@ -1,3 +0,0 @@
# This file is automatically rewritten by the zephyr-clients post-install
# script. So treat it right.
zhm_args="-f z1.mit.edu z3.mit.edu"

View File

@@ -25,9 +25,4 @@ class kandra::prod_app_frontend_once {
command => '/usr/lib/nagios/plugins/zulip_app_frontend/check_send_receive_time',
use_proxy => false,
}
zulip::cron { 'check_user_zephyr_mirror_liveness':
hour => '*',
minute => '*',
command => '/usr/lib/nagios/plugins/zulip_zephyr_mirror/check_user_zephyr_mirror_liveness',
}
}

View File

@@ -16,8 +16,6 @@ class kandra::profile::nagios inherits kandra::profile::base {
}
$default_host_domain = zulipconf('nagios', 'default_host_domain', undef)
$hosts_zmirror = split(zulipconf('nagios', 'hosts_zmirror', undef), ',')
$hosts_zmirrorp = split(zulipconf('nagios', 'hosts_zmirrorp', undef), ',')
$hosts_app_prod = split(zulipconf('nagios', 'hosts_app_prod', undef), ',')
$hosts_app_staging = split(zulipconf('nagios', 'hosts_app_staging', undef), ',')
$hosts_postgresql_primary = split(zulipconf('nagios', 'hosts_postgresql_primary', undef), ',')

View File

@@ -32,16 +32,6 @@ class kandra::profile::prod_app_frontend inherits kandra::profile::base {
notify => Service['nginx'],
}
file { '/usr/lib/nagios/plugins/zulip_zephyr_mirror':
require => Package[$zulip::common::nagios_plugins],
recurse => true,
purge => true,
owner => 'root',
group => 'root',
mode => '0755',
source => 'puppet:///modules/kandra/nagios_plugins/zulip_zephyr_mirror',
}
# Prod has our Apple Push Notifications Service private key at
# /etc/ssl/django-private/apns-dist.pem
}

View File

@@ -1,76 +0,0 @@
class kandra::profile::zmirror inherits kandra::profile::base {
include zulip::supervisor
$zmirror_packages = [# Packages needed to run the mirror
'libzephyr4-krb5',
'zephyr-clients',
'krb5-config',
'krb5-user',
# Packages needed to for ctypes access to Zephyr
'python3-dev',
'python3-typing-extensions',
]
package { $zmirror_packages:
ensure => installed,
}
file { "${zulip::common::supervisor_conf_dir}/zmirror.conf":
ensure => file,
require => Package[supervisor],
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/supervisor/conf.d/zmirror.conf',
notify => Service['supervisor'],
}
file { '/etc/cron.d/zephyr-mirror':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/cron.d/zephyr-mirror',
}
file { '/etc/krb5.conf':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/krb5.conf',
}
file { '/etc/default/zephyr-clients':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/zephyr-clients',
}
file { '/usr/lib/nagios/plugins/zulip_zephyr_mirror':
require => Package[$zulip::common::nagios_plugins],
recurse => true,
purge => true,
owner => 'root',
group => 'root',
mode => '0755',
source => 'puppet:///modules/kandra/nagios_plugins/zulip_zephyr_mirror',
}
# Allow the relevant UDP ports
concat::fragment { 'iptables-zmirror.v4':
target => '/etc/iptables/rules.v4',
source => 'puppet:///modules/kandra/iptables/zmirror.v4',
order => '20',
}
concat::fragment { 'iptables-zmirror.v6':
target => '/etc/iptables/rules.v6',
source => 'puppet:///modules/kandra/iptables/zmirror.v6',
order => '20',
}
# TODO: Do the rest of our setup, which includes at least:
# Putting tabbott/extra's keytab on the system at /home/zulip/tabbott.extra.keytab
}

View File

@@ -1,100 +0,0 @@
class kandra::profile::zmirror_personals inherits kandra::profile::base {
include zulip::supervisor
Kandra::User_Dotfiles['zulip'] {
authorized_keys => [
'common',
'production-write-ccache',
],
}
$zmirror_packages = [ # Packages needed to run the mirror
'libzephyr4-krb5',
'zephyr-clients',
'krb5-config',
'krb5-user',
# Packages needed to for ctypes access to Zephyr
'python3-dev',
'python3-typing-extensions',
'restricted-ssh-commands',
]
package { $zmirror_packages:
ensure => installed,
}
# The production-write-ccache key uses
# `command="/usr/lib/restricted-ssh-commands"` which allows us to
# limit the commands it can run.
file { '/etc/restricted-ssh-commands':
ensure => directory,
owner => 'root',
group => 'root',
mode => '0755',
}
file { '/etc/restricted-ssh-commands/zulip':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
content => join([
'^/home/zulip/python-zulip-api/zulip/integrations/zephyr/process_ccache ',
'[a-z0-9_.-]+ ',
'[A-Za-z0-9]{32} ',
'[-A-Za-z0-9+/]*={0,3}$',
"\n",
], ''),
}
file { '/etc/krb5.conf':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/krb5.conf',
}
concat::fragment { '01-supervisor-zmirror':
order => '10',
target => $zulip::common::supervisor_conf_file,
content => " ${zulip::common::supervisor_system_conf_dir}/zmirror/*.conf",
}
file { ['/home/zulip/api-keys', '/home/zulip/zephyr_sessions', '/home/zulip/ccache',
'/home/zulip/mirror_status', "${zulip::common::supervisor_system_conf_dir}/zmirror"]:
ensure => directory,
mode => '0755',
owner => 'zulip',
group => 'zulip',
}
file { '/etc/cron.d/test_zephyr_personal_mirrors':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/kandra/cron.d/test_zephyr_personal_mirrors',
}
file { '/usr/lib/nagios/plugins/zulip_zephyr_mirror':
require => Package[$zulip::common::nagios_plugins],
recurse => true,
purge => true,
owner => 'root',
group => 'root',
mode => '0755',
source => 'puppet:///modules/kandra/nagios_plugins/zulip_zephyr_mirror',
}
# Allow the relevant UDP ports
concat::fragment { 'iptables-zmirror.v4':
target => '/etc/iptables/rules.v4',
source => 'puppet:///modules/kandra/iptables/zmirror.v4',
order => '20',
}
concat::fragment { 'iptables-zmirror.v6':
target => '/etc/iptables/rules.v6',
source => 'puppet:///modules/kandra/iptables/zmirror.v6',
order => '20',
}
}

View File

@@ -28,26 +28,6 @@ define host{
}
<% end -%>
<% @hosts_zmirror.each do |host| -%>
define host{
use generic-host
host_name <%= host %>
alias <%= host %>
address <%= host %><% unless host.include?(".") %>.<%= @default_host_domain %><% end %>
hostgroups all,non_aws_host,zmirror,flaky_servers
}
<% end -%>
<% @hosts_zmirrorp.each do |host| -%>
define host{
use generic-host
host_name <%= host %>
alias <%= host %>
address <%= host %><% unless host.include?(".") %>.<%= @default_host_domain %><% end %>
hostgroups all,non_aws_host,zmirrorp,flaky_servers
}
<% end -%>
<% @hosts_postgresql_primary.each do |host| -%>
define host{
use generic-host