diff --git a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg index c040037547..aad5c71df9 100644 --- a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg +++ b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg @@ -256,6 +256,9 @@ define service { use generic-service service_description Check rabbitmq queue sizes check_command check_rabbitmq_queues!22 + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 hostgroup_name frontends contact_groups page_admins } @@ -264,6 +267,9 @@ define service { use generic-service service_description Check rabbitmq notify_tornado consumers check_command check_rabbitmq_tornado_consumers!22 + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 hostgroup_name frontends contact_groups page_admins } @@ -272,6 +278,9 @@ define service { use generic-service service_description Check rabbitmq useractivity consumers check_command check_rabbitmq_useractivity_consumers!22 + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 hostgroup_name frontends contact_groups admins } @@ -280,6 +289,9 @@ define service { use generic-service service_description Check rabbitmq invites consumers check_command check_rabbitmq_invites_consumers!22 + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 hostgroup_name frontends contact_groups admins } @@ -288,6 +300,9 @@ define service { use generic-service service_description Check rabbitmq signups consumers check_command check_rabbitmq_signups_consumers!22 + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 hostgroup_name frontends contact_groups admins }