mirror of
https://github.com/zulip/zulip.git
synced 2025-11-18 21:06:16 +00:00
nagios: Set max_check_attempts to 3 for rabbitmq consumers.
This works around the fact that we seem to have a mysterous extra checkup 40s after an error first occurs with these checks, which always fails because the data is updated by a cron job that runs every minute. (imported from commit e7fe9c85e8399115443269287e695b140b4443ff)
This commit is contained in:
@@ -256,6 +256,9 @@ define service {
|
|||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq queue sizes
|
service_description Check rabbitmq queue sizes
|
||||||
check_command check_rabbitmq_queues!22
|
check_command check_rabbitmq_queues!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
hostgroup_name frontends
|
hostgroup_name frontends
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
@@ -264,6 +267,9 @@ define service {
|
|||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq notify_tornado consumers
|
service_description Check rabbitmq notify_tornado consumers
|
||||||
check_command check_rabbitmq_tornado_consumers!22
|
check_command check_rabbitmq_tornado_consumers!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
hostgroup_name frontends
|
hostgroup_name frontends
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
@@ -272,6 +278,9 @@ define service {
|
|||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq useractivity consumers
|
service_description Check rabbitmq useractivity consumers
|
||||||
check_command check_rabbitmq_useractivity_consumers!22
|
check_command check_rabbitmq_useractivity_consumers!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
hostgroup_name frontends
|
hostgroup_name frontends
|
||||||
contact_groups admins
|
contact_groups admins
|
||||||
}
|
}
|
||||||
@@ -280,6 +289,9 @@ define service {
|
|||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq invites consumers
|
service_description Check rabbitmq invites consumers
|
||||||
check_command check_rabbitmq_invites_consumers!22
|
check_command check_rabbitmq_invites_consumers!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
hostgroup_name frontends
|
hostgroup_name frontends
|
||||||
contact_groups admins
|
contact_groups admins
|
||||||
}
|
}
|
||||||
@@ -288,6 +300,9 @@ define service {
|
|||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq signups consumers
|
service_description Check rabbitmq signups consumers
|
||||||
check_command check_rabbitmq_signups_consumers!22
|
check_command check_rabbitmq_signups_consumers!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
hostgroup_name frontends
|
hostgroup_name frontends
|
||||||
contact_groups admins
|
contact_groups admins
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user