From f3bafe6b2cb55305bba588fdca4f4e77e3ef2026 Mon Sep 17 00:00:00 2001 From: Varac Date: Thu, 22 Jun 2017 11:23:43 +0200 Subject: Delay hard state of the nagios APT check Delay a hard state of the APT check for 1 day so unattended_upgrades has time to upgrade packages. Resolves: #8748 --- puppet/modules/site_check_mk/files/extra_service_conf.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index c7120a96..9212af95 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -1,6 +1,9 @@ # retry 3 times before setting a service into a hard state -# and send out notification +# Delay a hard state of the APT check for 1 day +# so unattended_upgrades has time to upgrade packages. +# extra_service_conf["max_check_attempts"] = [ + ("360", ALL_HOSTS , ["APT"] ), ("4", ALL_HOSTS , ALL_SERVICES ) ] @@ -11,4 +14,3 @@ extra_service_conf["max_check_attempts"] = [ extra_service_conf["normal_check_interval"] = [ ("4", ALL_HOSTS , "Check_MK" ) ] - -- cgit v1.2.3 From 57faf66d7b82cc1ce67cf2e39ba7293c5a9d4bfa Mon Sep 17 00:00:00 2001 From: Azul Date: Wed, 8 Nov 2017 09:13:46 +0100 Subject: webapp: alert on 409 responses They might be meaningful response codes for some scenarios. But so far we are not conciously sending them out. If they occur that is because we handed them down from couch. So we might want to fix the underlying issue. Couch 409s should be caught by the webapp and handled there. --- puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg index 337d9ec6..a5375cc8 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg @@ -1,6 +1,10 @@ /var/log/leap/webapp.log # check for webapp errors C Completed 500 +# also alert conflicts. They might be meaningful response codes +# but so far we were just handing them on from couch and they +# indicated some actual problem. + C Completed 409 # couch connection issues C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} # ignore RoutingErrors that rails throw when it can't handle a url -- cgit v1.2.3