From 21af45fd2156e16786e3476c779115662ecb72a7 Mon Sep 17 00:00:00 2001 From: Micah Anderson Date: Tue, 21 Jul 2015 15:47:44 -0400 Subject: Increase tapicero heatbeat nagios checks (#7275) Increase warning/critical thresholds for time between tapicero heartbeat checks so it will emit less false positives Change-Id: I0f97373d88658b7f17b2c4e8c1963198dc3f66ed --- puppet/modules/site_check_mk/manifests/agent/tapicero.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp index ad9962d4..8505b34a 100644 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp @@ -20,7 +20,7 @@ class site_check_mk::agent::tapicero { 'Tapicero_Heartbeat': incl => '/etc/check_mk/mrpe.cfg', lens => 'Spacevars.lns', - changes => 'set Tapicero_Heartbeat \'/usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/leap/tapicero.log -r "tapicero" -w 300 -c 600\'', + changes => 'set Tapicero_Heartbeat \'/usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/leap/tapicero.log -r "tapicero" -w 1200 -c 2400\'', require => File['/etc/check_mk/mrpe.cfg']; } } -- cgit v1.2.3 From 5e21bb0d2415de0a40adfaa3b149313c459e7947 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 11 Aug 2015 14:57:58 +0200 Subject: Don't use check_mk logwatch to watch bigcouch logs anymore (#7375) The rationale here is: - bigcouch/its included erlang version is incredibly noisy and spits out warnings/error msgs all the time - it uses the worst logging format i ever saw, multiple lines directly to a file (couch 2.0 uses lager as logging backend which can log to syslog) - trying to sort out the false positives will take too much time, and who knows which of them will be resolved in couch 1.6/2.0 Change-Id: Idbe6b37a19cd65ce31a50d4c28eedb4cf15ba3b5 --- puppet/modules/site_check_mk/manifests/agent/couchdb.pp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index abfc7ad0..8de5121b 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -1,9 +1,17 @@ +# configure logwatch and nagios checks for couchdb class site_check_mk::agent::couchdb { - # watch logs - file { '/etc/check_mk/logwatch.d/bigcouch.cfg': - source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', - } + # watch bigcouch logs + # currently disabled because bigcouch is too noisy + # see https://leap.se/code/issues/7375 for more details + # and site_config::remove_files for removing leftovers + #file { '/etc/check_mk/logwatch.d/bigcouch.cfg': + # source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', + #} + + # check syslog msg from: + # - empd + # - /usr/local/bin/couch-doc-update concat::fragment { 'syslog_couchdb': source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/couchdb.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', -- cgit v1.2.3 From c4ddd197a1ca6a3fac70a86a3ed3dc3d4920e3ca Mon Sep 17 00:00:00 2001 From: varac Date: Thu, 13 Aug 2015 07:28:26 +0200 Subject: Increase readability of nagios notification mail subjects (#6847) Change-Id: Ic9af9ef3602abbb51edf1c9d71d4d264b4ace714 --- puppet/modules/site_check_mk/files/extra_host_conf.mk | 6 ------ puppet/modules/site_check_mk/manifests/server.pp | 2 +- puppet/modules/site_check_mk/templates/extra_host_conf.mk | 13 +++++++++++++ 3 files changed, 14 insertions(+), 7 deletions(-) delete mode 100644 puppet/modules/site_check_mk/files/extra_host_conf.mk create mode 100644 puppet/modules/site_check_mk/templates/extra_host_conf.mk (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/files/extra_host_conf.mk b/puppet/modules/site_check_mk/files/extra_host_conf.mk deleted file mode 100644 index 2c96f97a..00000000 --- a/puppet/modules/site_check_mk/files/extra_host_conf.mk +++ /dev/null @@ -1,6 +0,0 @@ -# retry 3 times before setting a host into a hard state -# and send out notification -extra_host_conf["max_check_attempts"] = [ - ("4", ALL_HOSTS ) -] - diff --git a/puppet/modules/site_check_mk/manifests/server.pp b/puppet/modules/site_check_mk/manifests/server.pp index 67519513..57f68d3e 100644 --- a/puppet/modules/site_check_mk/manifests/server.pp +++ b/puppet/modules/site_check_mk/manifests/server.pp @@ -54,7 +54,7 @@ class site_check_mk::server { notify => Exec['check_mk-refresh'], require => Package['check-mk-server']; '/etc/check_mk/conf.d/extra_host_conf.mk': - source => 'puppet:///modules/site_check_mk/extra_host_conf.mk', + content => template('site_check_mk/extra_host_conf.mk'), notify => Exec['check_mk-refresh'], require => Package['check-mk-server']; diff --git a/puppet/modules/site_check_mk/templates/extra_host_conf.mk b/puppet/modules/site_check_mk/templates/extra_host_conf.mk new file mode 100644 index 00000000..bc27b514 --- /dev/null +++ b/puppet/modules/site_check_mk/templates/extra_host_conf.mk @@ -0,0 +1,13 @@ +# retry 3 times before setting a host into a hard state +# and send out notification +extra_host_conf["max_check_attempts"] = [ + ("4", ALL_HOSTS ) +] + +# Use hostnames as alias so notification mail subjects +# are more readable and not so long. Alias defaults to +# the fqdn of a host is not changed. +extra_host_conf["alias"] = [ +<% @hosts.keys.sort.each do |key| -%> ( "<%= key.strip %>", ["<%= @hosts[key]['domain_internal']%>"]), +<% end -%> +] -- cgit v1.2.3 From 276b77cdcc0d169b84e046afe8763e2c52ff76fb Mon Sep 17 00:00:00 2001 From: varac Date: Mon, 5 Oct 2015 15:22:25 +0200 Subject: [feat] remove tapicero leftovers Soledad now creates user-dbs, which has been done by tapicero in the past. we need to remove any leftovers from tapicero. --- .../files/agent/logwatch/bigcouch.cfg | 2 +- .../files/agent/logwatch/tapicero.cfg | 11 --------- .../site_check_mk/manifests/agent/tapicero.pp | 26 ---------------------- 3 files changed, 1 insertion(+), 38 deletions(-) delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg delete mode 100644 puppet/modules/site_check_mk/manifests/agent/tapicero.pp (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg index 95ddd2ca..0f378a5a 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg @@ -6,7 +6,7 @@ I 127.0.0.1 localhost:5984 .* ok # https://leap.se/code/issues/5246 I Shutting down group server - # ignore bigcouch conflict errors, mainly coming from tapicero creating new users + # ignore bigcouch conflict errors I Error in process.*{{nocatch,conflict} # ignore "Uncaught error in HTTP request: {exit, normal}" error # it's suppressed in later versions of bigcouch anhow diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg deleted file mode 100644 index d98f5094..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg +++ /dev/null @@ -1,11 +0,0 @@ -/var/log/leap/tapicero.log -# Ignore transient Tapicero errors when creating a db (#6511) - I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::ResourceNotFound|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) - C tapicero.*RestClient::InternalServerError: -# possible race condition between multiple tapicero -# instances, so we ignore it -# see https://leap.se/code/issues/5168 - I tapicero.*RestClient::PreconditionFailed: - C tapicero.*Creating database.*failed due to: - C tapicero.*failed - W tapicero.*Couch stream ended unexpectedly. diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp deleted file mode 100644 index 8505b34a..00000000 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ /dev/null @@ -1,26 +0,0 @@ -# sets up tapicero monitoring -class site_check_mk::agent::tapicero { - - include ::site_nagios::plugins - - # watch logs - file { '/etc/check_mk/logwatch.d/tapicero.cfg': - source => 'puppet:///modules/site_check_mk/agent/logwatch/tapicero.cfg', - } - - # local nagios plugin checks via mrpe - augeas { - 'Tapicero_Procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Tapicero_Procs', - "set Tapicero_Procs \"/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 --ereg-argument-array='^tapicero$'\"" ], - require => File['/etc/check_mk/mrpe.cfg']; - 'Tapicero_Heartbeat': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => 'set Tapicero_Heartbeat \'/usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/leap/tapicero.log -r "tapicero" -w 1200 -c 2400\'', - require => File['/etc/check_mk/mrpe.cfg']; - } -} -- cgit v1.2.3 From cfbe272d17a21c4bff088a87865cbcbefc837e39 Mon Sep 17 00:00:00 2001 From: varac Date: Fri, 30 Oct 2015 11:23:40 +0100 Subject: [feat] Remove bigcouch nagios leftovers When migrating from bigcouch to couchdb, we need to remove leftover nagios tests for bigcouch. - Added new classes: site_check_mk::agent::couchdb::bigcouch and site_check_mk::agent::couchdb::master - Tested: unstable.pixelated-project.org - Resolves: https://github.com/pixelated/pixelated-platform/issues/126 --- .../site_check_mk/manifests/agent/couchdb.pp | 49 ++++--------------- .../manifests/agent/couchdb/bigcouch.pp | 56 ++++++++++++++++++++++ .../manifests/agent/couchdb/master.pp | 23 +++++++++ 3 files changed, 89 insertions(+), 39 deletions(-) create mode 100644 puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp create mode 100644 puppet/modules/site_check_mk/manifests/agent/couchdb/master.pp (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index 8de5121b..1554fd3c 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -1,40 +1,18 @@ -# configure logwatch and nagios checks for couchdb +# configure logwatch and nagios checks for couchdb (both bigcouch and plain +# couchdb installations) class site_check_mk::agent::couchdb { - # watch bigcouch logs - # currently disabled because bigcouch is too noisy - # see https://leap.se/code/issues/7375 for more details - # and site_config::remove_files for removing leftovers - #file { '/etc/check_mk/logwatch.d/bigcouch.cfg': - # source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', - #} - - # check syslog msg from: - # - empd - # - /usr/local/bin/couch-doc-update concat::fragment { 'syslog_couchdb': source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/couchdb.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', order => '02'; } - - # check bigcouch processes - augeas { - 'Bigcouch_epmd_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', - 'set Bigcouch_epmd_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - 'Bigcouch_beam_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', - 'set Bigcouch_beam_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam\'' ], - require => File['/etc/check_mk/mrpe.cfg']; + # check different couchdb stats + file { '/usr/lib/check_mk_agent/local/leap_couch_stats.sh': + source => 'puppet:///modules/site_check_mk/agent/local_checks/couchdb/leap_couch_stats.sh', + mode => '0755', + require => Package['check_mk-agent'] } # check open files for bigcouch proc @@ -44,20 +22,13 @@ class site_check_mk::agent::couchdb { mode => '0755' } augeas { - 'Bigcouch_open_files': + 'Couchdb_open_files': incl => '/etc/check_mk/mrpe.cfg', lens => 'Spacevars.lns', changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', - 'set Bigcouch_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], + 'rm /files/etc/check_mk/mrpe.cfg/Couchdb_open_files', + 'set Couchdb_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], require => File['/etc/check_mk/mrpe.cfg']; } - - # check different couchdb stats - file { '/usr/lib/check_mk_agent/local/leap_couch_stats.sh': - source => 'puppet:///modules/site_check_mk/agent/local_checks/couchdb/leap_couch_stats.sh', - mode => '0755', - require => Package['check_mk-agent'] - } } diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp new file mode 100644 index 00000000..073d07a9 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp @@ -0,0 +1,56 @@ +# configure logwatch and nagios checks for bigcouch +class site_check_mk::agent::couchdb::bigcouch { + + # watch bigcouch logs + # currently disabled because bigcouch is too noisy + # see https://leap.se/code/issues/7375 for more details + # and site_config::remove_files for removing leftovers + #file { '/etc/check_mk/logwatch.d/bigcouch.cfg': + # source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', + #} + + # check syslog msg from: + # - empd + # - /usr/local/bin/couch-doc-update + concat::fragment { 'syslog_bigcouch': + source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/bigcouch.cfg', + target => '/etc/check_mk/logwatch.d/syslog.cfg', + order => '02'; + } + + # check bigcouch processes + augeas { + 'Bigcouch_epmd_procs': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', + 'set Bigcouch_epmd_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd\'' ], + require => File['/etc/check_mk/mrpe.cfg']; + 'Bigcouch_beam_procs': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', + 'set Bigcouch_beam_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam\'' ], + require => File['/etc/check_mk/mrpe.cfg']; + } + + # check open files for bigcouch proc + include site_check_mk::agent::package::perl_plugin + file { '/srv/leap/nagios/plugins/check_unix_open_fds.pl': + source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl', + mode => '0755' + } + + augeas { + 'Bigcouch_open_files': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', + 'set Bigcouch_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], + require => File['/etc/check_mk/mrpe.cfg']; + } + +} diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb/master.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb/master.pp new file mode 100644 index 00000000..291b87d1 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb/master.pp @@ -0,0 +1,23 @@ +# configure logwatch and nagios checks for plain single couchdb master +class site_check_mk::agent::couchdb::master { + + # remove bigcouch leftovers + augeas { + 'Bigcouch_epmd_procs': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', + require => File['/etc/check_mk/mrpe.cfg']; + 'Bigcouch_beam_procs': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', + require => File['/etc/check_mk/mrpe.cfg']; + 'Bigcouch_open_files': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', + require => File['/etc/check_mk/mrpe.cfg']; + } + +} -- cgit v1.2.3 From 9b135ab96f1e419698e3e638ea871097fe4956e4 Mon Sep 17 00:00:00 2001 From: varac Date: Fri, 30 Oct 2015 18:00:51 +0100 Subject: [bug] Remove duplicte declaration Duplicate declaration: File[/srv/leap/nagios/plugins/check_unix_open_fds.pl] is already declared in file /srv/leap/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp at line 44; cannot redeclare at /srv/leap/puppet/modules/site_check_mk/manifests/agent/couchdb.pp:23 on node rewdevcouch1.rewire.org --- puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp | 7 ------- 1 file changed, 7 deletions(-) (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp index 073d07a9..82c3ac72 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp @@ -36,13 +36,6 @@ class site_check_mk::agent::couchdb::bigcouch { require => File['/etc/check_mk/mrpe.cfg']; } - # check open files for bigcouch proc - include site_check_mk::agent::package::perl_plugin - file { '/srv/leap/nagios/plugins/check_unix_open_fds.pl': - source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl', - mode => '0755' - } - augeas { 'Bigcouch_open_files': incl => '/etc/check_mk/mrpe.cfg', -- cgit v1.2.3 From 87ddb4d6505229f36b096188c3e43a19281b540c Mon Sep 17 00:00:00 2001 From: varac Date: Sat, 31 Oct 2015 20:03:28 +0100 Subject: [bug] Add bigcouch syslog snippet for logwatch --- .../modules/site_check_mk/files/agent/logwatch/syslog/bigcouch.cfg | 5 +++++ puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/bigcouch.cfg (limited to 'puppet/modules/site_check_mk') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/bigcouch.cfg new file mode 100644 index 00000000..f53f0780 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/bigcouch.cfg @@ -0,0 +1,5 @@ +# on one-node bigcouch setups, we'll get this msg +# a lot, so we ignore it here until we fix +# https://leap.se/code/issues/5244 + I epmd: got partial packet only on file descriptor + diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg index f546135a..5f8d5b95 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg @@ -1,7 +1,2 @@ C /usr/local/bin/couch-doc-update.*failed C /usr/local/bin/couch-doc-update.*ERROR -# on one-node bigcouch setups, we'll get this msg -# a lot, so we ignore it here until we fix -# https://leap.se/code/issues/5244 - I epmd: got partial packet only on file descriptor - -- cgit v1.2.3