From 2b875d23a3d2a53c9276dfee1a71085967dec4c0 Mon Sep 17 00:00:00 2001 From: varac Date: Mon, 24 Feb 2014 13:45:50 +0100 Subject: One monitor node for non-local environments and one for local environment (Feature #2981), wip also, use the configured ssh port for every node --- puppet/modules/site_check_mk/manifests/server.pp | 5 +++-- puppet/modules/site_check_mk/templates/use_ssh.mk | 5 +++-- puppet/modules/site_nagios/manifests/add_host_services.pp | 2 ++ puppet/modules/site_nagios/manifests/server.pp | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/manifests/server.pp b/puppet/modules/site_check_mk/manifests/server.pp index e77862e5..4bbeb850 100644 --- a/puppet/modules/site_check_mk/manifests/server.pp +++ b/puppet/modules/site_check_mk/manifests/server.pp @@ -4,10 +4,11 @@ class site_check_mk::server { $pubkey = $ssh_hash['authorized_keys']['monitor']['key'] $type = $ssh_hash['authorized_keys']['monitor']['type'] $seckey = $ssh_hash['monitor']['private_key'] - $ssh_port = $ssh_hash['port'] $nagios_hiera = hiera_hash('nagios') - $hosts = $nagios_hiera['hosts'] + $nagios_hosts = $nagios_hiera['hosts'] + + $hosts = hiera_hash('hosts') $all_hosts = inline_template ('<% @hosts.keys.sort.each do |key| -%>"<%= @hosts[key]["domain_internal"] %>", <% end -%>') package { 'check-mk-server': diff --git a/puppet/modules/site_check_mk/templates/use_ssh.mk b/puppet/modules/site_check_mk/templates/use_ssh.mk index 4c5523db..0bebebcf 100644 --- a/puppet/modules/site_check_mk/templates/use_ssh.mk +++ b/puppet/modules/site_check_mk/templates/use_ssh.mk @@ -1,5 +1,6 @@ # http://mathias-kettner.de/checkmk_datasource_programs.html datasource_programs = [ - ( "ssh -l root -i /etc/check_mk/.ssh/id_rsa -p <%= @ssh_port %> check_mk_agent", ALL_HOSTS ), -] +<% nagios_hosts.sort.each do |name,config| %> + ( "ssh -l root -i /etc/check_mk/.ssh/id_rsa -p <%=config['ssh_port']%> <%=config['domain_internal']%> check_mk_agent", [ "<%=config['domain_internal']%>" ], ),<%- end -%> +] diff --git a/puppet/modules/site_nagios/manifests/add_host_services.pp b/puppet/modules/site_nagios/manifests/add_host_services.pp index 2d615ff1..279809d1 100644 --- a/puppet/modules/site_nagios/manifests/add_host_services.pp +++ b/puppet/modules/site_nagios/manifests/add_host_services.pp @@ -1,7 +1,9 @@ define site_nagios::add_host_services ( + $domain_full_suffix, $domain_internal, $ip_address, $services, + $ssh_port, $openvpn_gateway_address='' ) { $nagios_hostname = $domain_internal diff --git a/puppet/modules/site_nagios/manifests/server.pp b/puppet/modules/site_nagios/manifests/server.pp index ca38d7fc..9aae8ae8 100644 --- a/puppet/modules/site_nagios/manifests/server.pp +++ b/puppet/modules/site_nagios/manifests/server.pp @@ -7,7 +7,7 @@ class site_nagios::server inherits nagios::base { $nagios_hiera = hiera('nagios') $nagiosadmin_pw = htpasswd_sha1($nagios_hiera['nagiosadmin_pw']) - $hosts = $nagios_hiera['hosts'] + $nagios_hosts = $nagios_hiera['hosts'] include nagios::defaults include nagios::base @@ -43,7 +43,7 @@ class site_nagios::server inherits nagios::base { group => 'nagios', } - create_resources ( site_nagios::add_host_services, $hosts ) + create_resources ( site_nagios::add_host_services, $nagios_hosts ) include site_nagios::server::apache include site_check_mk::server -- cgit v1.2.3 From 5372bba5dd503cb4fe9620bc342992c94863c8e6 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 12:35:51 +0100 Subject: monitor connection attempts from disallowed bigcouch node --- puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg | 1 + 1 file changed, 1 insertion(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg index 0911d09a..ee64b3c0 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg @@ -6,4 +6,5 @@ C rexi_DOWN,noproc C rexi_DOWN,noconnection C error + C Connection attempt from disallowed node W Shutting down group server -- cgit v1.2.3 From a81ce0750dbd5b000739e28279639e382ce347a2 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 12:36:36 +0100 Subject: don't use syslog for nagios log, cause it will clutter logwatch with false-positive warnings --- puppet/modules/site_nagios/files/configs/Debian/nagios.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'puppet') diff --git a/puppet/modules/site_nagios/files/configs/Debian/nagios.cfg b/puppet/modules/site_nagios/files/configs/Debian/nagios.cfg index 61d9f2da..9bd3da28 100644 --- a/puppet/modules/site_nagios/files/configs/Debian/nagios.cfg +++ b/puppet/modules/site_nagios/files/configs/Debian/nagios.cfg @@ -251,7 +251,7 @@ log_archive_path=/var/log/nagios3/archives # If you want messages logged to the syslog facility, as well as the # Nagios log file set this option to 1. If not, set it to 0. -use_syslog=1 +use_syslog=0 -- cgit v1.2.3 From 59b09ce45333778619baca2bd4e34e5beba63584 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 13:55:15 +0100 Subject: we use logwatch now to check for bigcouch errors --- puppet/modules/site_check_mk/manifests/agent/couchdb.pp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index c954292a..cc166d42 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -3,13 +3,4 @@ class site_check_mk::agent::couchdb { file { '/etc/check_mk/logwatch.d/couchdb.cfg': source => 'puppet:///modules/site_check_mk/agent/logwatch/couchdb.cfg', } - - - # local custom checks - file { '/usr/lib/check_mk_agent/local/check_bigcouch_errors.sh': - ensure => link, - target => '/srv/leap/couchdb/scripts/tests/check_bigcouch_errors.sh', - require => Vcsrepo['/srv/leap/couchdb/scripts'] - } - } -- cgit v1.2.3 From 299c059c239936901c4b234f78e89d99ce94e19c Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 13:58:52 +0100 Subject: renamed logwatch/couchdb.cfg to logwatch/bigcouch.cfg --- puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg | 10 ++++++++++ puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg | 10 ---------- puppet/modules/site_check_mk/manifests/agent/couchdb.pp | 5 +++-- 3 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg new file mode 100644 index 00000000..ee64b3c0 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg @@ -0,0 +1,10 @@ +/opt/bigcouch/var/log/bigcouch.log + C Uncaught error in HTTP request: {exit, + C Uncaught error in HTTP request: {exit,normal} + C Uncaught error in HTTP request: {error, + C Response abnormally terminated: {nodedown, + C rexi_DOWN,noproc + C rexi_DOWN,noconnection + C error + C Connection attempt from disallowed node + W Shutting down group server diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg deleted file mode 100644 index ee64b3c0..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/couchdb.cfg +++ /dev/null @@ -1,10 +0,0 @@ -/opt/bigcouch/var/log/bigcouch.log - C Uncaught error in HTTP request: {exit, - C Uncaught error in HTTP request: {exit,normal} - C Uncaught error in HTTP request: {error, - C Response abnormally terminated: {nodedown, - C rexi_DOWN,noproc - C rexi_DOWN,noconnection - C error - C Connection attempt from disallowed node - W Shutting down group server diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index cc166d42..f2d286ca 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -1,6 +1,7 @@ class site_check_mk::agent::couchdb { - file { '/etc/check_mk/logwatch.d/couchdb.cfg': - source => 'puppet:///modules/site_check_mk/agent/logwatch/couchdb.cfg', + file { '/etc/check_mk/logwatch.d/bigcouch.cfg': + source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', } + } -- cgit v1.2.3 From 3df2e8761ce6c54f6914d24e7acd24634d43c44d Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 14:00:47 +0100 Subject: added some error checking to logwatch/syslog.cfg --- puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg | 3 +++ 1 file changed, 3 insertions(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg index 3703b5e1..52c479ef 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg @@ -1,5 +1,8 @@ /var/log/syslog C panic C Oops + I Error: Driver 'pcspkr' is already registered, aborting... + C Error + C error W generic protection rip W .*Unrecovered read error - auto reallocate failed -- cgit v1.2.3 From ee83c7227f64bab9a36ecbfd86049bc3153b3760 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 14:03:28 +0100 Subject: moved logwatch/tapicero.cfg to logwatch/syslog/tapicero.cfg --- puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg | 4 ++++ puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg | 4 ---- puppet/modules/site_check_mk/manifests/agent/tapicero.pp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg new file mode 100644 index 00000000..4e3808eb --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg @@ -0,0 +1,4 @@ + C tapicero.*RestClient::InternalServerError: + C tapicero.*RestClient::PreconditionFailed: + C tapicero.*failed + W tapicero.*Couch stream ended unexpectedly. diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg deleted file mode 100644 index 4e3808eb..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg +++ /dev/null @@ -1,4 +0,0 @@ - C tapicero.*RestClient::InternalServerError: - C tapicero.*RestClient::PreconditionFailed: - C tapicero.*failed - W tapicero.*Couch stream ended unexpectedly. diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp index dc785e52..369ed00b 100644 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp @@ -1,7 +1,7 @@ class site_check_mk::agent::tapicero { concat::fragment { 'syslog_tapicero': - source => 'puppet:///modules/site_check_mk/agent/logwatch/tapicero.cfg', + source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/tapicero.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', order => '02'; } -- cgit v1.2.3 From 91365fb3b9b455253484fca6ffbc3dac8361e5b9 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 15:11:56 +0100 Subject: check syslog for /usr/local/bin/couch-doc-update failures --- .../modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg | 2 ++ puppet/modules/site_check_mk/manifests/agent/couchdb.pp | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg new file mode 100644 index 00000000..5f8d5b95 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg @@ -0,0 +1,2 @@ + C /usr/local/bin/couch-doc-update.*failed + C /usr/local/bin/couch-doc-update.*ERROR diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index f2d286ca..9456efcc 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -4,4 +4,10 @@ class site_check_mk::agent::couchdb { source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', } + concat::fragment { 'syslog_couchdb': + source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/couchdb.cfg', + target => '/etc/check_mk/logwatch.d/syslog.cfg', + order => '02'; + } + } -- cgit v1.2.3 From 42512cc9428afcd7d949e373e75da4f0d9fc8086 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 16:02:53 +0100 Subject: check syslog for stunnel failures --- .../site_check_mk/files/agent/logwatch/syslog/stunnel.cfg | 3 +++ puppet/modules/site_check_mk/manifests/agent/stunnel.pp | 9 +++++++++ puppet/modules/site_stunnel/manifests/clients.pp | 2 ++ 3 files changed, 14 insertions(+) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg create mode 100644 puppet/modules/site_check_mk/manifests/agent/stunnel.pp (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg new file mode 100644 index 00000000..31c229b7 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg @@ -0,0 +1,3 @@ +# check for stunnel failures + C stunnel:.*Connection refused + C stunnel:.*Connection reset by peer diff --git a/puppet/modules/site_check_mk/manifests/agent/stunnel.pp b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp new file mode 100644 index 00000000..64022824 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp @@ -0,0 +1,9 @@ +class site_check_mk::agent::stunnel { + + concat::fragment { 'syslog_stunnel': + source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/stunnel.cfg', + target => '/etc/check_mk/logwatch.d/syslog.cfg', + order => '02'; + } + +} diff --git a/puppet/modules/site_stunnel/manifests/clients.pp b/puppet/modules/site_stunnel/manifests/clients.pp index 791fdbc5..837665a3 100644 --- a/puppet/modules/site_stunnel/manifests/clients.pp +++ b/puppet/modules/site_stunnel/manifests/clients.pp @@ -28,4 +28,6 @@ define site_stunnel::clients ( Class['Site_config::X509::Ca'] ]; } + + include site_check_mk::agent::stunnel } -- cgit v1.2.3 From 40b13221967368060773af008f791ca7a053d18d Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 16:27:57 +0100 Subject: set /usr/lib/nagios/plugins/check_icmp to setuid root (Feature #5171) --- puppet/modules/site_check_mk/manifests/server.pp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/manifests/server.pp b/puppet/modules/site_check_mk/manifests/server.pp index 4bbeb850..0c98cd5a 100644 --- a/puppet/modules/site_check_mk/manifests/server.pp +++ b/puppet/modules/site_check_mk/manifests/server.pp @@ -44,6 +44,13 @@ class site_check_mk::server { content => "${type} ${pubkey} monitor", owner => 'nagios', mode => '0644'; + # check_icmp must be suid root or called by sudo + # see https://leap.se/code/issues/5171 + '/usr/lib/nagios/plugins/check_icmp': + mode => '4755', + require => Package['nagios-plugins-basic']; } + + include check_mk::agent::local_checks } -- cgit v1.2.3 From 0a6fbced6f2177464c000fc79a90c03171a3bcf6 Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 16:45:33 +0100 Subject: complete nagios monitoring for couchdb (Feature #2175) --- puppet/modules/site_check_mk/manifests/agent/couchdb.pp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index 9456efcc..97e4a777 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -1,13 +1,25 @@ class site_check_mk::agent::couchdb { + # watch logs file { '/etc/check_mk/logwatch.d/bigcouch.cfg': source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', } - concat::fragment { 'syslog_couchdb': source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/couchdb.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', order => '02'; } + + # check bigcouch processes + file_line { + 'Bigcouch_epmd_procs': + line => 'Bigcouch_epmd_procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd', + path => '/etc/check_mk/mrpe.cfg'; + 'Bigcouch_beam_procs': + line => 'Bigcouch_beam_procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam', + path => '/etc/check_mk/mrpe.cfg'; + } + + } -- cgit v1.2.3 From 5cc2e0d7a10c088d024bf0e5e4538b31a7d2844c Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 17:19:36 +0100 Subject: nagios monitoring of haproxy nodes (Feature #2657) --- puppet/modules/site_check_mk/manifests/agent/haproxy.pp | 12 ++++++++++++ .../manifests/agent/package/nagios_plugins_contrib.pp | 5 +++++ puppet/modules/site_haproxy/files/haproxy-stats.cfg | 6 ++++++ puppet/modules/site_haproxy/manifests/init.pp | 7 +++++++ 4 files changed, 30 insertions(+) create mode 100644 puppet/modules/site_check_mk/manifests/agent/haproxy.pp create mode 100644 puppet/modules/site_check_mk/manifests/agent/package/nagios_plugins_contrib.pp create mode 100644 puppet/modules/site_haproxy/files/haproxy-stats.cfg (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/manifests/agent/haproxy.pp b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp new file mode 100644 index 00000000..e7986db1 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp @@ -0,0 +1,12 @@ +class site_check_mk::agent::haproxy { + + include site_check_mk::agent::package::nagios_plugins_contrib + + # local nagios plugin checks via mrpe + file_line { + 'haproxy': + line => 'Haproxy /usr/lib/nagios/plugins/check_haproxy -u "http://localhost:8000/haproxy;csv"', + path => '/etc/check_mk/mrpe.cfg'; + } + +} diff --git a/puppet/modules/site_check_mk/manifests/agent/package/nagios_plugins_contrib.pp b/puppet/modules/site_check_mk/manifests/agent/package/nagios_plugins_contrib.pp new file mode 100644 index 00000000..95a60d17 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/package/nagios_plugins_contrib.pp @@ -0,0 +1,5 @@ +class site_check_mk::agent::package::nagios_plugins_contrib { + package { 'nagios-plugins-contrib': + ensure => installed, + } +} diff --git a/puppet/modules/site_haproxy/files/haproxy-stats.cfg b/puppet/modules/site_haproxy/files/haproxy-stats.cfg new file mode 100644 index 00000000..e6335ba2 --- /dev/null +++ b/puppet/modules/site_haproxy/files/haproxy-stats.cfg @@ -0,0 +1,6 @@ +# provide access to stats for the nagios plugin +listen stats 127.0.0.1:8000 + mode http + stats enable + stats uri /haproxy + diff --git a/puppet/modules/site_haproxy/manifests/init.pp b/puppet/modules/site_haproxy/manifests/init.pp index 602e26be..1a681373 100644 --- a/puppet/modules/site_haproxy/manifests/init.pp +++ b/puppet/modules/site_haproxy/manifests/init.pp @@ -22,4 +22,11 @@ class site_haproxy { } } + # monitor haproxy + concat::fragment { 'stats': + target => '/etc/haproxy/haproxy.cfg', + order => '90', + source => 'puppet:///modules/site_haproxy/haproxy-stats.cfg'; + } + include site_check_mk::agent::haproxy } -- cgit v1.2.3 From 835d1f9699507e9e40cae32ffc90940e26bed3ee Mon Sep 17 00:00:00 2001 From: varac Date: Tue, 25 Feb 2014 17:58:48 +0100 Subject: nagios: check open files for bigcouch process (Feature #4965) --- .../agent/nagios_plugins/check_unix_open_fds.pl | 322 +++++++++++++++++++++ puppet/modules/site_check_mk/manifests/agent.pp | 4 + .../site_check_mk/manifests/agent/couchdb.pp | 11 + .../manifests/agent/package/perl_plugin.pp | 5 + 4 files changed, 342 insertions(+) create mode 100755 puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl create mode 100644 puppet/modules/site_check_mk/manifests/agent/package/perl_plugin.pp (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl b/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl new file mode 100755 index 00000000..06163d49 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl @@ -0,0 +1,322 @@ +#!/usr/bin/perl -w + +# check_unix_open_fds Nagios Plugin +# +# TComm - Carlos Peris Pla +# +# This nagios plugin is free software, and comes with ABSOLUTELY +# NO WARRANTY. It may be used, redistributed and/or modified under +# the terms of the GNU General Public Licence (see +# http://www.fsf.org/licensing/licenses/gpl.txt). + + +# MODULE DECLARATION + +use strict; +use Nagios::Plugin; + + +# FUNCTION DECLARATION + +sub CreateNagiosManager (); +sub CheckArguments (); +sub PerformCheck (); + + +# CONSTANT DEFINITION + +use constant NAME => 'check_unix_open_fds'; +use constant VERSION => '0.1b'; +use constant USAGE => "Usage:\ncheck_unix_open_fds -w -c \n". + "\t\t[-V ]\n"; +use constant BLURB => "This plugin checks, in UNIX systems with the command lsof installed and with its SUID bit activated, the number\n". + "of file descriptors opened by an application and its processes.\n"; +use constant LICENSE => "This nagios plugin is free software, and comes with ABSOLUTELY\n". + "no WARRANTY. It may be used, redistributed and/or modified under\n". + "the terms of the GNU General Public Licence\n". + "(see http://www.fsf.org/licensing/licenses/gpl.txt).\n"; +use constant EXAMPLE => "\n\n". + "Example:\n". + "\n". + "check_unix_open_fds -a /usr/local/nagios/bin/ndo2db -w 20,75 -c 25,85\n". + "\n". + "It returns CRITICAL if number of file descriptors opened by ndo2db is higher than 85,\n". + "if not it returns WARNING if number of file descriptors opened by ndo2db is higher \n". + "than 75, if not it returns CRITICAL if number of file descriptors opened by any process\n". + "of ndo2db is higher than 25, if not it returns WARNING if number of file descriptors \n". + "opened by any process of ndo2db is higher than 20.\n". + "In other cases it returns OK if check has been performed succesfully.\n\n"; + + +# VARIABLE DEFINITION + +my $Nagios; +my $Error; +my $PluginResult; +my $PluginOutput; +my @WVRange; +my @CVRange; + + +# MAIN FUNCTION + +# Get command line arguments +$Nagios = &CreateNagiosManager(USAGE, VERSION, BLURB, LICENSE, NAME, EXAMPLE); +eval {$Nagios->getopts}; + +if (!$@) { + # Command line parsed + if (&CheckArguments($Nagios, \$Error, \@WVRange, \@CVRange)) { + # Argument checking passed + $PluginResult = &PerformCheck($Nagios, \$PluginOutput, \@WVRange, \@CVRange) + } + else { + # Error checking arguments + $PluginOutput = $Error; + $PluginResult = UNKNOWN; + } + $Nagios->nagios_exit($PluginResult,$PluginOutput); +} +else { + # Error parsing command line + $Nagios->nagios_exit(UNKNOWN,$@); +} + + + +# FUNCTION DEFINITIONS + +# Creates and configures a Nagios plugin object +# Input: strings (usage, version, blurb, license, name and example) to configure argument parsing functionality +# Return value: reference to a Nagios plugin object + +sub CreateNagiosManager() { + # Create GetOpt object + my $Nagios = Nagios::Plugin->new(usage => $_[0], version => $_[1], blurb => $_[2], license => $_[3], plugin => $_[4], extra => $_[5]); + + # Add argument units + $Nagios->add_arg(spec => 'application|a=s', + help => 'Application path for which you want to check the number of open file descriptors', + required => 1); + + # Add argument warning + $Nagios->add_arg(spec => 'warning|w=s', + help => "Warning thresholds. Format: ", + required => 1); + # Add argument critical + $Nagios->add_arg(spec => 'critical|c=s', + help => "Critical thresholds. Format: ", + required => 1); + + # Return value + return $Nagios; +} + + +# Checks argument values and sets some default values +# Input: Nagios Plugin object +# Output: reference to Error description string, Memory Unit, Swap Unit, reference to WVRange ($_[4]), reference to CVRange ($_[5]) +# Return value: True if arguments ok, false if not + +sub CheckArguments() { + my ($Nagios, $Error, $WVRange, $CVRange) = @_; + my $commas; + my $units; + my $i; + my $firstpos; + my $secondpos; + + # Check Warning thresholds list + $commas = $Nagios->opts->warning =~ tr/,//; + if ($commas !=1){ + ${$Error} = "Invalid Warning list format. One comma is expected."; + return 0; + } + else{ + $i=0; + $firstpos=0; + my $warning=$Nagios->opts->warning; + while ($warning =~ /[,]/g) { + $secondpos=pos $warning; + if ($secondpos - $firstpos==1){ + @{$WVRange}[$i] = "~:"; + } + else{ + @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, ($secondpos-$firstpos-1); + } + $firstpos=$secondpos; + $i++ + } + if (length($Nagios->opts->warning) - $firstpos==0){#La coma es el ultimo elemento del string + @{$WVRange}[$i] = "~:"; + } + else{ + @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, (length($Nagios->opts->warning)-$firstpos); + } + + if (@{$WVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){ + ${$Error} = "Invalid Process Warning threshold in ${$WVRange[0]}"; + return 0; + }if (@{$WVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){ + ${$Error} = "Invalid Application Warning threshold in ${$WVRange[1]}"; + return 0; + } + } + + # Check Critical thresholds list + $commas = $Nagios->opts->critical =~ tr/,//; + if ($commas !=1){ + ${$Error} = "Invalid Critical list format. One comma is expected."; + return 0; + } + else{ + $i=0; + $firstpos=0; + my $critical=$Nagios->opts->critical; + while ($critical =~ /[,]/g) { + $secondpos=pos $critical ; + if ($secondpos - $firstpos==1){ + @{$CVRange}[$i] = "~:"; + } + else{ + @{$CVRange}[$i] =substr $Nagios->opts->critical, $firstpos, ($secondpos-$firstpos-1); + } + $firstpos=$secondpos; + $i++ + } + if (length($Nagios->opts->critical) - $firstpos==0){#La coma es el ultimo elemento del string + @{$CVRange}[$i] = "~:"; + } + else{ + @{$CVRange}[$i] = substr $Nagios->opts->critical, $firstpos, (length($Nagios->opts->critical)-$firstpos); + } + + if (@{$CVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) { + ${$Error} = "Invalid Process Critical threshold in @{$CVRange}[0]"; + return 0; + } + if (@{$CVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) { + ${$Error} = "Invalid Application Critical threshold in @{$CVRange}[1]"; + return 0; + } + } + + return 1; +} + + +# Performs whole check: +# Input: Nagios Plugin object, reference to Plugin output string, Application, referece to WVRange, reference to CVRange +# Output: Plugin output string +# Return value: Plugin return value + +sub PerformCheck() { + my ($Nagios, $PluginOutput, $WVRange, $CVRange) = @_; + my $Application; + my @AppNameSplitted; + my $ApplicationName; + my $PsCommand; + my $PsResult; + my @PsResultLines; + my $ProcLine; + my $ProcPid; + my $LsofCommand; + my $LsofResult; + my $ProcCount = 0; + my $FDCount = 0; + my $ProcFDAvg = 0; + my $PerProcMaxFD = 0; + my $ProcOKFlag = 0; + my $ProcWarningFlag = 0; + my $ProcCriticalFlag = 0; + my $OKFlag = 0; + my $WarningFlag = 0; + my $CriticalFlag = 0; + my $LastWarningProcFDs = 0; + my $LastWarningProc = -1; + my $LastCriticalProcFDs = 0; + my $LastCriticalProc = -1; + my $ProcPluginReturnValue = UNKNOWN; + my $AppPluginReturnValue = UNKNOWN; + my $PluginReturnValue = UNKNOWN; + my $PerformanceData = ""; + my $PerfdataUnit = "FDs"; + + $Application = $Nagios->opts->application; + $PsCommand = "ps -eaf | grep $Application"; + $PsResult = `$PsCommand`; + @AppNameSplitted = split(/\//, $Application); + $ApplicationName = $AppNameSplitted[$#AppNameSplitted]; + @PsResultLines = split(/\n/, $PsResult); + if ( $#PsResultLines > 1 ) { + foreach my $Proc (split(/\n/, $PsResult)) { + if ($Proc !~ /check_unix_open_fds/ && $Proc !~ / grep /) { + $ProcCount += 1; + $ProcPid = (split(/\s+/, $Proc))[1]; + $LsofCommand = "lsof -p $ProcPid | wc -l"; + $LsofResult = `$LsofCommand`; + $LsofResult = ($LsofResult > 0 ) ? ($LsofResult - 1) : 0; + $FDCount += $LsofResult; + if ($LsofResult >= $PerProcMaxFD) { $PerProcMaxFD = $LsofResult; } + $ProcPluginReturnValue = $Nagios->check_threshold(check => $LsofResult,warning => @{$WVRange}[0],critical => @{$CVRange}[0]); + if ($ProcPluginReturnValue eq OK) { + $ProcOKFlag = 1; + } + elsif ($ProcPluginReturnValue eq WARNING) { + $ProcWarningFlag = 1; + if ($LsofResult >= $LastWarningProcFDs) { + $LastWarningProcFDs = $LsofResult; + $LastWarningProc = $ProcPid; + } + } + #if ($LsofResult >= $PCT) { + elsif ($ProcPluginReturnValue eq CRITICAL) { + $ProcCriticalFlag = 1; + if ($LsofResult >= $LastCriticalProcFDs) { + $LastCriticalProcFDs = $LsofResult; + $LastCriticalProc = $ProcPid; + } + } + } + } + if ($ProcCount) { $ProcFDAvg = int($FDCount / $ProcCount); } + $AppPluginReturnValue = $Nagios->check_threshold(check => $FDCount,warning => @{$WVRange}[1],critical => @{$CVRange}[1]); + #if ($FDCount >= $TWT) { + if ($AppPluginReturnValue eq OK) { $OKFlag = 1; } + elsif ($AppPluginReturnValue eq WARNING) { $WarningFlag = 1; } + elsif ($AppPluginReturnValue eq CRITICAL) { $CriticalFlag = 1; } + + # PluginReturnValue and PluginOutput + if ($CriticalFlag) { + $PluginReturnValue = CRITICAL; + ${$PluginOutput} .= "$ApplicationName handling $FDCount files (critical threshold set to @{$CVRange}[1])"; + } + elsif ($WarningFlag) { + $PluginReturnValue = WARNING; + ${$PluginOutput} .= "$ApplicationName handling $FDCount files (warning threshold set to @{$WVRange}[1])"; + } + elsif ($ProcCriticalFlag) { + $PluginReturnValue = CRITICAL; + ${$PluginOutput} .= "Process ID $LastCriticalProc handling $LastCriticalProcFDs files (critical threshold set to @{$CVRange}[0])"; + } + elsif ($ProcWarningFlag) { + $PluginReturnValue = WARNING; + ${$PluginOutput} .= "Process ID $LastWarningProc handling $LastWarningProcFDs files (warning threshold set to @{$WVRange}[0])"; + } + elsif ($OKFlag && $ProcOKFlag) { + $PluginReturnValue = OK; + ${$PluginOutput} .= "$ApplicationName handling $FDCount files"; + } + } + else { + ${$PluginOutput} .= "No existe la aplicacion $ApplicationName"; + } + + + $PerformanceData .= "ProcCount=$ProcCount$PerfdataUnit FDCount=$FDCount$PerfdataUnit ProcFDAvg=$ProcFDAvg$PerfdataUnit PerProcMaxFD=$PerProcMaxFD$PerfdataUnit"; + + # Output with performance data: + ${$PluginOutput} .= " | $PerformanceData"; + + return $PluginReturnValue; +} diff --git a/puppet/modules/site_check_mk/manifests/agent.pp b/puppet/modules/site_check_mk/manifests/agent.pp index a29923c1..efb05b37 100644 --- a/puppet/modules/site_check_mk/manifests/agent.pp +++ b/puppet/modules/site_check_mk/manifests/agent.pp @@ -14,6 +14,10 @@ class site_check_mk::agent { register_agent => false } + file { [ '/srv/leap/nagios', '/srv/leap/nagios/plugins' ]: + ensure => directory, + } + include site_check_mk::agent::mrpe include site_check_mk::agent::logwatch } diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index 97e4a777..01e2b886 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -21,5 +21,16 @@ class site_check_mk::agent::couchdb { path => '/etc/check_mk/mrpe.cfg'; } + # check open files for bigcouch proc + include site_check_mk::agent::package::perl_plugin + file { '/srv/leap/nagios/plugins/check_unix_open_fds.pl': + source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl', + mode => '0755' + } + file_line { + 'Bigcouch_open_files': + line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 750,750 -c 1000,1000', + path => '/etc/check_mk/mrpe.cfg'; + } } diff --git a/puppet/modules/site_check_mk/manifests/agent/package/perl_plugin.pp b/puppet/modules/site_check_mk/manifests/agent/package/perl_plugin.pp new file mode 100644 index 00000000..4feda375 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/package/perl_plugin.pp @@ -0,0 +1,5 @@ +class site_check_mk::agent::package::perl_plugin { + package { 'libnagios-plugin-perl': + ensure => installed, + } +} -- cgit v1.2.3 From 0daa46300cddc6c56e07b42c131852e839235a7f Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 14:02:39 +0100 Subject: ignore logwatch pattern for tapicero: 412 Precondition Failed while creating user db (Bug #5168) --- .../modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg index 4e3808eb..9983d27c 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg @@ -1,4 +1,7 @@ C tapicero.*RestClient::InternalServerError: - C tapicero.*RestClient::PreconditionFailed: +# possible race condition between multiple tapicero +# instances, so we ignore it +# see https://leap.se/code/issues/5168 + I tapicero.*RestClient::PreconditionFailed: C tapicero.*failed W tapicero.*Couch stream ended unexpectedly. -- cgit v1.2.3 From 877e6daa7e281c27114759482879e6f8c6903283 Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 14:08:48 +0100 Subject: ignore logwatch pattern 'sunnel: SSL_shutdown: Connection reset by peer' --- puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg index 31c229b7..cf7ebca8 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg @@ -1,3 +1,5 @@ # check for stunnel failures C stunnel:.*Connection refused - C stunnel:.*Connection reset by peer +# this is a temporary failure and happens very often, so we +# ignore it + I stunnel:.*Connection reset by peer -- cgit v1.2.3 From 5b15447055de66f30bc7f036a588dec4638b9a7d Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 14:52:04 +0100 Subject: check syslog for webapp errors --- .../modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg | 2 ++ puppet/modules/site_check_mk/manifests/agent/webapp.pp | 9 +++++++++ puppet/modules/site_webapp/manifests/init.pp | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg create mode 100644 puppet/modules/site_check_mk/manifests/agent/webapp.pp (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg new file mode 100644 index 00000000..14fcf34a --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg @@ -0,0 +1,2 @@ +# check for webapp errors + C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} diff --git a/puppet/modules/site_check_mk/manifests/agent/webapp.pp b/puppet/modules/site_check_mk/manifests/agent/webapp.pp new file mode 100644 index 00000000..dc2baf19 --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/webapp.pp @@ -0,0 +1,9 @@ +class site_check_mk::agent::webapp { + + concat::fragment { 'syslog_webapp': + source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/webapp.cfg', + target => '/etc/check_mk/logwatch.d/syslog.cfg', + order => '02'; + } + +} diff --git a/puppet/modules/site_webapp/manifests/init.pp b/puppet/modules/site_webapp/manifests/init.pp index f8216aa4..d02a7261 100644 --- a/puppet/modules/site_webapp/manifests/init.pp +++ b/puppet/modules/site_webapp/manifests/init.pp @@ -157,5 +157,5 @@ class site_webapp { } include site_shorewall::webapp - + include site_check_mk::agent::webapp } -- cgit v1.2.3 From fa75c9406b1c4cfeccca046ba01d108b681e53fe Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 15:20:34 +0100 Subject: ignore RoutingErrors that rails throw when it can't handle a url (#5173) --- puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg | 3 +++ 1 file changed, 3 insertions(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg index 14fcf34a..00f9c7fd 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg @@ -1,2 +1,5 @@ # check for webapp errors C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} +# ignore RoutingErrors that rails throw when it can't handle a url +# see https://leap.se/code/issues/5173 + I webapp.*ActionController::RoutingError -- cgit v1.2.3 From fdb0e27d6df35b511e4883becf3bc2afb945550b Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 15:30:54 +0100 Subject: check syslog for 'Undefined' logpattern --- puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg index 52c479ef..f3505c1c 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog.cfg @@ -1,4 +1,5 @@ /var/log/syslog +# some general patterns C panic C Oops I Error: Driver 'pcspkr' is already registered, aborting... @@ -6,3 +7,6 @@ C error W generic protection rip W .*Unrecovered read error - auto reallocate failed +# 401 Unauthorized error logged by webapp and possible other +# applications + C Unauthorized -- cgit v1.2.3 From 66cc1345c3af4e814d98c8e4b90d90158ac9d399 Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 26 Feb 2014 15:53:12 +0100 Subject: ignore valid log patterns from bigcouch.log --- puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'puppet') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg index ee64b3c0..a1eb1312 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg @@ -8,3 +8,8 @@ C error C Connection attempt from disallowed node W Shutting down group server +# ignore requests that are fine + I undefined - -.*200$ + I undefined - -.*201$ + I 127.0.0.1 undefined.* ok + I 127.0.0.1 localhost:5984 .* ok -- cgit v1.2.3