diff options
Diffstat (limited to 'puppet/modules/site_check_mk')
17 files changed, 223 insertions, 49 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh new file mode 100755 index 00000000..95474ccb --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# +# todo: +# - thresholds +# - couch response time +# - make CURL/URL/DBLIST_EXCLUDE vars configurable +# - move load_nagios_utils() to helper library so we can use it from multiple scripts + +start_time=$(date +%s.%N) + +CURL='curl -s --netrc-file /etc/couchdb/couchdb.netrc' +URL='http://127.0.0.1:5984' +TMPFILE=$(mktemp) +DBLIST_EXCLUDE='(user-|sessions_|tokens_)' +PREFIX='Couchdb_' + + +load_nagios_utils () { + # load the nagios utils + # in debian, the package nagios-plugins-common installs utils.sh to /usr/lib/nagios/plugins/utils.sh + utilsfn= + for d in $PROGPATH /usr/lib/nagios/plugins /usr/lib64/nagios/plugins /usr/local/nagios/libexec /opt/nagios-plugins/libexec . ; do + if [ -f "$d/utils.sh" ]; then + utilsfn=$d/utils.sh; + fi + done + if [ "$utilsfn" = "" ]; then + echo "UNKNOWN - cannot find utils.sh (part of nagios plugins)"; + exit 3; + fi + . "$utilsfn"; + STATE[$STATE_OK]='OK' + STATE[$STATE_WARNING]='Warning' + STATE[$STATE_CRITICAL]='Critical' + STATE[$STATE_UNKNOWN]='Unknown' + STATE[$STATE_DEPENDENT]='Dependend' +} + +get_global_stats_perf () { + trap "localexit=3" ERR + local localexit db_count + localexit=0 + + # get a list of all dbs + $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + + db_count=$( wc -l < $TMPFILE) + excluded_db_count=$( egrep -c "$DBLIST_EXCLUDE" $TMPFILE ) + + echo "db_count=$db_count|excluded_db_count=$excluded_db_count" + return ${localexit} +} + +db_stats () { + trap "localexit=3" ERR + local db db_stats doc_count del_doc_count localexit + localexit=0 + + db="$1" + name="$2" + + if [ -z "$name" ] + then + name="$db" + fi + + perf="$perf|${db}_docs=$( $CURL -s -X GET ${URL}/$db | json_pp |grep 'doc_count' | sed 's/[^0-9]//g' )" + db_stats=$( $CURL -s -X GET ${URL}/$db | json_pp ) + + doc_count=$( echo "$db_stats" | grep 'doc_count' | grep -v 'deleted_doc_count' | sed 's/[^0-9]//g' ) + del_doc_count=$( echo "$db_stats" | grep 'doc_del_count' | sed 's/[^0-9]//g' ) + + # don't divide by zero + if [ $del_doc_count -eq 0 ] + then + del_doc_perc=0 + else + del_doc_perc=$(( del_doc_count * 100 / doc_count )) + fi + + bytes=$( echo "$db_stats" | grep disk_size | sed 's/[^0-9]//g' ) + disk_size=$( echo "scale = 2; $bytes / 1024 / 1024" | bc -l ) + + echo -n "${localexit} ${PREFIX}${name}_database ${name}_docs=$doc_count|${name}_deleted_docs=$del_doc_count|${name}_deleted_docs_percentage=${del_doc_perc}%" + printf "|${name}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $name\n" "$disk_size" + + return ${localexit} +} + +# main + +load_nagios_utils + +# per-db stats +# get a list of all dbs +$CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + +# get list of dbs to check +dbs=$( egrep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' ) + +for db in $dbs +do + db_stats "$db" +done + +# special handling for rotated dbs +suffix=$(($(date +'%s') / (60*60*24*30) + 1)) +db_stats "sessions_${suffix}" "sessions" +db_stats "tokens_${suffix}" "tokens" + + +# show global couchdb stats +global_stats_perf=$(get_global_stats_perf) +exitcode=$? + +end_time=$(date +%s.%N) +duration=$( echo "scale = 2; $end_time - $start_time" | bc -l ) + +printf "${exitcode} ${PREFIX}global_stats ${global_stats_perf}|script_duration=%02.2fs ${STATE[exitcode]}: global couchdb status\n" "$duration" diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg index c71c5392..166d0230 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg @@ -1,4 +1,4 @@ -/var/log/leap_mx.log +/var/log/leap/mx.log W Don't know how to deliver mail W No public key, stopping the processing chain diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg index ac17c0ca..ed50f420 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg @@ -1,3 +1,4 @@ +/var/log/leap/openvpn.log # ignore openvpn TLS initialization errors when clients # suddenly hangup before properly establishing # a tls connection diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg index eb3131f2..b1e6cf2f 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg @@ -1,3 +1,4 @@ +/var/log/leap/stunnel.log # check for stunnel failures # # these are temporary failures and happen very often, so we diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg index e5721eea..d98f5094 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg @@ -1,5 +1,6 @@ +/var/log/leap/tapicero.log # Ignore transient Tapicero errors when creating a db (#6511) - I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) + I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::ResourceNotFound|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) C tapicero.*RestClient::InternalServerError: # possible race condition between multiple tapicero # instances, so we ignore it diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg index 00f9c7fd..008e9e09 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg @@ -1,3 +1,4 @@ +/var/log/leap/webapp.log # check for webapp errors C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} # ignore RoutingErrors that rails throw when it can't handle a url diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index 03d1ea76..c7120a96 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -1,13 +1,14 @@ # retry 3 times before setting a service into a hard state # and send out notification -extra_service_conf["max_check_attempts"] = [ - ("4", ALL_HOSTS , ALL_SERVICES ) +extra_service_conf["max_check_attempts"] = [ + ("4", ALL_HOSTS , ALL_SERVICES ) ] -# run check_mk_agent every 2 minutes if it terminates -# successfully. +# +# run check_mk_agent every 4 minutes if it terminates successfully. # see https://leap.se/code/issues/6539 for the rationale +# extra_service_conf["normal_check_interval"] = [ - ("2", ALL_HOSTS , "Check_MK" ) + ("4", ALL_HOSTS , "Check_MK" ) ] diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index ee0268a3..abfc7ad0 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -12,13 +12,21 @@ class site_check_mk::agent::couchdb { # check bigcouch processes - file_line { + augeas { 'Bigcouch_epmd_procs': - line => 'Bigcouch_epmd_procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd', - path => '/etc/check_mk/mrpe.cfg'; + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', + 'set Bigcouch_epmd_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd\'' ], + require => File['/etc/check_mk/mrpe.cfg']; 'Bigcouch_beam_procs': - line => 'Bigcouch_beam_procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam', - path => '/etc/check_mk/mrpe.cfg'; + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', + 'set Bigcouch_beam_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam\'' ], + require => File['/etc/check_mk/mrpe.cfg']; } # check open files for bigcouch proc @@ -27,10 +35,21 @@ class site_check_mk::agent::couchdb { source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl', mode => '0755' } - file_line { + augeas { 'Bigcouch_open_files': - line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720', - path => '/etc/check_mk/mrpe.cfg'; + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', + 'set Bigcouch_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], + require => File['/etc/check_mk/mrpe.cfg']; } + + # check different couchdb stats + file { '/usr/lib/check_mk_agent/local/leap_couch_stats.sh': + source => 'puppet:///modules/site_check_mk/agent/local_checks/couchdb/leap_couch_stats.sh', + mode => '0755', + require => Package['check_mk-agent'] + } } diff --git a/puppet/modules/site_check_mk/manifests/agent/haproxy.pp b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp index e7986db1..6d52efba 100644 --- a/puppet/modules/site_check_mk/manifests/agent/haproxy.pp +++ b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp @@ -3,10 +3,13 @@ class site_check_mk::agent::haproxy { include site_check_mk::agent::package::nagios_plugins_contrib # local nagios plugin checks via mrpe - file_line { - 'haproxy': - line => 'Haproxy /usr/lib/nagios/plugins/check_haproxy -u "http://localhost:8000/haproxy;csv"', - path => '/etc/check_mk/mrpe.cfg'; + augeas { 'haproxy': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Haproxy', + 'set Haproxy \'/usr/lib/nagios/plugins/check_haproxy -u "http://localhost:8000/haproxy;csv"\'' ], + require => File['/etc/check_mk/mrpe.cfg']; } } diff --git a/puppet/modules/site_check_mk/manifests/agent/haveged.pp b/puppet/modules/site_check_mk/manifests/agent/haveged.pp new file mode 100644 index 00000000..cacbea8c --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/haveged.pp @@ -0,0 +1,15 @@ +class site_check_mk::agent::haveged { + +# check haveged process + augeas { + 'haveged_proc': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/haveged_proc', + 'set haveged_proc \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /usr/sbin/haveged\'' ], + require => File['/etc/check_mk/mrpe.cfg']; + + } + +} diff --git a/puppet/modules/site_check_mk/manifests/agent/mrpe.pp b/puppet/modules/site_check_mk/manifests/agent/mrpe.pp index 6921574f..5e1f087a 100644 --- a/puppet/modules/site_check_mk/manifests/agent/mrpe.pp +++ b/puppet/modules/site_check_mk/manifests/agent/mrpe.pp @@ -11,8 +11,14 @@ class site_check_mk::agent::mrpe { ensure => present, require => Package['check-mk-agent'] } -> - file_line { 'Apt': - line => 'APT /usr/lib/nagios/plugins/check_apt', - path => '/etc/check_mk/mrpe.cfg', + + augeas { + 'Apt': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/APT', + 'set APT \'/usr/lib/nagios/plugins/check_apt\'' ]; } + } diff --git a/puppet/modules/site_check_mk/manifests/agent/mx.pp b/puppet/modules/site_check_mk/manifests/agent/mx.pp index 1e370125..98757b59 100644 --- a/puppet/modules/site_check_mk/manifests/agent/mx.pp +++ b/puppet/modules/site_check_mk/manifests/agent/mx.pp @@ -6,13 +6,16 @@ class site_check_mk::agent::mx { } # local nagios plugin checks via mrpe - file_line { + augeas { 'Leap_MX_Procs': - line => 'Leap_MX_Procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a \'/usr/bin/python /usr/bin/twistd --pidfile=/var/run/leap_mx.pid --rundir=/var/lib/leap_mx/ --python=/usr/share/app/leap_mx.tac --logfile=/var/log/leap_mx.log\'', - path => '/etc/check_mk/mrpe.cfg'; + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Leap_MX_Procs', + 'set Leap_MX_Procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a "/usr/bin/python /usr/bin/twistd --pidfile=/var/run/leap_mx.pid --rundir=/var/lib/leap_mx/ --python=/usr/share/app/leap_mx.tac --logfile=/var/log/leap/mx.log"\'' ], + require => File['/etc/check_mk/mrpe.cfg']; } - # check stale files in queue dir file { '/usr/lib/check_mk_agent/local/check_leap_mx.sh': source => 'puppet:///modules/site_check_mk/agent/local_checks/mx/check_leap_mx.sh', diff --git a/puppet/modules/site_check_mk/manifests/agent/openvpn.pp b/puppet/modules/site_check_mk/manifests/agent/openvpn.pp index 919a408d..0596a497 100644 --- a/puppet/modules/site_check_mk/manifests/agent/openvpn.pp +++ b/puppet/modules/site_check_mk/manifests/agent/openvpn.pp @@ -2,7 +2,7 @@ class site_check_mk::agent::openvpn { # check syslog concat::fragment { 'syslog_openpvn': - source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/openvpn.cfg', + source => 'puppet:///modules/site_check_mk/agent/logwatch/openvpn.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', order => '02'; } diff --git a/puppet/modules/site_check_mk/manifests/agent/soledad.pp b/puppet/modules/site_check_mk/manifests/agent/soledad.pp index 512d1a3d..f4a3f3a6 100644 --- a/puppet/modules/site_check_mk/manifests/agent/soledad.pp +++ b/puppet/modules/site_check_mk/manifests/agent/soledad.pp @@ -5,10 +5,13 @@ class site_check_mk::agent::soledad { } # local nagios plugin checks via mrpe - file_line { - 'Soledad_Procs': - line => 'Soledad_Procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a \'/usr/bin/python /usr/bin/twistd --pidfile=/var/run/soledad.pid --logfile=/var/log/soledad.log web --wsgi=leap.soledad.server.application\'', - path => '/etc/check_mk/mrpe.cfg'; - } + augeas { 'Soledad_Procs': + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Soledad_Procs', + 'set Soledad_Procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a "/usr/bin/python /usr/bin/twistd --uid=soledad --gid=soledad --pidfile=/var/run/soledad.pid --logfile=/var/log/soledad.log web --wsgi=leap.soledad.server.application --port=ssl:2323:privateKey=/etc/x509/keys/leap.key:certKey=/etc/x509/certs/leap.crt:sslmethod=SSLv23_METHOD"\'' ], + require => File['/etc/check_mk/mrpe.cfg']; + } } diff --git a/puppet/modules/site_check_mk/manifests/agent/stunnel.pp b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp index 64022824..7f765771 100644 --- a/puppet/modules/site_check_mk/manifests/agent/stunnel.pp +++ b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp @@ -1,7 +1,7 @@ class site_check_mk::agent::stunnel { concat::fragment { 'syslog_stunnel': - source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/stunnel.cfg', + source => 'puppet:///modules/site_check_mk/agent/logwatch/stunnel.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', order => '02'; } diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp index ffd11100..4a5ec68e 100644 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp @@ -2,21 +2,24 @@ class site_check_mk::agent::tapicero { include ::site_nagios::plugins - concat::fragment { 'syslog_tapicero': - source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/tapicero.cfg', - target => '/etc/check_mk/logwatch.d/syslog.cfg', - order => '02'; + # watch logs + file { '/etc/check_mk/logwatch.d/tapicero.cfg': + source => 'puppet:///modules/site_check_mk/agent/logwatch/tapicero.cfg', } # local nagios plugin checks via mrpe - file_line { + augeas { 'Tapicero_Procs': - line => 'Tapicero_Procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero', - path => '/etc/check_mk/mrpe.cfg'; - + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => [ + 'rm /files/etc/check_mk/mrpe.cfg/Tapicero_Procs', + 'set Tapicero_Procs "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero"' ], + require => File['/etc/check_mk/mrpe.cfg']; 'Tapicero_Heartbeat': - line => 'Tapicero_Heartbeat /usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/syslog -r "tapicero" -w 300 -c 600', - path => '/etc/check_mk/mrpe.cfg'; + incl => '/etc/check_mk/mrpe.cfg', + lens => 'Spacevars.lns', + changes => 'set Tapicero_Heartbeat \'/usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/leap/tapicero.log -r "tapicero" -w 300 -c 600\'', + require => File['/etc/check_mk/mrpe.cfg']; } - } diff --git a/puppet/modules/site_check_mk/manifests/agent/webapp.pp b/puppet/modules/site_check_mk/manifests/agent/webapp.pp index 88c3da30..9bf3b197 100644 --- a/puppet/modules/site_check_mk/manifests/agent/webapp.pp +++ b/puppet/modules/site_check_mk/manifests/agent/webapp.pp @@ -7,11 +7,9 @@ class site_check_mk::agent::webapp { ensure => absent } - # check syslog - concat::fragment { 'syslog_webapp': - source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/webapp.cfg', - target => '/etc/check_mk/logwatch.d/syslog.cfg', - order => '02'; + # watch logs + file { '/etc/check_mk/logwatch.d/webapp.cfg': + source => 'puppet:///modules/site_check_mk/agent/logwatch/webapp.cfg', } } |