diff options
Diffstat (limited to 'puppet/modules/site_check_mk')
17 files changed, 223 insertions, 49 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh new file mode 100755 index 00000000..95474ccb --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# +# todo: +#  - thresholds +#  - couch response time +#  - make CURL/URL/DBLIST_EXCLUDE vars configurable +#  - move load_nagios_utils() to helper library so we can use it from multiple scripts + +start_time=$(date +%s.%N) + +CURL='curl -s --netrc-file /etc/couchdb/couchdb.netrc' +URL='http://127.0.0.1:5984' +TMPFILE=$(mktemp) +DBLIST_EXCLUDE='(user-|sessions_|tokens_)' +PREFIX='Couchdb_' + + +load_nagios_utils () { +  # load the nagios utils +  # in debian, the package nagios-plugins-common installs utils.sh to /usr/lib/nagios/plugins/utils.sh +  utilsfn= +  for d in $PROGPATH /usr/lib/nagios/plugins /usr/lib64/nagios/plugins /usr/local/nagios/libexec /opt/nagios-plugins/libexec . ; do +    if [ -f "$d/utils.sh" ]; then +      utilsfn=$d/utils.sh; +    fi +  done +  if [ "$utilsfn" = "" ]; then +    echo "UNKNOWN - cannot find utils.sh (part of nagios plugins)"; +    exit 3; +  fi +  . "$utilsfn"; +  STATE[$STATE_OK]='OK' +  STATE[$STATE_WARNING]='Warning' +  STATE[$STATE_CRITICAL]='Critical' +  STATE[$STATE_UNKNOWN]='Unknown' +  STATE[$STATE_DEPENDENT]='Dependend' +} + +get_global_stats_perf () { +  trap "localexit=3" ERR +  local localexit db_count +  localexit=0 + +  # get a list of all dbs +  $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + +  db_count=$( wc -l < $TMPFILE) +  excluded_db_count=$( egrep -c "$DBLIST_EXCLUDE" $TMPFILE ) + +  echo "db_count=$db_count|excluded_db_count=$excluded_db_count" +  return ${localexit} +} + +db_stats () { +  trap "localexit=3" ERR +  local db db_stats doc_count del_doc_count localexit +  localexit=0 + +  db="$1" +  name="$2" + +  if [ -z "$name" ] +  then +    name="$db" +  fi + +  perf="$perf|${db}_docs=$( $CURL -s -X GET ${URL}/$db | json_pp |grep 'doc_count' | sed 's/[^0-9]//g' )" +  db_stats=$( $CURL -s -X GET ${URL}/$db | json_pp ) + +  doc_count=$( echo "$db_stats" | grep 'doc_count' | grep -v 'deleted_doc_count' | sed 's/[^0-9]//g' ) +  del_doc_count=$( echo "$db_stats" | grep 'doc_del_count' | sed 's/[^0-9]//g' ) + +  # don't divide by zero +  if [ $del_doc_count -eq 0 ] +  then +    del_doc_perc=0 +  else +    del_doc_perc=$(( del_doc_count * 100 / doc_count )) +  fi + +  bytes=$( echo "$db_stats" | grep disk_size | sed 's/[^0-9]//g' ) +  disk_size=$( echo "scale = 2; $bytes / 1024 / 1024" | bc -l ) + +  echo -n "${localexit} ${PREFIX}${name}_database ${name}_docs=$doc_count|${name}_deleted_docs=$del_doc_count|${name}_deleted_docs_percentage=${del_doc_perc}%" +  printf "|${name}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $name\n" "$disk_size" + +  return ${localexit} +} + +# main + +load_nagios_utils + +# per-db stats +# get a list of all dbs +$CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + +# get list of dbs to check +dbs=$( egrep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' ) + +for db in $dbs +do +  db_stats "$db" +done + +# special handling for rotated dbs +suffix=$(($(date +'%s') / (60*60*24*30) + 1)) +db_stats "sessions_${suffix}" "sessions" +db_stats "tokens_${suffix}" "tokens" + + +# show global couchdb stats +global_stats_perf=$(get_global_stats_perf) +exitcode=$? + +end_time=$(date +%s.%N) +duration=$( echo "scale = 2; $end_time - $start_time" | bc -l ) + +printf "${exitcode} ${PREFIX}global_stats ${global_stats_perf}|script_duration=%02.2fs ${STATE[exitcode]}: global couchdb status\n" "$duration" diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg index c71c5392..166d0230 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg @@ -1,4 +1,4 @@ -/var/log/leap_mx.log +/var/log/leap/mx.log   W Don't know how to deliver mail   W No public key, stopping the processing chain diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg index ac17c0ca..ed50f420 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg @@ -1,3 +1,4 @@ +/var/log/leap/openvpn.log  # ignore openvpn TLS initialization errors when clients  # suddenly hangup before properly establishing  # a tls connection diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg index eb3131f2..b1e6cf2f 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg @@ -1,3 +1,4 @@ +/var/log/leap/stunnel.log  # check for stunnel failures  #  # these are temporary failures and happen very often, so we diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg index e5721eea..d98f5094 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg @@ -1,5 +1,6 @@ +/var/log/leap/tapicero.log  # Ignore transient Tapicero errors when creating a db (#6511) - I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) + I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::ResourceNotFound|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error)   C tapicero.*RestClient::InternalServerError:  # possible race condition between multiple tapicero  # instances, so we ignore it diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg index 00f9c7fd..008e9e09 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg @@ -1,3 +1,4 @@ +/var/log/leap/webapp.log  # check for webapp errors   C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."}  # ignore RoutingErrors that rails throw when it can't handle a url diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index 03d1ea76..c7120a96 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -1,13 +1,14 @@  # retry 3 times before setting a service into a hard state  # and send out notification -extra_service_conf["max_check_attempts"] = [  -  ("4", ALL_HOSTS , ALL_SERVICES )  +extra_service_conf["max_check_attempts"] = [ +  ("4", ALL_HOSTS , ALL_SERVICES )  ] -# run check_mk_agent every 2 minutes if it terminates -# successfully. +# +# run check_mk_agent every 4 minutes if it terminates successfully.  # see https://leap.se/code/issues/6539 for the rationale +#  extra_service_conf["normal_check_interval"] = [ -  ("2", ALL_HOSTS , "Check_MK" ) +  ("4", ALL_HOSTS , "Check_MK" )  ] diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index ee0268a3..abfc7ad0 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -12,13 +12,21 @@ class site_check_mk::agent::couchdb {    # check bigcouch processes -  file_line { +  augeas {      'Bigcouch_epmd_procs': -      line => 'Bigcouch_epmd_procs  /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd', -      path => '/etc/check_mk/mrpe.cfg'; +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', +        'set Bigcouch_epmd_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd\'' ], +      require => File['/etc/check_mk/mrpe.cfg'];      'Bigcouch_beam_procs': -      line => 'Bigcouch_beam_procs  /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam', -      path => '/etc/check_mk/mrpe.cfg'; +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', +        'set Bigcouch_beam_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam\'' ], +      require => File['/etc/check_mk/mrpe.cfg'];    }    # check open files for bigcouch proc @@ -27,10 +35,21 @@ class site_check_mk::agent::couchdb {      source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl',      mode   => '0755'    } -  file_line { +  augeas {      'Bigcouch_open_files': -      line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720', -      path => '/etc/check_mk/mrpe.cfg'; +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', +        'set Bigcouch_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], +      require => File['/etc/check_mk/mrpe.cfg'];    } + +  # check different couchdb stats +  file { '/usr/lib/check_mk_agent/local/leap_couch_stats.sh': +    source  => 'puppet:///modules/site_check_mk/agent/local_checks/couchdb/leap_couch_stats.sh', +    mode    => '0755', +    require => Package['check_mk-agent'] +  }  } diff --git a/puppet/modules/site_check_mk/manifests/agent/haproxy.pp b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp index e7986db1..6d52efba 100644 --- a/puppet/modules/site_check_mk/manifests/agent/haproxy.pp +++ b/puppet/modules/site_check_mk/manifests/agent/haproxy.pp @@ -3,10 +3,13 @@ class site_check_mk::agent::haproxy {    include site_check_mk::agent::package::nagios_plugins_contrib    # local nagios plugin checks via mrpe -  file_line { -    'haproxy': -      line => 'Haproxy  /usr/lib/nagios/plugins/check_haproxy -u "http://localhost:8000/haproxy;csv"', -      path => '/etc/check_mk/mrpe.cfg'; +  augeas { 'haproxy': +    incl    => '/etc/check_mk/mrpe.cfg', +    lens    => 'Spacevars.lns', +    changes => [ +      'rm /files/etc/check_mk/mrpe.cfg/Haproxy', +      'set Haproxy \'/usr/lib/nagios/plugins/check_haproxy -u "http://localhost:8000/haproxy;csv"\'' ], +    require => File['/etc/check_mk/mrpe.cfg'];    }  } diff --git a/puppet/modules/site_check_mk/manifests/agent/haveged.pp b/puppet/modules/site_check_mk/manifests/agent/haveged.pp new file mode 100644 index 00000000..cacbea8c --- /dev/null +++ b/puppet/modules/site_check_mk/manifests/agent/haveged.pp @@ -0,0 +1,15 @@ +class site_check_mk::agent::haveged { + +# check haveged process +  augeas { +    'haveged_proc': +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/haveged_proc', +        'set haveged_proc \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /usr/sbin/haveged\'' ], +      require => File['/etc/check_mk/mrpe.cfg']; + +  } + +} diff --git a/puppet/modules/site_check_mk/manifests/agent/mrpe.pp b/puppet/modules/site_check_mk/manifests/agent/mrpe.pp index 6921574f..5e1f087a 100644 --- a/puppet/modules/site_check_mk/manifests/agent/mrpe.pp +++ b/puppet/modules/site_check_mk/manifests/agent/mrpe.pp @@ -11,8 +11,14 @@ class site_check_mk::agent::mrpe {      ensure  => present,      require => Package['check-mk-agent']    } -> -  file_line { 'Apt': -    line => 'APT    /usr/lib/nagios/plugins/check_apt', -    path => '/etc/check_mk/mrpe.cfg', + +  augeas { +    'Apt': +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/APT', +        'set APT \'/usr/lib/nagios/plugins/check_apt\'' ];    } +  } diff --git a/puppet/modules/site_check_mk/manifests/agent/mx.pp b/puppet/modules/site_check_mk/manifests/agent/mx.pp index 1e370125..98757b59 100644 --- a/puppet/modules/site_check_mk/manifests/agent/mx.pp +++ b/puppet/modules/site_check_mk/manifests/agent/mx.pp @@ -6,13 +6,16 @@ class site_check_mk::agent::mx {    }    # local nagios plugin checks via mrpe -  file_line { +  augeas {      'Leap_MX_Procs': -      line => 'Leap_MX_Procs  /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a \'/usr/bin/python /usr/bin/twistd --pidfile=/var/run/leap_mx.pid --rundir=/var/lib/leap_mx/ --python=/usr/share/app/leap_mx.tac --logfile=/var/log/leap_mx.log\'', -      path => '/etc/check_mk/mrpe.cfg'; +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/Leap_MX_Procs', +        'set Leap_MX_Procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a "/usr/bin/python /usr/bin/twistd --pidfile=/var/run/leap_mx.pid --rundir=/var/lib/leap_mx/ --python=/usr/share/app/leap_mx.tac --logfile=/var/log/leap/mx.log"\'' ], +      require => File['/etc/check_mk/mrpe.cfg'];    } -    # check stale files in queue dir    file { '/usr/lib/check_mk_agent/local/check_leap_mx.sh':      source  => 'puppet:///modules/site_check_mk/agent/local_checks/mx/check_leap_mx.sh', diff --git a/puppet/modules/site_check_mk/manifests/agent/openvpn.pp b/puppet/modules/site_check_mk/manifests/agent/openvpn.pp index 919a408d..0596a497 100644 --- a/puppet/modules/site_check_mk/manifests/agent/openvpn.pp +++ b/puppet/modules/site_check_mk/manifests/agent/openvpn.pp @@ -2,7 +2,7 @@ class site_check_mk::agent::openvpn {    # check syslog    concat::fragment { 'syslog_openpvn': -    source  => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/openvpn.cfg', +    source  => 'puppet:///modules/site_check_mk/agent/logwatch/openvpn.cfg',      target  => '/etc/check_mk/logwatch.d/syslog.cfg',      order   => '02';    } diff --git a/puppet/modules/site_check_mk/manifests/agent/soledad.pp b/puppet/modules/site_check_mk/manifests/agent/soledad.pp index 512d1a3d..f4a3f3a6 100644 --- a/puppet/modules/site_check_mk/manifests/agent/soledad.pp +++ b/puppet/modules/site_check_mk/manifests/agent/soledad.pp @@ -5,10 +5,13 @@ class site_check_mk::agent::soledad {    }    # local nagios plugin checks via mrpe -  file_line { -    'Soledad_Procs': -      line => 'Soledad_Procs  /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a \'/usr/bin/python /usr/bin/twistd --pidfile=/var/run/soledad.pid --logfile=/var/log/soledad.log web --wsgi=leap.soledad.server.application\'', -      path => '/etc/check_mk/mrpe.cfg'; -  } +  augeas { 'Soledad_Procs': +    incl    => '/etc/check_mk/mrpe.cfg', +    lens    => 'Spacevars.lns', +    changes => [ +      'rm /files/etc/check_mk/mrpe.cfg/Soledad_Procs', +      'set Soledad_Procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a "/usr/bin/python /usr/bin/twistd --uid=soledad --gid=soledad --pidfile=/var/run/soledad.pid --logfile=/var/log/soledad.log web --wsgi=leap.soledad.server.application --port=ssl:2323:privateKey=/etc/x509/keys/leap.key:certKey=/etc/x509/certs/leap.crt:sslmethod=SSLv23_METHOD"\'' ], +    require => File['/etc/check_mk/mrpe.cfg']; +  }  } diff --git a/puppet/modules/site_check_mk/manifests/agent/stunnel.pp b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp index 64022824..7f765771 100644 --- a/puppet/modules/site_check_mk/manifests/agent/stunnel.pp +++ b/puppet/modules/site_check_mk/manifests/agent/stunnel.pp @@ -1,7 +1,7 @@  class site_check_mk::agent::stunnel {    concat::fragment { 'syslog_stunnel': -    source  => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/stunnel.cfg', +    source  => 'puppet:///modules/site_check_mk/agent/logwatch/stunnel.cfg',      target  => '/etc/check_mk/logwatch.d/syslog.cfg',      order   => '02';    } diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp index ffd11100..4a5ec68e 100644 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp @@ -2,21 +2,24 @@ class site_check_mk::agent::tapicero {    include ::site_nagios::plugins -  concat::fragment { 'syslog_tapicero': -    source  => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/tapicero.cfg', -    target  => '/etc/check_mk/logwatch.d/syslog.cfg', -    order   => '02'; +  # watch logs +  file { '/etc/check_mk/logwatch.d/tapicero.cfg': +    source => 'puppet:///modules/site_check_mk/agent/logwatch/tapicero.cfg',    }    # local nagios plugin checks via mrpe -  file_line { +  augeas {      'Tapicero_Procs': -      line => 'Tapicero_Procs  /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero', -      path => '/etc/check_mk/mrpe.cfg'; - +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => [ +        'rm /files/etc/check_mk/mrpe.cfg/Tapicero_Procs', +        'set Tapicero_Procs "/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero"' ], +      require => File['/etc/check_mk/mrpe.cfg'];      'Tapicero_Heartbeat': -      line => 'Tapicero_Heartbeat  /usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/syslog -r "tapicero" -w 300 -c 600', -      path => '/etc/check_mk/mrpe.cfg'; +      incl    => '/etc/check_mk/mrpe.cfg', +      lens    => 'Spacevars.lns', +      changes => 'set Tapicero_Heartbeat \'/usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/leap/tapicero.log -r "tapicero" -w 300 -c 600\'', +      require => File['/etc/check_mk/mrpe.cfg'];    } -  } diff --git a/puppet/modules/site_check_mk/manifests/agent/webapp.pp b/puppet/modules/site_check_mk/manifests/agent/webapp.pp index 88c3da30..9bf3b197 100644 --- a/puppet/modules/site_check_mk/manifests/agent/webapp.pp +++ b/puppet/modules/site_check_mk/manifests/agent/webapp.pp @@ -7,11 +7,9 @@ class site_check_mk::agent::webapp {      ensure  => absent    } -  # check syslog -  concat::fragment { 'syslog_webapp': -    source  => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/webapp.cfg', -    target  => '/etc/check_mk/logwatch.d/syslog.cfg', -    order   => '02'; +  # watch logs +  file { '/etc/check_mk/logwatch.d/webapp.cfg': +    source => 'puppet:///modules/site_check_mk/agent/logwatch/webapp.cfg',    }  }  | 
