summaryrefslogtreecommitdiff
path: root/puppet/modules/site_check_mk/files/agent
diff options
context:
space:
mode:
Diffstat (limited to 'puppet/modules/site_check_mk/files/agent')
-rwxr-xr-xpuppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh119
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg2
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg (renamed from puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg)1
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg (renamed from puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg)1
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg (renamed from puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg)3
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg (renamed from puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg)1
6 files changed, 125 insertions, 2 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh
new file mode 100755
index 00000000..95474ccb
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+#
+# todo:
+# - thresholds
+# - couch response time
+# - make CURL/URL/DBLIST_EXCLUDE vars configurable
+# - move load_nagios_utils() to helper library so we can use it from multiple scripts
+
+start_time=$(date +%s.%N)
+
+CURL='curl -s --netrc-file /etc/couchdb/couchdb.netrc'
+URL='http://127.0.0.1:5984'
+TMPFILE=$(mktemp)
+DBLIST_EXCLUDE='(user-|sessions_|tokens_)'
+PREFIX='Couchdb_'
+
+
+load_nagios_utils () {
+ # load the nagios utils
+ # in debian, the package nagios-plugins-common installs utils.sh to /usr/lib/nagios/plugins/utils.sh
+ utilsfn=
+ for d in $PROGPATH /usr/lib/nagios/plugins /usr/lib64/nagios/plugins /usr/local/nagios/libexec /opt/nagios-plugins/libexec . ; do
+ if [ -f "$d/utils.sh" ]; then
+ utilsfn=$d/utils.sh;
+ fi
+ done
+ if [ "$utilsfn" = "" ]; then
+ echo "UNKNOWN - cannot find utils.sh (part of nagios plugins)";
+ exit 3;
+ fi
+ . "$utilsfn";
+ STATE[$STATE_OK]='OK'
+ STATE[$STATE_WARNING]='Warning'
+ STATE[$STATE_CRITICAL]='Critical'
+ STATE[$STATE_UNKNOWN]='Unknown'
+ STATE[$STATE_DEPENDENT]='Dependend'
+}
+
+get_global_stats_perf () {
+ trap "localexit=3" ERR
+ local localexit db_count
+ localexit=0
+
+ # get a list of all dbs
+ $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE
+
+ db_count=$( wc -l < $TMPFILE)
+ excluded_db_count=$( egrep -c "$DBLIST_EXCLUDE" $TMPFILE )
+
+ echo "db_count=$db_count|excluded_db_count=$excluded_db_count"
+ return ${localexit}
+}
+
+db_stats () {
+ trap "localexit=3" ERR
+ local db db_stats doc_count del_doc_count localexit
+ localexit=0
+
+ db="$1"
+ name="$2"
+
+ if [ -z "$name" ]
+ then
+ name="$db"
+ fi
+
+ perf="$perf|${db}_docs=$( $CURL -s -X GET ${URL}/$db | json_pp |grep 'doc_count' | sed 's/[^0-9]//g' )"
+ db_stats=$( $CURL -s -X GET ${URL}/$db | json_pp )
+
+ doc_count=$( echo "$db_stats" | grep 'doc_count' | grep -v 'deleted_doc_count' | sed 's/[^0-9]//g' )
+ del_doc_count=$( echo "$db_stats" | grep 'doc_del_count' | sed 's/[^0-9]//g' )
+
+ # don't divide by zero
+ if [ $del_doc_count -eq 0 ]
+ then
+ del_doc_perc=0
+ else
+ del_doc_perc=$(( del_doc_count * 100 / doc_count ))
+ fi
+
+ bytes=$( echo "$db_stats" | grep disk_size | sed 's/[^0-9]//g' )
+ disk_size=$( echo "scale = 2; $bytes / 1024 / 1024" | bc -l )
+
+ echo -n "${localexit} ${PREFIX}${name}_database ${name}_docs=$doc_count|${name}_deleted_docs=$del_doc_count|${name}_deleted_docs_percentage=${del_doc_perc}%"
+ printf "|${name}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $name\n" "$disk_size"
+
+ return ${localexit}
+}
+
+# main
+
+load_nagios_utils
+
+# per-db stats
+# get a list of all dbs
+$CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE
+
+# get list of dbs to check
+dbs=$( egrep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' )
+
+for db in $dbs
+do
+ db_stats "$db"
+done
+
+# special handling for rotated dbs
+suffix=$(($(date +'%s') / (60*60*24*30) + 1))
+db_stats "sessions_${suffix}" "sessions"
+db_stats "tokens_${suffix}" "tokens"
+
+
+# show global couchdb stats
+global_stats_perf=$(get_global_stats_perf)
+exitcode=$?
+
+end_time=$(date +%s.%N)
+duration=$( echo "scale = 2; $end_time - $start_time" | bc -l )
+
+printf "${exitcode} ${PREFIX}global_stats ${global_stats_perf}|script_duration=%02.2fs ${STATE[exitcode]}: global couchdb status\n" "$duration"
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg
index c71c5392..166d0230 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg
@@ -1,4 +1,4 @@
-/var/log/leap_mx.log
+/var/log/leap/mx.log
W Don't know how to deliver mail
W No public key, stopping the processing chain
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg
index ac17c0ca..ed50f420 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg
@@ -1,3 +1,4 @@
+/var/log/leap/openvpn.log
# ignore openvpn TLS initialization errors when clients
# suddenly hangup before properly establishing
# a tls connection
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg
index eb3131f2..b1e6cf2f 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg
@@ -1,3 +1,4 @@
+/var/log/leap/stunnel.log
# check for stunnel failures
#
# these are temporary failures and happen very often, so we
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg
index e5721eea..d98f5094 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg
@@ -1,5 +1,6 @@
+/var/log/leap/tapicero.log
# Ignore transient Tapicero errors when creating a db (#6511)
- I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error)
+ I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::ResourceNotFound|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error)
C tapicero.*RestClient::InternalServerError:
# possible race condition between multiple tapicero
# instances, so we ignore it
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg
index 00f9c7fd..008e9e09 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg
@@ -1,3 +1,4 @@
+/var/log/leap/webapp.log
# check for webapp errors
C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."}
# ignore RoutingErrors that rails throw when it can't handle a url