From c9a9132d107d99a2f303fa766dae87397ac18274 Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 4 Mar 2015 10:58:24 -0800 Subject: temporarily increase the delay between soledad / web api tests to 60 minutes, until we are able to fix the issue with the test users creating db bloat. --- puppet/modules/site_check_mk/files/extra_service_conf.mk | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index 03d1ea76..61b0dd39 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -1,13 +1,18 @@ # retry 3 times before setting a service into a hard state # and send out notification -extra_service_conf["max_check_attempts"] = [ - ("4", ALL_HOSTS , ALL_SERVICES ) +extra_service_conf["max_check_attempts"] = [ + ("4", ALL_HOSTS , ALL_SERVICES ) ] -# run check_mk_agent every 2 minutes if it terminates -# successfully. +# +# run check_mk_agent every 2 minutes if it terminates successfully. # see https://leap.se/code/issues/6539 for the rationale +# +# update: temporarily set interval to 60 minutes until we solve the +# issue with the users db getting bloated with deleted +# test users. +# extra_service_conf["normal_check_interval"] = [ - ("2", ALL_HOSTS , "Check_MK" ) + ("60", ALL_HOSTS , "Check_MK" ) ] -- cgit v1.2.3 From eeb5fd763d7f78d6dcff252981cc9e38dd446fa6 Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 1 Apr 2015 22:26:56 -0700 Subject: run check_mk tests every 10 minutes --- puppet/modules/site_check_mk/files/extra_service_conf.mk | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index 61b0dd39..a4c6e769 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -5,14 +5,10 @@ extra_service_conf["max_check_attempts"] = [ ] # -# run check_mk_agent every 2 minutes if it terminates successfully. +# run check_mk_agent every 10 minutes if it terminates successfully. # see https://leap.se/code/issues/6539 for the rationale # -# update: temporarily set interval to 60 minutes until we solve the -# issue with the users db getting bloated with deleted -# test users. -# extra_service_conf["normal_check_interval"] = [ - ("60", ALL_HOSTS , "Check_MK" ) + ("10", ALL_HOSTS , "Check_MK" ) ] -- cgit v1.2.3 From 6019eff30ef839e5c6a603fde7fe3fc98d4d4f47 Mon Sep 17 00:00:00 2001 From: varac Date: Sun, 1 Mar 2015 10:16:44 +0100 Subject: added local check_mk couchdb script (#6749) leap_couch_stats.sh is a local check_mk agent script which provides per-db stats as well as global stats. Change-Id: I1eba19a3a0210d3127acbad119dfd2918414ff4a --- .../agent/local_checks/couchdb/leap_couch_stats.sh | 106 +++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100755 puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh new file mode 100755 index 00000000..51a8ac52 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh @@ -0,0 +1,106 @@ +#!/bin/bash +# +# todo: +# - thresholds +# - couch response time +# - make CURL/URL/DBLIST_EXCLUDE vars configurable +# - move load_nagios_utils() to helper library so we can use it from multiple scripts + +start_time=$(date +%s.%N) + +CURL='curl -s --netrc-file /etc/couchdb/couchdb.netrc' +URL='http://127.0.0.1:5984' +TMPFILE=$(mktemp) +DBLIST_EXCLUDE='user-' +PREFIX='Couchdb_' + + +load_nagios_utils () { + # load the nagios utils + # in debian, the package nagios-plugins-common installs utils.sh to /usr/lib/nagios/plugins/utils.sh + utilsfn= + for d in $PROGPATH /usr/lib/nagios/plugins /usr/lib64/nagios/plugins /usr/local/nagios/libexec /opt/nagios-plugins/libexec . ; do + if [ -f "$d/utils.sh" ]; then + utilsfn=$d/utils.sh; + fi + done + if [ "$utilsfn" = "" ]; then + echo "UNKNOWN - cannot find utils.sh (part of nagios plugins)"; + exit 3; + fi + . "$utilsfn"; + STATE[$STATE_OK]='OK' + STATE[$STATE_WARNING]='Warning' + STATE[$STATE_CRITICAL]='Critical' + STATE[$STATE_UNKNOWN]='Unknown' + STATE[$STATE_DEPENDENT]='Dependend' +} + +get_global_stats_perf () { + trap "localexit=3" ERR + local localexit db_count + localexit=0 + + # get a list of all dbs + $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + + db_count=$( wc -l < $TMPFILE) + excluded_db_count=$( grep -c "$DBLIST_EXCLUDE" $TMPFILE ) + + echo "db_count=$db_count|excluded_db_count=$excluded_db_count" + return ${localexit} +} + +db_stats () { + trap "localexit=3" ERR + local db db_stats doc_count del_doc_count localexit + localexit=0 + + db=$1 + perf="$perf|${db}_docs=$( $CURL -s -X GET ${URL}/$db | json_pp |grep 'doc_count' | sed 's/[^0-9]//g' )" + db_stats=$( $CURL -s -X GET ${URL}/$db | json_pp ) + + doc_count=$( echo "$db_stats" | grep 'doc_count' | grep -v 'deleted_doc_count' | sed 's/[^0-9]//g' ) + del_doc_count=$( echo "$db_stats" | grep 'doc_del_count' | sed 's/[^0-9]//g' ) + + # don't divide by zero + if [ $del_doc_count -eq 0 ] + then + del_doc_perc=0 + else + del_doc_perc=$(( del_doc_count * 100 / doc_count )) + fi + + bytes=$( echo "$db_stats" | grep disk_size | sed 's/[^0-9]//g' ) + disk_size=$( echo "scale = 2; $bytes / 1024 / 1024" | bc -l ) + + echo -n "${localexit} ${PREFIX}${db}_database ${db}_docs=$doc_count|${db}_deleted_docs=$del_doc_count|${db}_deleted_docs_percentage=${del_doc_perc}%" + printf "|${db}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $db\n" "$disk_size" + + return ${localexit} +} + +# main + +load_nagios_utils + +# per-db stats +# get a list of all dbs +$CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE + +# get list of dbs to check +dbs=$( grep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' ) + +for db in $dbs +do + db_stats "$db" +done + +# show global couchdb stats +global_stats_perf=$(get_global_stats_perf) +exitcode=$? + +end_time=$(date +%s.%N) +duration=$( echo "scale = 2; $end_time - $start_time" | bc -l ) + +printf "${exitcode} ${PREFIX}global_stats ${global_stats_perf}|script_duration=%02.2fs ${STATE[exitcode]}: global couchdb status\n" "$duration" -- cgit v1.2.3 From 7f069dc47c318e9047a3ae7a29a90f3471610e34 Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 15 Apr 2015 09:36:30 -0700 Subject: fix tapicero & webapp logs: remove heartbeat log check, move to /var/log/tapicero, fix webapp logwatch location. --- .../site_check_mk/files/agent/logwatch/syslog/tapicero.cfg | 10 ---------- .../site_check_mk/files/agent/logwatch/syslog/webapp.cfg | 5 ----- .../modules/site_check_mk/files/agent/logwatch/tapicero.cfg | 11 +++++++++++ puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg | 6 ++++++ 4 files changed, 17 insertions(+), 15 deletions(-) delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg deleted file mode 100644 index e5721eea..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg +++ /dev/null @@ -1,10 +0,0 @@ -# Ignore transient Tapicero errors when creating a db (#6511) - I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) - C tapicero.*RestClient::InternalServerError: -# possible race condition between multiple tapicero -# instances, so we ignore it -# see https://leap.se/code/issues/5168 - I tapicero.*RestClient::PreconditionFailed: - C tapicero.*Creating database.*failed due to: - C tapicero.*failed - W tapicero.*Couch stream ended unexpectedly. diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg deleted file mode 100644 index 00f9c7fd..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg +++ /dev/null @@ -1,5 +0,0 @@ -# check for webapp errors - C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} -# ignore RoutingErrors that rails throw when it can't handle a url -# see https://leap.se/code/issues/5173 - I webapp.*ActionController::RoutingError diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg new file mode 100644 index 00000000..f527f120 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg @@ -0,0 +1,11 @@ +/var/log/leap/tapicero.log +# Ignore transient Tapicero errors when creating a db (#6511) + I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) + C tapicero.*RestClient::InternalServerError: +# possible race condition between multiple tapicero +# instances, so we ignore it +# see https://leap.se/code/issues/5168 + I tapicero.*RestClient::PreconditionFailed: + C tapicero.*Creating database.*failed due to: + C tapicero.*failed + W tapicero.*Couch stream ended unexpectedly. diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg new file mode 100644 index 00000000..008e9e09 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/webapp.cfg @@ -0,0 +1,6 @@ +/var/log/leap/webapp.log +# check for webapp errors + C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."} +# ignore RoutingErrors that rails throw when it can't handle a url +# see https://leap.se/code/issues/5173 + I webapp.*ActionController::RoutingError -- cgit v1.2.3 From bb07407485ed1626221a1190cc2fb2789f95ed22 Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 15 Apr 2015 16:12:11 -0700 Subject: clean up logging mess: add 'logfile' define, mv openvpn and stunnel logs to their own files, fix mx logwatch path. --- .../modules/site_check_mk/files/agent/logwatch/leap_mx.cfg | 2 +- .../modules/site_check_mk/files/agent/logwatch/openvpn.cfg | 14 ++++++++++++++ .../modules/site_check_mk/files/agent/logwatch/stunnel.cfg | 10 ++++++++++ .../site_check_mk/files/agent/logwatch/syslog/openvpn.cfg | 13 ------------- .../site_check_mk/files/agent/logwatch/syslog/stunnel.cfg | 9 --------- 5 files changed, 25 insertions(+), 23 deletions(-) create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg create mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg delete mode 100644 puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg index c71c5392..166d0230 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg @@ -1,4 +1,4 @@ -/var/log/leap_mx.log +/var/log/leap/mx.log W Don't know how to deliver mail W No public key, stopping the processing chain diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg new file mode 100644 index 00000000..ed50f420 --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/openvpn.cfg @@ -0,0 +1,14 @@ +/var/log/leap/openvpn.log +# ignore openvpn TLS initialization errors when clients +# suddenly hangup before properly establishing +# a tls connection + I ovpn-.*TLS Error: Unroutable control packet received from + I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds \(check your network connectivity\) + I ovpn-.*TLS Error: TLS handshake failed + I ovpn-.*TLS Error: TLS object -> incoming plaintext read error + I ovpn-.*Fatal TLS error \(check_tls_errors_co\), restarting + I ovpn-.*TLS_ERROR: BIO read tls_read_plaintext error: error:140890B2:SSL routines:SSL3_GET_CLIENT_CERTIFICATE:no certificate + + I ovpn-.*SIGUSR1\[soft,tls-error\] received, client-instance restarting + I ovpn-.*VERIFY ERROR: depth=0, error=certificate has expired + diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg new file mode 100644 index 00000000..b1e6cf2f --- /dev/null +++ b/puppet/modules/site_check_mk/files/agent/logwatch/stunnel.cfg @@ -0,0 +1,10 @@ +/var/log/leap/stunnel.log +# check for stunnel failures +# +# these are temporary failures and happen very often, so we +# ignore them until we tuned stunnel timeouts/logging, +# see https://leap.se/code/issues/5218 + I stunnel:.*Connection reset by peer + I stunnel:.*Peer suddenly disconnected + I stunnel:.*Connection refused + diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg deleted file mode 100644 index ac17c0ca..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg +++ /dev/null @@ -1,13 +0,0 @@ -# ignore openvpn TLS initialization errors when clients -# suddenly hangup before properly establishing -# a tls connection - I ovpn-.*TLS Error: Unroutable control packet received from - I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds \(check your network connectivity\) - I ovpn-.*TLS Error: TLS handshake failed - I ovpn-.*TLS Error: TLS object -> incoming plaintext read error - I ovpn-.*Fatal TLS error \(check_tls_errors_co\), restarting - I ovpn-.*TLS_ERROR: BIO read tls_read_plaintext error: error:140890B2:SSL routines:SSL3_GET_CLIENT_CERTIFICATE:no certificate - - I ovpn-.*SIGUSR1\[soft,tls-error\] received, client-instance restarting - I ovpn-.*VERIFY ERROR: depth=0, error=certificate has expired - diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg deleted file mode 100644 index eb3131f2..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg +++ /dev/null @@ -1,9 +0,0 @@ -# check for stunnel failures -# -# these are temporary failures and happen very often, so we -# ignore them until we tuned stunnel timeouts/logging, -# see https://leap.se/code/issues/5218 - I stunnel:.*Connection reset by peer - I stunnel:.*Peer suddenly disconnected - I stunnel:.*Connection refused - -- cgit v1.2.3 From e65460dcefda6660431d937e9a66d17a1d80e104 Mon Sep 17 00:00:00 2001 From: varac Date: Sun, 26 Apr 2015 21:19:33 +0200 Subject: Tapicero changed it's error message when uploading design doc fails in race condition with another tapicero instance #6534 Change-Id: Ie194a2983210601bd24aef5e74f8b7fa2b7c433f --- puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg index f527f120..d98f5094 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/tapicero.cfg @@ -1,6 +1,6 @@ /var/log/leap/tapicero.log # Ignore transient Tapicero errors when creating a db (#6511) - I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) + I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::ResourceNotFound|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) C tapicero.*RestClient::InternalServerError: # possible race condition between multiple tapicero # instances, so we ignore it -- cgit v1.2.3 From eaddcfca6761bac329a57a65453958b6741f2420 Mon Sep 17 00:00:00 2001 From: varac Date: Sun, 26 Apr 2015 22:41:29 +0200 Subject: run check_mk_agent every 4 instead of 10 minutes, useful for better graphs Change-Id: Ibefc6ce08cf714cf79a460a8b6eb32e2851ce22c --- puppet/modules/site_check_mk/files/extra_service_conf.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk index a4c6e769..c7120a96 100644 --- a/puppet/modules/site_check_mk/files/extra_service_conf.mk +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -5,10 +5,10 @@ extra_service_conf["max_check_attempts"] = [ ] # -# run check_mk_agent every 10 minutes if it terminates successfully. +# run check_mk_agent every 4 minutes if it terminates successfully. # see https://leap.se/code/issues/6539 for the rationale # extra_service_conf["normal_check_interval"] = [ - ("10", ALL_HOSTS , "Check_MK" ) + ("4", ALL_HOSTS , "Check_MK" ) ] -- cgit v1.2.3 From ccf63cf34fedbcff2923b11f2e49c3a58b6c1180 Mon Sep 17 00:00:00 2001 From: varac Date: Wed, 27 May 2015 11:17:06 +0200 Subject: leap_couch_stats.sh handles rotated dbs (#6987) Change-Id: I115ebdefd7365bf15a30c4a3ce7a4543ad757cec --- .../agent/local_checks/couchdb/leap_couch_stats.sh | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'puppet/modules/site_check_mk/files') diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh index 51a8ac52..95474ccb 100755 --- a/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh +++ b/puppet/modules/site_check_mk/files/agent/local_checks/couchdb/leap_couch_stats.sh @@ -11,7 +11,7 @@ start_time=$(date +%s.%N) CURL='curl -s --netrc-file /etc/couchdb/couchdb.netrc' URL='http://127.0.0.1:5984' TMPFILE=$(mktemp) -DBLIST_EXCLUDE='user-' +DBLIST_EXCLUDE='(user-|sessions_|tokens_)' PREFIX='Couchdb_' @@ -45,7 +45,7 @@ get_global_stats_perf () { $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE db_count=$( wc -l < $TMPFILE) - excluded_db_count=$( grep -c "$DBLIST_EXCLUDE" $TMPFILE ) + excluded_db_count=$( egrep -c "$DBLIST_EXCLUDE" $TMPFILE ) echo "db_count=$db_count|excluded_db_count=$excluded_db_count" return ${localexit} @@ -56,7 +56,14 @@ db_stats () { local db db_stats doc_count del_doc_count localexit localexit=0 - db=$1 + db="$1" + name="$2" + + if [ -z "$name" ] + then + name="$db" + fi + perf="$perf|${db}_docs=$( $CURL -s -X GET ${URL}/$db | json_pp |grep 'doc_count' | sed 's/[^0-9]//g' )" db_stats=$( $CURL -s -X GET ${URL}/$db | json_pp ) @@ -74,8 +81,8 @@ db_stats () { bytes=$( echo "$db_stats" | grep disk_size | sed 's/[^0-9]//g' ) disk_size=$( echo "scale = 2; $bytes / 1024 / 1024" | bc -l ) - echo -n "${localexit} ${PREFIX}${db}_database ${db}_docs=$doc_count|${db}_deleted_docs=$del_doc_count|${db}_deleted_docs_percentage=${del_doc_perc}%" - printf "|${db}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $db\n" "$disk_size" + echo -n "${localexit} ${PREFIX}${name}_database ${name}_docs=$doc_count|${name}_deleted_docs=$del_doc_count|${name}_deleted_docs_percentage=${del_doc_perc}%" + printf "|${name}_disksize_mb=%02.2fmb ${STATE[localexit]}: database $name\n" "$disk_size" return ${localexit} } @@ -89,13 +96,19 @@ load_nagios_utils $CURL -X GET $URL/_all_dbs | json_pp | egrep -v '(\[|\])' > $TMPFILE # get list of dbs to check -dbs=$( grep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' ) +dbs=$( egrep -v "${DBLIST_EXCLUDE}" $TMPFILE | tr -d '\n"' | sed 's/,/ /g' ) for db in $dbs do db_stats "$db" done +# special handling for rotated dbs +suffix=$(($(date +'%s') / (60*60*24*30) + 1)) +db_stats "sessions_${suffix}" "sessions" +db_stats "tokens_${suffix}" "tokens" + + # show global couchdb stats global_stats_perf=$(get_global_stats_perf) exitcode=$? -- cgit v1.2.3