summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabriel Filion <gabriel@koumbit.org>2011-04-27 15:46:58 -0400
committerGabriel Filion <lelutin@gmail.com>2012-04-10 00:09:15 -0400
commit7fb80fbecfa5a186846a2909bd1d6ada32ba21e1 (patch)
tree9ed1d2bf91f5914d2fad1c321c3247c3061f7757
parentdddff2316ee0ce0dab3540538de44a3829cc5c07 (diff)
Define check_load as a function of the nb of cpus
The "check_load" nagios check should not be hardcoded to some arbitrary value. The thresholds should rather be adjusted in function of how many CPUs are present in the server. The factors were chosen by prior experience but could be adjusted if needed. Factors for warning thresholds (where N is the nb of cpus): * 1min : 5 * N * 5mins : 4 * N * 15mins : 3 * N Factors for critical thresholds: * 1min : 10 * N * 5mins : 9 * N * 15mins : 8 * N
-rw-r--r--files/nrpe/nrpe_commands.cfg1
-rw-r--r--manifests/nrpe/base.pp14
2 files changed, 13 insertions, 2 deletions
diff --git a/files/nrpe/nrpe_commands.cfg b/files/nrpe/nrpe_commands.cfg
index 1a26cc3..b725a36 100644
--- a/files/nrpe/nrpe_commands.cfg
+++ b/files/nrpe/nrpe_commands.cfg
@@ -1,6 +1,5 @@
# default plugins
command[check_users]=/usr/lib/nagios/plugins/check_users $ARG1$
-command[check_load]=/usr/lib/nagios/plugins/check_load $ARG1$
command[check_disk]=/usr/lib/nagios/plugins/check_disk $ARG1$ -x "/dev" -x "/dev/shm" -x "/lib/init/rw"
command[check_apt]=sudo /usr/lib/nagios/plugins/check_apt -u $ARG1$
command[check_swap]=/usr/lib/nagios/plugins/check_swap $ARG1$
diff --git a/manifests/nrpe/base.pp b/manifests/nrpe/base.pp
index 8636760..1d26849 100644
--- a/manifests/nrpe/base.pp
+++ b/manifests/nrpe/base.pp
@@ -23,9 +23,21 @@ class nagios::nrpe::base {
# default commands
nagios::nrpe::command { "basic_nrpe":
- source => [ "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.cfg",
+ source => [ "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.${fqdn}.cfg",
+ "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.cfg",
"puppet:///modules/nagios/nrpe/nrpe_commands.cfg" ],
}
+ # the check for load should be customized for each server based on number
+ # of CPUs and the type of activity.
+ $warning_1_threshold = 5 * $processorcount
+ $warning_5_threshold = 4 * $processorcount
+ $warning_15_threshold = 3 * $processorcount
+ $critical_1_threshold = 10 * $processorcount
+ $critical_5_threshold = 9 * $processorcount
+ $critical_15_threshold = 8 * $processorcount
+ nagios::nrpe::command { "check_load":
+ command_line => "/usr/lib/nagios/plugins/check_load -w ${warning_1_threshold},${warning_5_threshold},${warning_15_threshold} -c ${critical_1_threshold},${critical_5_threshold},${critical_15_threshold}",
+ }
service { "nagios-nrpe-server":
ensure => running,