From 7fb80fbecfa5a186846a2909bd1d6ada32ba21e1 Mon Sep 17 00:00:00 2001 From: Gabriel Filion Date: Wed, 27 Apr 2011 15:46:58 -0400 Subject: Define check_load as a function of the nb of cpus The "check_load" nagios check should not be hardcoded to some arbitrary value. The thresholds should rather be adjusted in function of how many CPUs are present in the server. The factors were chosen by prior experience but could be adjusted if needed. Factors for warning thresholds (where N is the nb of cpus): * 1min : 5 * N * 5mins : 4 * N * 15mins : 3 * N Factors for critical thresholds: * 1min : 10 * N * 5mins : 9 * N * 15mins : 8 * N --- manifests/nrpe/base.pp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'manifests/nrpe/base.pp') diff --git a/manifests/nrpe/base.pp b/manifests/nrpe/base.pp index 8636760..1d26849 100644 --- a/manifests/nrpe/base.pp +++ b/manifests/nrpe/base.pp @@ -23,9 +23,21 @@ class nagios::nrpe::base { # default commands nagios::nrpe::command { "basic_nrpe": - source => [ "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.cfg", + source => [ "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.${fqdn}.cfg", + "puppet:///modules/site-nagios/configs/nrpe/nrpe_commands.cfg", "puppet:///modules/nagios/nrpe/nrpe_commands.cfg" ], } + # the check for load should be customized for each server based on number + # of CPUs and the type of activity. + $warning_1_threshold = 5 * $processorcount + $warning_5_threshold = 4 * $processorcount + $warning_15_threshold = 3 * $processorcount + $critical_1_threshold = 10 * $processorcount + $critical_5_threshold = 9 * $processorcount + $critical_15_threshold = 8 * $processorcount + nagios::nrpe::command { "check_load": + command_line => "/usr/lib/nagios/plugins/check_load -w ${warning_1_threshold},${warning_5_threshold},${warning_15_threshold} -c ${critical_1_threshold},${critical_5_threshold},${critical_15_threshold}", + } service { "nagios-nrpe-server": ensure => running, -- cgit v1.2.3