diff options
author | Micah Anderson <micah@riseup.net> | 2016-11-04 10:54:28 -0400 |
---|---|---|
committer | Micah Anderson <micah@riseup.net> | 2016-11-04 10:54:28 -0400 |
commit | 34a381efa8f6295080c843f86bfa07d4e41056af (patch) | |
tree | 9282cf5d4c876688602705a7fa0002bc4a810bde /puppet/modules/site_check_mk | |
parent | 0a72bc6fd292bf9367b314fcb0347c4d35042f16 (diff) | |
parent | 5821964ff7e16ca7aa9141bd09a77d355db492a9 (diff) |
Merge branch 'develop'
Diffstat (limited to 'puppet/modules/site_check_mk')
9 files changed, 12 insertions, 452 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg deleted file mode 100644 index 0f378a5a..00000000 --- a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg +++ /dev/null @@ -1,28 +0,0 @@ -/opt/bigcouch/var/log/bigcouch.log nocontext=1 -# ignore requests that are fine - I undefined - -.*200$ - I undefined - -.*201$ - I 127.0.0.1 undefined.* ok - I 127.0.0.1 localhost:5984 .* ok - # https://leap.se/code/issues/5246 - I Shutting down group server - # ignore bigcouch conflict errors - I Error in process.*{{nocatch,conflict} - # ignore "Uncaught error in HTTP request: {exit, normal}" error - # it's suppressed in later versions of bigcouch anhow - # see https://leap.se/code/issues/5226 - I Uncaught error in HTTP request: {exit,normal} - I Uncaught error in HTTP request: {exit, - # Ignore rexi_EXIT bigcouch error (Bug #6512) - I Error in process <[0-9.]+> on node .* with exit value: {{rexi_EXIT,{(killed|noproc|shutdown),\[{couch_db,collect_results - # Ignore "Generic server terminating" bigcouch message (Feature #6544) - I Generic server <.*> terminating - I {error_report,<.*>, - I {error_info, - C Uncaught error in HTTP request: {error, - C Response abnormally terminated: {nodedown, - C rexi_DOWN,noproc - C rexi_DOWN,noconnection - C error - C Connection attempt from disallowed node - W Apache CouchDB has started diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg index 3af5045b..11ad3a54 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg @@ -1,4 +1,7 @@ /var/log/soledad.log +# Ignore 401 errors because they are quite noisy due to scanners giving us many false +# positives, and we do not need to see those + I \".*401 [0-9]+ C WSGI application error C Error C error diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg index 71395c50..7daf0cac 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg @@ -15,3 +15,7 @@ # 401 Unauthorized error logged by webapp and possible other # applications C Unauthorized +# catch abnormal termination of processes (due to segfault/fpe +# signals etc). +# see https://github.com/pixelated/pixelated-user-agent/issues/683 + C systemd.*: main process exited, code=killed, status= diff --git a/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl b/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl deleted file mode 100755 index 06163d49..00000000 --- a/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/perl -w - -# check_unix_open_fds Nagios Plugin -# -# TComm - Carlos Peris Pla -# -# This nagios plugin is free software, and comes with ABSOLUTELY -# NO WARRANTY. It may be used, redistributed and/or modified under -# the terms of the GNU General Public Licence (see -# http://www.fsf.org/licensing/licenses/gpl.txt). - - -# MODULE DECLARATION - -use strict; -use Nagios::Plugin; - - -# FUNCTION DECLARATION - -sub CreateNagiosManager (); -sub CheckArguments (); -sub PerformCheck (); - - -# CONSTANT DEFINITION - -use constant NAME => 'check_unix_open_fds'; -use constant VERSION => '0.1b'; -use constant USAGE => "Usage:\ncheck_unix_open_fds -w <process_threshold,application_threshold> -c <process_threshold,application_threshold>\n". - "\t\t[-V <version>]\n"; -use constant BLURB => "This plugin checks, in UNIX systems with the command lsof installed and with its SUID bit activated, the number\n". - "of file descriptors opened by an application and its processes.\n"; -use constant LICENSE => "This nagios plugin is free software, and comes with ABSOLUTELY\n". - "no WARRANTY. It may be used, redistributed and/or modified under\n". - "the terms of the GNU General Public Licence\n". - "(see http://www.fsf.org/licensing/licenses/gpl.txt).\n"; -use constant EXAMPLE => "\n\n". - "Example:\n". - "\n". - "check_unix_open_fds -a /usr/local/nagios/bin/ndo2db -w 20,75 -c 25,85\n". - "\n". - "It returns CRITICAL if number of file descriptors opened by ndo2db is higher than 85,\n". - "if not it returns WARNING if number of file descriptors opened by ndo2db is higher \n". - "than 75, if not it returns CRITICAL if number of file descriptors opened by any process\n". - "of ndo2db is higher than 25, if not it returns WARNING if number of file descriptors \n". - "opened by any process of ndo2db is higher than 20.\n". - "In other cases it returns OK if check has been performed succesfully.\n\n"; - - -# VARIABLE DEFINITION - -my $Nagios; -my $Error; -my $PluginResult; -my $PluginOutput; -my @WVRange; -my @CVRange; - - -# MAIN FUNCTION - -# Get command line arguments -$Nagios = &CreateNagiosManager(USAGE, VERSION, BLURB, LICENSE, NAME, EXAMPLE); -eval {$Nagios->getopts}; - -if (!$@) { - # Command line parsed - if (&CheckArguments($Nagios, \$Error, \@WVRange, \@CVRange)) { - # Argument checking passed - $PluginResult = &PerformCheck($Nagios, \$PluginOutput, \@WVRange, \@CVRange) - } - else { - # Error checking arguments - $PluginOutput = $Error; - $PluginResult = UNKNOWN; - } - $Nagios->nagios_exit($PluginResult,$PluginOutput); -} -else { - # Error parsing command line - $Nagios->nagios_exit(UNKNOWN,$@); -} - - - -# FUNCTION DEFINITIONS - -# Creates and configures a Nagios plugin object -# Input: strings (usage, version, blurb, license, name and example) to configure argument parsing functionality -# Return value: reference to a Nagios plugin object - -sub CreateNagiosManager() { - # Create GetOpt object - my $Nagios = Nagios::Plugin->new(usage => $_[0], version => $_[1], blurb => $_[2], license => $_[3], plugin => $_[4], extra => $_[5]); - - # Add argument units - $Nagios->add_arg(spec => 'application|a=s', - help => 'Application path for which you want to check the number of open file descriptors', - required => 1); - - # Add argument warning - $Nagios->add_arg(spec => 'warning|w=s', - help => "Warning thresholds. Format: <process_threshold,application_threshold>", - required => 1); - # Add argument critical - $Nagios->add_arg(spec => 'critical|c=s', - help => "Critical thresholds. Format: <process_threshold,application_threshold>", - required => 1); - - # Return value - return $Nagios; -} - - -# Checks argument values and sets some default values -# Input: Nagios Plugin object -# Output: reference to Error description string, Memory Unit, Swap Unit, reference to WVRange ($_[4]), reference to CVRange ($_[5]) -# Return value: True if arguments ok, false if not - -sub CheckArguments() { - my ($Nagios, $Error, $WVRange, $CVRange) = @_; - my $commas; - my $units; - my $i; - my $firstpos; - my $secondpos; - - # Check Warning thresholds list - $commas = $Nagios->opts->warning =~ tr/,//; - if ($commas !=1){ - ${$Error} = "Invalid Warning list format. One comma is expected."; - return 0; - } - else{ - $i=0; - $firstpos=0; - my $warning=$Nagios->opts->warning; - while ($warning =~ /[,]/g) { - $secondpos=pos $warning; - if ($secondpos - $firstpos==1){ - @{$WVRange}[$i] = "~:"; - } - else{ - @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, ($secondpos-$firstpos-1); - } - $firstpos=$secondpos; - $i++ - } - if (length($Nagios->opts->warning) - $firstpos==0){#La coma es el ultimo elemento del string - @{$WVRange}[$i] = "~:"; - } - else{ - @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, (length($Nagios->opts->warning)-$firstpos); - } - - if (@{$WVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){ - ${$Error} = "Invalid Process Warning threshold in ${$WVRange[0]}"; - return 0; - }if (@{$WVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){ - ${$Error} = "Invalid Application Warning threshold in ${$WVRange[1]}"; - return 0; - } - } - - # Check Critical thresholds list - $commas = $Nagios->opts->critical =~ tr/,//; - if ($commas !=1){ - ${$Error} = "Invalid Critical list format. One comma is expected."; - return 0; - } - else{ - $i=0; - $firstpos=0; - my $critical=$Nagios->opts->critical; - while ($critical =~ /[,]/g) { - $secondpos=pos $critical ; - if ($secondpos - $firstpos==1){ - @{$CVRange}[$i] = "~:"; - } - else{ - @{$CVRange}[$i] =substr $Nagios->opts->critical, $firstpos, ($secondpos-$firstpos-1); - } - $firstpos=$secondpos; - $i++ - } - if (length($Nagios->opts->critical) - $firstpos==0){#La coma es el ultimo elemento del string - @{$CVRange}[$i] = "~:"; - } - else{ - @{$CVRange}[$i] = substr $Nagios->opts->critical, $firstpos, (length($Nagios->opts->critical)-$firstpos); - } - - if (@{$CVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) { - ${$Error} = "Invalid Process Critical threshold in @{$CVRange}[0]"; - return 0; - } - if (@{$CVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) { - ${$Error} = "Invalid Application Critical threshold in @{$CVRange}[1]"; - return 0; - } - } - - return 1; -} - - -# Performs whole check: -# Input: Nagios Plugin object, reference to Plugin output string, Application, referece to WVRange, reference to CVRange -# Output: Plugin output string -# Return value: Plugin return value - -sub PerformCheck() { - my ($Nagios, $PluginOutput, $WVRange, $CVRange) = @_; - my $Application; - my @AppNameSplitted; - my $ApplicationName; - my $PsCommand; - my $PsResult; - my @PsResultLines; - my $ProcLine; - my $ProcPid; - my $LsofCommand; - my $LsofResult; - my $ProcCount = 0; - my $FDCount = 0; - my $ProcFDAvg = 0; - my $PerProcMaxFD = 0; - my $ProcOKFlag = 0; - my $ProcWarningFlag = 0; - my $ProcCriticalFlag = 0; - my $OKFlag = 0; - my $WarningFlag = 0; - my $CriticalFlag = 0; - my $LastWarningProcFDs = 0; - my $LastWarningProc = -1; - my $LastCriticalProcFDs = 0; - my $LastCriticalProc = -1; - my $ProcPluginReturnValue = UNKNOWN; - my $AppPluginReturnValue = UNKNOWN; - my $PluginReturnValue = UNKNOWN; - my $PerformanceData = ""; - my $PerfdataUnit = "FDs"; - - $Application = $Nagios->opts->application; - $PsCommand = "ps -eaf | grep $Application"; - $PsResult = `$PsCommand`; - @AppNameSplitted = split(/\//, $Application); - $ApplicationName = $AppNameSplitted[$#AppNameSplitted]; - @PsResultLines = split(/\n/, $PsResult); - if ( $#PsResultLines > 1 ) { - foreach my $Proc (split(/\n/, $PsResult)) { - if ($Proc !~ /check_unix_open_fds/ && $Proc !~ / grep /) { - $ProcCount += 1; - $ProcPid = (split(/\s+/, $Proc))[1]; - $LsofCommand = "lsof -p $ProcPid | wc -l"; - $LsofResult = `$LsofCommand`; - $LsofResult = ($LsofResult > 0 ) ? ($LsofResult - 1) : 0; - $FDCount += $LsofResult; - if ($LsofResult >= $PerProcMaxFD) { $PerProcMaxFD = $LsofResult; } - $ProcPluginReturnValue = $Nagios->check_threshold(check => $LsofResult,warning => @{$WVRange}[0],critical => @{$CVRange}[0]); - if ($ProcPluginReturnValue eq OK) { - $ProcOKFlag = 1; - } - elsif ($ProcPluginReturnValue eq WARNING) { - $ProcWarningFlag = 1; - if ($LsofResult >= $LastWarningProcFDs) { - $LastWarningProcFDs = $LsofResult; - $LastWarningProc = $ProcPid; - } - } - #if ($LsofResult >= $PCT) { - elsif ($ProcPluginReturnValue eq CRITICAL) { - $ProcCriticalFlag = 1; - if ($LsofResult >= $LastCriticalProcFDs) { - $LastCriticalProcFDs = $LsofResult; - $LastCriticalProc = $ProcPid; - } - } - } - } - if ($ProcCount) { $ProcFDAvg = int($FDCount / $ProcCount); } - $AppPluginReturnValue = $Nagios->check_threshold(check => $FDCount,warning => @{$WVRange}[1],critical => @{$CVRange}[1]); - #if ($FDCount >= $TWT) { - if ($AppPluginReturnValue eq OK) { $OKFlag = 1; } - elsif ($AppPluginReturnValue eq WARNING) { $WarningFlag = 1; } - elsif ($AppPluginReturnValue eq CRITICAL) { $CriticalFlag = 1; } - - # PluginReturnValue and PluginOutput - if ($CriticalFlag) { - $PluginReturnValue = CRITICAL; - ${$PluginOutput} .= "$ApplicationName handling $FDCount files (critical threshold set to @{$CVRange}[1])"; - } - elsif ($WarningFlag) { - $PluginReturnValue = WARNING; - ${$PluginOutput} .= "$ApplicationName handling $FDCount files (warning threshold set to @{$WVRange}[1])"; - } - elsif ($ProcCriticalFlag) { - $PluginReturnValue = CRITICAL; - ${$PluginOutput} .= "Process ID $LastCriticalProc handling $LastCriticalProcFDs files (critical threshold set to @{$CVRange}[0])"; - } - elsif ($ProcWarningFlag) { - $PluginReturnValue = WARNING; - ${$PluginOutput} .= "Process ID $LastWarningProc handling $LastWarningProcFDs files (warning threshold set to @{$WVRange}[0])"; - } - elsif ($OKFlag && $ProcOKFlag) { - $PluginReturnValue = OK; - ${$PluginOutput} .= "$ApplicationName handling $FDCount files"; - } - } - else { - ${$PluginOutput} .= "No existe la aplicacion $ApplicationName"; - } - - - $PerformanceData .= "ProcCount=$ProcCount$PerfdataUnit FDCount=$FDCount$PerfdataUnit ProcFDAvg=$ProcFDAvg$PerfdataUnit PerProcMaxFD=$PerProcMaxFD$PerfdataUnit"; - - # Output with performance data: - ${$PluginOutput} .= " | $PerformanceData"; - - return $PluginReturnValue; -} diff --git a/puppet/modules/site_check_mk/files/ignored_services.mk b/puppet/modules/site_check_mk/files/ignored_services.mk index 35dc4433..8a6705ac 100644 --- a/puppet/modules/site_check_mk/files/ignored_services.mk +++ b/puppet/modules/site_check_mk/files/ignored_services.mk @@ -1,3 +1,5 @@ -ignored_services = [ +# ignore NTP Time because this check was +# very flaky in the past (see https://leap.se/code/issues/6407) +ignored_services += [ ( ALL_HOSTS, [ "NTP Time" ] ) ] diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index 1554fd3c..9fc771e0 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -1,5 +1,4 @@ -# configure logwatch and nagios checks for couchdb (both bigcouch and plain -# couchdb installations) +# configure logwatch and nagios checks for couchdb class site_check_mk::agent::couchdb { concat::fragment { 'syslog_couchdb': @@ -14,21 +13,4 @@ class site_check_mk::agent::couchdb { mode => '0755', require => Package['check_mk-agent'] } - - # check open files for bigcouch proc - include site_check_mk::agent::package::perl_plugin - file { '/srv/leap/nagios/plugins/check_unix_open_fds.pl': - source => 'puppet:///modules/site_check_mk/agent/nagios_plugins/check_unix_open_fds.pl', - mode => '0755' - } - augeas { - 'Couchdb_open_files': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Couchdb_open_files', - 'set Couchdb_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - } - } diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp deleted file mode 100644 index 82c3ac72..00000000 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb/bigcouch.pp +++ /dev/null @@ -1,49 +0,0 @@ -# configure logwatch and nagios checks for bigcouch -class site_check_mk::agent::couchdb::bigcouch { - - # watch bigcouch logs - # currently disabled because bigcouch is too noisy - # see https://leap.se/code/issues/7375 for more details - # and site_config::remove_files for removing leftovers - #file { '/etc/check_mk/logwatch.d/bigcouch.cfg': - # source => 'puppet:///modules/site_check_mk/agent/logwatch/bigcouch.cfg', - #} - - # check syslog msg from: - # - empd - # - /usr/local/bin/couch-doc-update - concat::fragment { 'syslog_bigcouch': - source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/bigcouch.cfg', - target => '/etc/check_mk/logwatch.d/syslog.cfg', - order => '02'; - } - - # check bigcouch processes - augeas { - 'Bigcouch_epmd_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', - 'set Bigcouch_epmd_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/epmd\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - 'Bigcouch_beam_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', - 'set Bigcouch_beam_procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a /opt/bigcouch/erts-5.9.1/bin/beam\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - } - - augeas { - 'Bigcouch_open_files': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', - 'set Bigcouch_open_files \'/srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - } - -} diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb/plain.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb/plain.pp deleted file mode 100644 index 3ec2267b..00000000 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb/plain.pp +++ /dev/null @@ -1,23 +0,0 @@ -# configure logwatch and nagios checks for plain single couchdb master -class site_check_mk::agent::couchdb::plain { - - # remove bigcouch leftovers - augeas { - 'Bigcouch_epmd_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_epmd_procs', - require => File['/etc/check_mk/mrpe.cfg']; - 'Bigcouch_beam_procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_beam_procs', - require => File['/etc/check_mk/mrpe.cfg']; - 'Bigcouch_open_files': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => 'rm /files/etc/check_mk/mrpe.cfg/Bigcouch_open_files', - require => File['/etc/check_mk/mrpe.cfg']; - } - -} diff --git a/puppet/modules/site_check_mk/manifests/agent/soledad.pp b/puppet/modules/site_check_mk/manifests/agent/soledad.pp index f4a3f3a6..a8febaae 100644 --- a/puppet/modules/site_check_mk/manifests/agent/soledad.pp +++ b/puppet/modules/site_check_mk/manifests/agent/soledad.pp @@ -1,17 +1,8 @@ +# Configure soledad check_mk checks class site_check_mk::agent::soledad { file { '/etc/check_mk/logwatch.d/soledad.cfg': source => 'puppet:///modules/site_check_mk/agent/logwatch/soledad.cfg', } - # local nagios plugin checks via mrpe - - augeas { 'Soledad_Procs': - incl => '/etc/check_mk/mrpe.cfg', - lens => 'Spacevars.lns', - changes => [ - 'rm /files/etc/check_mk/mrpe.cfg/Soledad_Procs', - 'set Soledad_Procs \'/usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a "/usr/bin/python /usr/bin/twistd --uid=soledad --gid=soledad --pidfile=/var/run/soledad.pid --logfile=/var/log/soledad.log web --wsgi=leap.soledad.server.application --port=ssl:2323:privateKey=/etc/x509/keys/leap.key:certKey=/etc/x509/certs/leap.crt:sslmethod=SSLv23_METHOD"\'' ], - require => File['/etc/check_mk/mrpe.cfg']; - } } |