summaryrefslogtreecommitdiff
path: root/puppet/modules/site_check_mk/files/agent
diff options
context:
space:
mode:
authorMicah Anderson <micah@leap.se>2014-04-22 14:13:46 -0400
committerMicah Anderson <micah@leap.se>2014-04-22 14:13:46 -0400
commit327d5c934e408f90011d7949b89ab01fed88998e (patch)
tree77cfefffc8f9ffe160c4413b26dd5ca5cdd6f1e8 /puppet/modules/site_check_mk/files/agent
parentca11482dd7cd4ea8ffa69407ee2fd5b5e1b7981b (diff)
parent4295f334ea4f92d7fb47f7121a42633630c368d1 (diff)
Merge branch 'develop' (0.5.0)
Conflicts: .gitignore Change-Id: I778f3e1f1f4832f5894bc149ead67e9a4becf304
Diffstat (limited to 'puppet/modules/site_check_mk/files/agent')
-rw-r--r--puppet/modules/site_check_mk/files/agent/local_checks/all_hosts/run_node_tests.sh5
-rwxr-xr-xpuppet/modules/site_check_mk/files/agent/local_checks/mx/check_leap_mx.sh33
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg20
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg4
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/logwatch.cfg31
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg5
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg7
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg7
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg9
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg8
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg5
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog_header.cfg1
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg11
-rwxr-xr-xpuppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl322
-rwxr-xr-xpuppet/modules/site_check_mk/files/agent/plugins/mk_logwatch.1.2.4374
15 files changed, 842 insertions, 0 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/all_hosts/run_node_tests.sh b/puppet/modules/site_check_mk/files/agent/local_checks/all_hosts/run_node_tests.sh
new file mode 100644
index 00000000..1dd0afc9
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/local_checks/all_hosts/run_node_tests.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+#
+# runs node tests
+
+/srv/leap/bin/run_tests --checkmk
diff --git a/puppet/modules/site_check_mk/files/agent/local_checks/mx/check_leap_mx.sh b/puppet/modules/site_check_mk/files/agent/local_checks/mx/check_leap_mx.sh
new file mode 100755
index 00000000..b8687c9a
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/local_checks/mx/check_leap_mx.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+
+WARN=1
+CRIT=5
+
+# in minutes
+MAXAGE=10
+
+STATUS[0]='OK'
+STATUS[1]='Warning'
+STATUS[2]='Critical'
+CHECKNAME='Leap_MX_Queue'
+
+WATCHDIR='/var/mail/vmail/Maildir/new/'
+
+
+total=`find $WATCHDIR -type f -mmin +$MAXAGE | wc -l`
+
+if [ $total -lt $WARN ]
+then
+ exitcode=0
+else
+ if [ $total -le $CRIT ]
+ then
+ exitcode=1
+ else
+ exitcode=2
+ fi
+fi
+
+echo "${exitcode} ${CHECKNAME} stale_files=${total} ${STATUS[exitcode]}: ${total} stale files (>=${MAXAGE} min) in ${WATCHDIR}."
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg
new file mode 100644
index 00000000..28f333b0
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg
@@ -0,0 +1,20 @@
+/opt/bigcouch/var/log/bigcouch.log nocontext=1
+# ignore requests that are fine
+ I undefined - -.*200$
+ I undefined - -.*201$
+ I 127.0.0.1 undefined.* ok
+ I 127.0.0.1 localhost:5984 .* ok
+ # https://leap.se/code/issues/5246
+ I Shutting down group server
+ # ignore "Uncaught error in HTTP request: {exit, normal}" error
+ # it's suppressed in later versions of bigcouch anhow
+ # see https://leap.se/code/issues/5226
+ I Uncaught error in HTTP request: {exit,normal}
+ I Uncaught error in HTTP request: {exit,
+ C Uncaught error in HTTP request: {error,
+ C Response abnormally terminated: {nodedown,
+ C rexi_DOWN,noproc
+ C rexi_DOWN,noconnection
+ C error
+ C Connection attempt from disallowed node
+ W Apache CouchDB has started
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg
new file mode 100644
index 00000000..c71c5392
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/leap_mx.cfg
@@ -0,0 +1,4 @@
+/var/log/leap_mx.log
+ W Don't know how to deliver mail
+ W No public key, stopping the processing chain
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/logwatch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/logwatch.cfg
new file mode 100644
index 00000000..4f16d1bd
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/logwatch.cfg
@@ -0,0 +1,31 @@
+# This file is managed by Puppet. DO NOT EDIT.
+
+# logwatch.cfg
+# This file configures mk_logwatch. Define your logfiles
+# and patterns to be looked for here.
+
+# Name one or more logfiles
+/var/log/messages
+# Patterns are indented with one space are prefixed with:
+# C: Critical messages
+# W: Warning messages
+# I: ignore these lines (OK)
+# The first match decided. Lines that do not match any pattern
+# are ignored
+ C Fail event detected on md device
+ I mdadm.*: Rebuild.*event detected
+ W mdadm\[
+ W ata.*hard resetting link
+ W ata.*soft reset failed (.*FIS failed)
+ W device-mapper: thin:.*reached low water mark
+ C device-mapper: thin:.*no free space
+
+/var/log/auth.log
+ W sshd.*Corrupted MAC on input
+
+/var/log/kern.log
+ C panic
+ C Oops
+ W generic protection rip
+ W .*Unrecovered read error - auto reallocate failed
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg
new file mode 100644
index 00000000..623d1e46
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg
@@ -0,0 +1,5 @@
+/var/log/soledad.log
+ C WSGI application error
+ C Error
+ C error
+ W Timing out client:
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg
new file mode 100644
index 00000000..f546135a
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/couchdb.cfg
@@ -0,0 +1,7 @@
+ C /usr/local/bin/couch-doc-update.*failed
+ C /usr/local/bin/couch-doc-update.*ERROR
+# on one-node bigcouch setups, we'll get this msg
+# a lot, so we ignore it here until we fix
+# https://leap.se/code/issues/5244
+ I epmd: got partial packet only on file descriptor
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
new file mode 100644
index 00000000..d58e876d
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
@@ -0,0 +1,7 @@
+# ignore openvpn TLS initialization errors when clients
+# suddenly hangup before properly establishing
+# a tls connection
+ I ovpn-.*TLS Error: Unroutable control packet received from
+ I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds (check your network connectivity)
+ I ovpn-.*TLS Error: TLS handshake failed
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg
new file mode 100644
index 00000000..eb3131f2
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/stunnel.cfg
@@ -0,0 +1,9 @@
+# check for stunnel failures
+#
+# these are temporary failures and happen very often, so we
+# ignore them until we tuned stunnel timeouts/logging,
+# see https://leap.se/code/issues/5218
+ I stunnel:.*Connection reset by peer
+ I stunnel:.*Peer suddenly disconnected
+ I stunnel:.*Connection refused
+
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
new file mode 100644
index 00000000..93ce0311
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
@@ -0,0 +1,8 @@
+ C tapicero.*RestClient::InternalServerError:
+# possible race condition between multiple tapicero
+# instances, so we ignore it
+# see https://leap.se/code/issues/5168
+ I tapicero.*RestClient::PreconditionFailed:
+ C tapicero.*Creating database.*failed due to:
+ C tapicero.*failed
+ W tapicero.*Couch stream ended unexpectedly.
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg
new file mode 100644
index 00000000..00f9c7fd
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/webapp.cfg
@@ -0,0 +1,5 @@
+# check for webapp errors
+ C webapp.*Could not connect to couch database messages due to 401 Unauthorized: {"error":"unauthorized","reason":"You are not a server admin."}
+# ignore RoutingErrors that rails throw when it can't handle a url
+# see https://leap.se/code/issues/5173
+ I webapp.*ActionController::RoutingError
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_header.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_header.cfg
new file mode 100644
index 00000000..f60d752b
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_header.cfg
@@ -0,0 +1 @@
+/var/log/syslog
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg
new file mode 100644
index 00000000..450b9e90
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg
@@ -0,0 +1,11 @@
+# some general patterns
+ C panic
+ C Oops
+ I Error: Driver 'pcspkr' is already registered, aborting...
+ C Error
+ C error
+ W generic protection rip
+ W .*Unrecovered read error - auto reallocate failed
+# 401 Unauthorized error logged by webapp and possible other
+# applications
+ C Unauthorized
diff --git a/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl b/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl
new file mode 100755
index 00000000..06163d49
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/nagios_plugins/check_unix_open_fds.pl
@@ -0,0 +1,322 @@
+#!/usr/bin/perl -w
+
+# check_unix_open_fds Nagios Plugin
+#
+# TComm - Carlos Peris Pla
+#
+# This nagios plugin is free software, and comes with ABSOLUTELY
+# NO WARRANTY. It may be used, redistributed and/or modified under
+# the terms of the GNU General Public Licence (see
+# http://www.fsf.org/licensing/licenses/gpl.txt).
+
+
+# MODULE DECLARATION
+
+use strict;
+use Nagios::Plugin;
+
+
+# FUNCTION DECLARATION
+
+sub CreateNagiosManager ();
+sub CheckArguments ();
+sub PerformCheck ();
+
+
+# CONSTANT DEFINITION
+
+use constant NAME => 'check_unix_open_fds';
+use constant VERSION => '0.1b';
+use constant USAGE => "Usage:\ncheck_unix_open_fds -w <process_threshold,application_threshold> -c <process_threshold,application_threshold>\n".
+ "\t\t[-V <version>]\n";
+use constant BLURB => "This plugin checks, in UNIX systems with the command lsof installed and with its SUID bit activated, the number\n".
+ "of file descriptors opened by an application and its processes.\n";
+use constant LICENSE => "This nagios plugin is free software, and comes with ABSOLUTELY\n".
+ "no WARRANTY. It may be used, redistributed and/or modified under\n".
+ "the terms of the GNU General Public Licence\n".
+ "(see http://www.fsf.org/licensing/licenses/gpl.txt).\n";
+use constant EXAMPLE => "\n\n".
+ "Example:\n".
+ "\n".
+ "check_unix_open_fds -a /usr/local/nagios/bin/ndo2db -w 20,75 -c 25,85\n".
+ "\n".
+ "It returns CRITICAL if number of file descriptors opened by ndo2db is higher than 85,\n".
+ "if not it returns WARNING if number of file descriptors opened by ndo2db is higher \n".
+ "than 75, if not it returns CRITICAL if number of file descriptors opened by any process\n".
+ "of ndo2db is higher than 25, if not it returns WARNING if number of file descriptors \n".
+ "opened by any process of ndo2db is higher than 20.\n".
+ "In other cases it returns OK if check has been performed succesfully.\n\n";
+
+
+# VARIABLE DEFINITION
+
+my $Nagios;
+my $Error;
+my $PluginResult;
+my $PluginOutput;
+my @WVRange;
+my @CVRange;
+
+
+# MAIN FUNCTION
+
+# Get command line arguments
+$Nagios = &CreateNagiosManager(USAGE, VERSION, BLURB, LICENSE, NAME, EXAMPLE);
+eval {$Nagios->getopts};
+
+if (!$@) {
+ # Command line parsed
+ if (&CheckArguments($Nagios, \$Error, \@WVRange, \@CVRange)) {
+ # Argument checking passed
+ $PluginResult = &PerformCheck($Nagios, \$PluginOutput, \@WVRange, \@CVRange)
+ }
+ else {
+ # Error checking arguments
+ $PluginOutput = $Error;
+ $PluginResult = UNKNOWN;
+ }
+ $Nagios->nagios_exit($PluginResult,$PluginOutput);
+}
+else {
+ # Error parsing command line
+ $Nagios->nagios_exit(UNKNOWN,$@);
+}
+
+
+
+# FUNCTION DEFINITIONS
+
+# Creates and configures a Nagios plugin object
+# Input: strings (usage, version, blurb, license, name and example) to configure argument parsing functionality
+# Return value: reference to a Nagios plugin object
+
+sub CreateNagiosManager() {
+ # Create GetOpt object
+ my $Nagios = Nagios::Plugin->new(usage => $_[0], version => $_[1], blurb => $_[2], license => $_[3], plugin => $_[4], extra => $_[5]);
+
+ # Add argument units
+ $Nagios->add_arg(spec => 'application|a=s',
+ help => 'Application path for which you want to check the number of open file descriptors',
+ required => 1);
+
+ # Add argument warning
+ $Nagios->add_arg(spec => 'warning|w=s',
+ help => "Warning thresholds. Format: <process_threshold,application_threshold>",
+ required => 1);
+ # Add argument critical
+ $Nagios->add_arg(spec => 'critical|c=s',
+ help => "Critical thresholds. Format: <process_threshold,application_threshold>",
+ required => 1);
+
+ # Return value
+ return $Nagios;
+}
+
+
+# Checks argument values and sets some default values
+# Input: Nagios Plugin object
+# Output: reference to Error description string, Memory Unit, Swap Unit, reference to WVRange ($_[4]), reference to CVRange ($_[5])
+# Return value: True if arguments ok, false if not
+
+sub CheckArguments() {
+ my ($Nagios, $Error, $WVRange, $CVRange) = @_;
+ my $commas;
+ my $units;
+ my $i;
+ my $firstpos;
+ my $secondpos;
+
+ # Check Warning thresholds list
+ $commas = $Nagios->opts->warning =~ tr/,//;
+ if ($commas !=1){
+ ${$Error} = "Invalid Warning list format. One comma is expected.";
+ return 0;
+ }
+ else{
+ $i=0;
+ $firstpos=0;
+ my $warning=$Nagios->opts->warning;
+ while ($warning =~ /[,]/g) {
+ $secondpos=pos $warning;
+ if ($secondpos - $firstpos==1){
+ @{$WVRange}[$i] = "~:";
+ }
+ else{
+ @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, ($secondpos-$firstpos-1);
+ }
+ $firstpos=$secondpos;
+ $i++
+ }
+ if (length($Nagios->opts->warning) - $firstpos==0){#La coma es el ultimo elemento del string
+ @{$WVRange}[$i] = "~:";
+ }
+ else{
+ @{$WVRange}[$i] = substr $Nagios->opts->warning, $firstpos, (length($Nagios->opts->warning)-$firstpos);
+ }
+
+ if (@{$WVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){
+ ${$Error} = "Invalid Process Warning threshold in ${$WVRange[0]}";
+ return 0;
+ }if (@{$WVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/){
+ ${$Error} = "Invalid Application Warning threshold in ${$WVRange[1]}";
+ return 0;
+ }
+ }
+
+ # Check Critical thresholds list
+ $commas = $Nagios->opts->critical =~ tr/,//;
+ if ($commas !=1){
+ ${$Error} = "Invalid Critical list format. One comma is expected.";
+ return 0;
+ }
+ else{
+ $i=0;
+ $firstpos=0;
+ my $critical=$Nagios->opts->critical;
+ while ($critical =~ /[,]/g) {
+ $secondpos=pos $critical ;
+ if ($secondpos - $firstpos==1){
+ @{$CVRange}[$i] = "~:";
+ }
+ else{
+ @{$CVRange}[$i] =substr $Nagios->opts->critical, $firstpos, ($secondpos-$firstpos-1);
+ }
+ $firstpos=$secondpos;
+ $i++
+ }
+ if (length($Nagios->opts->critical) - $firstpos==0){#La coma es el ultimo elemento del string
+ @{$CVRange}[$i] = "~:";
+ }
+ else{
+ @{$CVRange}[$i] = substr $Nagios->opts->critical, $firstpos, (length($Nagios->opts->critical)-$firstpos);
+ }
+
+ if (@{$CVRange}[0] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) {
+ ${$Error} = "Invalid Process Critical threshold in @{$CVRange}[0]";
+ return 0;
+ }
+ if (@{$CVRange}[1] !~/^(@?(\d+|(\d+|~):(\d+)?))?$/) {
+ ${$Error} = "Invalid Application Critical threshold in @{$CVRange}[1]";
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+
+# Performs whole check:
+# Input: Nagios Plugin object, reference to Plugin output string, Application, referece to WVRange, reference to CVRange
+# Output: Plugin output string
+# Return value: Plugin return value
+
+sub PerformCheck() {
+ my ($Nagios, $PluginOutput, $WVRange, $CVRange) = @_;
+ my $Application;
+ my @AppNameSplitted;
+ my $ApplicationName;
+ my $PsCommand;
+ my $PsResult;
+ my @PsResultLines;
+ my $ProcLine;
+ my $ProcPid;
+ my $LsofCommand;
+ my $LsofResult;
+ my $ProcCount = 0;
+ my $FDCount = 0;
+ my $ProcFDAvg = 0;
+ my $PerProcMaxFD = 0;
+ my $ProcOKFlag = 0;
+ my $ProcWarningFlag = 0;
+ my $ProcCriticalFlag = 0;
+ my $OKFlag = 0;
+ my $WarningFlag = 0;
+ my $CriticalFlag = 0;
+ my $LastWarningProcFDs = 0;
+ my $LastWarningProc = -1;
+ my $LastCriticalProcFDs = 0;
+ my $LastCriticalProc = -1;
+ my $ProcPluginReturnValue = UNKNOWN;
+ my $AppPluginReturnValue = UNKNOWN;
+ my $PluginReturnValue = UNKNOWN;
+ my $PerformanceData = "";
+ my $PerfdataUnit = "FDs";
+
+ $Application = $Nagios->opts->application;
+ $PsCommand = "ps -eaf | grep $Application";
+ $PsResult = `$PsCommand`;
+ @AppNameSplitted = split(/\//, $Application);
+ $ApplicationName = $AppNameSplitted[$#AppNameSplitted];
+ @PsResultLines = split(/\n/, $PsResult);
+ if ( $#PsResultLines > 1 ) {
+ foreach my $Proc (split(/\n/, $PsResult)) {
+ if ($Proc !~ /check_unix_open_fds/ && $Proc !~ / grep /) {
+ $ProcCount += 1;
+ $ProcPid = (split(/\s+/, $Proc))[1];
+ $LsofCommand = "lsof -p $ProcPid | wc -l";
+ $LsofResult = `$LsofCommand`;
+ $LsofResult = ($LsofResult > 0 ) ? ($LsofResult - 1) : 0;
+ $FDCount += $LsofResult;
+ if ($LsofResult >= $PerProcMaxFD) { $PerProcMaxFD = $LsofResult; }
+ $ProcPluginReturnValue = $Nagios->check_threshold(check => $LsofResult,warning => @{$WVRange}[0],critical => @{$CVRange}[0]);
+ if ($ProcPluginReturnValue eq OK) {
+ $ProcOKFlag = 1;
+ }
+ elsif ($ProcPluginReturnValue eq WARNING) {
+ $ProcWarningFlag = 1;
+ if ($LsofResult >= $LastWarningProcFDs) {
+ $LastWarningProcFDs = $LsofResult;
+ $LastWarningProc = $ProcPid;
+ }
+ }
+ #if ($LsofResult >= $PCT) {
+ elsif ($ProcPluginReturnValue eq CRITICAL) {
+ $ProcCriticalFlag = 1;
+ if ($LsofResult >= $LastCriticalProcFDs) {
+ $LastCriticalProcFDs = $LsofResult;
+ $LastCriticalProc = $ProcPid;
+ }
+ }
+ }
+ }
+ if ($ProcCount) { $ProcFDAvg = int($FDCount / $ProcCount); }
+ $AppPluginReturnValue = $Nagios->check_threshold(check => $FDCount,warning => @{$WVRange}[1],critical => @{$CVRange}[1]);
+ #if ($FDCount >= $TWT) {
+ if ($AppPluginReturnValue eq OK) { $OKFlag = 1; }
+ elsif ($AppPluginReturnValue eq WARNING) { $WarningFlag = 1; }
+ elsif ($AppPluginReturnValue eq CRITICAL) { $CriticalFlag = 1; }
+
+ # PluginReturnValue and PluginOutput
+ if ($CriticalFlag) {
+ $PluginReturnValue = CRITICAL;
+ ${$PluginOutput} .= "$ApplicationName handling $FDCount files (critical threshold set to @{$CVRange}[1])";
+ }
+ elsif ($WarningFlag) {
+ $PluginReturnValue = WARNING;
+ ${$PluginOutput} .= "$ApplicationName handling $FDCount files (warning threshold set to @{$WVRange}[1])";
+ }
+ elsif ($ProcCriticalFlag) {
+ $PluginReturnValue = CRITICAL;
+ ${$PluginOutput} .= "Process ID $LastCriticalProc handling $LastCriticalProcFDs files (critical threshold set to @{$CVRange}[0])";
+ }
+ elsif ($ProcWarningFlag) {
+ $PluginReturnValue = WARNING;
+ ${$PluginOutput} .= "Process ID $LastWarningProc handling $LastWarningProcFDs files (warning threshold set to @{$WVRange}[0])";
+ }
+ elsif ($OKFlag && $ProcOKFlag) {
+ $PluginReturnValue = OK;
+ ${$PluginOutput} .= "$ApplicationName handling $FDCount files";
+ }
+ }
+ else {
+ ${$PluginOutput} .= "No existe la aplicacion $ApplicationName";
+ }
+
+
+ $PerformanceData .= "ProcCount=$ProcCount$PerfdataUnit FDCount=$FDCount$PerfdataUnit ProcFDAvg=$ProcFDAvg$PerfdataUnit PerProcMaxFD=$PerProcMaxFD$PerfdataUnit";
+
+ # Output with performance data:
+ ${$PluginOutput} .= " | $PerformanceData";
+
+ return $PluginReturnValue;
+}
diff --git a/puppet/modules/site_check_mk/files/agent/plugins/mk_logwatch.1.2.4 b/puppet/modules/site_check_mk/files/agent/plugins/mk_logwatch.1.2.4
new file mode 100755
index 00000000..3dbca322
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/agent/plugins/mk_logwatch.1.2.4
@@ -0,0 +1,374 @@
+#!/usr/bin/python
+# -*- encoding: utf-8; py-indent-offset: 4 -*-
+# +------------------------------------------------------------------+
+# | ____ _ _ __ __ _ __ |
+# | / ___| |__ ___ ___| | __ | \/ | |/ / |
+# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
+# | | |___| | | | __/ (__| < | | | | . \ |
+# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
+# | |
+# | Copyright Mathias Kettner 2010 mk@mathias-kettner.de |
+# +------------------------------------------------------------------+
+#
+# This file is part of Check_MK.
+# The official homepage is at http://mathias-kettner.de/check_mk.
+#
+# check_mk is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation in version 2. check_mk is distributed
+# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
+# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more de-
+# ails. You should have received a copy of the GNU General Public
+# License along with GNU Make; see the file COPYING. If not, write
+# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+# Boston, MA 02110-1301 USA.
+
+# Call with -d for debug mode: colored output, no saving of status
+
+import sys, os, re, time
+import glob
+
+if '-d' in sys.argv[1:] or '--debug' in sys.argv[1:]:
+ tty_red = '\033[1;31m'
+ tty_green = '\033[1;32m'
+ tty_yellow = '\033[1;33m'
+ tty_blue = '\033[1;34m'
+ tty_normal = '\033[0m'
+ debug = True
+else:
+ tty_red = ''
+ tty_green = ''
+ tty_yellow = ''
+ tty_blue = ''
+ tty_normal = ''
+ debug = False
+
+# The configuration file and status file are searched
+# in the directory named by the environment variable
+# LOGWATCH_DIR. If that is not set, MK_CONFDIR is used.
+# If that is not set either, the current directory ist
+# used.
+logwatch_dir = os.getenv("LOGWATCH_DIR")
+if not logwatch_dir:
+ logwatch_dir = os.getenv("MK_CONFDIR")
+ if not logwatch_dir:
+ logwatch_dir = "."
+
+print "<<<logwatch>>>"
+
+config_filename = logwatch_dir + "/logwatch.cfg"
+status_filename = logwatch_dir + "/logwatch.state"
+config_dir = logwatch_dir + "/logwatch.d/*.cfg"
+
+def is_not_comment(line):
+ if line.lstrip().startswith('#') or \
+ line.strip() == '':
+ return False
+ return True
+
+def parse_filenames(line):
+ return line.split()
+
+def parse_pattern(level, pattern):
+ if level not in [ 'C', 'W', 'I', 'O' ]:
+ raise(Exception("Invalid pattern line '%s'" % line))
+ try:
+ compiled = re.compile(pattern)
+ except:
+ raise(Exception("Invalid regular expression in line '%s'" % line))
+ return (level, compiled)
+
+def read_config():
+ config_lines = [ line.rstrip() for line in filter(is_not_comment, file(config_filename).readlines()) ]
+ # Add config from a logwatch.d folder
+ for config_file in glob.glob(config_dir):
+ config_lines += [ line.rstrip() for line in filter(is_not_comment, file(config_file).readlines()) ]
+
+ have_filenames = False
+ config = []
+
+ for line in config_lines:
+ rewrite = False
+ if line[0].isspace(): # pattern line
+ if not have_filenames:
+ raise Exception("Missing logfile names")
+ level, pattern = line.split(None, 1)
+ if level == 'A':
+ cont_list.append(parse_cont_pattern(pattern))
+ elif level == 'R':
+ rewrite_list.append(pattern)
+ else:
+ level, compiled = parse_pattern(level, pattern)
+ cont_list = [] # List of continuation patterns
+ rewrite_list = [] # List of rewrite patterns
+ patterns.append((level, compiled, cont_list, rewrite_list))
+ else: # filename line
+ patterns = []
+ config.append((parse_filenames(line), patterns))
+ have_filenames = True
+ return config
+
+def parse_cont_pattern(pattern):
+ try:
+ return int(pattern)
+ except:
+ try:
+ return re.compile(pattern)
+ except:
+ if debug:
+ raise
+ raise Exception("Invalid regular expression in line '%s'" % pattern)
+
+# structure of statusfile
+# # LOGFILE OFFSET INODE
+# /var/log/messages|7767698|32455445
+# /var/test/x12134.log|12345|32444355
+def read_status():
+ if debug:
+ return {}
+
+ status = {}
+ for line in file(status_filename):
+ # TODO: Remove variants with spaces. rsplit is
+ # not portable. split fails if logfilename contains
+ # spaces
+ inode = -1
+ try:
+ parts = line.split('|')
+ filename = parts[0]
+ offset = parts[1]
+ if len(parts) >= 3:
+ inode = parts[2]
+
+ except:
+ try:
+ filename, offset = line.rsplit(None, 1)
+ except:
+ filename, offset = line.split(None, 1)
+ status[filename] = int(offset), int(inode)
+ return status
+
+def save_status(status):
+ f = file(status_filename, "w")
+ for filename, (offset, inode) in status.items():
+ f.write("%s|%d|%d\n" % (filename, offset, inode))
+
+pushed_back_line = None
+def next_line(f):
+ global pushed_back_line
+ if pushed_back_line != None:
+ line = pushed_back_line
+ pushed_back_line = None
+ return line
+ else:
+ try:
+ line = f.next()
+ return line
+ except:
+ return None
+
+
+def process_logfile(logfile, patterns):
+ global pushed_back_line
+
+ # Look at which file offset we have finished scanning
+ # the logfile last time. If we have never seen this file
+ # before, we set the offset to -1
+ offset, prev_inode = status.get(logfile, (-1, -1))
+ try:
+ fl = os.open(logfile, os.O_RDONLY)
+ inode = os.fstat(fl)[1] # 1 = st_ino
+ except:
+ if debug:
+ raise
+ print "[[[%s:cannotopen]]]" % logfile
+ return
+
+ print "[[[%s]]]" % logfile
+
+ # Seek to the current end in order to determine file size
+ current_end = os.lseek(fl, 0, 2) # os.SEEK_END not available in Python 2.4
+ status[logfile] = current_end, inode
+
+ # If we have never seen this file before, we just set the
+ # current pointer to the file end. We do not want to make
+ # a fuss about ancient log messages...
+ if offset == -1:
+ if not debug:
+ return
+ else:
+ offset = 0
+
+
+ # If the inode of the logfile has changed it has appearently
+ # been started from new (logfile rotation). At least we must
+ # assume that. In some rare cases (restore of a backup, etc)
+ # we are wrong and resend old log messages
+ if prev_inode >= 0 and inode != prev_inode:
+ offset = 0
+
+ # Our previously stored offset is the current end ->
+ # no new lines in this file
+ if offset == current_end:
+ return # nothing new
+
+ # If our offset is beyond the current end, the logfile has been
+ # truncated or wrapped while keeping the same inode. We assume
+ # that it contains all new data in that case and restart from
+ # offset 0.
+ if offset > current_end:
+ offset = 0
+
+ # now seek to offset where interesting data begins
+ os.lseek(fl, offset, 0) # os.SEEK_SET not available in Python 2.4
+ f = os.fdopen(fl)
+ worst = -1
+ outputtxt = ""
+ lines_parsed = 0
+ start_time = time.time()
+
+ while True:
+ line = next_line(f)
+ if line == None:
+ break # End of file
+
+ lines_parsed += 1
+ # Check if maximum number of new log messages is exceeded
+ if opt_maxlines != None and lines_parsed > opt_maxlines:
+ outputtxt += "%s Maximum number (%d) of new log messages exceeded.\n" % (
+ opt_overflow, opt_maxlines)
+ worst = max(worst, opt_overflow_level)
+ os.lseek(fl, 0, 2) # Seek to end of file, skip all other messages
+ break
+
+ # Check if maximum processing time (per file) is exceeded. Check only
+ # every 100'th line in order to save system calls
+ if opt_maxtime != None and lines_parsed % 100 == 10 \
+ and time.time() - start_time > opt_maxtime:
+ outputtxt += "%s Maximum parsing time (%.1f sec) of this log file exceeded.\n" % (
+ opt_overflow, opt_maxtime)
+ worst = max(worst, opt_overflow_level)
+ os.lseek(fl, 0, 2) # Seek to end of file, skip all other messages
+ break
+
+ level = "."
+ for lev, pattern, cont_patterns, replacements in patterns:
+ matches = pattern.search(line[:-1])
+ if matches:
+ level = lev
+ levelint = {'C': 2, 'W': 1, 'O': 0, 'I': -1, '.': -1}[lev]
+ worst = max(levelint, worst)
+
+ # Check for continuation lines
+ for cont_pattern in cont_patterns:
+ if type(cont_pattern) == int: # add that many lines
+ for x in range(cont_pattern):
+ cont_line = next_line(f)
+ if cont_line == None: # end of file
+ break
+ line = line[:-1] + "\1" + cont_line
+
+ else: # pattern is regex
+ while True:
+ cont_line = next_line(f)
+ if cont_line == None: # end of file
+ break
+ elif cont_pattern.search(cont_line[:-1]):
+ line = line[:-1] + "\1" + cont_line
+ else:
+ pushed_back_line = cont_line # sorry for stealing this line
+ break
+
+ # Replacement
+ for replace in replacements:
+ line = replace.replace('\\0', line) + "\n"
+ for nr, group in enumerate(matches.groups()):
+ line = line.replace('\\%d' % (nr+1), group)
+
+ break # matching rule found and executed
+
+ color = {'C': tty_red, 'W': tty_yellow, 'O': tty_green, 'I': tty_blue, '.': ''}[level]
+ if debug:
+ line = line.replace("\1", "\nCONT:")
+ if level == "I":
+ level = "."
+ if opt_nocontext and level == '.':
+ continue
+ outputtxt += "%s%s %s%s\n" % (color, level, line[:-1], tty_normal)
+
+ new_offset = os.lseek(fl, 0, 1) # os.SEEK_CUR not available in Python 2.4
+ status[logfile] = new_offset, inode
+
+ # output all lines if at least one warning, error or ok has been found
+ if worst > -1:
+ sys.stdout.write(outputtxt)
+ sys.stdout.flush()
+
+try:
+ config = read_config()
+except Exception, e:
+ if debug:
+ raise
+ print "CANNOT READ CONFIG FILE: %s" % e
+ sys.exit(1)
+
+# Simply ignore errors in the status file. In case of a corrupted status file we simply begin
+# with an empty status. That keeps the monitoring up and running - even if we might loose a
+# message in the extreme case of a corrupted status file.
+try:
+ status = read_status()
+except Exception, e:
+ status = {}
+
+
+# The filename line may contain options like 'maxlines=100' or 'maxtime=10'
+for filenames, patterns in config:
+ # Initialize options with default values
+ opt_maxlines = None
+ opt_maxtime = None
+ opt_regex = None
+ opt_overflow = 'C'
+ opt_overflow_level = 2
+ opt_nocontext = False
+ try:
+ options = [ o.split('=', 1) for o in filenames if '=' in o ]
+ for key, value in options:
+ if key == 'maxlines':
+ opt_maxlines = int(value)
+ elif key == 'maxtime':
+ opt_maxtime = float(value)
+ elif key == 'overflow':
+ if value not in [ 'C', 'I', 'W', 'O' ]:
+ raise Exception("Invalid value %s for overflow. Allowed are C, I, O and W" % value)
+ opt_overflow = value
+ opt_overflow_level = {'C':2, 'W':1, 'O':0, 'I':0}[value]
+ elif key == 'regex':
+ opt_regex = re.compile(value)
+ elif key == 'iregex':
+ opt_regex = re.compile(value, re.I)
+ elif key == 'nocontext':
+ opt_nocontext = True
+ else:
+ raise Exception("Invalid option %s" % key)
+ except Exception, e:
+ if debug:
+ raise
+ print "INVALID CONFIGURATION: %s" % e
+ sys.exit(1)
+
+
+ for glob in filenames:
+ if '=' in glob:
+ continue
+ logfiles = [ l.strip() for l in os.popen("ls %s 2>/dev/null" % glob).readlines() ]
+ if opt_regex:
+ logfiles = [ f for f in logfiles if opt_regex.search(f) ]
+ if len(logfiles) == 0:
+ print '[[[%s:missing]]]' % glob
+ else:
+ for logfile in logfiles:
+ process_logfile(logfile, patterns)
+
+if not debug:
+ save_status(status)