summaryrefslogtreecommitdiff
path: root/files/master/lastruncheck
diff options
context:
space:
mode:
authorvarac <varacanero@zeromail.org>2012-10-02 22:34:08 +0200
committervarac <varacanero@zeromail.org>2012-10-02 22:34:08 +0200
commit91c8846a43002d1747b969593b61916f05012ccc (patch)
tree09c5bda8579b8751b182173ebdd05c52a34520e2 /files/master/lastruncheck
parentfaa36737e9fbd79425d99af6d48fcc07850b03e8 (diff)
modified lastruncheck to run under v2.7.x, see github.com/cafuego/check_puppetmaster
Diffstat (limited to 'files/master/lastruncheck')
-rw-r--r--files/master/lastruncheck323
1 files changed, 100 insertions, 223 deletions
diff --git a/files/master/lastruncheck b/files/master/lastruncheck
index d59e489..72c0eb5 100644
--- a/files/master/lastruncheck
+++ b/files/master/lastruncheck
@@ -1,224 +1,101 @@
-#!/usr/bin/env ruby
-require 'puppet/application'
-
-module Puppet::Lastcheck
- module Puppet::Lastcheck::Tests
- def self.included(klass)
- klass.extend ClassMethods
- end
- def self.tests
- @tests ||= {}
- end
- module ClassMethods
- def add_test(name, options={})
- include Puppet::Lastcheck::Tests.const_get(name.to_s.split('_').collect{|s| s.capitalize }.join(''))
- Puppet::Lastcheck::Tests.tests[name] = options
- attr_accessor "ignore_#{name}".to_sym
- option("--ignore-#{name.to_s.gsub(/_/,'-')}") do
- self.send("ignore_#{name}=", true)
- end
- end
- end
- module Util
- def facts_hosts
- return @facts_hosts if @facts_hosts
- require 'puppet/indirector/facts/yaml'
- @facts_hosts = Puppet::Node::Facts.indirection.search("*").collect do |fqdn|
- if node = Puppet::Node::Facts.indirection.find(fqdn)
- { :hostname => node.name, :expired => node.expired?, :timestamp => node.values[:_timestamp], :expiration => node.expiration }
- end
- end.compact
- end
- end
- end
- module Puppet::Lastcheck::Reports
- def self.included(klass)
- klass.extend ClassMethods
- end
- def ordered_reports
- @ordered_reports ||= Puppet::Lastcheck::Reports.reports.keys.sort{|a,b| Puppet::Lastcheck::Reports.reports[a][:priority] <=> Puppet::Lastcheck::Reports.reports[b][:priority] }
- end
-
- def self.reports
- @reports ||= {}
- end
- module ClassMethods
- def add_report(name, options={})
- include Puppet::Lastcheck::Reports.const_get(name.to_s.split('_').collect{|s| s.capitalize }.join(''))
- Puppet::Lastcheck::Reports.reports[name] = options
- Puppet::Lastcheck::Reports.reports[name][:priority] ||= 100
- attr_accessor "report_to_#{name}".to_sym
- option("--report-to-#{name.to_s.gsub(/_/,'-')}") do
- self.send("report_to_#{name}=", true)
- end
- end
- end
- end
-end
-
-module Puppet::Lastcheck::Tests::NoFacts
- def analyze_no_facts
- signed_hosts.each{|host| add_failed_host(host,"No facts available") unless facts_hosts.any?{|fhost| fhost[:hostname] == host } }
- end
- def setup_no_facts
- Puppet::SSL::Host.ca_location = :only
- end
-
- private
- def signed_hosts
- ca.list
- end
-
- def ca
- @ca ||= Puppet::SSL::CertificateAuthority.new
- end
-end
-
-module Puppet::Lastcheck::Tests::ExpiredFacts
- include Puppet::Lastcheck::Tests::Util
- def analyze_expired_facts
- facts_hosts.each{|host| add_failed_host(host[:hostname],"Expired at #{host[:expiration]}") if host[:expired] }
- end
-end
-module Puppet::Lastcheck::Tests::TimedOutFacts
- include Puppet::Lastcheck::Tests::Util
- def analyze_timed_out_facts
- require 'time'
- facts_hosts.each{|host| add_failed_host(host[:hostname], "Last facts save at #{host[:timestamp]}") if Time.parse(host[:timestamp].to_s) < (Time.now - @timeout) }
- end
-
- def setup_timed_out_facts
- if @timeout
- ignore_expired_facts ||= true
- end
- end
-end
-module Puppet::Lastcheck::Tests::Storedconfigs
- def analyze_storedconfigs
- storedconfigs_hosts.each do |host|
- if !facts_hosts.any?{|fact_host| fact_host[:hostname] == host.name }
- add_failed_host(host.name, "In storedconfigs but no facts available!")
- elsif host.last_compile.nil?
- add_failed_host(host.name, "No entry in storedconfigs")
- elsif host.last_compile < (Time.now - @timeout)
- add_failed_host(host.name, "Last compile time in storedconfigs at #{host.last_compile}")
- end
- end
- end
-
- private
- def storedconfigs_hosts
- return @storedconfigs_hosts if @storedconfigs_hosts
- Puppet::Rails.connect
- @storedconfigs_hosts = Puppet::Rails::Host.all
- end
-end
-module Puppet::Lastcheck::Reports::Console
- def deliver_report_to_console(failing_hosts)
- unless failing_hosts.empty?
- puts 'The following hosts are out of date:'
- puts '------------------------------------'
- host_length = 0
- failing_hosts.keys.each{|host| host_length = host.length if host.length > host_length }
- failing_hosts.keys.each{ |host| puts "#{pretty_puts(host,host_length)} - Reason: #{failing_hosts[host][:reason]}" }
- 1
- else
- 0
- end
- end
-end
-module Puppet::Lastcheck::Reports::Nagios
- def deliver_report_to_nagios(failing_hosts)
- unless failing_hosts.empty?
- puts "PUPPETLAST CRITICAL: #{failing_hosts.size} outdated hosts: #{failing_hosts.keys.join(',')}"
- 2
- else
- puts "PUPPETLAST OK: No outdated hosts"
- 0
- end
- end
-end
-#
-# = Synopsis
-#
-# Verifiying your puppet runs. Check different places to verify
-# whether your clients actually still runs successfully.
-# Also checks for left overs of legacy hosts.
+#!/bin/bash
#
-# = Usage
-#
-# puppet lastcheck [-h|--help]
-class Puppet::Application::Lastcheck < Puppet::Application
-
- should_parse_config
- run_mode :master
-
- include Puppet::Lastcheck::Tests
- add_test :no_facts
- add_test :expired_facts, :ignore_by_default => true
- add_test :timed_out_facts
- add_test :storedconfigs
-
- include Puppet::Lastcheck::Reports
- add_report :console, :priority => 50
- add_report :nagios
-
- option("--timeout TIMEOUT") do |v|
- @timeout = v.to_i
- end
-
- option("--ignore-hosts HOSTS") do |v|
- @ignore_hosts = v.split(',')
- end
-
- def main
-
- Puppet::Lastcheck::Tests.tests.keys.each do |test|
- self.send("analyze_#{test}") unless self.send("ignore_#{test}")
- end
- exitcode = 0
- ordered_reports.each do |report|
- if self.send("report_to_#{report}")
- tmpexitcode = self.send("deliver_report_to_#{report}",@failing_hosts)
- exitcode = tmpexitcode unless exitcode > 0
- end
- end
- exit(exitcode)
- end
-
- def setup
- exit(Puppet.settings.print_configs ? 0 : 1) if Puppet.settings.print_configs?
-
- #Puppet::Util::Log.newdestination :console
- Puppet::Node::Facts.indirection.terminus_class = :yaml
-
- Puppet::Lastcheck::Tests.tests.keys.each do |test|
- self.send("ignore_#{test}=", Puppet::Lastcheck::Tests.tests[test][:ignore_by_default]||false) unless self.send("ignore_#{test}")
- self.send("setup_#{test}") if self.respond_to?("setup_#{test}") and !self.send("ignore_#{test}")
- end
- report = nil
- report_activated = false
- ordered_reports.each do |report|
- report_activated ||= self.send("report_to_#{report}")
- end
- self.report_to_console = true unless report_activated
-
- @ignore_hosts = [] unless @ignore_hosts
- @failing_hosts = {}
- unless @timeout
- @timeout = Puppet[:runinterval]
- end
- end
-
- private
-
- def add_failed_host(hostname,reason)
- @failing_hosts[hostname] = { :reason => reason } unless (@failing_hosts[hostname] || @ignore_hosts.include?(hostname))
- end
-
- def pretty_puts(str,length)
- sprintf("%0-#{length}s",str)
- end
-end
-
-Puppet::Application.find('lastcheck').new.run
+# Nagios checking script that whines if a node hasn't checked in with the
+# master for a day or more and sets a critical status if the node hasn't
+# checked in for a week or longer. You can of course tweak those thresholds
+# if you want.
+
+# Spot of configuration. Basically just specify where the yaml files live.
+YAMLPATH=/var/lib/puppet/yaml/node
+STATUS_OK=0
+STATUS_WARNING=1
+STATUS_CRITICAL=2
+STATUS_UNKNOWN=3
+INTERVAL_WARNING=$((60 * 60 * 2))
+INTERVAL_CRITICAL=$((60 * 60 * 24))
+PARAMS="-la"
+
+# A space separated list of hostnames to ignore. These might for instance be
+# laptops that just don't get used every day and thus don't sync.
+IGNORE_HOSTS=""
+
+# Override settings from a config file if one exists.
+if [ -f /etc/default/check_puppetmaster ]; then
+ . /etc/default/check_puppetmaster
+fi
+
+# Early exit if no read access to the yaml files.
+if [ ! -r ${YAMLPATH} ]; then
+ echo "UNKNOWN: Cannot access ${YAMLPATH}"
+ exit ${STATUS_UNKNOWN}
+fi
+
+# Bunch of internal vars used for status info output.
+status="OK"
+ret=${STATUS_OK}
+i_count=0
+o_count=0
+w_count=0
+w_string=""
+e_count=0
+e_string=""
+
+# Current time.
+NOW=$(date +"%s")
+
+# The meat.
+
+# Get all hostnames associated with active certificates, and check the time
+# each of these last checked in with the server. Do this by converting the
+# yaml file expiration datestamp to epoch format and subtracting it from now.
+for node in $(/usr/sbin/puppetca ${PARAMS} | awk '/^\+/ {print $2}' | tr -d '"'); do
+
+ EXPIRATION=$(grep expiration ${YAMLPATH}/$node.yaml | awk '{printf("%s %s", $2, $3);}')
+ typeset -i CHECKIN=$(date +"%s" -d "${EXPIRATION}")
+ DIFFERENCE=$((${NOW} - ${CHECKIN}))
+
+ # Count hosts and generate some output strings based on the status.
+ if [ ${DIFFERENCE} -lt ${INTERVAL_WARNING} ]; then
+ o_count=$((${o_count} + 1));
+ else
+ # If there is an issue, first check if we can ignore this host.
+ if [ -n "${IGNORE_HOSTS}" ]; then
+ if [[ ${IGNORE_HOSTS} =~ ${node} ]]; then
+ i_count=$((${i_count} + 1))
+ continue
+ fi
+ fi
+ if [ ${DIFFERENCE} -gt ${INTERVAL_CRITICAL} ]; then
+ e_count=$((${e_count} + 1))
+ e_string="${e_string} ${node}"
+ else
+ w_count=$((${w_count} + 1))
+ w_string="${w_string} ${node}"
+ fi
+ fi
+done
+
+# Generate a status string for user display.
+if [ -n "${e_string}" ]; then
+ s_string="${s_string} ${e_count} critical (${e_string## });"
+fi
+if [ -n "${w_string}" ]; then
+ s_string="${s_string} ${w_count} warning (${w_string## });"
+fi
+if [ ${i_count} -gt 0 ]; then
+ s_string="${s_string} ${i_count} ignored;"
+fi
+s_string="${s_string} ${o_count} ok."
+
+# Create a return value and status string.
+if [ ${e_count} -gt 0 ]; then
+ status="CRITICAL"
+ ret=${STATUS_CRITICAL}
+elif [ ${w_count} -gt 0 ]; then
+ status="WARNING"
+ ret=${STATUS_WARNING}
+fi
+
+# Output the status and inform the user about which hosts are lagging.
+echo -n "${status}:${s_string}"
+exit $ret