diff options
Diffstat (limited to 'files/master/lastruncheck')
-rw-r--r-- | files/master/lastruncheck | 323 |
1 files changed, 100 insertions, 223 deletions
diff --git a/files/master/lastruncheck b/files/master/lastruncheck index d59e489..72c0eb5 100644 --- a/files/master/lastruncheck +++ b/files/master/lastruncheck @@ -1,224 +1,101 @@ -#!/usr/bin/env ruby -require 'puppet/application' - -module Puppet::Lastcheck - module Puppet::Lastcheck::Tests - def self.included(klass) - klass.extend ClassMethods - end - def self.tests - @tests ||= {} - end - module ClassMethods - def add_test(name, options={}) - include Puppet::Lastcheck::Tests.const_get(name.to_s.split('_').collect{|s| s.capitalize }.join('')) - Puppet::Lastcheck::Tests.tests[name] = options - attr_accessor "ignore_#{name}".to_sym - option("--ignore-#{name.to_s.gsub(/_/,'-')}") do - self.send("ignore_#{name}=", true) - end - end - end - module Util - def facts_hosts - return @facts_hosts if @facts_hosts - require 'puppet/indirector/facts/yaml' - @facts_hosts = Puppet::Node::Facts.indirection.search("*").collect do |fqdn| - if node = Puppet::Node::Facts.indirection.find(fqdn) - { :hostname => node.name, :expired => node.expired?, :timestamp => node.values[:_timestamp], :expiration => node.expiration } - end - end.compact - end - end - end - module Puppet::Lastcheck::Reports - def self.included(klass) - klass.extend ClassMethods - end - def ordered_reports - @ordered_reports ||= Puppet::Lastcheck::Reports.reports.keys.sort{|a,b| Puppet::Lastcheck::Reports.reports[a][:priority] <=> Puppet::Lastcheck::Reports.reports[b][:priority] } - end - - def self.reports - @reports ||= {} - end - module ClassMethods - def add_report(name, options={}) - include Puppet::Lastcheck::Reports.const_get(name.to_s.split('_').collect{|s| s.capitalize }.join('')) - Puppet::Lastcheck::Reports.reports[name] = options - Puppet::Lastcheck::Reports.reports[name][:priority] ||= 100 - attr_accessor "report_to_#{name}".to_sym - option("--report-to-#{name.to_s.gsub(/_/,'-')}") do - self.send("report_to_#{name}=", true) - end - end - end - end -end - -module Puppet::Lastcheck::Tests::NoFacts - def analyze_no_facts - signed_hosts.each{|host| add_failed_host(host,"No facts available") unless facts_hosts.any?{|fhost| fhost[:hostname] == host } } - end - def setup_no_facts - Puppet::SSL::Host.ca_location = :only - end - - private - def signed_hosts - ca.list - end - - def ca - @ca ||= Puppet::SSL::CertificateAuthority.new - end -end - -module Puppet::Lastcheck::Tests::ExpiredFacts - include Puppet::Lastcheck::Tests::Util - def analyze_expired_facts - facts_hosts.each{|host| add_failed_host(host[:hostname],"Expired at #{host[:expiration]}") if host[:expired] } - end -end -module Puppet::Lastcheck::Tests::TimedOutFacts - include Puppet::Lastcheck::Tests::Util - def analyze_timed_out_facts - require 'time' - facts_hosts.each{|host| add_failed_host(host[:hostname], "Last facts save at #{host[:timestamp]}") if Time.parse(host[:timestamp].to_s) < (Time.now - @timeout) } - end - - def setup_timed_out_facts - if @timeout - ignore_expired_facts ||= true - end - end -end -module Puppet::Lastcheck::Tests::Storedconfigs - def analyze_storedconfigs - storedconfigs_hosts.each do |host| - if !facts_hosts.any?{|fact_host| fact_host[:hostname] == host.name } - add_failed_host(host.name, "In storedconfigs but no facts available!") - elsif host.last_compile.nil? - add_failed_host(host.name, "No entry in storedconfigs") - elsif host.last_compile < (Time.now - @timeout) - add_failed_host(host.name, "Last compile time in storedconfigs at #{host.last_compile}") - end - end - end - - private - def storedconfigs_hosts - return @storedconfigs_hosts if @storedconfigs_hosts - Puppet::Rails.connect - @storedconfigs_hosts = Puppet::Rails::Host.all - end -end -module Puppet::Lastcheck::Reports::Console - def deliver_report_to_console(failing_hosts) - unless failing_hosts.empty? - puts 'The following hosts are out of date:' - puts '------------------------------------' - host_length = 0 - failing_hosts.keys.each{|host| host_length = host.length if host.length > host_length } - failing_hosts.keys.each{ |host| puts "#{pretty_puts(host,host_length)} - Reason: #{failing_hosts[host][:reason]}" } - 1 - else - 0 - end - end -end -module Puppet::Lastcheck::Reports::Nagios - def deliver_report_to_nagios(failing_hosts) - unless failing_hosts.empty? - puts "PUPPETLAST CRITICAL: #{failing_hosts.size} outdated hosts: #{failing_hosts.keys.join(',')}" - 2 - else - puts "PUPPETLAST OK: No outdated hosts" - 0 - end - end -end -# -# = Synopsis -# -# Verifiying your puppet runs. Check different places to verify -# whether your clients actually still runs successfully. -# Also checks for left overs of legacy hosts. +#!/bin/bash # -# = Usage -# -# puppet lastcheck [-h|--help] -class Puppet::Application::Lastcheck < Puppet::Application - - should_parse_config - run_mode :master - - include Puppet::Lastcheck::Tests - add_test :no_facts - add_test :expired_facts, :ignore_by_default => true - add_test :timed_out_facts - add_test :storedconfigs - - include Puppet::Lastcheck::Reports - add_report :console, :priority => 50 - add_report :nagios - - option("--timeout TIMEOUT") do |v| - @timeout = v.to_i - end - - option("--ignore-hosts HOSTS") do |v| - @ignore_hosts = v.split(',') - end - - def main - - Puppet::Lastcheck::Tests.tests.keys.each do |test| - self.send("analyze_#{test}") unless self.send("ignore_#{test}") - end - exitcode = 0 - ordered_reports.each do |report| - if self.send("report_to_#{report}") - tmpexitcode = self.send("deliver_report_to_#{report}",@failing_hosts) - exitcode = tmpexitcode unless exitcode > 0 - end - end - exit(exitcode) - end - - def setup - exit(Puppet.settings.print_configs ? 0 : 1) if Puppet.settings.print_configs? - - #Puppet::Util::Log.newdestination :console - Puppet::Node::Facts.indirection.terminus_class = :yaml - - Puppet::Lastcheck::Tests.tests.keys.each do |test| - self.send("ignore_#{test}=", Puppet::Lastcheck::Tests.tests[test][:ignore_by_default]||false) unless self.send("ignore_#{test}") - self.send("setup_#{test}") if self.respond_to?("setup_#{test}") and !self.send("ignore_#{test}") - end - report = nil - report_activated = false - ordered_reports.each do |report| - report_activated ||= self.send("report_to_#{report}") - end - self.report_to_console = true unless report_activated - - @ignore_hosts = [] unless @ignore_hosts - @failing_hosts = {} - unless @timeout - @timeout = Puppet[:runinterval] - end - end - - private - - def add_failed_host(hostname,reason) - @failing_hosts[hostname] = { :reason => reason } unless (@failing_hosts[hostname] || @ignore_hosts.include?(hostname)) - end - - def pretty_puts(str,length) - sprintf("%0-#{length}s",str) - end -end - -Puppet::Application.find('lastcheck').new.run +# Nagios checking script that whines if a node hasn't checked in with the +# master for a day or more and sets a critical status if the node hasn't +# checked in for a week or longer. You can of course tweak those thresholds +# if you want. + +# Spot of configuration. Basically just specify where the yaml files live. +YAMLPATH=/var/lib/puppet/yaml/node +STATUS_OK=0 +STATUS_WARNING=1 +STATUS_CRITICAL=2 +STATUS_UNKNOWN=3 +INTERVAL_WARNING=$((60 * 60 * 2)) +INTERVAL_CRITICAL=$((60 * 60 * 24)) +PARAMS="-la" + +# A space separated list of hostnames to ignore. These might for instance be +# laptops that just don't get used every day and thus don't sync. +IGNORE_HOSTS="" + +# Override settings from a config file if one exists. +if [ -f /etc/default/check_puppetmaster ]; then + . /etc/default/check_puppetmaster +fi + +# Early exit if no read access to the yaml files. +if [ ! -r ${YAMLPATH} ]; then + echo "UNKNOWN: Cannot access ${YAMLPATH}" + exit ${STATUS_UNKNOWN} +fi + +# Bunch of internal vars used for status info output. +status="OK" +ret=${STATUS_OK} +i_count=0 +o_count=0 +w_count=0 +w_string="" +e_count=0 +e_string="" + +# Current time. +NOW=$(date +"%s") + +# The meat. + +# Get all hostnames associated with active certificates, and check the time +# each of these last checked in with the server. Do this by converting the +# yaml file expiration datestamp to epoch format and subtracting it from now. +for node in $(/usr/sbin/puppetca ${PARAMS} | awk '/^\+/ {print $2}' | tr -d '"'); do + + EXPIRATION=$(grep expiration ${YAMLPATH}/$node.yaml | awk '{printf("%s %s", $2, $3);}') + typeset -i CHECKIN=$(date +"%s" -d "${EXPIRATION}") + DIFFERENCE=$((${NOW} - ${CHECKIN})) + + # Count hosts and generate some output strings based on the status. + if [ ${DIFFERENCE} -lt ${INTERVAL_WARNING} ]; then + o_count=$((${o_count} + 1)); + else + # If there is an issue, first check if we can ignore this host. + if [ -n "${IGNORE_HOSTS}" ]; then + if [[ ${IGNORE_HOSTS} =~ ${node} ]]; then + i_count=$((${i_count} + 1)) + continue + fi + fi + if [ ${DIFFERENCE} -gt ${INTERVAL_CRITICAL} ]; then + e_count=$((${e_count} + 1)) + e_string="${e_string} ${node}" + else + w_count=$((${w_count} + 1)) + w_string="${w_string} ${node}" + fi + fi +done + +# Generate a status string for user display. +if [ -n "${e_string}" ]; then + s_string="${s_string} ${e_count} critical (${e_string## });" +fi +if [ -n "${w_string}" ]; then + s_string="${s_string} ${w_count} warning (${w_string## });" +fi +if [ ${i_count} -gt 0 ]; then + s_string="${s_string} ${i_count} ignored;" +fi +s_string="${s_string} ${o_count} ok." + +# Create a return value and status string. +if [ ${e_count} -gt 0 ]; then + status="CRITICAL" + ret=${STATUS_CRITICAL} +elif [ ${w_count} -gt 0 ]; then + status="WARNING" + ret=${STATUS_WARNING} +fi + +# Output the status and inform the user about which hosts are lagging. +echo -n "${status}:${s_string}" +exit $ret |