summaryrefslogtreecommitdiff
path: root/jobs/nagios.rb
diff options
context:
space:
mode:
Diffstat (limited to 'jobs/nagios.rb')
-rw-r--r--jobs/nagios.rb90
1 files changed, 90 insertions, 0 deletions
diff --git a/jobs/nagios.rb b/jobs/nagios.rb
new file mode 100644
index 0000000..4186d22
--- /dev/null
+++ b/jobs/nagios.rb
@@ -0,0 +1,90 @@
+SCHEDULER.every '10s' do
+ require 'bundler/setup'
+ require 'nagiosharder'
+ require 'pp'
+
+ environments = {
+ cdev: {
+ domain: 'cdev.bitmask.i',
+ query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
+ home_url: 'https://unstable.bitmask.net/nagios3/',
+ username: 'nagiosadmin',
+ password: ENV['UNSTABLE_PASS']
+ },
+ dev: {
+ domain: 'dev.bitmask.i',
+ query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
+ home_url: 'https://unstable.bitmask.net/nagios3/',
+ username: 'nagiosadmin',
+ password: ENV['UNSTABLE_PASS']
+ },
+ unstable: {
+ domain: 'unstable.bitmask.i',
+ query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
+ home_url: 'https://unstable.bitmask.net/nagios3/',
+ username: 'nagiosadmin',
+ password: ENV['UNSTABLE_PASS']
+ },
+ }
+
+ environments.each do |key, env|
+ nag = NagiosHarder::Site.new(env[:query_url], env[:username], env[:password],'3','iso8601')
+ unacked = nag.service_status(
+ :host_status_types => [:all],
+ :service_status_types => [:warning, :critical, :unknown],
+ :service_props => [:no_scheduled_downtime, :state_unacknowledged]
+ )
+
+ critical_count = 0
+ critical_services = Array.new
+ warning_count = 0
+ warning_services = Array.new
+ unknown_count = 0
+ unknown_services = Array.new
+
+ unacked.each do |alert|
+ next if ! alert["host"].include? env[:domain]
+ next if ! tried_at_maximum(alert["attempts"])
+
+ if alert["status"].eql? "CRITICAL"
+ critical_count += 1
+ critical_services << alert["service"]
+ elsif alert["status"].eql? "WARNING"
+ warning_count += 1
+ warning_services << alert["service"]
+ elsif alert["status"].eql? "UNKNOWN"
+ unknown_count += 1
+ unknown_services << alert["service"]
+ end
+ end
+
+ if ['cdev.bitmask.i', 'dev.bitmask.i', 'unstable.bitmask.i'].include? env[:domain]
+ status = critical_count + warning_count + unknown_count > 0 ? "gray" : "green"
+ else
+ status = critical_count > 0 ? "red" : (warning_count + unknown_count > 0 ? "yellow" : "green")
+ end
+
+ # nagiosharder may not alert us to a problem querying nagios.
+ # If no problems found check that we fetch service status and
+ # expect to find more than 0 entries.
+ if critical_count == 0 and warning_count == 0 and unknown_count == 0
+ if nag.service_status.length == 0
+ status = "error"
+ end
+ end
+
+ puts key.to_s + ": " + critical_count.to_s
+ puts critical_services.join(", ")
+ puts
+
+ send_event('nagios-' + key.to_s, {
+ criticals: critical_count, critical_services: critical_services,
+ warnings: warning_count, warning_services: warning_services,
+ unknown: unknown_count, unknown_services: unknown_services,
+ status: status, nagios_url: env[:home_url]})
+ end
+end
+
+def tried_at_maximum(attempts)
+ return attempts ? attempts.split("/").uniq.size == 1 : false
+end