From e7d8080b14a32788f120c1abde69ed0420d760b0 Mon Sep 17 00:00:00 2001 From: Varac Date: Mon, 17 Jul 2017 18:36:56 +0200 Subject: Add Nagios --- .gitignore | 1 + README.md | 2 +- dashboards/dashboard.erb | 3 ++ jobs/gitlab_build_status.rb | 5 +- jobs/nagios.rb | 123 +++++++++++++++++++++++++++----------------- 5 files changed, 84 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index bc62fd0..7ad5658 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ assets/images/piwik.png credentials .bundle/ vendor/ +nagiosharder.yml diff --git a/README.md b/README.md index 8717cfa..faa31e9 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ gem install bundler Install app dependencies using bundler: ``` -bundle install --path=vendor/bundle +bundle install --path=vendor/bundle --deployment ``` If `bundle install` fails along the way, you're probably missing Xcode command line utilities: `xcode-select --install` diff --git a/dashboards/dashboard.erb b/dashboards/dashboard.erb index babc0f2..1c97924 100644 --- a/dashboards/dashboard.erb +++ b/dashboards/dashboard.erb @@ -12,6 +12,9 @@
  • +
  • +
    +
  • diff --git a/jobs/gitlab_build_status.rb b/jobs/gitlab_build_status.rb index addfaed..be52f98 100644 --- a/jobs/gitlab_build_status.rb +++ b/jobs/gitlab_build_status.rb @@ -1,6 +1,5 @@ require 'gitlab' require 'date' -require 'pp' Gitlab.configure do |config| # API endpoint URL, default @@ -10,7 +9,7 @@ Gitlab.configure do |config| config.private_token = ENV['GITLAB_TOKEN'] end -SCHEDULER.every '300s', :first_in => 0 do +SCHEDULER.every '3000s', :first_in => 0 do broken_builds = [] # get a list of all projects @@ -35,7 +34,7 @@ SCHEDULER.every '300s', :first_in => 0 do unless proj[:status] =~ /^success|running|No builds configured$/ broken_builds << proj end - puts proj + #puts proj end failed = broken_builds.size > 0 diff --git a/jobs/nagios.rb b/jobs/nagios.rb index bda1b59..56c6f88 100644 --- a/jobs/nagios.rb +++ b/jobs/nagios.rb @@ -8,88 +8,119 @@ SCHEDULER.every '10s' do query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/', home_url: 'https://unstable.bitmask.net/nagios3/', username: 'nagiosadmin', - password: ENV['UNSTABLE_PASS'] + password: ENV['NAGIOS_UNSTABLE_PASS'] }, demo: { domain: 'demo.bitmask.i', query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/', home_url: 'https://unstable.bitmask.net/nagios3/', username: 'nagiosadmin', - password: ENV['UNSTABLE_PASS'] + password: ENV['NAGIOS_UNSTABLE_PASS'] }, dev: { domain: 'dev.bitmask.i', query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/', home_url: 'https://unstable.bitmask.net/nagios3/', username: 'nagiosadmin', - password: ENV['UNSTABLE_PASS'] + password: ENV['NAGIOS_UNSTABLE_PASS'] }, mail: { domain: 'mail.bitmask.i', query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/', home_url: 'https://unstable.bitmask.net/nagios3/', username: 'nagiosadmin', - password: ENV['UNSTABLE_PASS'] + password: ENV['NAGIOS_UNSTABLE_PASS'] }, unstable: { domain: 'unstable.bitmask.i', query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/', home_url: 'https://unstable.bitmask.net/nagios3/', username: 'nagiosadmin', - password: ENV['UNSTABLE_PASS'] + password: ENV['NAGIOS_UNSTABLE_PASS'] + }, + leap: { + domain: 'leap.se', + query_url: 'https://hare.leap.se/cgi-bin/nagios3/', + home_url: 'https://hare.leap.se/nagios3/', + username: 'nagiosadmin', + password: ENV['NAGIOS_LEAP_PASS'] }, } - environments.each do |key, env| - nag = NagiosHarder::Site.new(env[:query_url], env[:username], env[:password],'3','iso8601') - unacked = nag.service_status( - :host_status_types => [:all], - :service_status_types => [:warning, :critical, :unknown], - :service_props => [:no_scheduled_downtime, :state_unacknowledged] - ) - critical_count = 0 - critical_services = Array.new - warning_count = 0 - warning_services = Array.new - unknown_count = 0 - unknown_services = Array.new - unacked.each do |alert| - next if ! alert["host"].include? env[:domain] - next if ! tried_at_maximum(alert["attempts"]) + environments = { + leap: { + domain: 'leap.se', + query_url: 'https://hare.leap.se/cgi-bin/nagios3/', + home_url: 'https://hare.leap.se/nagios3/', + username: 'nagiosadmin', + password: ENV['NAGIOS_LEAP_PASS'] + } + } + - if alert["status"].eql? "CRITICAL" - critical_count += 1 - critical_services << alert["service"] - elsif alert["status"].eql? "WARNING" - warning_count += 1 - warning_services << alert["service"] - elsif alert["status"].eql? "UNKNOWN" - unknown_count += 1 - unknown_services << alert["service"] + environments.each do |key, env| + begin + nag = NagiosHarder::Site.new(env[:query_url], env[:username], env[:password],'3','iso8601') + #puts nag + unacked = nag.service_status( + :host_status_types => [:all], + :service_status_types => [:warning, :critical, :unknown], + :service_props => [:no_scheduled_downtime, :state_unacknowledged] + ) + #puts unacked + critical_count = 0 + critical_services = Array.new + warning_count = 0 + warning_services = Array.new + unknown_count = 0 + unknown_services = Array.new + + unacked.each do |alert| + puts alert + next if ! alert["host"].include? env[:domain] + next if ! tried_at_maximum(alert["attempts"]) + puts '=============' + if alert["status"].eql? "CRITICAL" + critical_count += 1 + critical_services << alert["service"] + elsif alert["status"].eql? "WARNING" + warning_count += 1 + warning_services << alert["service"] + elsif alert["status"].eql? "UNKNOWN" + unknown_count += 1 + unknown_services << alert["service"] + end end - end - if ['cdev.bitmask.i', 'dev.bitmask.i', 'unstable.bitmask.i'].include? env[:domain] - status = critical_count + warning_count + unknown_count > 0 ? "gray" : "green" - else - status = critical_count > 0 ? "red" : (warning_count + unknown_count > 0 ? "yellow" : "green") - end + if ['cdev.bitmask.i', 'dev.bitmask.i', 'unstable.bitmask.i'].include? env[:domain] + status = critical_count + warning_count + unknown_count > 0 ? "gray" : "green" + else + status = critical_count > 0 ? "red" : (warning_count + unknown_count > 0 ? "yellow" : "green") + end - # nagiosharder may not alert us to a problem querying nagios. - # If no problems found check that we fetch service status and - # expect to find more than 0 entries. - if critical_count == 0 and warning_count == 0 and unknown_count == 0 - if nag.service_status.length == 0 - status = "error" + # nagiosharder may not alert us to a problem querying nagios. + # If no problems found check that we fetch service status and + # expect to find more than 0 entries. + if critical_count == 0 and warning_count == 0 and unknown_count == 0 + if nag.service_status.length == 0 + status = "error" + end end - end - #puts key.to_s + ": " + critical_count.to_s - #puts critical_services.join(", ") - #puts + puts key.to_s + ": " + critical_count.to_s + puts critical_services.join(", ") + puts + + rescue => error + error.backtrace + puts 'Could not query nagios at '+env[:query_url] + status = "red" + critical_count = 1 + critical_services = ['Could not query nagios'] + end send_event('nagios-' + key.to_s, { criticals: critical_count, critical_services: critical_services, warnings: warning_count, warning_services: warning_services, -- cgit v1.2.3