summaryrefslogtreecommitdiff
path: root/jobs/nagios.rb
blob: 2542f20493140d8890e8b5bea04453a8ccc783b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
SCHEDULER.every '60s' do
  require 'bundler/setup'
  require 'nagiosharder'

  environments = {
    cdev: {
      domain: 'cdev.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_UNSTABLE_PASS']
    },
    demo: {
      domain: 'demo.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_UNSTABLE_PASS']
    },
    dev: {
      domain: 'dev.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_UNSTABLE_PASS']
    },
    mail: {
      domain: 'mail.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_UNSTABLE_PASS']
    },
    unstable: {
      domain: 'unstable.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_UNSTABLE_PASS']
    },
    infrastructure: {
      domain: 'leap.se',
      query_url: 'https://hare.leap.se/cgi-bin/nagios3/',
      home_url: 'https://hare.leap.se/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_INFRASTRUCTURE_PASS']
    },
    user_leap_se: {
      domain: 'leap.i',
      query_url: 'https://user.leap.se/cgi-bin/nagios3/',
      home_url: 'https://user.leap.se/nagios3/',
      username: 'nagiosadmin',
      password: ENV['NAGIOS_USER_LEAP_SE_PASS']
    },
  }



  environments.each do |key, env|
    begin
      nag = NagiosHarder::Site.new(env[:query_url], env[:username], env[:password],'3','iso8601')
      puts nag
      unacked = nag.service_status(
        :host_status_types => [:all],
        :service_status_types => [:warning, :critical, :unknown],
        :service_props => [:no_scheduled_downtime, :state_unacknowledged]
      )
      #puts unacked
      critical_count = 0
      critical_services = Array.new
      warning_count = 0
      warning_services = Array.new
      unknown_count = 0
      unknown_services = Array.new

      unacked.each do |alert|
        next if ! alert["host"].include? '.'+env[:domain]
        next if ! tried_at_maximum(alert["attempts"])
        if alert["status"].eql? "CRITICAL"
          critical_count += 1
          critical_services << alert["service"]
        elsif alert["status"].eql? "WARNING"
          warning_count += 1
          warning_services << alert["service"]
        elsif alert["status"].eql? "UNKNOWN"
          unknown_count += 1
          unknown_services << alert["service"]
        end
      end

      if ['cdev.bitmask.i', 'dev.bitmask.i', 'unstable.bitmask.i'].include? env[:domain]
        status = critical_count + warning_count + unknown_count > 0 ? "gray" : "green"
      else
        status = critical_count > 0 ? "red" : (warning_count + unknown_count > 0 ? "yellow" : "green")
      end

      # nagiosharder may not alert us to a problem querying nagios.
      # If no problems found check that we fetch service status and
      # expect to find more than 0 entries.
      if critical_count == 0 and warning_count == 0 and unknown_count == 0
        if nag.service_status.length == 0
          status = "error"
        end
      end

      puts "#{key}: #{critical_count} (#{critical_services.join(', ')})"

    rescue => error
      puts 'Could not query nagios at '+env[:query_url]
      puts error
      status = "red"
      critical_count = 1
      critical_services = ["Could not query nagios #{error}"]

    end
    send_event('nagios-' + key.to_s, {
      criticals: critical_count, critical_services: critical_services,
      warnings: warning_count, warning_services: warning_services,
      unknown: unknown_count, unknown_services: unknown_services,
      status: status, nagios_url: env[:home_url]})
  end
end

def tried_at_maximum(attempts)
  return attempts ? attempts.split("/").uniq.size == 1 : false
end