summaryrefslogtreecommitdiff
path: root/jobs/nagios.rb
blob: 4186d22fe2ec674bf6b2f58722bd39a263964347 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
SCHEDULER.every '10s' do
  require 'bundler/setup'
  require 'nagiosharder'
  require 'pp'

  environments = {
    cdev: {
      domain: 'cdev.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['UNSTABLE_PASS']
    },
    dev: {
      domain: 'dev.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['UNSTABLE_PASS']
    },
    unstable: {
      domain: 'unstable.bitmask.i',
      query_url: 'https://unstable.bitmask.net/cgi-bin/nagios3/',
      home_url: 'https://unstable.bitmask.net/nagios3/',
      username: 'nagiosadmin',
      password: ENV['UNSTABLE_PASS']
    },
  }

  environments.each do |key, env|
    nag = NagiosHarder::Site.new(env[:query_url], env[:username], env[:password],'3','iso8601')
    unacked = nag.service_status(
      :host_status_types => [:all],
      :service_status_types => [:warning, :critical, :unknown],
      :service_props => [:no_scheduled_downtime, :state_unacknowledged]
    )

    critical_count = 0
    critical_services = Array.new
    warning_count = 0
    warning_services = Array.new
    unknown_count = 0
    unknown_services = Array.new

    unacked.each do |alert|
      next if ! alert["host"].include? env[:domain]
      next if ! tried_at_maximum(alert["attempts"])

      if alert["status"].eql? "CRITICAL"
        critical_count += 1
        critical_services << alert["service"]
      elsif alert["status"].eql? "WARNING"
        warning_count += 1
        warning_services << alert["service"]
      elsif alert["status"].eql? "UNKNOWN"
        unknown_count += 1
        unknown_services << alert["service"]
      end
    end

    if ['cdev.bitmask.i', 'dev.bitmask.i', 'unstable.bitmask.i'].include? env[:domain]
      status = critical_count + warning_count + unknown_count > 0 ? "gray" : "green"
    else
      status = critical_count > 0 ? "red" : (warning_count + unknown_count > 0 ? "yellow" : "green")
    end

    # nagiosharder may not alert us to a problem querying nagios.
    # If no problems found check that we fetch service status and
    # expect to find more than 0 entries.
    if critical_count == 0 and warning_count == 0 and unknown_count == 0
      if nag.service_status.length == 0
        status = "error"
      end
    end

    puts key.to_s + ": " + critical_count.to_s
    puts critical_services.join(", ")
    puts

    send_event('nagios-' + key.to_s, {
      criticals: critical_count, critical_services: critical_services,
      warnings: warning_count, warning_services: warning_services,
      unknown: unknown_count, unknown_services: unknown_services,
      status: status, nagios_url: env[:home_url]})
  end
end

def tried_at_maximum(attempts)
  return attempts ? attempts.split("/").uniq.size == 1 : false
end