diff options
Diffstat (limited to 'puppet/modules/site_check_mk')
14 files changed, 123 insertions, 23 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg index 28f333b0..95ddd2ca 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg @@ -6,11 +6,19 @@ I 127.0.0.1 localhost:5984 .* ok # https://leap.se/code/issues/5246 I Shutting down group server + # ignore bigcouch conflict errors, mainly coming from tapicero creating new users + I Error in process.*{{nocatch,conflict} # ignore "Uncaught error in HTTP request: {exit, normal}" error # it's suppressed in later versions of bigcouch anhow # see https://leap.se/code/issues/5226 I Uncaught error in HTTP request: {exit,normal} I Uncaught error in HTTP request: {exit, + # Ignore rexi_EXIT bigcouch error (Bug #6512) + I Error in process <[0-9.]+> on node .* with exit value: {{rexi_EXIT,{(killed|noproc|shutdown),\[{couch_db,collect_results + # Ignore "Generic server terminating" bigcouch message (Feature #6544) + I Generic server <.*> terminating + I {error_report,<.*>, + I {error_info, C Uncaught error in HTTP request: {error, C Response abnormally terminated: {nodedown, C rexi_DOWN,noproc diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg index 623d1e46..3af5045b 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg @@ -2,4 +2,5 @@ C WSGI application error C Error C error - W Timing out client: +# Removed this line because we determined it was better to ignore it (#6566) +# W Timing out client: diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg index d58e876d..ac17c0ca 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg @@ -2,6 +2,12 @@ # suddenly hangup before properly establishing # a tls connection I ovpn-.*TLS Error: Unroutable control packet received from - I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds (check your network connectivity) + I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds \(check your network connectivity\) I ovpn-.*TLS Error: TLS handshake failed + I ovpn-.*TLS Error: TLS object -> incoming plaintext read error + I ovpn-.*Fatal TLS error \(check_tls_errors_co\), restarting + I ovpn-.*TLS_ERROR: BIO read tls_read_plaintext error: error:140890B2:SSL routines:SSL3_GET_CLIENT_CERTIFICATE:no certificate + + I ovpn-.*SIGUSR1\[soft,tls-error\] received, client-instance restarting + I ovpn-.*VERIFY ERROR: depth=0, error=certificate has expired diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg index 93ce0311..e5721eea 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg @@ -1,3 +1,5 @@ +# Ignore transient Tapicero errors when creating a db (#6511) + I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error) C tapicero.*RestClient::InternalServerError: # possible race condition between multiple tapicero # instances, so we ignore it diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg index 450b9e90..71395c50 100644 --- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg +++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg @@ -1,8 +1,14 @@ # some general patterns + I Error: Driver 'pcspkr' is already registered, aborting... +# ignore postfix errors on lost connection (Bug #6476) + I postfix/smtpd.*SSL_accept error from.*lost connection +# ignore postfix too many errors after DATA (#6545) + I postfix/smtpd.*too many errors after DATA from C panic C Oops - I Error: Driver 'pcspkr' is already registered, aborting... C Error +# ignore ipv6 icmp errors for now (Bug #6540) + I kernel: .*icmpv6_send: no reply to icmp error C error W generic protection rip W .*Unrecovered read error - auto reallocate failed diff --git a/puppet/modules/site_check_mk/files/extra_host_conf.mk b/puppet/modules/site_check_mk/files/extra_host_conf.mk new file mode 100644 index 00000000..2c96f97a --- /dev/null +++ b/puppet/modules/site_check_mk/files/extra_host_conf.mk @@ -0,0 +1,6 @@ +# retry 3 times before setting a host into a hard state +# and send out notification +extra_host_conf["max_check_attempts"] = [ + ("4", ALL_HOSTS ) +] + diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk new file mode 100644 index 00000000..03d1ea76 --- /dev/null +++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk @@ -0,0 +1,13 @@ +# retry 3 times before setting a service into a hard state +# and send out notification +extra_service_conf["max_check_attempts"] = [ + ("4", ALL_HOSTS , ALL_SERVICES ) +] + +# run check_mk_agent every 2 minutes if it terminates +# successfully. +# see https://leap.se/code/issues/6539 for the rationale +extra_service_conf["normal_check_interval"] = [ + ("2", ALL_HOSTS , "Check_MK" ) +] + diff --git a/puppet/modules/site_check_mk/files/ignored_services.mk b/puppet/modules/site_check_mk/files/ignored_services.mk new file mode 100644 index 00000000..35dc4433 --- /dev/null +++ b/puppet/modules/site_check_mk/files/ignored_services.mk @@ -0,0 +1,3 @@ +ignored_services = [ + ( ALL_HOSTS, [ "NTP Time" ] ) +] diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp index 01e2b886..ee0268a3 100644 --- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp +++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp @@ -29,7 +29,7 @@ class site_check_mk::agent::couchdb { } file_line { 'Bigcouch_open_files': - line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 750,750 -c 1000,1000', + line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720', path => '/etc/check_mk/mrpe.cfg'; } diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp index 369ed00b..ffd11100 100644 --- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp +++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp @@ -1,5 +1,7 @@ class site_check_mk::agent::tapicero { + include ::site_nagios::plugins + concat::fragment { 'syslog_tapicero': source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/tapicero.cfg', target => '/etc/check_mk/logwatch.d/syslog.cfg', @@ -11,6 +13,10 @@ class site_check_mk::agent::tapicero { 'Tapicero_Procs': line => 'Tapicero_Procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero', path => '/etc/check_mk/mrpe.cfg'; + + 'Tapicero_Heartbeat': + line => 'Tapicero_Heartbeat /usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/syslog -r "tapicero" -w 300 -c 600', + path => '/etc/check_mk/mrpe.cfg'; } } diff --git a/puppet/modules/site_check_mk/manifests/agent/webapp.pp b/puppet/modules/site_check_mk/manifests/agent/webapp.pp index 64f5ea6d..88c3da30 100644 --- a/puppet/modules/site_check_mk/manifests/agent/webapp.pp +++ b/puppet/modules/site_check_mk/manifests/agent/webapp.pp @@ -1,20 +1,11 @@ class site_check_mk::agent::webapp { - # check webapp login + soledad sync - package { [ 'python-srp', 'python-requests', 'python-yaml', 'python-u1db' ]: - ensure => installed + # remove leftovers of webapp python checks + file { + [ '/usr/lib/check_mk_agent/local/nagios-webapp_login.py', + '/usr/lib/check_mk_agent/local/soledad_sync.py' ]: + ensure => absent } - file { '/usr/lib/check_mk_agent/local/nagios-webapp_login.py': - ensure => link, - target => '/srv/leap/webapp/test/nagios/webapp_login.py', - require => Package['check_mk-agent'] - } - file { '/usr/lib/check_mk_agent/local/soledad_sync.py': - ensure => link, - target => '/srv/leap/webapp/test/nagios/soledad_sync.py', - require => Package['check_mk-agent'] - } - # check syslog concat::fragment { 'syslog_webapp': diff --git a/puppet/modules/site_check_mk/manifests/server.pp b/puppet/modules/site_check_mk/manifests/server.pp index e544ef0d..171f1576 100644 --- a/puppet/modules/site_check_mk/manifests/server.pp +++ b/puppet/modules/site_check_mk/manifests/server.pp @@ -5,11 +5,13 @@ class site_check_mk::server { $type = $ssh_hash['authorized_keys']['monitor']['type'] $seckey = $ssh_hash['monitor']['private_key'] - $nagios_hiera = hiera_hash('nagios') - $nagios_hosts = $nagios_hiera['hosts'] + $nagios_hiera = hiera_hash('nagios') + $nagios_hosts = $nagios_hiera['hosts'] - $hosts = hiera_hash('hosts') - $all_hosts = inline_template ('<% @hosts.keys.sort.each do |key| -%>"<%= @hosts[key]["domain_internal"] %>", <% end -%>') + $hosts = hiera_hash('hosts') + $all_hosts = inline_template ('<% @hosts.keys.sort.each do |key| -%>"<%= @hosts[key]["domain_internal"] %>", <% end -%>') + $domains_internal = $nagios_hiera['domains_internal'] + $environments = $nagios_hiera['environments'] package { 'check-mk-server': ensure => installed, @@ -35,10 +37,32 @@ class site_check_mk::server { content => template('site_check_mk/use_ssh.mk'), notify => Exec['check_mk-refresh'], require => Package['check-mk-server']; + '/etc/check_mk/conf.d/hostgroups.mk': + content => template('site_check_mk/hostgroups.mk'), + notify => Exec['check_mk-refresh'], + require => Package['check-mk-server']; + '/etc/check_mk/conf.d/host_contactgroups.mk': + content => template('site_check_mk/host_contactgroups.mk'), + notify => Exec['check_mk-refresh'], + require => Package['check-mk-server']; + '/etc/check_mk/conf.d/ignored_services.mk': + source => 'puppet:///modules/site_check_mk/ignored_services.mk', + notify => Exec['check_mk-refresh'], + require => Package['check-mk-server']; + '/etc/check_mk/conf.d/extra_service_conf.mk': + source => 'puppet:///modules/site_check_mk/extra_service_conf.mk', + notify => Exec['check_mk-refresh'], + require => Package['check-mk-server']; + '/etc/check_mk/conf.d/extra_host_conf.mk': + source => 'puppet:///modules/site_check_mk/extra_host_conf.mk', + notify => Exec['check_mk-refresh'], + require => Package['check-mk-server']; + '/etc/check_mk/all_hosts_static': content => $all_hosts, notify => Exec['check_mk-refresh'], require => Package['check-mk-server']; + '/etc/check_mk/.ssh': ensure => directory, require => Package['check-mk-server']; @@ -52,6 +76,7 @@ class site_check_mk::server { owner => 'nagios', mode => '0644', require => Package['check-mk-server']; + # check_icmp must be suid root or called by sudo # see https://leap.se/code/issues/5171 '/usr/lib/nagios/plugins/check_icmp': @@ -59,6 +84,5 @@ class site_check_mk::server { require => Package['nagios-plugins-basic']; } - include check_mk::agent::local_checks } diff --git a/puppet/modules/site_check_mk/templates/host_contactgroups.mk b/puppet/modules/site_check_mk/templates/host_contactgroups.mk new file mode 100644 index 00000000..6a534967 --- /dev/null +++ b/puppet/modules/site_check_mk/templates/host_contactgroups.mk @@ -0,0 +1,17 @@ +<% + contact_groups = [] + @environments.keys.sort.each do |env_name| + hosts = "" + @nagios_hosts.keys.sort.each do |hostname| + hostdata = @nagios_hosts[hostname] + domain_internal = hostdata['domain_internal'] + if hostdata['environment'] == env_name + hosts << '"' + domain_internal + '", ' + end + end + contact_groups << ' ( "%s", [%s] )' % [env_name, hosts] + end +%> +host_contactgroups = [ +<%= contact_groups.join(",\n") %> +] diff --git a/puppet/modules/site_check_mk/templates/hostgroups.mk b/puppet/modules/site_check_mk/templates/hostgroups.mk new file mode 100644 index 00000000..7158dcd1 --- /dev/null +++ b/puppet/modules/site_check_mk/templates/hostgroups.mk @@ -0,0 +1,17 @@ +<% + host_groups = [] + @environments.keys.sort.each do |env_name| + hosts = "" + @nagios_hosts.keys.sort.each do |hostname| + hostdata = @nagios_hosts[hostname] + domain_internal = hostdata['domain_internal'] + if hostdata['environment'] == env_name + hosts << '"' + domain_internal + '", ' + end + end + host_groups << ' ( "%s", [%s] )' % [env_name, hosts] + end +%> +host_groups = [ +<%= host_groups.join(",\n") %> +] |