summaryrefslogtreecommitdiff
path: root/puppet/modules/site_check_mk
diff options
context:
space:
mode:
Diffstat (limited to 'puppet/modules/site_check_mk')
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg8
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg3
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg8
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg2
-rw-r--r--puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg8
-rw-r--r--puppet/modules/site_check_mk/files/extra_host_conf.mk6
-rw-r--r--puppet/modules/site_check_mk/files/extra_service_conf.mk13
-rw-r--r--puppet/modules/site_check_mk/files/ignored_services.mk3
-rw-r--r--puppet/modules/site_check_mk/manifests/agent/couchdb.pp2
-rw-r--r--puppet/modules/site_check_mk/manifests/agent/tapicero.pp6
-rw-r--r--puppet/modules/site_check_mk/manifests/agent/webapp.pp19
-rw-r--r--puppet/modules/site_check_mk/manifests/server.pp34
-rw-r--r--puppet/modules/site_check_mk/templates/host_contactgroups.mk17
-rw-r--r--puppet/modules/site_check_mk/templates/hostgroups.mk17
14 files changed, 123 insertions, 23 deletions
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg
index 28f333b0..95ddd2ca 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/bigcouch.cfg
@@ -6,11 +6,19 @@
I 127.0.0.1 localhost:5984 .* ok
# https://leap.se/code/issues/5246
I Shutting down group server
+ # ignore bigcouch conflict errors, mainly coming from tapicero creating new users
+ I Error in process.*{{nocatch,conflict}
# ignore "Uncaught error in HTTP request: {exit, normal}" error
# it's suppressed in later versions of bigcouch anhow
# see https://leap.se/code/issues/5226
I Uncaught error in HTTP request: {exit,normal}
I Uncaught error in HTTP request: {exit,
+ # Ignore rexi_EXIT bigcouch error (Bug #6512)
+ I Error in process <[0-9.]+> on node .* with exit value: {{rexi_EXIT,{(killed|noproc|shutdown),\[{couch_db,collect_results
+ # Ignore "Generic server terminating" bigcouch message (Feature #6544)
+ I Generic server <.*> terminating
+ I {error_report,<.*>,
+ I {error_info,
C Uncaught error in HTTP request: {error,
C Response abnormally terminated: {nodedown,
C rexi_DOWN,noproc
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg
index 623d1e46..3af5045b 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/soledad.cfg
@@ -2,4 +2,5 @@
C WSGI application error
C Error
C error
- W Timing out client:
+# Removed this line because we determined it was better to ignore it (#6566)
+# W Timing out client:
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
index d58e876d..ac17c0ca 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/openvpn.cfg
@@ -2,6 +2,12 @@
# suddenly hangup before properly establishing
# a tls connection
I ovpn-.*TLS Error: Unroutable control packet received from
- I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds (check your network connectivity)
+ I ovpn-.*TLS Error: TLS key negotiation failed to occur within 60 seconds \(check your network connectivity\)
I ovpn-.*TLS Error: TLS handshake failed
+ I ovpn-.*TLS Error: TLS object -> incoming plaintext read error
+ I ovpn-.*Fatal TLS error \(check_tls_errors_co\), restarting
+ I ovpn-.*TLS_ERROR: BIO read tls_read_plaintext error: error:140890B2:SSL routines:SSL3_GET_CLIENT_CERTIFICATE:no certificate
+
+ I ovpn-.*SIGUSR1\[soft,tls-error\] received, client-instance restarting
+ I ovpn-.*VERIFY ERROR: depth=0, error=certificate has expired
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
index 93ce0311..e5721eea 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog/tapicero.cfg
@@ -1,3 +1,5 @@
+# Ignore transient Tapicero errors when creating a db (#6511)
+ I tapicero.*(Creating database|Checking security of|Writing security to|Uploading design doc to) user-.* failed (\(trying again soon\)|(twice )?due to): (RestClient::Resource Not Found|RestClient::InternalServerError): (404 Resource Not Found|500 Internal Server Error)
C tapicero.*RestClient::InternalServerError:
# possible race condition between multiple tapicero
# instances, so we ignore it
diff --git a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg
index 450b9e90..71395c50 100644
--- a/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg
+++ b/puppet/modules/site_check_mk/files/agent/logwatch/syslog_tail.cfg
@@ -1,8 +1,14 @@
# some general patterns
+ I Error: Driver 'pcspkr' is already registered, aborting...
+# ignore postfix errors on lost connection (Bug #6476)
+ I postfix/smtpd.*SSL_accept error from.*lost connection
+# ignore postfix too many errors after DATA (#6545)
+ I postfix/smtpd.*too many errors after DATA from
C panic
C Oops
- I Error: Driver 'pcspkr' is already registered, aborting...
C Error
+# ignore ipv6 icmp errors for now (Bug #6540)
+ I kernel: .*icmpv6_send: no reply to icmp error
C error
W generic protection rip
W .*Unrecovered read error - auto reallocate failed
diff --git a/puppet/modules/site_check_mk/files/extra_host_conf.mk b/puppet/modules/site_check_mk/files/extra_host_conf.mk
new file mode 100644
index 00000000..2c96f97a
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/extra_host_conf.mk
@@ -0,0 +1,6 @@
+# retry 3 times before setting a host into a hard state
+# and send out notification
+extra_host_conf["max_check_attempts"] = [
+ ("4", ALL_HOSTS )
+]
+
diff --git a/puppet/modules/site_check_mk/files/extra_service_conf.mk b/puppet/modules/site_check_mk/files/extra_service_conf.mk
new file mode 100644
index 00000000..03d1ea76
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/extra_service_conf.mk
@@ -0,0 +1,13 @@
+# retry 3 times before setting a service into a hard state
+# and send out notification
+extra_service_conf["max_check_attempts"] = [
+ ("4", ALL_HOSTS , ALL_SERVICES )
+]
+
+# run check_mk_agent every 2 minutes if it terminates
+# successfully.
+# see https://leap.se/code/issues/6539 for the rationale
+extra_service_conf["normal_check_interval"] = [
+ ("2", ALL_HOSTS , "Check_MK" )
+]
+
diff --git a/puppet/modules/site_check_mk/files/ignored_services.mk b/puppet/modules/site_check_mk/files/ignored_services.mk
new file mode 100644
index 00000000..35dc4433
--- /dev/null
+++ b/puppet/modules/site_check_mk/files/ignored_services.mk
@@ -0,0 +1,3 @@
+ignored_services = [
+ ( ALL_HOSTS, [ "NTP Time" ] )
+]
diff --git a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp
index 01e2b886..ee0268a3 100644
--- a/puppet/modules/site_check_mk/manifests/agent/couchdb.pp
+++ b/puppet/modules/site_check_mk/manifests/agent/couchdb.pp
@@ -29,7 +29,7 @@ class site_check_mk::agent::couchdb {
}
file_line {
'Bigcouch_open_files':
- line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 750,750 -c 1000,1000',
+ line => 'Bigcouch_open_files /srv/leap/nagios/plugins/check_unix_open_fds.pl -a beam -w 28672,28672 -c 30720,30720',
path => '/etc/check_mk/mrpe.cfg';
}
diff --git a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp
index 369ed00b..ffd11100 100644
--- a/puppet/modules/site_check_mk/manifests/agent/tapicero.pp
+++ b/puppet/modules/site_check_mk/manifests/agent/tapicero.pp
@@ -1,5 +1,7 @@
class site_check_mk::agent::tapicero {
+ include ::site_nagios::plugins
+
concat::fragment { 'syslog_tapicero':
source => 'puppet:///modules/site_check_mk/agent/logwatch/syslog/tapicero.cfg',
target => '/etc/check_mk/logwatch.d/syslog.cfg',
@@ -11,6 +13,10 @@ class site_check_mk::agent::tapicero {
'Tapicero_Procs':
line => 'Tapicero_Procs /usr/lib/nagios/plugins/check_procs -w 1:1 -c 1:1 -a tapicero',
path => '/etc/check_mk/mrpe.cfg';
+
+ 'Tapicero_Heartbeat':
+ line => 'Tapicero_Heartbeat /usr/local/lib/nagios/plugins/check_last_regex_in_log -f /var/log/syslog -r "tapicero" -w 300 -c 600',
+ path => '/etc/check_mk/mrpe.cfg';
}
}
diff --git a/puppet/modules/site_check_mk/manifests/agent/webapp.pp b/puppet/modules/site_check_mk/manifests/agent/webapp.pp
index 64f5ea6d..88c3da30 100644
--- a/puppet/modules/site_check_mk/manifests/agent/webapp.pp
+++ b/puppet/modules/site_check_mk/manifests/agent/webapp.pp
@@ -1,20 +1,11 @@
class site_check_mk::agent::webapp {
- # check webapp login + soledad sync
- package { [ 'python-srp', 'python-requests', 'python-yaml', 'python-u1db' ]:
- ensure => installed
+ # remove leftovers of webapp python checks
+ file {
+ [ '/usr/lib/check_mk_agent/local/nagios-webapp_login.py',
+ '/usr/lib/check_mk_agent/local/soledad_sync.py' ]:
+ ensure => absent
}
- file { '/usr/lib/check_mk_agent/local/nagios-webapp_login.py':
- ensure => link,
- target => '/srv/leap/webapp/test/nagios/webapp_login.py',
- require => Package['check_mk-agent']
- }
- file { '/usr/lib/check_mk_agent/local/soledad_sync.py':
- ensure => link,
- target => '/srv/leap/webapp/test/nagios/soledad_sync.py',
- require => Package['check_mk-agent']
- }
-
# check syslog
concat::fragment { 'syslog_webapp':
diff --git a/puppet/modules/site_check_mk/manifests/server.pp b/puppet/modules/site_check_mk/manifests/server.pp
index e544ef0d..171f1576 100644
--- a/puppet/modules/site_check_mk/manifests/server.pp
+++ b/puppet/modules/site_check_mk/manifests/server.pp
@@ -5,11 +5,13 @@ class site_check_mk::server {
$type = $ssh_hash['authorized_keys']['monitor']['type']
$seckey = $ssh_hash['monitor']['private_key']
- $nagios_hiera = hiera_hash('nagios')
- $nagios_hosts = $nagios_hiera['hosts']
+ $nagios_hiera = hiera_hash('nagios')
+ $nagios_hosts = $nagios_hiera['hosts']
- $hosts = hiera_hash('hosts')
- $all_hosts = inline_template ('<% @hosts.keys.sort.each do |key| -%>"<%= @hosts[key]["domain_internal"] %>", <% end -%>')
+ $hosts = hiera_hash('hosts')
+ $all_hosts = inline_template ('<% @hosts.keys.sort.each do |key| -%>"<%= @hosts[key]["domain_internal"] %>", <% end -%>')
+ $domains_internal = $nagios_hiera['domains_internal']
+ $environments = $nagios_hiera['environments']
package { 'check-mk-server':
ensure => installed,
@@ -35,10 +37,32 @@ class site_check_mk::server {
content => template('site_check_mk/use_ssh.mk'),
notify => Exec['check_mk-refresh'],
require => Package['check-mk-server'];
+ '/etc/check_mk/conf.d/hostgroups.mk':
+ content => template('site_check_mk/hostgroups.mk'),
+ notify => Exec['check_mk-refresh'],
+ require => Package['check-mk-server'];
+ '/etc/check_mk/conf.d/host_contactgroups.mk':
+ content => template('site_check_mk/host_contactgroups.mk'),
+ notify => Exec['check_mk-refresh'],
+ require => Package['check-mk-server'];
+ '/etc/check_mk/conf.d/ignored_services.mk':
+ source => 'puppet:///modules/site_check_mk/ignored_services.mk',
+ notify => Exec['check_mk-refresh'],
+ require => Package['check-mk-server'];
+ '/etc/check_mk/conf.d/extra_service_conf.mk':
+ source => 'puppet:///modules/site_check_mk/extra_service_conf.mk',
+ notify => Exec['check_mk-refresh'],
+ require => Package['check-mk-server'];
+ '/etc/check_mk/conf.d/extra_host_conf.mk':
+ source => 'puppet:///modules/site_check_mk/extra_host_conf.mk',
+ notify => Exec['check_mk-refresh'],
+ require => Package['check-mk-server'];
+
'/etc/check_mk/all_hosts_static':
content => $all_hosts,
notify => Exec['check_mk-refresh'],
require => Package['check-mk-server'];
+
'/etc/check_mk/.ssh':
ensure => directory,
require => Package['check-mk-server'];
@@ -52,6 +76,7 @@ class site_check_mk::server {
owner => 'nagios',
mode => '0644',
require => Package['check-mk-server'];
+
# check_icmp must be suid root or called by sudo
# see https://leap.se/code/issues/5171
'/usr/lib/nagios/plugins/check_icmp':
@@ -59,6 +84,5 @@ class site_check_mk::server {
require => Package['nagios-plugins-basic'];
}
-
include check_mk::agent::local_checks
}
diff --git a/puppet/modules/site_check_mk/templates/host_contactgroups.mk b/puppet/modules/site_check_mk/templates/host_contactgroups.mk
new file mode 100644
index 00000000..6a534967
--- /dev/null
+++ b/puppet/modules/site_check_mk/templates/host_contactgroups.mk
@@ -0,0 +1,17 @@
+<%
+ contact_groups = []
+ @environments.keys.sort.each do |env_name|
+ hosts = ""
+ @nagios_hosts.keys.sort.each do |hostname|
+ hostdata = @nagios_hosts[hostname]
+ domain_internal = hostdata['domain_internal']
+ if hostdata['environment'] == env_name
+ hosts << '"' + domain_internal + '", '
+ end
+ end
+ contact_groups << ' ( "%s", [%s] )' % [env_name, hosts]
+ end
+%>
+host_contactgroups = [
+<%= contact_groups.join(",\n") %>
+]
diff --git a/puppet/modules/site_check_mk/templates/hostgroups.mk b/puppet/modules/site_check_mk/templates/hostgroups.mk
new file mode 100644
index 00000000..7158dcd1
--- /dev/null
+++ b/puppet/modules/site_check_mk/templates/hostgroups.mk
@@ -0,0 +1,17 @@
+<%
+ host_groups = []
+ @environments.keys.sort.each do |env_name|
+ hosts = ""
+ @nagios_hosts.keys.sort.each do |hostname|
+ hostdata = @nagios_hosts[hostname]
+ domain_internal = hostdata['domain_internal']
+ if hostdata['environment'] == env_name
+ hosts << '"' + domain_internal + '", '
+ end
+ end
+ host_groups << ' ( "%s", [%s] )' % [env_name, hosts]
+ end
+%>
+host_groups = [
+<%= host_groups.join(",\n") %>
+]