summaryrefslogtreecommitdiff
path: root/files/checkbackups.pl
diff options
context:
space:
mode:
Diffstat (limited to 'files/checkbackups.pl')
-rwxr-xr-xfiles/checkbackups.pl194
1 files changed, 194 insertions, 0 deletions
diff --git a/files/checkbackups.pl b/files/checkbackups.pl
new file mode 100755
index 00000000..39914469
--- /dev/null
+++ b/files/checkbackups.pl
@@ -0,0 +1,194 @@
+#!/usr/bin/perl -w
+
+# This script is designed to check a backup directory populated with
+# subdirectories named after hosts, within which there are backups of various
+# types.
+#
+# Example:
+# /home/backup:
+# foo.example.com
+#
+# foo.example.com:
+# rdiff-backup .ssh
+#
+# rdiff-backup:
+# root home rdiff-backup-data usr var
+#
+# There are heuristics to determine the backup type. Currently, the following
+# types are supported:
+#
+# rdiff-backup: assumes there is a rdiff-backup/rdiff-backup-data/backup.log file
+# duplicity: assumes there is a dup subdirectory, checks the latest file
+# dump files: assumes there is a dump subdirectory, checks the latest file
+#
+# This script returns output suitable for send_nsca to send the results to
+# nagios and should therefore be used like this:
+#
+# checkbackups.sh | send_nsca -H nagios.example.com
+
+use Getopt::Std;
+
+# XXX: taken from utils.sh from nagios-plugins-basic
+my $STATE_OK=0;
+my $STATE_WARNING=1;
+my $STATE_CRITICAL=2;
+my $STATE_UNKNOWN=3;
+my $STATE_DEPENDENT=4;
+my %ERRORS=(0=>'OK',1=>'WARNING',2=>'CRITICAL',3=>'UNKNOWN',4=>'DEPENDENT');
+
+# gross hack: we look into subdirs to find vservers
+my @vserver_dirs = qw{/var/lib/vservers /vservers};
+
+our $opt_d = "/backup";
+our $opt_c = 48 * 60 * 60;
+our $opt_w = 24 * 60 * 60;
+our $opt_v = 0;
+our $opt_o;
+our $opt_s;
+
+if (!getopts('d:c:w:s:vo')) {
+ print <<EOF
+Usage: $0 [ -d <backupdir> ] [ -c <threshold> ] [ -w <threshold> ] [ -o ] [ -s <host> ] [ -v ]
+EOF
+ ;
+ exit();
+}
+
+sub check_rdiff {
+ my ($host, $dir, $optv) = @_;
+ my $flag="$dir/rdiff-backup-data/backup.log";
+ my $extra_msg = '';
+ my @vservers;
+ if (open(FLAG, $flag)) {
+ while (<FLAG>) {
+ if (/EndTime ([0-9]*).[0-9]* \((.*)\)/) {
+ $last_bak = $1;
+ $extra_msg = ' [backup.log]';
+ $opt_v && print STDERR "found timestamp $1 ($2) in $flag\n";
+ }
+ }
+ if (!$last_bak) {
+ print_status($host, $STATE_UNKNOWN, "cannot parse $flag for a valid timestamp");
+ next;
+ }
+ } else {
+ $opt_v && print STDERR "cannot open $flag\n";
+ }
+ close(FLAG);
+ ($state, $delta) = check_age($last_bak);
+ $dir =~ /([^\/]+)\/?$/;
+ $service = "backups-$1";
+ print_status($host, $state, "$delta hours old$extra_msg", $service);
+ foreach my $vserver_dir (@vserver_dirs) {
+ $vsdir = "$dir/$vserver_dir";
+ if (opendir(DIR, $vsdir)) {
+ @vservers = grep { /^[^\.]/ && -d "$vsdir/$_" } readdir(DIR);
+ $opt_v && print STDERR "found vservers $vsdir: @vservers\n";
+ closedir DIR;
+ } else {
+ $opt_v && print STDERR "no vserver in $vsdir\n";
+ }
+ }
+ my @dom_sufx = split(/\./, $host);
+ my $dom_sufx = join('.', @dom_sufx[1,-1]);
+ foreach my $vserver (@vservers) {
+ print_status("$vserver.$dom_sufx", $state, "$delta hours old$extra_msg, same as parent: $host");
+ }
+}
+
+sub check_age {
+ my ($last_bak) = @_;
+ my $t = time();
+ my $delta = $t - $last_bak;
+ if ($delta > $opt_c) {
+ $state = $STATE_CRITICAL;
+ } elsif ($delta > $opt_w) {
+ $state = $STATE_WARNING;
+ } elsif ($delta >= 0) {
+ $state = $STATE_OK;
+ }
+ $delta = sprintf '%.2f', $delta/3600.0;
+ return ($state, $delta);
+}
+
+sub print_status {
+ my ($host, $state, $message, $service) = @_;
+ my $state_msg = $ERRORS{$state};
+ if (!$service) {
+ $service = 'backups';
+ }
+ $line = "$host\t$service\t$state\t$state_msg $message\n";
+ if ($opt_s) {
+ $opt_v && print STDERR "sending results to nagios...\n";
+ open(NSCA, "|/usr/sbin/send_nsca -H $opt_s") or die("cannot start send_nsca: $!\n");
+ print NSCA $line;
+ close(NSCA) or warn("could not close send_nsca pipe correctly: $!\n");
+ }
+ if (!$opt_s || $opt_v) {
+ printf $line;
+ }
+}
+
+sub check_flag {
+ my ($host, $flag) = @_;
+ my @stats = stat($flag);
+ if (not @stats) {
+ print_status($host, $STATE_UNKNOWN, "cannot stat flag $flag");
+ }
+ else {
+ ($state, $delta) = check_age($stats[9]);
+ print_status($host, $state, "$delta hours old");
+ }
+}
+
+my $backupdir= $opt_d;
+
+my @hosts;
+if (defined($opt_o)) {
+ @hosts=qx{hostname -f};
+} else {
+ # XXX: this should be a complete backup registry instead
+ @hosts=qx{ls $backupdir | grep -v lost+found};
+}
+
+chdir($backupdir);
+my ($delta, $state, $host);
+foreach $host (@hosts) {
+ chomp($host);
+ if ($opt_o) {
+ $dir = $backupdir;
+ } else {
+ $dir = $host;
+ }
+ my $flag;
+ if (-d $dir) {
+ # guess the backup type and find a proper stamp file to compare
+ @rdiffs = glob("$dir/*/rdiff-backup-data");
+ foreach $subdir (@rdiffs) {
+ $subdir =~ s/rdiff-backup-data$//;
+ $opt_v && print STDERR "inspecting dir $subdir\n";
+ check_rdiff($host, $subdir, $opt_v);
+ $flag = 1;
+ }
+ if (-d "$dir/dump") {
+ # XXX: this doesn't check backup consistency
+ $flag="$dir/dump/" . `ls -tr $dir/dump | tail -1`;
+ chomp($flag);
+ check_flag($host, $flag);
+ } elsif (-d "$dir/dup") {
+ # XXX: this doesn't check backup consistency
+ $flag="$dir/dup/" . `ls -tr $dir/dup | tail -1`;
+ chomp($flag);
+ check_flag($host, $flag);
+ } elsif (-r "$dir/rsync.log") {
+ # XXX: this doesn't check backup consistency
+ $flag="$dir/rsync.log";
+ check_flag($host, $flag);
+ }
+ if (!$flag) {
+ print_status($host, $STATE_UNKNOWN, 'unknown system');
+ }
+ } else {
+ print_status($host, $STATE_UNKNOWN, 'no directory');
+ }
+}