#!/usr/bin/perl -w

# This script is designed to check a backup directory populated with
# subdirectories named after hosts, within which there are backups of various
# types.
#
# Example:
# /home/backup:
# foo.example.com
# 
# foo.example.com:
# rdiff-backup .ssh
#
# rdiff-backup:
# root home rdiff-backup-data usr var
#
# There are heuristics to determine the backup type. Currently, the following
# types are supported:
#
# rdiff-backup: assumes there is a rdiff-backup/rdiff-backup-data/backup.log file
# duplicity: assumes there is a dup subdirectory, checks the latest file
# dump files: assumes there is a dump subdirectory, checks the latest file
#
# This script returns output suitable for send_nsca to send the results to
# nagios and should therefore be used like this:
#
# checkbackups.sh | send_nsca -H nagios.example.com

use Getopt::Std;

# XXX: taken from utils.sh from nagios-plugins-basic
my $STATE_OK=0;
my $STATE_WARNING=1;
my $STATE_CRITICAL=2;
my $STATE_UNKNOWN=3;
my $STATE_DEPENDENT=4;
my %ERRORS=(0=>'OK',1=>'WARNING',2=>'CRITICAL',3=>'UNKNOWN',4=>'DEPENDENT');

# gross hack: we look into subdirs to find vservers
my @vserver_dirs = qw{/var/lib/vservers /vservers};

our $opt_d = "/backup";
our $opt_c = 48 * 60 * 60;
our $opt_w = 24 * 60 * 60;
our $opt_v = 0;
our $opt_o;
our $opt_s;

if (!getopts('d:c:w:s:vo')) {
	print <<EOF
Usage: $0 [ -d <backupdir> ] [ -c <threshold> ] [ -w <threshold> ] [ -o ] [ -s <host> ] [ -v ]
EOF
	;
	exit();
}

sub check_rdiff {
    my ($host, $dir, $optv) = @_;
    my $flag="$dir/rdiff-backup-data/backup.log";
    my $extra_msg = '';
    my @vservers;
    if (open(FLAG, $flag)) {
        while (<FLAG>) {
            if (/EndTime ([0-9]*).[0-9]* \((.*)\)/) {
                $last_bak = $1;
                $extra_msg = ' [backup.log]';
                $opt_v && print STDERR "found timestamp $1 ($2) in $flag\n";
            }
        }
        if (!$last_bak) {
            print_status($host, $STATE_UNKNOWN, "cannot parse $flag for a valid timestamp");
            next;
        }
    } else {
        $opt_v && print STDERR "cannot open $flag\n";
    }
    close(FLAG);
    ($state, $delta) = check_age($last_bak);
    $dir =~ /([^\/]+)\/?$/;
    $service = "backups-$1";
    print_status($host, $state, "$delta hours old$extra_msg", $service);
    foreach my $vserver_dir (@vserver_dirs) {
        $vsdir = "$dir/$vserver_dir";
        if (opendir(DIR, $vsdir)) {
            @vservers = grep { /^[^\.]/ && -d "$vsdir/$_" } readdir(DIR);
            $opt_v && print STDERR "found vservers $vsdir: @vservers\n";
            closedir DIR;
        } else {
            $opt_v && print STDERR "no vserver in $vsdir\n";
        }
    }
    my @dom_sufx = split(/\./, $host);
    my $dom_sufx = join('.', @dom_sufx[1,-1]);
    foreach my $vserver (@vservers) {
        print_status("$vserver.$dom_sufx", $state, "$delta hours old$extra_msg, same as parent: $host");
    }
}

sub check_age {
    my ($last_bak) = @_;
    my $t = time();
    my $delta = $t - $last_bak;
    if ($delta > $opt_c) {
        $state = $STATE_CRITICAL;
    } elsif ($delta > $opt_w) {
        $state = $STATE_WARNING;
    } elsif ($delta >= 0) {
        $state = $STATE_OK;
    }
    $delta = sprintf '%.2f', $delta/3600.0;
    return ($state, $delta);
}

sub print_status {
    my ($host, $state, $message, $service) = @_;
    my $state_msg = $ERRORS{$state};
    if (!$service) {
        $service = 'backups';
    }
    $line = "$host\t$service\t$state\t$state_msg $message\n";
    if ($opt_s) {
	$opt_v && print STDERR "sending results to nagios...\n";
        open(NSCA, "|/usr/sbin/send_nsca -H $opt_s") or die("cannot start send_nsca: $!\n");
        print NSCA $line;
        close(NSCA) or warn("could not close send_nsca pipe correctly: $!\n");
    }
    if (!$opt_s || $opt_v) {
        printf $line;
    }
}

sub check_flag {
    my ($host, $flag) = @_;
    my @stats = stat($flag);
    if (not @stats) {
        print_status($host, $STATE_UNKNOWN, "cannot stat flag $flag");
    }
    else {
        ($state, $delta) = check_age($stats[9]);
        print_status($host, $state, "$delta hours old");
    }
}

my $backupdir= $opt_d;

my @hosts;
if (defined($opt_o)) {
	@hosts=qx{hostname -f};
} else {
	# XXX: this should be a complete backup registry instead
	@hosts=qx{ls $backupdir | grep -v lost+found};
}

chdir($backupdir);
my ($delta, $state, $host);
foreach $host (@hosts) {
	chomp($host);
	if ($opt_o) {
		$dir = $backupdir;
	} else {
		$dir = $host;
	}
	my $flag;
	if (-d $dir) {
                # guess the backup type and find a proper stamp file to compare
                @rdiffs = glob("$dir/*/rdiff-backup-data");
                foreach $subdir (@rdiffs) {
                    $subdir =~ s/rdiff-backup-data$//;
                    $opt_v && print STDERR "inspecting dir $subdir\n";
                    check_rdiff($host, $subdir, $opt_v);
                    $flag = 1;
                }
		if (-d "$dir/dump") {
			# XXX: this doesn't check backup consistency
			$flag="$dir/dump/" . `ls -tr $dir/dump | tail -1`;
			chomp($flag);
			check_flag($host, $flag);
		} elsif (-d "$dir/dup") {
			# XXX: this doesn't check backup consistency
			$flag="$dir/dup/" . `ls -tr $dir/dup | tail -1`;
			chomp($flag);
			check_flag($host, $flag);
		} elsif (-r "$dir/rsync.log") {
			# XXX: this doesn't check backup consistency
			$flag="$dir/rsync.log";
			check_flag($host, $flag);
		}
                if (!$flag) {
                        print_status($host, $STATE_UNKNOWN, 'unknown system');
		}
	} else {
            print_status($host, $STATE_UNKNOWN, 'no directory');
	}
}