#!/usr/bin/perl -w

# check_zfs Nagios plugin for monitoring Sun ZFS zpools
# Copyright (c) 2007 
# Written by Nathan Butcher

# Released under the GNU Public License
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# Version: 0.9.2
# Date : 24th July 2007
# This plugin has tested on FreeBSD 7.0-CURRENT and Solaris 10
# With a bit of fondling, it could be expanded to recognize other OSes in
# future (e.g. if FUSE Linux gets off the ground)

# Verbose levels:-
# 1 - Only alert us of zpool health and size stats
# 2 - ...also alert us of failed devices when things go bad
# 3 - ...alert us of the status of all devices regardless of health
#
# Usage:   check_zfs <zpool> <verbose level 1-3>
# Example: check_zfs zeepool 1
#	ZPOOL zeedata : ONLINE {Size:3.97G Used:183K Avail:3.97G Cap:0%}

use strict;

my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
my $state="UNKNOWN";
my $msg="FAILURE";

if ($#ARGV+1 != 2) {
	print "Usage: $0 <zpool name> <verbose level 1-3>\n";
	exit $ERRORS{$state};
}

if ($^O ne 'solaris' && $^O ne 'freebsd') {
	print "This plugin currently only works on Solaris 10, OpenSolaris distributions, and FreeBSD 7 and later.\n";
	exit $ERRORS{$state};
}

my $pool=$ARGV[0];
my $verbose=$ARGV[1];

my $size="";
my $used="";
my $avail="";
my $cap="";
my $health="";
my $dmge="";

if ($verbose < 1 || $verbose > 3) {
	print "Verbose levels range from 1-3\n";
	exit $ERRORS{$state};
}

my $statcommand="zpool list $pool";

if (! open STAT, "$statcommand|") {
	print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
	exit $ERRORS{$state};
}

while(<STAT>) {
	chomp;
	next if (/^NAME\s+SIZE\s+USED\s+AVAIL\s+CAP\s+HEALTH\s+ALTROOT/);
	if (/^${pool}\s+/) {
		($size, $used, $avail, $cap, $health) = /^${pool}\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
	}
}

close(STAT);

## check for valid zpool list response from zpool
if (! $health ) {
	$state = "CRITICAL";
	$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
	print $state, " ", $msg;
	exit ($ERRORS{$state});
}

## determine health of zpool and subsequent error status
if ($health eq "ONLINE" ) {
	$state = "OK";
} else {
	if ($health eq "DEGRADED") {
		$state = "WARNING";
	} else {
		$state = "CRITICAL";
	}
}

## get more detail on possible device failure
## flag to detect section of zpool status involving our zpool
my $poolfind=0;

$statcommand="zpool status $pool";
if (! open STAT, "$statcommand|") {
	$state = 'CRITICAL';
	print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
	exit $ERRORS{$state};
}

## go through zfs status output to find zpool fses and devices
while(<STAT>) {
	chomp;

	if (/^\s${pool}/ && $poolfind==1) {
		$poolfind=2;
		next;
	} elsif ( $poolfind==1 ) {
		$poolfind=0;
	}

	if (/NAME\s+STATE\s+READ\s+WRITE\s+CKSUM/) {
		$poolfind=1;
	}

	if ( /^$/ ) {
		$poolfind=0;
	}

	if ($poolfind == 2) {

		## special cases pertaining to full verbose
		if (/^\sspares/) {
			next unless $verbose == 3;
			$dmge=$dmge . "[SPARES]:- ";
			next;
		}
		if (/^\s{5}spare\s/) {
			next unless $verbose == 3;
			my ($sta) = /spare\s+(\S+)/;
			$dmge=$dmge . "[SPARE:${sta}]:- ";
			next;
		}
		if (/^\s{5}replacing\s/) {
			next unless $verbose == 3;
			my $perc;
			my ($sta) = /^\s+\S+\s+(\S+)/;
			if (/%/) {
				($perc) = /([0-9]+%)/;	
			} else {
				$perc = "working";
			}
			$dmge=$dmge . "[REPLACING:${sta} (${perc})]:- ";
			next;
		}

		## other cases
		my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/;

		## pool online, not degraded thanks to dead/corrupted disk
		if ($state eq "OK" && $sta eq "UNAVAIL") {
			$state="WARNING";
			
			## switching to verbose level 2 to explain weirdness
			if ($verbose == 1) {
				$verbose =2;
			}
		}

		## no display for verbose level 1
		next if ($verbose==1);
		## don't display working devices for verbose level 2
		next if ($verbose==2 && $state eq "OK");
		next if ($verbose==2 && ($sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE"));
	
		## show everything else
		if (/^\s{3}(\S+)/) {
			$dmge=$dmge . "<" . $dev . ":" . $sta . "> ";
		} elsif (/^\s{7}(\S+)/) {
			$dmge=$dmge . "(" . $dev . ":" . $sta . ") ";
		} else {
			$dmge=$dmge . $dev . ":" . $sta . " ";
		}
	}
}

## calling all goats!

$msg = sprintf "ZPOOL %s : %s {Size:%s Used:%s Avail:%s Cap:%s} %s\n", $pool, $health, $size, $used, $avail, $cap, $dmge;
print $state, " ", $msg;
exit ($ERRORS{$state});
