parent
bf617f451e
commit
bcc92a6193
2 changed files with 239 additions and 0 deletions
@ -0,0 +1,12 @@ |
|||||||
|
# Description: MegaRaid Status |
||||||
|
# Type: Agent or Agent (active) |
||||||
|
# Key: raid.mega.status |
||||||
|
# Type of Information: Character |
||||||
|
# Show Value: As is |
||||||
|
|
||||||
|
# The value reported is like: |
||||||
|
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7 |
||||||
|
|
||||||
|
# You can add a simple trigger on this check like: |
||||||
|
# { hostname:raid.mega.status.str( OK ) }=0 |
||||||
|
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/check_raid_megaraid_sudo |
@ -0,0 +1,227 @@ |
|||||||
|
#!/usr/bin/perl -w |
||||||
|
|
||||||
|
# check_megaraid_sas Nagios plugin |
||||||
|
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu |
||||||
|
# |
||||||
|
# This program is free software; you can redistribute it and/or |
||||||
|
# modify it under the terms of the GNU General Public License |
||||||
|
# as published by the Free Software Foundation; either version 2 |
||||||
|
# of the License, or (at your option) any later version. |
||||||
|
# |
||||||
|
# This program is distributed in the hope that it will be useful, |
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
# GNU General Public License for more details. |
||||||
|
# |
||||||
|
# You should have received a copy of the GNU General Public License |
||||||
|
# along with this program; if not, write to the Free Software |
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||||||
|
# |
||||||
|
# |
||||||
|
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS |
||||||
|
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares |
||||||
|
# attached to the controller, you can specify the number you should expect to |
||||||
|
# find with the '-s' flag. |
||||||
|
# |
||||||
|
# The paths for the Nagios plugins lib and MegaCli may need to me changed. |
||||||
|
# |
||||||
|
# $Author: delgado $ |
||||||
|
# $Revision: #3 $ $Date: 2007/06/07 $ |
||||||
|
|
||||||
|
# Slightly modified by Daniel B. for zabbix |
||||||
|
# 23 Apr 2009 |
||||||
|
|
||||||
|
use strict; |
||||||
|
use Getopt::Std; |
||||||
|
|
||||||
|
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p); |
||||||
|
|
||||||
|
|
||||||
|
getopts('hs:o:p:m:'); |
||||||
|
|
||||||
|
if ( $opt_h ) { |
||||||
|
print "Usage: $0 [-s number] [-m number] [-o number]\n"; |
||||||
|
print " -s is how many hotspares are attached to the controller\n"; |
||||||
|
print " -m is the number of media errors to ignore\n"; |
||||||
|
print " -p is the predictive error count to ignore\n"; |
||||||
|
print " -o is the number of other disk errors to ignore\n"; |
||||||
|
exit; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
my $megacli = (-x '/opt/MegaRAID/MegaCli/MegaCli64') ? |
||||||
|
'/opt/MegaRAID/MegaCli/MegaCli64' : '/opt/MegaRAID/MegaCli/MegaCli'; |
||||||
|
|
||||||
|
## Return codes for Nagios |
||||||
|
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
||||||
|
|
||||||
|
my ($adapters); |
||||||
|
my $hotspares = 0; |
||||||
|
my $hotsparecount = 0; |
||||||
|
my $pdbad = 0; |
||||||
|
my $pdcount = 0; |
||||||
|
my $mediaerrors = 0; |
||||||
|
my $mediaallow = 0; |
||||||
|
my $prederrors = 0; |
||||||
|
my $predallow = 0; |
||||||
|
my $othererrors = 0; |
||||||
|
my $otherallow = 0; |
||||||
|
my $result = ''; |
||||||
|
my $status = 'OK'; |
||||||
|
|
||||||
|
sub max_state ($$) { |
||||||
|
my ($current, $compare) = @_; |
||||||
|
|
||||||
|
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) { |
||||||
|
return 'CRITICAL'; |
||||||
|
} elsif ($compare eq 'OK') { |
||||||
|
return $current; |
||||||
|
} elsif ($compare eq 'WARNING') { |
||||||
|
return 'WARNING'; |
||||||
|
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) { |
||||||
|
return 'UNKNOWN'; |
||||||
|
} else { |
||||||
|
return $current; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
if ( $opt_s ) { |
||||||
|
$hotspares = $opt_s; |
||||||
|
} |
||||||
|
if ( $opt_m ) { |
||||||
|
$mediaallow = $opt_m; |
||||||
|
} |
||||||
|
if ( $opt_p ) { |
||||||
|
$predallow = $opt_p; |
||||||
|
} |
||||||
|
if ( $opt_o ) { |
||||||
|
$otherallow = $opt_o; |
||||||
|
} |
||||||
|
|
||||||
|
# Get the number of RAID controllers we have |
||||||
|
open (ADPCOUNT, "$megacli -adpCount -NoLog |") |
||||||
|
|| die "error: Could not execute MegaCli -adpCount"; |
||||||
|
|
||||||
|
while (<ADPCOUNT>) { |
||||||
|
if ( m/Controller Count:\s*(\d+)/ ) { |
||||||
|
$adapters = $1; |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
close ADPCOUNT; |
||||||
|
|
||||||
|
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) { |
||||||
|
# Get the number of logical drives on this adapter |
||||||
|
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |") |
||||||
|
|| die "error: Could not execute $megacli -LdGetNum -a$adp"; |
||||||
|
|
||||||
|
my ($ldnum); |
||||||
|
while (<LDGETNUM>) { |
||||||
|
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) { |
||||||
|
$ldnum = $1; |
||||||
|
last; |
||||||
|
} |
||||||
|
} |
||||||
|
close LDGETNUM; |
||||||
|
|
||||||
|
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) { |
||||||
|
# Get info on this particular logical drive |
||||||
|
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |") |
||||||
|
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog"; |
||||||
|
|
||||||
|
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state); |
||||||
|
while (<LDINFO>) { |
||||||
|
if ( m/^Size\s*:\s*(\d+(\.\d+)?)\s*(MB|GB|TB)/ ) { |
||||||
|
$size = $1; |
||||||
|
$unit = $3; |
||||||
|
# Adjust MB to GB if that's what we got |
||||||
|
if ( $unit eq 'MB' ) { |
||||||
|
$size = sprintf( "%.0f", ($size / 1024) ); |
||||||
|
$unit= 'GB'; |
||||||
|
} |
||||||
|
} elsif ( m/^State\s*:\s*(\w+)/ ) { |
||||||
|
$state = $1; |
||||||
|
if ( $state ne 'Optimal' ) { |
||||||
|
$status = 'CRITICAL'; |
||||||
|
} |
||||||
|
} elsif ( m/^Number Of Drives( per span)?\s*:\s*(\d+)/ ) { |
||||||
|
$ldpdcount = $2; |
||||||
|
} elsif ( m/^Span Depth\s*:\s*(\d+)/ ) { |
||||||
|
$spandepth = $1; |
||||||
|
$ldpdcount = $ldpdcount * $spandepth; |
||||||
|
} elsif ( m/^RAID Level\s*:\s*Primary-(\d)/ ) { |
||||||
|
$raidlevel = $1; |
||||||
|
} |
||||||
|
} |
||||||
|
close LDINFO; |
||||||
|
|
||||||
|
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state "; |
||||||
|
|
||||||
|
} #LDISK |
||||||
|
close LDINFO; |
||||||
|
|
||||||
|
# Get info on physical disks for this adapter |
||||||
|
open (PDLIST, "$megacli -PdList -a$adp -NoLog |") |
||||||
|
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog"; |
||||||
|
|
||||||
|
my ($slotnumber,$fwstate); |
||||||
|
PDISKS: while (<PDLIST>) { |
||||||
|
if ( m/Slot Number:\s*(\d+)/ ) { |
||||||
|
$slotnumber = $1; |
||||||
|
# Don't care about backplane error counts |
||||||
|
next if ( $slotnumber == 255 ); |
||||||
|
$pdcount++; |
||||||
|
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) { |
||||||
|
if ( $1 eq 'Media') { |
||||||
|
$mediaerrors += $2; |
||||||
|
} else { |
||||||
|
$othererrors += $2; |
||||||
|
} |
||||||
|
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) { |
||||||
|
$prederrors += $1; |
||||||
|
} elsif ( m/Firmware state:\s*(\w+)/ ) { |
||||||
|
$fwstate = $1; |
||||||
|
if ( $fwstate =~ m/Hotspare/ ) { |
||||||
|
$hotsparecount++; |
||||||
|
} elsif ( $fwstate =~ m/^Online/ ) { |
||||||
|
# Do nothing |
||||||
|
} elsif ( $slotnumber != 255 ) { |
||||||
|
$pdbad++; |
||||||
|
$status = 'CRITICAL'; |
||||||
|
} |
||||||
|
} |
||||||
|
} #PDISKS |
||||||
|
close PDLIST; |
||||||
|
} |
||||||
|
|
||||||
|
$result .= "Drives:$pdcount "; |
||||||
|
|
||||||
|
# Any bad disks? |
||||||
|
if ( $pdbad ) { |
||||||
|
$result .= "$pdbad Bad Drives "; |
||||||
|
} |
||||||
|
|
||||||
|
my $errorcount = $mediaerrors + $prederrors + $othererrors; |
||||||
|
# Were there any errors? |
||||||
|
if ( $errorcount ) { |
||||||
|
$result .= "($errorcount Errors) "; |
||||||
|
if ( ( $mediaerrors > $mediaallow ) || |
||||||
|
( $prederrors > $predallow ) || |
||||||
|
( $othererrors > $otherallow ) ) { |
||||||
|
$status = max_state($status, 'WARNING'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# Do we have as many hotspares as expected (if any) |
||||||
|
if ( $hotspares ) { |
||||||
|
if ( $hotsparecount < $hotspares ) { |
||||||
|
$status = max_state($status, 'WARNING'); |
||||||
|
$result .= "Hotspare(s):$hotsparecount (of $hotspares)"; |
||||||
|
} else { |
||||||
|
$result .= "Hotspare(s):$hotsparecount"; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
print STDOUT "$status: $result\n"; |
||||||
|
exit $ERRORS{$status}; |
Loading…
Reference in new issue