commit
f922de9d81
2 changed files with 152 additions and 0 deletions
@ -0,0 +1,12 @@ |
|||||||
|
# Description: Software Raid Status |
||||||
|
# Type: Agent or Agent (active) |
||||||
|
# Key: raid.sw.status |
||||||
|
# Type of Information: Character |
||||||
|
# Show Value: As is |
||||||
|
|
||||||
|
# The value reported is like: |
||||||
|
# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal |
||||||
|
|
||||||
|
# You can add a simple trigger on this check like: |
||||||
|
# { hostname:raid.sw.status.str( OK ) }=0 |
||||||
|
UserParameter=raid.sw.status,/var/lib/zabbix/bin/check_raid_mdadm |
@ -0,0 +1,140 @@ |
|||||||
|
#!/usr/bin/env perl |
||||||
|
|
||||||
|
# Get status of Linux software RAID for SNMP / Nagios |
||||||
|
# Author: Michal Ludvig <michal@logix.cz> |
||||||
|
# http://www.logix.cz/michal/devel/nagios |
||||||
|
# Slightly modified by Daniel B. for integration on SME Server / Zabbix |
||||||
|
|
||||||
|
# |
||||||
|
# Simple parser for /proc/mdstat that outputs status of all |
||||||
|
# or some RAID devices. Possible results are |
||||||
|
# - OK: all arrays are optimal |
||||||
|
# - WARNING: Array rebuilding |
||||||
|
# - CRITICAL: Array degraded |
||||||
|
|
||||||
|
use strict; |
||||||
|
use Getopt::Long; |
||||||
|
|
||||||
|
# Sample /proc/mdstat output: |
||||||
|
# |
||||||
|
# Personalities : [raid1] [raid5] |
||||||
|
# md0 : active (read-only) raid1 sdc1[1] |
||||||
|
# 2096384 blocks [2/1] [_U] |
||||||
|
# |
||||||
|
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) |
||||||
|
# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] |
||||||
|
# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec |
||||||
|
# |
||||||
|
# unused devices: <none> |
||||||
|
|
||||||
|
my $file = "/proc/mdstat"; |
||||||
|
my $device = "all"; |
||||||
|
|
||||||
|
# Get command line options. |
||||||
|
GetOptions ('file=s' => \$file, |
||||||
|
'device=s' => \$device, |
||||||
|
'help' => sub { &usage() } ); |
||||||
|
|
||||||
|
## Strip leading "/dev/" from --device in case it has been given |
||||||
|
$device =~ s/^\/dev\///; |
||||||
|
|
||||||
|
## Return codes for Nagios |
||||||
|
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
||||||
|
|
||||||
|
## This is a global return value - set to the worst result we get overall |
||||||
|
my $retval = 0; |
||||||
|
|
||||||
|
my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status); |
||||||
|
my @raids; |
||||||
|
my $result = 'OK'; |
||||||
|
|
||||||
|
open FILE, "< $file" or die "Can't open $file : $!"; |
||||||
|
while (<FILE>) { |
||||||
|
next if ! /^(md\d+)+\s*:/; |
||||||
|
next if $device ne "all" and $device ne $1; |
||||||
|
my $dev = $1; |
||||||
|
push @raids, $dev; |
||||||
|
|
||||||
|
my @array = split(/ /); |
||||||
|
$devs_total{$dev} = 0; |
||||||
|
my $devs_up = 0; |
||||||
|
my $missing = 0; |
||||||
|
for $_ (@array) { |
||||||
|
$level{$dev} = $1 if /^(raid\d+)$/; |
||||||
|
next if ! /(\w+)\[\d+\](\(.\))*/; |
||||||
|
$devs_total{$dev}++; |
||||||
|
if ($2 eq "(F)") { |
||||||
|
$failed_devs{$dev} .= "$1,"; |
||||||
|
} |
||||||
|
elsif ($2 eq "(S)") { |
||||||
|
$spare_devs{$dev} .= "$1,"; |
||||||
|
} |
||||||
|
else { |
||||||
|
$active_devs{$dev} .= "$1,"; |
||||||
|
$devs_up++; |
||||||
|
} |
||||||
|
} |
||||||
|
if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } |
||||||
|
else { $active_devs{$dev} =~ s/,$//; } |
||||||
|
if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } |
||||||
|
else { $spare_devs{$dev} =~ s/,$//; } |
||||||
|
if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } |
||||||
|
else { $failed_devs{$dev} =~ s/,$//; } |
||||||
|
|
||||||
|
$_ = <FILE>; |
||||||
|
/(\d+)\ blocks\ (.*)(\[.*\])\s?$/; |
||||||
|
$size{$dev} = int($1/1024); |
||||||
|
#print "$3\n"; |
||||||
|
$missing = 1 if ($3 =~ m/_/); |
||||||
|
if ($size{$dev} > 1024){ |
||||||
|
$size{$dev} = int($size{$dev}/1024)."GB"; |
||||||
|
} |
||||||
|
else{ |
||||||
|
$size{$dev} .= "MB"; |
||||||
|
} |
||||||
|
$_ = <FILE>; |
||||||
|
if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) { |
||||||
|
$status{$dev} = "Degraded"; |
||||||
|
$result = "CRITICAL"; |
||||||
|
$retval = $ERRORS{"CRITICAL"}; |
||||||
|
} |
||||||
|
else { |
||||||
|
$status{$dev} = "Optimal"; |
||||||
|
} |
||||||
|
if (/recovery/){ |
||||||
|
$status{$dev} = "Rebuilding"; |
||||||
|
if ($result eq "OK"){ |
||||||
|
$result = "WARNING"; |
||||||
|
$retval = $ERRORS{"WARNING"}; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
print "$result: "; |
||||||
|
foreach my $raid (@raids){ |
||||||
|
print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} "; |
||||||
|
} |
||||||
|
print "\n"; |
||||||
|
close FILE; |
||||||
|
exit $retval; |
||||||
|
|
||||||
|
# ===== |
||||||
|
sub usage() |
||||||
|
{ |
||||||
|
printf(" |
||||||
|
Check status of Linux SW RAID |
||||||
|
|
||||||
|
Author: Michal Ludvig <michal\@logix.cz> (c) 2006 |
||||||
|
http://www.logix.cz/michal/devel/nagios |
||||||
|
Modified by Daniel B. <daniel\@firewall-services.com>: |
||||||
|
|
||||||
|
Usage: $0 [options] |
||||||
|
|
||||||
|
--file=<filename> Name of file to parse. Default is /proc/mdstat |
||||||
|
--device=<device> Name of MD device, e.g. md0. Default is \"all\" |
||||||
|
|
||||||
|
"); |
||||||
|
exit(1); |
||||||
|
} |
||||||
|
|
Loading…
Reference in new issue