#!/usr/bin/env perl

# Get status of Linux software RAID for SNMP / Nagios
# Author: Michal Ludvig <michal@logix.cz>
#         http://www.logix.cz/michal/devel/nagios
# Slightly modified by Daniel B. for integration on SME Server / Zabbix

# 
# Simple parser for /proc/mdstat that outputs status of all
# or some RAID devices. Possible results are
# - OK: all arrays are optimal
# - WARNING: Array rebuilding
# - CRITICAL: Array degraded

use strict;
use Getopt::Long;

# Sample /proc/mdstat output:
# 
# Personalities : [raid1] [raid5]
# md0 : active (read-only) raid1 sdc1[1]
#       2096384 blocks [2/1] [_U]
# 
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S)
#       995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U]
#       [=================>...]  recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec
# 
# unused devices: <none>

my $file   = "/proc/mdstat";
my $device = "all";

# Get command line options.
GetOptions (
  'file=s' => \$file,
  'device=s' => \$device,
  'help' => sub { &usage() }
);

## Strip leading "/dev/" from --device in case it has been given
$device =~ s/^\/dev\///;

## This is a global return value - set to the worst result we get overall
my $retval = 0;

my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status);
my @raids;
my $result = 'OK';

open FILE, "< $file" or die "Can't open $file : $!";
while (<FILE>) {
  next if ! /^(md\d+)+\s*:/;
  next if $device ne "all" and $device ne $1;
  my $dev = $1;
  push @raids, $dev;

  my @array = split(/ /);
  $devs_total{$dev} = 0;
  my $devs_up = 0;
  my $missing = 0;
  for $_ (@array) {
    $level{$dev} = $1 if /^(raid\d+)$/;
    next if ! /(\w+)\[\d+\](\(.\))*/;
    $devs_total{$dev}++;
    if ($2 eq "(F)") {
      $failed_devs{$dev} .= "$1,";
    }
    elsif ($2 eq "(S)") {
      $spare_devs{$dev} .= "$1,";
      $devs_up++;
    }
    else {
      $active_devs{$dev} .= "$1,";
      $devs_up++;
    }
  }
  if (! defined($active_devs{$dev})){
    $active_devs{$dev} = "none";
  }
  else {
    $active_devs{$dev} =~ s/,$//;
  }
  if (! defined($spare_devs{$dev})){
    $spare_devs{$dev}  = "none";
  }
  else {
    $spare_devs{$dev} =~ s/,$//;
  }
  if (! defined($failed_devs{$dev})){
    $failed_devs{$dev} = "none";
  }
  else {
    $failed_devs{$dev} =~ s/,$//;
  }
	
  $_ = <FILE>;
  /(\d+)\ blocks\ (.*)(\[.*\])\s?$/;
  $size{$dev} = int($1/1024);
  $missing = 1 if ($3 =~ m/_/);
  if ($size{$dev} > 1024){
    $size{$dev} = int($size{$dev}/1024)."GB";
  }
  else{
    $size{$dev} .= "MB";
  }
  $_ = <FILE>;

  if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) {
    $status{$dev} = "Degraded";
    $result = "CRITICAL";
  }
  else {
    $status{$dev} = "Optimal";
  }
  if (/(recovery|resync)\s*=\s*\d{1,2}(\.\d)?%/){
    $status{$dev} = "Rebuilding";
    if ($result eq "OK"){
      $result = "WARNING";
    }
  }
}
print "$result: ";
foreach my $raid (@raids){
  print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} ";
}
print "\n";
close FILE;
exit 0;

# =====
sub usage(){
	printf("
Check status of Linux SW RAID

Author: Michal Ludvig <michal\@logix.cz> (c) 2006
        http://www.logix.cz/michal/devel/nagios
Modified by Daniel B. <daniel\@firewall-services.com>:

Usage: $0 [options]

  --file=<filename>    Name of file to parse. Default is /proc/mdstat
  --device=<device>    Name of MD device, e.g. md0. Default is \"all\"

");
	exit(1);
}