#!/usr/bin/perl -w use strict; use File::Which; use Getopt::Long; my $what = 'volume'; my $volume = undef; my $peer = undef; my $bricks = undef; my $gluster = which('gluster'); unless($gluster){ # Gluster is not installed, exit with an error die "gluster command not found"; } GetOptions( "what=s" => \$what, "volume=s" => \$volume, "bricks=i" => \$bricks, "peer=s" => \$peer ); sub usage(){ print <<"EOF"; usage: $0 --what=[peer|volume] If --what=volume you need to pass --volume=. The optional --bricks arg can be used to pass the number of expected bricks If --what=peer you need to pass --peer= EOF } sub gluster($){ my $cmd = shift; my $code = 256; my @result = (); # Loop to run gluster cmd as it can fail if two run at the same time for (my $i = 0; ($code != 0 && $i < 3); $i++){ open (RES, "$cmd 2>/dev/null |") || die "error: Could not execute $cmd"; @result = ; close RES; $code = $?; sleep(1) unless ($code == 0); } return @result; } if (($what eq 'volume' && !$volume) || ($what eq 'peer' && !$peer) || ($what ne 'volume' && $what ne 'peer')){ usage(); } if ($what eq 'volume'){ my $bricksfound = 0; my @volinfo = gluster("$gluster vol status $volume"); unless (scalar @volinfo){ die "Error occurred while trying to get volume status for $volume"; } foreach my $line (@volinfo){ # Check that all bricks are online if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+(\s+\d+)?\s+([A-Z])/){ $bricksfound++; if ($3 ne 'Y') { print "CRITICAL: brick status (reported $3 on $1)"; exit 1; } } # Check the Self-Heal daemons are up and running elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+([A-Z])/ && $2 ne 'Y'){ print "CRITICAL: self-heal daemon (reported $2 on $1)"; exit 1; } } # Check the number of bricks is the one we expect if ($bricks && $bricks != $bricksfound){ print "CRITICAL: bricks count mismatch (found $bricksfound while expecting $bricks)"; exit 1; } @volinfo = gluster("$gluster vol heal $volume info"); unless (scalar @volinfo){ die "Error occurred while trying to get volume heal info for $volume"; } foreach my $line (@volinfo){ if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){ # Lets check a second time to limit false positives sleep 1; @volinfo = gluster("$gluster vol heal $volume info"); unless (scalar @volinfo){ die "Error occurred while trying to get volume heal info for $volume"; } foreach my $line (@volinfo){ if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){ print "CRITICAL: self-heal in progress ($1)"; exit 1; } } } } @volinfo = gluster("$gluster vol heal $volume info heal-failed"); # the heal-failed command isn't supported on all version of GlusterFS if (scalar @volinfo){ foreach my $line (@volinfo){ # Now, check we don't have any file which the Self-Heal daemon couldn't sync if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){ print "CRITICAL: self-heal error ($1)"; exit 1; } } } @volinfo = gluster("$gluster vol heal $volume info split-brain"); unless (scalar @volinfo){ die "Error occurred while trying to get split-brain info for $volume"; } foreach my $line (@volinfo){ # Now, check we don't have any file in a split-brain situation if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){ print "CRITICAL: split-bran ($1)"; exit 1; } } @volinfo = gluster("$gluster vol info $volume"); unless (scalar @volinfo){ die "Error occurred while trying to get volume info for $volume"; } foreach my $line (@volinfo){ # Check the volume is started if ($line =~ m/^Status:\s+(\w+)$/ && $1 ne 'Started'){ print 'CRITICAL: The volume is not started'; exit 1; } } print 'OK'; } elsif ($what eq 'peer'){ my @peers = gluster("$gluster pool list"); my $status = 'unknown'; foreach my $line (@peers){ if (($line =~ m/^$peer\s+/) || ($line =~ m/^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}\s+$peer\s+/)){ (undef,undef,$status) = split(/\s+/, $line); } } print $status; }