|
|
|
#!/usr/bin/perl -w
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use File::Which;
|
|
|
|
use Getopt::Long;
|
|
|
|
|
|
|
|
my $what = 'volume';
|
|
|
|
my $volume = undef;
|
|
|
|
my $peer = undef;
|
|
|
|
my $bricks = undef;
|
|
|
|
|
|
|
|
my $gluster = which('gluster');
|
|
|
|
|
|
|
|
unless($gluster){
|
|
|
|
# Gluster is not installed, exit with an error
|
|
|
|
die "gluster command not found";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
GetOptions(
|
|
|
|
"what=s" => \$what,
|
|
|
|
"volume=s" => \$volume,
|
|
|
|
"bricks=i" => \$bricks,
|
|
|
|
"peer=s" => \$peer
|
|
|
|
);
|
|
|
|
|
|
|
|
sub usage(){
|
|
|
|
print <<"EOF";
|
|
|
|
|
|
|
|
usage: $0 --what=[peer|volume]
|
|
|
|
|
|
|
|
If --what=volume you need to pass --volume=<volname>. The optional --bricks arg can be used to pass the number of expected bricks
|
|
|
|
If --what=peer you need to pass --peer=<host>
|
|
|
|
|
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
sub gluster($){
|
|
|
|
my $cmd = shift;
|
|
|
|
my $code = 256;
|
|
|
|
my @result = ();
|
|
|
|
# Loop to run gluster cmd as it can fail if two run at the same time
|
|
|
|
for (my $i = 0; ($code != 0 && $i < 3); $i++){
|
|
|
|
open (RES, "$cmd 2>/dev/null |")
|
|
|
|
|| die "error: Could not execute $cmd";
|
|
|
|
@result = <RES>;
|
|
|
|
close RES;
|
|
|
|
$code = $?;
|
|
|
|
sleep(1) unless ($code == 0);
|
|
|
|
}
|
|
|
|
return @result;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (($what eq 'volume' && !$volume) ||
|
|
|
|
($what eq 'peer' && !$peer) ||
|
|
|
|
($what ne 'volume' && $what ne 'peer')){
|
|
|
|
usage();
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($what eq 'volume'){
|
|
|
|
my $bricksfound = 0;
|
|
|
|
my @volinfo = gluster("$gluster vol status $volume");
|
|
|
|
unless (scalar @volinfo){
|
|
|
|
die "Error occurred while trying to get volume status for $volume";
|
|
|
|
}
|
|
|
|
foreach my $line (@volinfo){
|
|
|
|
# Check that all bricks are online
|
|
|
|
if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+\s+([A-Z])/){
|
|
|
|
$bricksfound++;
|
|
|
|
if ($2 ne 'Y') {
|
|
|
|
print "CRITICAL: brick status (reported $2 on $1)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
# Check the Self-Heal daemons are up and running
|
|
|
|
elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+([A-Z])/ && $2 ne 'Y'){
|
|
|
|
print "CRITICAL: self-heal daemon (reported $2 on $1)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
# Check the number of bricks is the one we expect
|
|
|
|
if ($bricks && $bricks != $bricksfound){
|
|
|
|
print "CRITICAL: bricks count mismatch (found $bricksfound while expecting $bricks)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
@volinfo = gluster("$gluster vol heal $volume info");
|
|
|
|
unless (scalar @volinfo){
|
|
|
|
die "Error occurred while trying to get volume heal info for $volume";
|
|
|
|
}
|
|
|
|
foreach my $line (@volinfo){
|
|
|
|
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
|
|
|
|
print "CRITICAL: self-heal in progress ($1)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@volinfo = gluster("$gluster vol heal $volume info heal-failed");
|
|
|
|
# the heal-failed command isn't supported on all version of GlusterFS
|
|
|
|
if (scalar @volinfo){
|
|
|
|
foreach my $line (@volinfo){
|
|
|
|
# Now, check we don't have any file which the Self-Heal daemon couldn't sync
|
|
|
|
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
|
|
|
|
print "CRITICAL: self-heal error ($1)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@volinfo = gluster("$gluster vol heal $volume info split-brain");
|
|
|
|
unless (scalar @volinfo){
|
|
|
|
die "Error occurred while trying to get split-brain info for $volume";
|
|
|
|
}
|
|
|
|
foreach my $line (@volinfo){
|
|
|
|
# Now, check we don't have any file in a split-brain situation
|
|
|
|
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
|
|
|
|
print "CRITICAL: split-bran ($1)";
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@volinfo = gluster("$gluster vol info $volume");
|
|
|
|
unless (scalar @volinfo){
|
|
|
|
die "Error occurred while trying to get volume info for $volume";
|
|
|
|
}
|
|
|
|
foreach my $line (@volinfo){
|
|
|
|
# Check the volume is started
|
|
|
|
if ($line =~ m/^Status:\s+(\w+)$/ && $1 ne 'Started'){
|
|
|
|
print 'CRITICAL: The volume is not started';
|
|
|
|
exit 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
print 'OK';
|
|
|
|
}
|
|
|
|
elsif ($what eq 'peer'){
|
|
|
|
my @peers = gluster("$gluster pool list");
|
|
|
|
my $status = 'unknown';
|
|
|
|
foreach my $line (@peers){
|
|
|
|
if (($line =~ m/^$peer\s+/) ||
|
|
|
|
($line =~ m/^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}\s+$peer\s+/)){
|
|
|
|
(undef,undef,$status) = split(/\s+/, $line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
print $status;
|
|
|
|
}
|
|
|
|
|