Additional scripts for Zabbix agent on Linux to discover and monitor several services
 
 

158 lines
4.4 KiB

#!/usr/bin/perl -w
use strict;
use File::Which;
use Getopt::Long;
my $what = 'volume';
my $volume = undef;
my $peer = undef;
my $bricks = undef;
my $gluster = which('gluster');
my $lock = '/var/lock/gluster-zabbix.lock';
unless($gluster){
# Gluster is not installed, exit with an error
die "gluster command not found";
}
# Get an exclusive lock
open(LOCK, ">$lock") || die "Can't open $lock";
flock(LOCK, 2);
GetOptions(
"what=s" => \$what,
"volume=s" => \$volume,
"bricks=i" => \$bricks,
"peer=s" => \$peer
);
sub usage(){
print <<"EOF";
usage: $0 --what=[peer|volume]
If --what=volume you need to pass --volume=<volname>. The optional --bricks arg can be used to pass the number of expected bricks
If --what=peer you need to pass --peer=<host>
EOF
}
sub gluster($){
my $cmd = shift;
my $code = 256;
my @result = ();
# Loop to run gluster cmd as it can fail if two run at the same time
for (my $i = 0; ($code != 0 && $i < 3); $i++){
open (RES, "$cmd 2>/dev/null |")
|| die "error: Could not execute $cmd";
@result = <RES>;
close RES;
$code = $?;
sleep(1) unless ($code == 0);
}
return @result;
}
if (($what eq 'volume' && !$volume) ||
($what eq 'peer' && !$peer) ||
($what ne 'volume' && $what ne 'peer')){
usage();
}
if ($what eq 'volume'){
my $bricksfound = 0;
my @volinfo = gluster("$gluster vol status $volume");
unless (scalar @volinfo){
die "Error occurred while trying to get volume status for $volume";
}
foreach my $line (@volinfo){
# Check that all bricks are online
if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+(\s+\d+)?\s+([A-Z])/){
$bricksfound++;
if ($3 ne 'Y') {
print "CRITICAL: brick status (reported $3 on $1)";
exit 1;
}
}
# Check the Self-Heal daemons are up and running
elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+([A-Z])/ && $2 ne 'Y'){
print "CRITICAL: self-heal daemon (reported $2 on $1)";
exit 1;
}
}
# Check the number of bricks is the one we expect
if ($bricks && $bricks != $bricksfound){
print "CRITICAL: bricks count mismatch (found $bricksfound while expecting $bricks)";
exit 1;
}
@volinfo = gluster("$gluster vol heal $volume info");
unless (scalar @volinfo){
die "Error occurred while trying to get volume heal info for $volume";
}
foreach my $line (@volinfo){
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
# Lets check a second time to limit false positives
sleep 1;
@volinfo = gluster("$gluster vol heal $volume info");
unless (scalar @volinfo){
die "Error occurred while trying to get volume heal info for $volume";
}
foreach my $line (@volinfo){
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
print "CRITICAL: self-heal in progress ($1)";
exit 1;
}
}
}
}
@volinfo = gluster("$gluster vol heal $volume info heal-failed");
# the heal-failed command isn't supported on all version of GlusterFS
if (scalar @volinfo){
foreach my $line (@volinfo){
# Now, check we don't have any file which the Self-Heal daemon couldn't sync
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
print "CRITICAL: self-heal error ($1)";
exit 1;
}
}
}
@volinfo = gluster("$gluster vol heal $volume info split-brain");
unless (scalar @volinfo){
die "Error occurred while trying to get split-brain info for $volume";
}
foreach my $line (@volinfo){
# Now, check we don't have any file in a split-brain situation
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/ && $1 gt 0){
print "CRITICAL: split-bran ($1)";
exit 1;
}
}
@volinfo = gluster("$gluster vol info $volume");
unless (scalar @volinfo){
die "Error occurred while trying to get volume info for $volume";
}
foreach my $line (@volinfo){
# Check the volume is started
if ($line =~ m/^Status:\s+(\w+)$/ && $1 ne 'Started'){
print 'CRITICAL: The volume is not started';
exit 1;
}
}
print 'OK';
}
elsif ($what eq 'peer'){
my @peers = gluster("$gluster pool list");
my $status = 'unknown';
foreach my $line (@peers){
if (($line =~ m/^$peer\s+/) ||
($line =~ m/^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}\s+$peer\s+/)){
(undef,undef,$status) = split(/\s+/, $line);
}
}
print $status;
}
close(LOCK);
exit(0);