From 5e8df466e018046bdcedceeb8917c8e2714a4f11 Mon Sep 17 00:00:00 2001 From: Daniel Berteaud Date: Wed, 16 Jul 2014 10:36:30 +0200 Subject: [PATCH] Better debug info and more robust execution for GlusterFS scripts --- zabbix_scripts/check_gluster_sudo | 58 +++++++++++++++++++++------------------ zabbix_scripts/disco_gluster_sudo | 31 +++++++++++++-------- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/zabbix_scripts/check_gluster_sudo b/zabbix_scripts/check_gluster_sudo index 9fb1c74..765078b 100644 --- a/zabbix_scripts/check_gluster_sudo +++ b/zabbix_scripts/check_gluster_sudo @@ -35,6 +35,23 @@ If --what=peer you need to pass --peer= EOF } +sub gluster($){ + my $cmd = shift; + my $code = 256; + my @result = (); + # Loop to run gluster cmd as it can fail if two run at the same time + for (my $i = 0; ($code != 0 && $i < 10); $i++){ + open (RES, "$cmd |") + || die "error: Could not execute $cmd"; + @result = ; + close RES; + $code = $?; + sleep(1) unless ($code == 0); + } + die "error: Could not execute $cmd" unless ($code == 0); + return @result; +} + if (($what eq 'volume' && !$volume) || ($what eq 'peer' && !$peer) || ($what ne 'volume' && $what ne 'peer')){ @@ -42,67 +59,56 @@ if (($what eq 'volume' && !$volume) || } if ($what eq 'volume'){ - open (VOLUMEINFO, "$gluster vol status $volume |") - || die "error: Could not execute gluster vol status $volume"; + my @volinfo = gluster("$gluster vol status $volume"); my $bricksfound = 0; my $status = 'OK'; - foreach my $line (){ + foreach my $line (@volinfo){ # Check that all bricks are online - if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+\s+(Y|N)/){ + if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+\s+([A-Z])/){ $bricksfound++; - $status = "CRITICAL: brick status ($1)" if ($2 ne 'Y'); + $status = "CRITICAL: brick status (reported $2 on $1)" if ($2 ne 'Y'); } # Check the Self-Heal daemons are up and running - elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+(Y|N)/){ - $status = "CRITICAL: self-heal daemon ($1)" if ($2 ne 'Y'); + elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+([A-Z])/){ + $status = "CRITICAL: self-heal daemon (reported $2 on $1)" if ($2 ne 'Y'); } } # Check the number of bricks is the one we expect if ($bricks && $bricks != $bricksfound){ - $status = 'CRITICAL: bricks count mismatch'; + $status = "CRITICAL: bricks count mismatch (found $bricksfound while expecting $bricks)"; } - close VOLUMEINFO; - open (VOLUMEINFO, "$gluster vol heal $volume info heal-failed |") - || die "error: Could not execute gluster vol heal $volume info heal-failed"; - foreach my $line (){ + @volinfo = gluster("$gluster vol heal $volume info heal-failed"); + foreach my $line (@volinfo){ # Now, check we don't have any file which the Self-Heal daemon couldn't sync if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/){ $status = "CRITICAL: self-heal error ($1)" if ($1 gt 0); } } - close VOLUMEINFO; - open (VOLUMEINFO, "$gluster vol heal $volume info split-brain |") - || die "error: Could not execute gluster vol heal $volume info split-brain"; - foreach my $line (){ + @volinfo = gluster("$gluster vol heal $volume info split-brain"); + foreach my $line (@volinfo){ # Now, check we don't have any file in a split-brain situation if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/){ $status = "CRITICAL: split-bran ($1)" if ($1 gt 0); } } - close VOLUMEINFO; - open (VOLUMEINFO, "$gluster vol info $volume |") - || die "error: Could not execute gluster vol info $volume"; - foreach my $line (){ + @volinfo = gluster("$gluster vol info $volume"); + foreach my $line (@volinfo){ # Check the volume is started if ($line =~ m/^Status:\s+(\w+)$/){ $status = 'CRITICAL: The volume is not started' unless ($1 eq 'Started'); } } - close VOLUMEINFO; print $status; } elsif ($what eq 'peer'){ - open (PEERLIST, "$gluster pool list |") - || die "error: Could not execute gluster pool list"; - + my @peers = gluster("$gluster pool list"); my $status = 'unknown'; - foreach my $line (){ + foreach my $line (@peers){ if (($line =~ m/^$peer\s+/) || ($line =~ m/^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}\s+$peer\s+/)){ (undef,undef,$status) = split(/\s+/, $line); } } - close PEERLIST; print $status; } diff --git a/zabbix_scripts/disco_gluster_sudo b/zabbix_scripts/disco_gluster_sudo index b6ded03..73c3ee0 100644 --- a/zabbix_scripts/disco_gluster_sudo +++ b/zabbix_scripts/disco_gluster_sudo @@ -29,11 +29,25 @@ Usage: $0 --what=[volumes|peers] EOF } -if ($what eq 'volumes'){ - open (VOLUMES, "$gluster vol info all |") - || die "error: Could not execute gluster vol info all"; +sub gluster($){ + my $cmd = shift; + my $code = 256; + my @result = (); + # Loop to run gluster cmd as it can fail if two run at the same time + for (my $i = 0; ($code != 0 && $i < 10); $i++){ + open (RES, "$cmd |") + || die "error: Could not execute $cmd"; + @result = ; + close RES; + $code = $?; + sleep(1) unless ($code == 0); + } + die "error: Could not execute $cmd" unless ($code == 0); + return @result; +} - foreach my $line (){ +if ($what eq 'volumes'){ + foreach my $line (gluster("$gluster vol info all")){ if ($line =~ m/^Volume\ Name:\ (\w+)$/){ my $vol = $1; my ($type,$bricks,$uuid,$status,$transport) = ('unknown'); @@ -56,7 +70,6 @@ if ($what eq 'volumes'){ $bricks = $1; } } - close VOLUMEINFO; push @{$json->{data}}, { "{#GLUSTER_VOL_NAME}" => $vol, "{#GLUSTER_VOL_TYPE}" => $type, @@ -67,15 +80,12 @@ if ($what eq 'volumes'){ }; } } - close VOLUMES; } -elsif ($what eq 'peers'){ - open (PEERS, "$gluster peer status |") - || die "error: Could not execute gluster peer status"; +elsif ($what eq 'peers'){ my $peerno = 0; my ($host,$uuid,$status) = ('unknown'); - foreach my $line (){ + foreach my $line (gluster("$gluster peer status")){ if ($line =~ m/^Number of Peers:\ (\d+)$/){ $peerno = $1; } @@ -94,7 +104,6 @@ elsif ($what eq 'peers'){ }; } } - close PEERS; } else{ usage();