parent
8b456bb08d
commit
fac2e4cce0
27 changed files with 11 additions and 748 deletions
@ -1,11 +1,2 @@ |
||||
{ |
||||
my $runasroot = '/usr/bin/mysqladmin status, /sbin/e-smith/db yum_updates show, /var/lib/zabbix/bin/sensors *, /var/lib/zabbix/bin/check_lvm *, /usr/sbin/smartctl -A /dev/*'; |
||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){ |
||||
$runasroot .= ', /var/lib/zabbix/bin/megaraid-parser.pl'; |
||||
} |
||||
$runasroot .= "\n"; |
||||
$OUT .= 'Cmnd_Alias ZABBIX_AGENT_ROOT = '.$runasroot; |
||||
|
||||
} |
||||
Cmnd_Alias ZABBIX_AGENT_MYSQL = /usr/bin/du -s /var/lib/mysql |
||||
Cmnd_Alias ZABBIX_AGENT = /var/lib/zabbix/bin/*_sudo |
||||
|
||||
|
@ -1,2 +1,2 @@ |
||||
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT_ROOT |
||||
zabbix ALL=(mysql) NOPASSWD: ZABBIX_AGENT_MYSQL |
||||
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT |
||||
|
||||
|
@ -1,14 +0,0 @@ |
||||
####### USER-DEFINED MONITORED PARAMETERS ####### |
||||
# Format: UserParameter=<key>,<shell command> |
||||
# Note that shell command must not return empty string or EOL only |
||||
#UserParameter=system.test,who|wc -l |
||||
### Set of parameter for monitoring MySQL server (v3.23.42 and later) |
||||
### Change -u<username> and add -p<password> if required |
||||
#UserParameter=mysql.ping,mysqladmin -uroot ping|grep alive|wc -l |
||||
#UserParameter=mysql.uptime,mysqladmin -uroot status|cut -f2 -d":"|cut -f1 -d"T" |
||||
#UserParameter=mysql.threads,mysqladmin -uroot status|cut -f3 -d":"|cut -f1 -d"Q" |
||||
#UserParameter=mysql.questions,mysqladmin -uroot status|cut -f4 -d":"|cut -f1 -d"S" |
||||
#UserParameter=mysql.slowqueries,mysqladmin -uroot status|cut -f5 -d":"|cut -f1 -d"O" |
||||
#UserParameter=mysql.qps,mysqladmin -uroot status|cut -f9 -d":" |
||||
#UserParameter=mysql.version,mysql -V |
||||
|
@ -0,0 +1 @@ |
||||
Include=/etc/zabbix/zabbix_agentd.conf.d/ |
@ -1,42 +0,0 @@ |
||||
# Disk I/O Monitoring |
||||
|
||||
# Description: Read operations on hdX |
||||
# X can be from 1 to 8 |
||||
# you'll have to create a custom template if |
||||
# you want to support more than 8 drives |
||||
# You can also monitor raid devices (/dev/md1 for example) |
||||
|
||||
# Type: Agent or Agent (active) |
||||
# Key: vfs.dev.read.hdX vfs.dev.write.hdX |
||||
# Type of information: Numeric (Float or integer 64bit) |
||||
# Units: bytes/sec |
||||
# Use multiplier: 512 |
||||
# Update interval: 60 (for example) |
||||
# Store Value: Delta (speed/sec) |
||||
# Show Value: As is |
||||
|
||||
# For these UserParameter to work, you need to configure the drives you want to monitor |
||||
# in the DB: |
||||
# db configuration setprop zabbix-agent HardDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd |
||||
# signal-event zabbix-agent-update |
||||
|
||||
{ |
||||
|
||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'HardDrives'} || '' )); |
||||
|
||||
my $cnt = 1; |
||||
foreach my $drive (@hd){ |
||||
if ( -e $drive){ |
||||
$drive =~ s|/dev/||; |
||||
$OUT .= "Alias=vfs.dev.read.hd" . $cnt . ":vfs.dev.read[$drive,sectors]\n"; |
||||
$OUT .= "Alias=vfs.dev.write.hd" . $cnt . ":vfs.dev.write[$drive,sectors]\n"; |
||||
$cnt++; |
||||
} |
||||
} |
||||
for (;$cnt < 9; $cnt++){ |
||||
$OUT .= "UserParameter=vfs.dev.read.hd" . $cnt . ",echo '0'\n"; |
||||
$OUT .= "UserParameter=vfs.dev.write.hd" . $cnt . ",echo '0'\n"; |
||||
} |
||||
|
||||
} |
||||
|
@ -1,10 +0,0 @@ |
||||
|
||||
# Type: Agent or Agent (active) |
||||
# Key: lvm[key] where key can be snapshot_max_allow, snapshots, lv or vg |
||||
# Type of information: Numeric (integer 64bit) or characters (for version) |
||||
# Units: depends on the key (snapshot_max_alloc is in %) |
||||
# Custom multiplier: Do not use |
||||
# Store Value: As is |
||||
|
||||
UserParameter=lvm[*],/usr/bin/sudo /var/lib/zabbix/bin/check_lvm $1 |
||||
|
@ -1,37 +0,0 @@ |
||||
|
||||
{ |
||||
|
||||
my $options = ''; |
||||
my $spares = ${'zabbix-agent'}{'MegaRaidSpares'} || ''; |
||||
$options .= "-s $spares " if ($spares =~ /\d+/); |
||||
my $media_error = ${'zabbix-agent'}{'MegaRaidMediaError'} || ''; |
||||
$options .= "-m $media_error " if ($media_error =~ /\d+/); |
||||
my $other_error = ${'zabbix-agent'}{'MegaRaidOtherError'} || ''; |
||||
$options .= "-o $other_error " if ($other_error =~ /\d+/); |
||||
my $predictive_error = ${'zabbix-agent'}{'MegaRaidPredictiveError'} || ''; |
||||
$options .= "-p $predictive_error " if ($predictive_error =~ /\d+/); |
||||
|
||||
# As this check requires the MegaCli utility, first check if it's present: |
||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){ |
||||
$OUT .=<<"HERE"; |
||||
|
||||
# Report status of every Raid Array using the MegaRaid controler (Requires the MegaCli utility) |
||||
# This controler is used for example on perc5/6(i) Raid card |
||||
|
||||
# Description: MegaRaid Status |
||||
# Type: Agent or Agent (active) |
||||
# Key: raid.mega.status |
||||
# Type of Information: Character |
||||
# Show Value: As is |
||||
|
||||
# The value reported is like: |
||||
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7 |
||||
# |
||||
|
||||
# Tips: You can add a simple trigger on this check like: |
||||
# \{ hostname:raid.mega.status.str( OK ) \}=0 |
||||
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/megaraid-parser.pl $options |
||||
|
||||
HERE |
||||
} |
||||
} |
@ -1,11 +0,0 @@ |
||||
|
||||
# Description: Temperature |
||||
# Type: Agent or Agent (active) |
||||
# Key: sensors[mb] (for example) |
||||
# Type of information: Numeric (float) |
||||
# Units: °C |
||||
# Custom multiplier: Do not use |
||||
# Store Value: As is |
||||
|
||||
UserParameter=sensors[*],/usr/bin/sudo /var/lib/zabbix/bin/sensors $1 |
||||
|
@ -1,43 +0,0 @@ |
||||
# Smart Monitoring |
||||
|
||||
# Description: Smart Value <key> |
||||
# Key can be one of: Raw_Read_Error_Rate, Spin_Up_Time, Start_Stop_Count |
||||
# Reallocated_Sector_Ct, Seek_Error_Rate, Power_On_Hours, Spin_Retry_Count, |
||||
# Power_Cycle_Count, Temperature_Celsius, Hardware_ECC_Recovered, |
||||
# Current_Pending_Sector, Offline_Uncorrectable, UDMA_CRC_Error_Count, |
||||
# Multi_Zone_Error_Rate, TA_Increase_Count |
||||
|
||||
# Type: Agent or Agent (active) |
||||
# Key: system.smart.hdX[<key>] (for example system.smart.hd1[Reallocated_Sector_Ct]) |
||||
# Type of information: Numeric (integer 64bit) |
||||
# Units: (none) |
||||
# Use multiplier: No |
||||
# Update interval: 120 (for example) |
||||
# Store Value: As is |
||||
# Show Value: As is |
||||
|
||||
# For Seek_Error_Rate, Raw_Read_Error_Rate, Hardware_ECC_Recovered you can store value as Delta |
||||
# in order to graph the error rate in a readable format |
||||
|
||||
# For these UserParameter to work, you need to configure the drives you want to monitor |
||||
# in the DB: |
||||
# db configuration setprop zabbix-agent SmartDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd |
||||
# signal-event zabbix-agent-update |
||||
|
||||
{ |
||||
|
||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'SmartDrives'} || '' )); |
||||
|
||||
my $cnt = 1; |
||||
foreach my $drive (@hd){ |
||||
if ( -e $drive){ |
||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],/usr/bin/sudo /usr/sbin/smartctl -A $drive| grep \$1| tail -1| cut -c 88-|cut -f1 -d' '\n"; |
||||
$cnt++; |
||||
} |
||||
} |
||||
for (;$cnt < 9; $cnt++){ |
||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],echo '0'\n"; |
||||
} |
||||
|
||||
} |
||||
|
@ -1,53 +0,0 @@ |
||||
# Squid |
||||
|
||||
# Description: Squid Request Hit Ratio |
||||
# Type: Agent or Agent (active) |
||||
# Key: squid.request_hit_ratio |
||||
# Type of information: Numeric (float) |
||||
# Units: % |
||||
# Custom multiplier: Do not use |
||||
# Store Value: As is |
||||
|
||||
UserParameter=squid.request_hit_ratio,squidclient mgr:info|grep 'Request Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %' |
||||
|
||||
# Description: Squid Byte Hit Ratio |
||||
# Type: Agent or Agent (active) |
||||
# Key: squid.byte_hit_ratio |
||||
# Type of information: Numeric (float) |
||||
# Units: % |
||||
# Custom multiplier: Do not use |
||||
# Store Value: As is |
||||
|
||||
UserParameter=squid.byte_hit_ratio,squidclient mgr:info|grep 'Byte Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %' |
||||
|
||||
# Description: Squid Average HTTP request per minute |
||||
# Type: Agent or Agent (active) |
||||
# Key: squid.avg_http_req_per_min |
||||
# Type of information: Numeric (float) |
||||
# Units: Req/min |
||||
# Custom multiplier: Do not use |
||||
# Store Value: As is |
||||
|
||||
UserParameter=squid.avg_http_req_per_min,squidclient mgr:info|grep 'Average HTTP requests per minute since start:'|cut -d':' -f2| tr -d ' \t' |
||||
|
||||
# Description: Squid Disk Cache Size |
||||
# Type: Agent or Agent (active) |
||||
# Key: squid.cache_size_disk |
||||
# Type of information: Numeric (integer 64bits) |
||||
# Units: Bytes |
||||
# Custom multiplier: 1024 |
||||
# Store Value: As is |
||||
|
||||
UserParameter=squid.cache_size_disk,squidclient mgr:info|grep 'Storage Swap size:' | awk '\{print $4\}' |
||||
|
||||
# Description: Squid Memory Cache Size |
||||
# Type: Agent or Agent (active) |
||||
# Key: squid.cache_size_mem |
||||
# Type of information: Numeric (integer 64bits) |
||||
# Units: Bytes |
||||
# Custom multiplier: 1024 |
||||
# Store Value: As is |
||||
|
||||
UserParameter=squid.cache_size_mem,squidclient mgr:info|grep 'Storage Mem size:' | awk '\{print $4\}' |
||||
|
||||
|
@ -1,16 +0,0 @@ |
||||
|
||||
# Report status of every Raid Array (parsing /proc/mdtstat) |
||||
|
||||
# Description: Software Raid Status |
||||
# Type: Agent or Agent (active) |
||||
# Key: raid.sw.status |
||||
# Type of Information: Character |
||||
# Show Value: As is |
||||
|
||||
# The value reported is like: |
||||
# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal |
||||
|
||||
# Tips: You can add a simple trigger on this check like: |
||||
# \{ hostname:raid.sw.status.str( OK ) \}=0 |
||||
UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl |
||||
|
@ -1,34 +0,0 @@ |
||||
# Monitor UPS status |
||||
|
||||
# Description: Nut UPS load |
||||
# Type: Agent or Agent (active) |
||||
# Key: ups.load |
||||
# Type of information: Numeric (float) |
||||
# Units: % |
||||
# Multiplier: Do not use |
||||
# Store Value: As is |
||||
UserParameter=ups.load[*],upsc $1@localhost ups.load |
||||
|
||||
# Description: Nut UPS Battery Charge |
||||
# Type: Agent or Agent (active) |
||||
# Key: ups.battery.charge |
||||
# Type of information: Numeric (float) |
||||
# Units: % |
||||
# Multiplier: Do not use |
||||
# Store Value: As is |
||||
UserParameter=ups.battery.charge[*],upsc $1@localhost battery.charge |
||||
|
||||
# Description: Nut UPS Status |
||||
# Type: Agent or Agent (active) |
||||
# Key: ups.status |
||||
# Type of information: Character |
||||
# Show Value: As is (you can also define a dictionnary OL=>On Line etc...) |
||||
UserParameter=ups.status[*],upsc $1@localhost ups.status |
||||
|
||||
# Description: Nut UPS Model |
||||
# Type: Agent or Agent (active) |
||||
# Key: ups.model |
||||
# Type of information: Text |
||||
UserParameter=ups.model[*],upsc $1@localhost ups.model |
||||
|
||||
|
@ -1,36 +0,0 @@ |
||||
#!/bin/bash |
||||
|
||||
PATH=$PATH:/usr/sbin:/usr/local/sbin |
||||
|
||||
snapshot_max_alloc(){ |
||||
MAX_PERCENT=0 |
||||
|
||||
for PERCENT in $(lvdisplay | grep % | sed -e 's/ Allocated to snapshot //g' -e 's/%//g'); do |
||||
if [[ "$PERCENT" > "$MAX_PERCENT" ]]; then |
||||
MAX_PERCENT=$PERCENT |
||||
fi |
||||
done |
||||
|
||||
echo "$MAX_PERCENT" |
||||
} |
||||
|
||||
snapshots(){ |
||||
echo $(lvdisplay | grep % | wc -l) |
||||
} |
||||
|
||||
lv(){ |
||||
echo $(lvdisplay | grep 'LV Name' | wc -l) |
||||
} |
||||
|
||||
vg(){ |
||||
echo $(vgdisplay | grep 'VG Name' | wc -l) |
||||
} |
||||
|
||||
case $1 in |
||||
snapshot_max_alloc|snapshots|lv|vg) |
||||
$1 |
||||
;; |
||||
*) |
||||
echo 'ZBX_NOTSUPPORTED' |
||||
esac |
||||
|
@ -1,155 +0,0 @@ |
||||
#!/usr/bin/env perl |
||||
|
||||
# Get status of Linux software RAID for SNMP / Nagios |
||||
# Author: Michal Ludvig <michal@logix.cz> |
||||
# http://www.logix.cz/michal/devel/nagios |
||||
|
||||
# Slightly modified by Daniel B. for integration on SME Server / Zabbix |
||||
# 24 Apr 2009 |
||||
# - One line Repport |
||||
# - Support RAID 0 Array |
||||
# - Repport Warning if an array is rebuilding |
||||
|
||||
|
||||
# |
||||
# Simple parser for /proc/mdstat that outputs status of all |
||||
# or some RAID devices. Possible results are OK and CRITICAL. |
||||
# It could eventually be extended to output WARNING result in |
||||
# case the array is being rebuilt or if there are still some |
||||
# spares remaining, but for now leave it as it is. |
||||
# |
||||
# To run the script remotely via SNMP daemon (net-snmp) add the |
||||
# following line to /etc/snmpd.conf: |
||||
# |
||||
# extend raid-md0 /root/parse-mdstat.pl --device=md0 |
||||
# |
||||
# The script result will be available e.g. with command: |
||||
# |
||||
# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2 |
||||
|
||||
use strict; |
||||
use Getopt::Long; |
||||
|
||||
# Sample /proc/mdstat output: |
||||
# |
||||
# Personalities : [raid1] [raid5] |
||||
# md0 : active (read-only) raid1 sdc1[1] |
||||
# 2096384 blocks [2/1] [_U] |
||||
# |
||||
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) |
||||
# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] |
||||
# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec |
||||
# |
||||
# unused devices: <none> |
||||
|
||||
my $file = "/proc/mdstat"; |
||||
my $device = "all"; |
||||
|
||||
# Get command line options. |
||||
GetOptions ('file=s' => \$file, |
||||
'device=s' => \$device, |
||||
'help' => sub { &usage() } ); |
||||
|
||||
## Strip leading "/dev/" from --device in case it has been given |
||||
$device =~ s/^\/dev\///; |
||||
|
||||
## Return codes for Nagios |
||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
||||
|
||||
## This is a global return value - set to the worst result we get overall |
||||
my $retval = 0; |
||||
|
||||
my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status); |
||||
my @raids; |
||||
my $result = 'OK'; |
||||
|
||||
open FILE, "< $file" or die "Can't open $file : $!"; |
||||
while (<FILE>) { |
||||
next if ! /^(md\d+)+\s*:/; |
||||
next if $device ne "all" and $device ne $1; |
||||
my $dev = $1; |
||||
push @raids, $dev; |
||||
|
||||
my @array = split(/ /); |
||||
$devs_total{$dev} = 0; |
||||
my $devs_up = 0; |
||||
my $missing = 0; |
||||
for $_ (@array) { |
||||
$level{$dev} = $1 if /^(raid\d+)$/; |
||||
next if ! /(\w+)\[\d+\](\(.\))*/; |
||||
$devs_total{$dev}++; |
||||
if ($2 eq "(F)") { |
||||
$failed_devs{$dev} .= "$1,"; |
||||
} |
||||
elsif ($2 eq "(S)") { |
||||
$spare_devs{$dev} .= "$1,"; |
||||
} |
||||
else { |
||||
$active_devs{$dev} .= "$1,"; |
||||
$devs_up++; |
||||
} |
||||
} |
||||
if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } |
||||
else { $active_devs{$dev} =~ s/,$//; } |
||||
if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } |
||||
else { $spare_devs{$dev} =~ s/,$//; } |
||||
if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } |
||||
else { $failed_devs{$dev} =~ s/,$//; } |
||||
|
||||
$_ = <FILE>; |
||||
/(\d+)\ blocks\ (.*)(\[.*\])\s?$/; |
||||
$size{$dev} = int($1/1024); |
||||
#print "$3\n"; |
||||
$missing = 1 if ($3 =~ m/_/); |
||||
if ($size{$dev} > 1024){ |
||||
$size{$dev} = int($size{$dev}/1024)."GB"; |
||||
} |
||||
else{ |
||||
$size{$dev} .= "MB"; |
||||
} |
||||
$_ = <FILE>; |
||||
if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) { |
||||
$status{$dev} = "Degraded"; |
||||
$result = "CRITICAL"; |
||||
$retval = $ERRORS{"CRITICAL"}; |
||||
} |
||||
else { |
||||
$status{$dev} = "Optimal"; |
||||
} |
||||
if (/recovery/){ |
||||
$status{$dev} = "Rebuilding"; |
||||
if ($result eq "OK"){ |
||||
$result = "WARNING"; |
||||
$retval = $ERRORS{"WARNING"}; |
||||
} |
||||
} |
||||
|
||||
|
||||
} |
||||
print "$result: "; |
||||
foreach my $raid (@raids){ |
||||
print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} "; |
||||
} |
||||
print "\n"; |
||||
close FILE; |
||||
exit $retval; |
||||
|
||||
# ===== |
||||
sub usage() |
||||
{ |
||||
printf(" |
||||
Check status of Linux SW RAID |
||||
|
||||
Author: Michal Ludvig <michal\@logix.cz> (c) 2006 |
||||
http://www.logix.cz/michal/devel/nagios |
||||
Modified by Daniel B. <daniel\@firewall-services.com>: |
||||
|
||||
Usage: mdstat-parser.pl [options] |
||||
|
||||
--file=<filename> Name of file to parse. Default is /proc/mdstat |
||||
--device=<device> Name of MD device, e.g. md0. Default is \"all\" |
||||
|
||||
"); |
||||
exit(1); |
||||
} |
||||
|
@ -1,226 +0,0 @@ |
||||
#!/usr/bin/perl -w |
||||
|
||||
# check_megaraid_sas Nagios plugin |
||||
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu |
||||
# |
||||
# This program is free software; you can redistribute it and/or |
||||
# modify it under the terms of the GNU General Public License |
||||
# as published by the Free Software Foundation; either version 2 |
||||
# of the License, or (at your option) any later version. |
||||
# |
||||
# This program is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with this program; if not, write to the Free Software |
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||||
# |
||||
# |
||||
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS |
||||
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares |
||||
# attached to the controller, you can specify the number you should expect to |
||||
# find with the '-s' flag. |
||||
# |
||||
# The paths for the Nagios plugins lib and MegaCli may need to me changed. |
||||
# |
||||
# $Author: delgado $ |
||||
# $Revision: #3 $ $Date: 2007/06/07 $ |
||||
|
||||
# Slightly modified by Daniel B. for SME Server integration with zabbix |
||||
# 23 Apr 2009 |
||||
|
||||
use strict; |
||||
use Getopt::Std; |
||||
|
||||
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p); |
||||
|
||||
|
||||
getopts('hs:o:p:m:'); |
||||
|
||||
if ( $opt_h ) { |
||||
print "Usage: $0 [-s number] [-m number] [-o number]\n"; |
||||
print " -s is how many hotspares are attached to the controller\n"; |
||||
print " -m is the number of media errors to ignore\n"; |
||||
print " -p is the predictive error count to ignore\n"; |
||||
print " -o is the number of other disk errors to ignore\n"; |
||||
exit; |
||||
} |
||||
|
||||
|
||||
my $megacli = '/opt/MegaRAID/MegaCli/MegaCli'; |
||||
|
||||
## Return codes for Nagios |
||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
||||
|
||||
my ($adapters); |
||||
my $hotspares = 0; |
||||
my $hotsparecount = 0; |
||||
my $pdbad = 0; |
||||
my $pdcount = 0; |
||||
my $mediaerrors = 0; |
||||
my $mediaallow = 0; |
||||
my $prederrors = 0; |
||||
my $predallow = 0; |
||||
my $othererrors = 0; |
||||
my $otherallow = 0; |
||||
my $result = ''; |
||||
my $status = 'OK'; |
||||
|
||||
sub max_state ($$) { |
||||
my ($current, $compare) = @_; |
||||
|
||||
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) { |
||||
return 'CRITICAL'; |
||||
} elsif ($compare eq 'OK') { |
||||
return $current; |
||||
} elsif ($compare eq 'WARNING') { |
||||
return 'WARNING'; |
||||
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) { |
||||
return 'UNKNOWN'; |
||||
} else { |
||||
return $current; |
||||
} |
||||
} |
||||
|
||||
|
||||
if ( $opt_s ) { |
||||
$hotspares = $opt_s; |
||||
} |
||||
if ( $opt_m ) { |
||||
$mediaallow = $opt_m; |
||||
} |
||||
if ( $opt_p ) { |
||||
$predallow = $opt_p; |
||||
} |
||||
if ( $opt_o ) { |
||||
$otherallow = $opt_o; |
||||
} |
||||
|
||||
# Get the number of RAID controllers we have |
||||
open (ADPCOUNT, "$megacli -adpCount -NoLog |") |
||||
|| die "error: Could not execute MegaCli -adpCount"; |
||||
|
||||
while (<ADPCOUNT>) { |
||||
if ( m/Controller Count:\s*(\d+)/ ) { |
||||
$adapters = $1; |
||||
last; |
||||
} |
||||
} |
||||
close ADPCOUNT; |
||||
|
||||
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) { |
||||
# Get the number of logical drives on this adapter |
||||
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |") |
||||
|| die "error: Could not execute $megacli -LdGetNum -a$adp"; |
||||
|
||||
my ($ldnum); |
||||
while (<LDGETNUM>) { |
||||
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) { |
||||
$ldnum = $1; |
||||
last; |
||||
} |
||||
} |
||||
close LDGETNUM; |
||||
|
||||
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) { |
||||
# Get info on this particular logical drive |
||||
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |") |
||||
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog"; |
||||
|
||||
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state); |
||||
while (<LDINFO>) { |
||||
if ( m/Size:\s*((\d+)(MB|GB|TB))/ ) { |
||||
$size = $2; |
||||
$unit = $3; |
||||
# Adjust MB to GB if that's what we got |
||||
if ( $unit eq 'MB' ) { |
||||
$size = sprintf( "%.0f", ($size / 1024) ); |
||||
$unit= 'GB'; |
||||
} |
||||
} elsif ( m/State:\s*(\w+)/ ) { |
||||
$state = $1; |
||||
if ( $state ne 'Optimal' ) { |
||||
$status = 'CRITICAL'; |
||||
} |
||||
} elsif ( m/Number Of Drives( per span)?:\s*(\d+)/ ) { |
||||
$ldpdcount = $2; |
||||
} elsif ( m/Span Depth:\s*(\d+)/ ) { |
||||
$spandepth = $1; |
||||
$ldpdcount = $ldpdcount * $spandepth; |
||||
} elsif ( m/RAID Level: Primary-(\d)/ ) { |
||||
$raidlevel = $1; |
||||
} |
||||
} |
||||
close LDINFO; |
||||
|
||||
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state "; |
||||
|
||||
} #LDISK |
||||
close LDINFO; |
||||
|
||||
# Get info on physical disks for this adapter |
||||
open (PDLIST, "$megacli -PdList -a$adp -NoLog |") |
||||
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog"; |
||||
|
||||
my ($slotnumber,$fwstate); |
||||
PDISKS: while (<PDLIST>) { |
||||
if ( m/Slot Number:\s*(\d+)/ ) { |
||||
$slotnumber = $1; |
||||
# Don't care about backplane error counts |
||||
next if ( $slotnumber == 255 ); |
||||
$pdcount++; |
||||
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) { |
||||
if ( $1 eq 'Media') { |
||||
$mediaerrors += $2; |
||||
} else { |
||||
$othererrors += $2; |
||||
} |
||||
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) { |
||||
$prederrors += $1; |
||||
} elsif ( m/Firmware state:\s*(\w+)/ ) { |
||||
$fwstate = $1; |
||||
if ( $fwstate eq 'Hotspare' ) { |
||||
$hotsparecount++; |
||||
} elsif ( $fwstate eq 'Online' ) { |
||||
# Do nothing |
||||
} elsif ( $slotnumber != 255 ) { |
||||
$pdbad++; |
||||
$status = 'CRITICAL'; |
||||
} |
||||
} |
||||
} #PDISKS |
||||
close PDLIST; |
||||
} |
||||
|
||||
$result .= "Drives:$pdcount "; |
||||
|
||||
# Any bad disks? |
||||
if ( $pdbad ) { |
||||
$result .= "$pdbad Bad Drives "; |
||||
} |
||||
|
||||
my $errorcount = $mediaerrors + $prederrors + $othererrors; |
||||
# Were there any errors? |
||||
if ( $errorcount ) { |
||||
$result .= "($errorcount Errors) "; |
||||
if ( ( $mediaerrors > $mediaallow ) || |
||||
( $prederrors > $predallow ) || |
||||
( $othererrors > $otherallow ) ) { |
||||
$status = max_state($status, 'WARNING'); |
||||
} |
||||
} |
||||
|
||||
# Do we have as many hotspares as expected (if any) |
||||
if ( $hotspares ) { |
||||
if ( $hotsparecount < $hotspares ) { |
||||
$status = max_state($status, 'WARNING'); |
||||
$result .= "Hotspare(s):$hotsparecount (of $hotspares)"; |
||||
} else { |
||||
$result .= "Hotspare(s):$hotsparecount"; |
||||
} |
||||
} |
||||
|
||||
print STDOUT "$status: $result\n"; |
||||
exit $ERRORS{$status}; |
@ -1,57 +0,0 @@ |
||||
#!/bin/bash |
||||
|
||||
KEY=$1 |
||||
|
||||
case $KEY in |
||||
cpu0) |
||||
# Here are some examples on how to retrieve temperatures |
||||
# of your system: |
||||
# |
||||
# If your motherboard support IPMI and you have the ipmitool package |
||||
# You can use this: |
||||
# Of course, you'll have to adapt command as each controler may report different sensors name |
||||
|
||||
# /usr/bin/ipmitool sdr | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
||||
|
||||
# Else, if your motherboard support lm_sensor, you can use something |
||||
# like this: |
||||
# /usr/bin/sensors | grep temp1 | cut -d':' -f 2 | awk '{print $1'} | sed -e "s/+//g" -e "s/.C//g" |
||||
|
||||
# You can also try to get your CPU temperature with acpi: |
||||
# cat /proc/acpi/thermal_zone/THRM/temperature | awk '{print $2}' |
||||
|
||||
# It's important that your commands return only numerical values |
||||
|
||||
# The default for now is to use IPMI |
||||
/usr/bin/ipmitool sdr type Temperature | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
||||
|
||||
;; |
||||
cpu1) |
||||
# This will be the same as the above, but for the second CPU |
||||
|
||||
/usr/bin/ipmitool sdr type Temperature | grep 'P2 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
||||
|
||||
;; |
||||
mb) |
||||
# AFAIK, motherboard temperature can be retrieved only with lm_sensor or IPMI |
||||
|
||||
/usr/bin/ipmitool sdr type Temperature | grep 'Baseboard' | cut -d'|' -f 2 | awk '{print $1'} |
||||
|
||||
;; |
||||
ambiant) |
||||
# Some IPMI controler also report the ambiant temperature |
||||
/usr/bin/ipmitool sdr type Temperature | grep Ambient | cut -d'|' -f 2 | awk '{print $1'} |
||||
|
||||
;; |
||||
hd*|sd*) |
||||
# Here, we want a harddrive temperature, so we'll use smartctl |
||||
# We could also use hddtemp but it doesn't seems to work for a lot of drive, where smartctl do |
||||
/usr/sbin/smartctl -a /dev/$KEY | grep Temperature_Celsius | awk '{print $10}' |
||||
|
||||
;; |
||||
*) |
||||
# Else, we tell the server the item is not supported |
||||
echo 'ZBX_NOTSUPPORTED' |
||||
;; |
||||
esac |
||||
|
Loading…
Reference in new issue