parent
8b456bb08d
commit
fac2e4cce0
27 changed files with 11 additions and 748 deletions
@ -1,11 +1,2 @@ |
|||||||
{ |
Cmnd_Alias ZABBIX_AGENT = /var/lib/zabbix/bin/*_sudo |
||||||
my $runasroot = '/usr/bin/mysqladmin status, /sbin/e-smith/db yum_updates show, /var/lib/zabbix/bin/sensors *, /var/lib/zabbix/bin/check_lvm *, /usr/sbin/smartctl -A /dev/*'; |
|
||||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){ |
|
||||||
$runasroot .= ', /var/lib/zabbix/bin/megaraid-parser.pl'; |
|
||||||
} |
|
||||||
$runasroot .= "\n"; |
|
||||||
$OUT .= 'Cmnd_Alias ZABBIX_AGENT_ROOT = '.$runasroot; |
|
||||||
|
|
||||||
} |
|
||||||
Cmnd_Alias ZABBIX_AGENT_MYSQL = /usr/bin/du -s /var/lib/mysql |
|
||||||
|
|
||||||
|
@ -1,2 +1,2 @@ |
|||||||
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT_ROOT |
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT |
||||||
zabbix ALL=(mysql) NOPASSWD: ZABBIX_AGENT_MYSQL |
|
||||||
|
@ -1,14 +0,0 @@ |
|||||||
####### USER-DEFINED MONITORED PARAMETERS ####### |
|
||||||
# Format: UserParameter=<key>,<shell command> |
|
||||||
# Note that shell command must not return empty string or EOL only |
|
||||||
#UserParameter=system.test,who|wc -l |
|
||||||
### Set of parameter for monitoring MySQL server (v3.23.42 and later) |
|
||||||
### Change -u<username> and add -p<password> if required |
|
||||||
#UserParameter=mysql.ping,mysqladmin -uroot ping|grep alive|wc -l |
|
||||||
#UserParameter=mysql.uptime,mysqladmin -uroot status|cut -f2 -d":"|cut -f1 -d"T" |
|
||||||
#UserParameter=mysql.threads,mysqladmin -uroot status|cut -f3 -d":"|cut -f1 -d"Q" |
|
||||||
#UserParameter=mysql.questions,mysqladmin -uroot status|cut -f4 -d":"|cut -f1 -d"S" |
|
||||||
#UserParameter=mysql.slowqueries,mysqladmin -uroot status|cut -f5 -d":"|cut -f1 -d"O" |
|
||||||
#UserParameter=mysql.qps,mysqladmin -uroot status|cut -f9 -d":" |
|
||||||
#UserParameter=mysql.version,mysql -V |
|
||||||
|
|
@ -0,0 +1 @@ |
|||||||
|
Include=/etc/zabbix/zabbix_agentd.conf.d/ |
@ -1,42 +0,0 @@ |
|||||||
# Disk I/O Monitoring |
|
||||||
|
|
||||||
# Description: Read operations on hdX |
|
||||||
# X can be from 1 to 8 |
|
||||||
# you'll have to create a custom template if |
|
||||||
# you want to support more than 8 drives |
|
||||||
# You can also monitor raid devices (/dev/md1 for example) |
|
||||||
|
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: vfs.dev.read.hdX vfs.dev.write.hdX |
|
||||||
# Type of information: Numeric (Float or integer 64bit) |
|
||||||
# Units: bytes/sec |
|
||||||
# Use multiplier: 512 |
|
||||||
# Update interval: 60 (for example) |
|
||||||
# Store Value: Delta (speed/sec) |
|
||||||
# Show Value: As is |
|
||||||
|
|
||||||
# For these UserParameter to work, you need to configure the drives you want to monitor |
|
||||||
# in the DB: |
|
||||||
# db configuration setprop zabbix-agent HardDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd |
|
||||||
# signal-event zabbix-agent-update |
|
||||||
|
|
||||||
{ |
|
||||||
|
|
||||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'HardDrives'} || '' )); |
|
||||||
|
|
||||||
my $cnt = 1; |
|
||||||
foreach my $drive (@hd){ |
|
||||||
if ( -e $drive){ |
|
||||||
$drive =~ s|/dev/||; |
|
||||||
$OUT .= "Alias=vfs.dev.read.hd" . $cnt . ":vfs.dev.read[$drive,sectors]\n"; |
|
||||||
$OUT .= "Alias=vfs.dev.write.hd" . $cnt . ":vfs.dev.write[$drive,sectors]\n"; |
|
||||||
$cnt++; |
|
||||||
} |
|
||||||
} |
|
||||||
for (;$cnt < 9; $cnt++){ |
|
||||||
$OUT .= "UserParameter=vfs.dev.read.hd" . $cnt . ",echo '0'\n"; |
|
||||||
$OUT .= "UserParameter=vfs.dev.write.hd" . $cnt . ",echo '0'\n"; |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
@ -1,10 +0,0 @@ |
|||||||
|
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: lvm[key] where key can be snapshot_max_allow, snapshots, lv or vg |
|
||||||
# Type of information: Numeric (integer 64bit) or characters (for version) |
|
||||||
# Units: depends on the key (snapshot_max_alloc is in %) |
|
||||||
# Custom multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=lvm[*],/usr/bin/sudo /var/lib/zabbix/bin/check_lvm $1 |
|
||||||
|
|
@ -1,37 +0,0 @@ |
|||||||
|
|
||||||
{ |
|
||||||
|
|
||||||
my $options = ''; |
|
||||||
my $spares = ${'zabbix-agent'}{'MegaRaidSpares'} || ''; |
|
||||||
$options .= "-s $spares " if ($spares =~ /\d+/); |
|
||||||
my $media_error = ${'zabbix-agent'}{'MegaRaidMediaError'} || ''; |
|
||||||
$options .= "-m $media_error " if ($media_error =~ /\d+/); |
|
||||||
my $other_error = ${'zabbix-agent'}{'MegaRaidOtherError'} || ''; |
|
||||||
$options .= "-o $other_error " if ($other_error =~ /\d+/); |
|
||||||
my $predictive_error = ${'zabbix-agent'}{'MegaRaidPredictiveError'} || ''; |
|
||||||
$options .= "-p $predictive_error " if ($predictive_error =~ /\d+/); |
|
||||||
|
|
||||||
# As this check requires the MegaCli utility, first check if it's present: |
|
||||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){ |
|
||||||
$OUT .=<<"HERE"; |
|
||||||
|
|
||||||
# Report status of every Raid Array using the MegaRaid controler (Requires the MegaCli utility) |
|
||||||
# This controler is used for example on perc5/6(i) Raid card |
|
||||||
|
|
||||||
# Description: MegaRaid Status |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: raid.mega.status |
|
||||||
# Type of Information: Character |
|
||||||
# Show Value: As is |
|
||||||
|
|
||||||
# The value reported is like: |
|
||||||
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7 |
|
||||||
# |
|
||||||
|
|
||||||
# Tips: You can add a simple trigger on this check like: |
|
||||||
# \{ hostname:raid.mega.status.str( OK ) \}=0 |
|
||||||
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/megaraid-parser.pl $options |
|
||||||
|
|
||||||
HERE |
|
||||||
} |
|
||||||
} |
|
@ -1,11 +0,0 @@ |
|||||||
|
|
||||||
# Description: Temperature |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: sensors[mb] (for example) |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: °C |
|
||||||
# Custom multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=sensors[*],/usr/bin/sudo /var/lib/zabbix/bin/sensors $1 |
|
||||||
|
|
@ -1,43 +0,0 @@ |
|||||||
# Smart Monitoring |
|
||||||
|
|
||||||
# Description: Smart Value <key> |
|
||||||
# Key can be one of: Raw_Read_Error_Rate, Spin_Up_Time, Start_Stop_Count |
|
||||||
# Reallocated_Sector_Ct, Seek_Error_Rate, Power_On_Hours, Spin_Retry_Count, |
|
||||||
# Power_Cycle_Count, Temperature_Celsius, Hardware_ECC_Recovered, |
|
||||||
# Current_Pending_Sector, Offline_Uncorrectable, UDMA_CRC_Error_Count, |
|
||||||
# Multi_Zone_Error_Rate, TA_Increase_Count |
|
||||||
|
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: system.smart.hdX[<key>] (for example system.smart.hd1[Reallocated_Sector_Ct]) |
|
||||||
# Type of information: Numeric (integer 64bit) |
|
||||||
# Units: (none) |
|
||||||
# Use multiplier: No |
|
||||||
# Update interval: 120 (for example) |
|
||||||
# Store Value: As is |
|
||||||
# Show Value: As is |
|
||||||
|
|
||||||
# For Seek_Error_Rate, Raw_Read_Error_Rate, Hardware_ECC_Recovered you can store value as Delta |
|
||||||
# in order to graph the error rate in a readable format |
|
||||||
|
|
||||||
# For these UserParameter to work, you need to configure the drives you want to monitor |
|
||||||
# in the DB: |
|
||||||
# db configuration setprop zabbix-agent SmartDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd |
|
||||||
# signal-event zabbix-agent-update |
|
||||||
|
|
||||||
{ |
|
||||||
|
|
||||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'SmartDrives'} || '' )); |
|
||||||
|
|
||||||
my $cnt = 1; |
|
||||||
foreach my $drive (@hd){ |
|
||||||
if ( -e $drive){ |
|
||||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],/usr/bin/sudo /usr/sbin/smartctl -A $drive| grep \$1| tail -1| cut -c 88-|cut -f1 -d' '\n"; |
|
||||||
$cnt++; |
|
||||||
} |
|
||||||
} |
|
||||||
for (;$cnt < 9; $cnt++){ |
|
||||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],echo '0'\n"; |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
@ -1,53 +0,0 @@ |
|||||||
# Squid |
|
||||||
|
|
||||||
# Description: Squid Request Hit Ratio |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: squid.request_hit_ratio |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: % |
|
||||||
# Custom multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=squid.request_hit_ratio,squidclient mgr:info|grep 'Request Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %' |
|
||||||
|
|
||||||
# Description: Squid Byte Hit Ratio |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: squid.byte_hit_ratio |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: % |
|
||||||
# Custom multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=squid.byte_hit_ratio,squidclient mgr:info|grep 'Byte Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %' |
|
||||||
|
|
||||||
# Description: Squid Average HTTP request per minute |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: squid.avg_http_req_per_min |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: Req/min |
|
||||||
# Custom multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=squid.avg_http_req_per_min,squidclient mgr:info|grep 'Average HTTP requests per minute since start:'|cut -d':' -f2| tr -d ' \t' |
|
||||||
|
|
||||||
# Description: Squid Disk Cache Size |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: squid.cache_size_disk |
|
||||||
# Type of information: Numeric (integer 64bits) |
|
||||||
# Units: Bytes |
|
||||||
# Custom multiplier: 1024 |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=squid.cache_size_disk,squidclient mgr:info|grep 'Storage Swap size:' | awk '\{print $4\}' |
|
||||||
|
|
||||||
# Description: Squid Memory Cache Size |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: squid.cache_size_mem |
|
||||||
# Type of information: Numeric (integer 64bits) |
|
||||||
# Units: Bytes |
|
||||||
# Custom multiplier: 1024 |
|
||||||
# Store Value: As is |
|
||||||
|
|
||||||
UserParameter=squid.cache_size_mem,squidclient mgr:info|grep 'Storage Mem size:' | awk '\{print $4\}' |
|
||||||
|
|
||||||
|
|
@ -1,16 +0,0 @@ |
|||||||
|
|
||||||
# Report status of every Raid Array (parsing /proc/mdtstat) |
|
||||||
|
|
||||||
# Description: Software Raid Status |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: raid.sw.status |
|
||||||
# Type of Information: Character |
|
||||||
# Show Value: As is |
|
||||||
|
|
||||||
# The value reported is like: |
|
||||||
# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal |
|
||||||
|
|
||||||
# Tips: You can add a simple trigger on this check like: |
|
||||||
# \{ hostname:raid.sw.status.str( OK ) \}=0 |
|
||||||
UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl |
|
||||||
|
|
@ -1,34 +0,0 @@ |
|||||||
# Monitor UPS status |
|
||||||
|
|
||||||
# Description: Nut UPS load |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: ups.load |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: % |
|
||||||
# Multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
UserParameter=ups.load[*],upsc $1@localhost ups.load |
|
||||||
|
|
||||||
# Description: Nut UPS Battery Charge |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: ups.battery.charge |
|
||||||
# Type of information: Numeric (float) |
|
||||||
# Units: % |
|
||||||
# Multiplier: Do not use |
|
||||||
# Store Value: As is |
|
||||||
UserParameter=ups.battery.charge[*],upsc $1@localhost battery.charge |
|
||||||
|
|
||||||
# Description: Nut UPS Status |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: ups.status |
|
||||||
# Type of information: Character |
|
||||||
# Show Value: As is (you can also define a dictionnary OL=>On Line etc...) |
|
||||||
UserParameter=ups.status[*],upsc $1@localhost ups.status |
|
||||||
|
|
||||||
# Description: Nut UPS Model |
|
||||||
# Type: Agent or Agent (active) |
|
||||||
# Key: ups.model |
|
||||||
# Type of information: Text |
|
||||||
UserParameter=ups.model[*],upsc $1@localhost ups.model |
|
||||||
|
|
||||||
|
|
@ -1,36 +0,0 @@ |
|||||||
#!/bin/bash |
|
||||||
|
|
||||||
PATH=$PATH:/usr/sbin:/usr/local/sbin |
|
||||||
|
|
||||||
snapshot_max_alloc(){ |
|
||||||
MAX_PERCENT=0 |
|
||||||
|
|
||||||
for PERCENT in $(lvdisplay | grep % | sed -e 's/ Allocated to snapshot //g' -e 's/%//g'); do |
|
||||||
if [[ "$PERCENT" > "$MAX_PERCENT" ]]; then |
|
||||||
MAX_PERCENT=$PERCENT |
|
||||||
fi |
|
||||||
done |
|
||||||
|
|
||||||
echo "$MAX_PERCENT" |
|
||||||
} |
|
||||||
|
|
||||||
snapshots(){ |
|
||||||
echo $(lvdisplay | grep % | wc -l) |
|
||||||
} |
|
||||||
|
|
||||||
lv(){ |
|
||||||
echo $(lvdisplay | grep 'LV Name' | wc -l) |
|
||||||
} |
|
||||||
|
|
||||||
vg(){ |
|
||||||
echo $(vgdisplay | grep 'VG Name' | wc -l) |
|
||||||
} |
|
||||||
|
|
||||||
case $1 in |
|
||||||
snapshot_max_alloc|snapshots|lv|vg) |
|
||||||
$1 |
|
||||||
;; |
|
||||||
*) |
|
||||||
echo 'ZBX_NOTSUPPORTED' |
|
||||||
esac |
|
||||||
|
|
@ -1,155 +0,0 @@ |
|||||||
#!/usr/bin/env perl |
|
||||||
|
|
||||||
# Get status of Linux software RAID for SNMP / Nagios |
|
||||||
# Author: Michal Ludvig <michal@logix.cz> |
|
||||||
# http://www.logix.cz/michal/devel/nagios |
|
||||||
|
|
||||||
# Slightly modified by Daniel B. for integration on SME Server / Zabbix |
|
||||||
# 24 Apr 2009 |
|
||||||
# - One line Repport |
|
||||||
# - Support RAID 0 Array |
|
||||||
# - Repport Warning if an array is rebuilding |
|
||||||
|
|
||||||
|
|
||||||
# |
|
||||||
# Simple parser for /proc/mdstat that outputs status of all |
|
||||||
# or some RAID devices. Possible results are OK and CRITICAL. |
|
||||||
# It could eventually be extended to output WARNING result in |
|
||||||
# case the array is being rebuilt or if there are still some |
|
||||||
# spares remaining, but for now leave it as it is. |
|
||||||
# |
|
||||||
# To run the script remotely via SNMP daemon (net-snmp) add the |
|
||||||
# following line to /etc/snmpd.conf: |
|
||||||
# |
|
||||||
# extend raid-md0 /root/parse-mdstat.pl --device=md0 |
|
||||||
# |
|
||||||
# The script result will be available e.g. with command: |
|
||||||
# |
|
||||||
# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2 |
|
||||||
|
|
||||||
use strict; |
|
||||||
use Getopt::Long; |
|
||||||
|
|
||||||
# Sample /proc/mdstat output: |
|
||||||
# |
|
||||||
# Personalities : [raid1] [raid5] |
|
||||||
# md0 : active (read-only) raid1 sdc1[1] |
|
||||||
# 2096384 blocks [2/1] [_U] |
|
||||||
# |
|
||||||
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) |
|
||||||
# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] |
|
||||||
# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec |
|
||||||
# |
|
||||||
# unused devices: <none> |
|
||||||
|
|
||||||
my $file = "/proc/mdstat"; |
|
||||||
my $device = "all"; |
|
||||||
|
|
||||||
# Get command line options. |
|
||||||
GetOptions ('file=s' => \$file, |
|
||||||
'device=s' => \$device, |
|
||||||
'help' => sub { &usage() } ); |
|
||||||
|
|
||||||
## Strip leading "/dev/" from --device in case it has been given |
|
||||||
$device =~ s/^\/dev\///; |
|
||||||
|
|
||||||
## Return codes for Nagios |
|
||||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
|
||||||
|
|
||||||
## This is a global return value - set to the worst result we get overall |
|
||||||
my $retval = 0; |
|
||||||
|
|
||||||
my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status); |
|
||||||
my @raids; |
|
||||||
my $result = 'OK'; |
|
||||||
|
|
||||||
open FILE, "< $file" or die "Can't open $file : $!"; |
|
||||||
while (<FILE>) { |
|
||||||
next if ! /^(md\d+)+\s*:/; |
|
||||||
next if $device ne "all" and $device ne $1; |
|
||||||
my $dev = $1; |
|
||||||
push @raids, $dev; |
|
||||||
|
|
||||||
my @array = split(/ /); |
|
||||||
$devs_total{$dev} = 0; |
|
||||||
my $devs_up = 0; |
|
||||||
my $missing = 0; |
|
||||||
for $_ (@array) { |
|
||||||
$level{$dev} = $1 if /^(raid\d+)$/; |
|
||||||
next if ! /(\w+)\[\d+\](\(.\))*/; |
|
||||||
$devs_total{$dev}++; |
|
||||||
if ($2 eq "(F)") { |
|
||||||
$failed_devs{$dev} .= "$1,"; |
|
||||||
} |
|
||||||
elsif ($2 eq "(S)") { |
|
||||||
$spare_devs{$dev} .= "$1,"; |
|
||||||
} |
|
||||||
else { |
|
||||||
$active_devs{$dev} .= "$1,"; |
|
||||||
$devs_up++; |
|
||||||
} |
|
||||||
} |
|
||||||
if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } |
|
||||||
else { $active_devs{$dev} =~ s/,$//; } |
|
||||||
if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } |
|
||||||
else { $spare_devs{$dev} =~ s/,$//; } |
|
||||||
if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } |
|
||||||
else { $failed_devs{$dev} =~ s/,$//; } |
|
||||||
|
|
||||||
$_ = <FILE>; |
|
||||||
/(\d+)\ blocks\ (.*)(\[.*\])\s?$/; |
|
||||||
$size{$dev} = int($1/1024); |
|
||||||
#print "$3\n"; |
|
||||||
$missing = 1 if ($3 =~ m/_/); |
|
||||||
if ($size{$dev} > 1024){ |
|
||||||
$size{$dev} = int($size{$dev}/1024)."GB"; |
|
||||||
} |
|
||||||
else{ |
|
||||||
$size{$dev} .= "MB"; |
|
||||||
} |
|
||||||
$_ = <FILE>; |
|
||||||
if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) { |
|
||||||
$status{$dev} = "Degraded"; |
|
||||||
$result = "CRITICAL"; |
|
||||||
$retval = $ERRORS{"CRITICAL"}; |
|
||||||
} |
|
||||||
else { |
|
||||||
$status{$dev} = "Optimal"; |
|
||||||
} |
|
||||||
if (/recovery/){ |
|
||||||
$status{$dev} = "Rebuilding"; |
|
||||||
if ($result eq "OK"){ |
|
||||||
$result = "WARNING"; |
|
||||||
$retval = $ERRORS{"WARNING"}; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
} |
|
||||||
print "$result: "; |
|
||||||
foreach my $raid (@raids){ |
|
||||||
print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} "; |
|
||||||
} |
|
||||||
print "\n"; |
|
||||||
close FILE; |
|
||||||
exit $retval; |
|
||||||
|
|
||||||
# ===== |
|
||||||
sub usage() |
|
||||||
{ |
|
||||||
printf(" |
|
||||||
Check status of Linux SW RAID |
|
||||||
|
|
||||||
Author: Michal Ludvig <michal\@logix.cz> (c) 2006 |
|
||||||
http://www.logix.cz/michal/devel/nagios |
|
||||||
Modified by Daniel B. <daniel\@firewall-services.com>: |
|
||||||
|
|
||||||
Usage: mdstat-parser.pl [options] |
|
||||||
|
|
||||||
--file=<filename> Name of file to parse. Default is /proc/mdstat |
|
||||||
--device=<device> Name of MD device, e.g. md0. Default is \"all\" |
|
||||||
|
|
||||||
"); |
|
||||||
exit(1); |
|
||||||
} |
|
||||||
|
|
@ -1,226 +0,0 @@ |
|||||||
#!/usr/bin/perl -w |
|
||||||
|
|
||||||
# check_megaraid_sas Nagios plugin |
|
||||||
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu |
|
||||||
# |
|
||||||
# This program is free software; you can redistribute it and/or |
|
||||||
# modify it under the terms of the GNU General Public License |
|
||||||
# as published by the Free Software Foundation; either version 2 |
|
||||||
# of the License, or (at your option) any later version. |
|
||||||
# |
|
||||||
# This program is distributed in the hope that it will be useful, |
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
||||||
# GNU General Public License for more details. |
|
||||||
# |
|
||||||
# You should have received a copy of the GNU General Public License |
|
||||||
# along with this program; if not, write to the Free Software |
|
||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
||||||
# |
|
||||||
# |
|
||||||
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS |
|
||||||
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares |
|
||||||
# attached to the controller, you can specify the number you should expect to |
|
||||||
# find with the '-s' flag. |
|
||||||
# |
|
||||||
# The paths for the Nagios plugins lib and MegaCli may need to me changed. |
|
||||||
# |
|
||||||
# $Author: delgado $ |
|
||||||
# $Revision: #3 $ $Date: 2007/06/07 $ |
|
||||||
|
|
||||||
# Slightly modified by Daniel B. for SME Server integration with zabbix |
|
||||||
# 23 Apr 2009 |
|
||||||
|
|
||||||
use strict; |
|
||||||
use Getopt::Std; |
|
||||||
|
|
||||||
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p); |
|
||||||
|
|
||||||
|
|
||||||
getopts('hs:o:p:m:'); |
|
||||||
|
|
||||||
if ( $opt_h ) { |
|
||||||
print "Usage: $0 [-s number] [-m number] [-o number]\n"; |
|
||||||
print " -s is how many hotspares are attached to the controller\n"; |
|
||||||
print " -m is the number of media errors to ignore\n"; |
|
||||||
print " -p is the predictive error count to ignore\n"; |
|
||||||
print " -o is the number of other disk errors to ignore\n"; |
|
||||||
exit; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
my $megacli = '/opt/MegaRAID/MegaCli/MegaCli'; |
|
||||||
|
|
||||||
## Return codes for Nagios |
|
||||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
|
||||||
|
|
||||||
my ($adapters); |
|
||||||
my $hotspares = 0; |
|
||||||
my $hotsparecount = 0; |
|
||||||
my $pdbad = 0; |
|
||||||
my $pdcount = 0; |
|
||||||
my $mediaerrors = 0; |
|
||||||
my $mediaallow = 0; |
|
||||||
my $prederrors = 0; |
|
||||||
my $predallow = 0; |
|
||||||
my $othererrors = 0; |
|
||||||
my $otherallow = 0; |
|
||||||
my $result = ''; |
|
||||||
my $status = 'OK'; |
|
||||||
|
|
||||||
sub max_state ($$) { |
|
||||||
my ($current, $compare) = @_; |
|
||||||
|
|
||||||
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) { |
|
||||||
return 'CRITICAL'; |
|
||||||
} elsif ($compare eq 'OK') { |
|
||||||
return $current; |
|
||||||
} elsif ($compare eq 'WARNING') { |
|
||||||
return 'WARNING'; |
|
||||||
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) { |
|
||||||
return 'UNKNOWN'; |
|
||||||
} else { |
|
||||||
return $current; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
if ( $opt_s ) { |
|
||||||
$hotspares = $opt_s; |
|
||||||
} |
|
||||||
if ( $opt_m ) { |
|
||||||
$mediaallow = $opt_m; |
|
||||||
} |
|
||||||
if ( $opt_p ) { |
|
||||||
$predallow = $opt_p; |
|
||||||
} |
|
||||||
if ( $opt_o ) { |
|
||||||
$otherallow = $opt_o; |
|
||||||
} |
|
||||||
|
|
||||||
# Get the number of RAID controllers we have |
|
||||||
open (ADPCOUNT, "$megacli -adpCount -NoLog |") |
|
||||||
|| die "error: Could not execute MegaCli -adpCount"; |
|
||||||
|
|
||||||
while (<ADPCOUNT>) { |
|
||||||
if ( m/Controller Count:\s*(\d+)/ ) { |
|
||||||
$adapters = $1; |
|
||||||
last; |
|
||||||
} |
|
||||||
} |
|
||||||
close ADPCOUNT; |
|
||||||
|
|
||||||
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) { |
|
||||||
# Get the number of logical drives on this adapter |
|
||||||
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |") |
|
||||||
|| die "error: Could not execute $megacli -LdGetNum -a$adp"; |
|
||||||
|
|
||||||
my ($ldnum); |
|
||||||
while (<LDGETNUM>) { |
|
||||||
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) { |
|
||||||
$ldnum = $1; |
|
||||||
last; |
|
||||||
} |
|
||||||
} |
|
||||||
close LDGETNUM; |
|
||||||
|
|
||||||
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) { |
|
||||||
# Get info on this particular logical drive |
|
||||||
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |") |
|
||||||
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog"; |
|
||||||
|
|
||||||
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state); |
|
||||||
while (<LDINFO>) { |
|
||||||
if ( m/Size:\s*((\d+)(MB|GB|TB))/ ) { |
|
||||||
$size = $2; |
|
||||||
$unit = $3; |
|
||||||
# Adjust MB to GB if that's what we got |
|
||||||
if ( $unit eq 'MB' ) { |
|
||||||
$size = sprintf( "%.0f", ($size / 1024) ); |
|
||||||
$unit= 'GB'; |
|
||||||
} |
|
||||||
} elsif ( m/State:\s*(\w+)/ ) { |
|
||||||
$state = $1; |
|
||||||
if ( $state ne 'Optimal' ) { |
|
||||||
$status = 'CRITICAL'; |
|
||||||
} |
|
||||||
} elsif ( m/Number Of Drives( per span)?:\s*(\d+)/ ) { |
|
||||||
$ldpdcount = $2; |
|
||||||
} elsif ( m/Span Depth:\s*(\d+)/ ) { |
|
||||||
$spandepth = $1; |
|
||||||
$ldpdcount = $ldpdcount * $spandepth; |
|
||||||
} elsif ( m/RAID Level: Primary-(\d)/ ) { |
|
||||||
$raidlevel = $1; |
|
||||||
} |
|
||||||
} |
|
||||||
close LDINFO; |
|
||||||
|
|
||||||
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state "; |
|
||||||
|
|
||||||
} #LDISK |
|
||||||
close LDINFO; |
|
||||||
|
|
||||||
# Get info on physical disks for this adapter |
|
||||||
open (PDLIST, "$megacli -PdList -a$adp -NoLog |") |
|
||||||
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog"; |
|
||||||
|
|
||||||
my ($slotnumber,$fwstate); |
|
||||||
PDISKS: while (<PDLIST>) { |
|
||||||
if ( m/Slot Number:\s*(\d+)/ ) { |
|
||||||
$slotnumber = $1; |
|
||||||
# Don't care about backplane error counts |
|
||||||
next if ( $slotnumber == 255 ); |
|
||||||
$pdcount++; |
|
||||||
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) { |
|
||||||
if ( $1 eq 'Media') { |
|
||||||
$mediaerrors += $2; |
|
||||||
} else { |
|
||||||
$othererrors += $2; |
|
||||||
} |
|
||||||
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) { |
|
||||||
$prederrors += $1; |
|
||||||
} elsif ( m/Firmware state:\s*(\w+)/ ) { |
|
||||||
$fwstate = $1; |
|
||||||
if ( $fwstate eq 'Hotspare' ) { |
|
||||||
$hotsparecount++; |
|
||||||
} elsif ( $fwstate eq 'Online' ) { |
|
||||||
# Do nothing |
|
||||||
} elsif ( $slotnumber != 255 ) { |
|
||||||
$pdbad++; |
|
||||||
$status = 'CRITICAL'; |
|
||||||
} |
|
||||||
} |
|
||||||
} #PDISKS |
|
||||||
close PDLIST; |
|
||||||
} |
|
||||||
|
|
||||||
$result .= "Drives:$pdcount "; |
|
||||||
|
|
||||||
# Any bad disks? |
|
||||||
if ( $pdbad ) { |
|
||||||
$result .= "$pdbad Bad Drives "; |
|
||||||
} |
|
||||||
|
|
||||||
my $errorcount = $mediaerrors + $prederrors + $othererrors; |
|
||||||
# Were there any errors? |
|
||||||
if ( $errorcount ) { |
|
||||||
$result .= "($errorcount Errors) "; |
|
||||||
if ( ( $mediaerrors > $mediaallow ) || |
|
||||||
( $prederrors > $predallow ) || |
|
||||||
( $othererrors > $otherallow ) ) { |
|
||||||
$status = max_state($status, 'WARNING'); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
# Do we have as many hotspares as expected (if any) |
|
||||||
if ( $hotspares ) { |
|
||||||
if ( $hotsparecount < $hotspares ) { |
|
||||||
$status = max_state($status, 'WARNING'); |
|
||||||
$result .= "Hotspare(s):$hotsparecount (of $hotspares)"; |
|
||||||
} else { |
|
||||||
$result .= "Hotspare(s):$hotsparecount"; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
print STDOUT "$status: $result\n"; |
|
||||||
exit $ERRORS{$status}; |
|
@ -1,57 +0,0 @@ |
|||||||
#!/bin/bash |
|
||||||
|
|
||||||
KEY=$1 |
|
||||||
|
|
||||||
case $KEY in |
|
||||||
cpu0) |
|
||||||
# Here are some examples on how to retrieve temperatures |
|
||||||
# of your system: |
|
||||||
# |
|
||||||
# If your motherboard support IPMI and you have the ipmitool package |
|
||||||
# You can use this: |
|
||||||
# Of course, you'll have to adapt command as each controler may report different sensors name |
|
||||||
|
|
||||||
# /usr/bin/ipmitool sdr | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
|
||||||
|
|
||||||
# Else, if your motherboard support lm_sensor, you can use something |
|
||||||
# like this: |
|
||||||
# /usr/bin/sensors | grep temp1 | cut -d':' -f 2 | awk '{print $1'} | sed -e "s/+//g" -e "s/.C//g" |
|
||||||
|
|
||||||
# You can also try to get your CPU temperature with acpi: |
|
||||||
# cat /proc/acpi/thermal_zone/THRM/temperature | awk '{print $2}' |
|
||||||
|
|
||||||
# It's important that your commands return only numerical values |
|
||||||
|
|
||||||
# The default for now is to use IPMI |
|
||||||
/usr/bin/ipmitool sdr type Temperature | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
|
||||||
|
|
||||||
;; |
|
||||||
cpu1) |
|
||||||
# This will be the same as the above, but for the second CPU |
|
||||||
|
|
||||||
/usr/bin/ipmitool sdr type Temperature | grep 'P2 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'} |
|
||||||
|
|
||||||
;; |
|
||||||
mb) |
|
||||||
# AFAIK, motherboard temperature can be retrieved only with lm_sensor or IPMI |
|
||||||
|
|
||||||
/usr/bin/ipmitool sdr type Temperature | grep 'Baseboard' | cut -d'|' -f 2 | awk '{print $1'} |
|
||||||
|
|
||||||
;; |
|
||||||
ambiant) |
|
||||||
# Some IPMI controler also report the ambiant temperature |
|
||||||
/usr/bin/ipmitool sdr type Temperature | grep Ambient | cut -d'|' -f 2 | awk '{print $1'} |
|
||||||
|
|
||||||
;; |
|
||||||
hd*|sd*) |
|
||||||
# Here, we want a harddrive temperature, so we'll use smartctl |
|
||||||
# We could also use hddtemp but it doesn't seems to work for a lot of drive, where smartctl do |
|
||||||
/usr/sbin/smartctl -a /dev/$KEY | grep Temperature_Celsius | awk '{print $10}' |
|
||||||
|
|
||||||
;; |
|
||||||
*) |
|
||||||
# Else, we tell the server the item is not supported |
|
||||||
echo 'ZBX_NOTSUPPORTED' |
|
||||||
;; |
|
||||||
esac |
|
||||||
|
|
Loading…
Reference in new issue