Additional scripts for Zabbix agent on Linux to discover and monitor several services

386 lines
13 KiB

#!/usr/bin/perl -w
use strict;
use warnings;
use Config::Simple '-strict';
use Getopt::Long;
use File::Which;
use File::Basename;
use Zabbix::Agent::Addons::Disks;
use Zabbix::Agent::Addons::UPS;
# Output file
my $output = undef;
# When a threshold can be automatically detected,
# you may want to be notified before it's reached, so you can
# set a margin which will be substracted from the real threshold
my $temp_margin = '20';
my $temp_hd_margin = '10';
my $pwr_margin = '200';
my $pwr_rel_margin = '20';
# This value will be substracted from the higher threshold to define the low one
# so you can have hysteresis to prevent flip-flop
my $temp_hyst = '10';
my $temp_hd_hyst = '5';
my $temp_ups_hyst = '5';
my $pwr_hyst = '200';
my $pwr_rel_hyst = '10';
# Default threshold if not detected
my $def_temp_thres_high = '50';
my $def_hd_temp_thres_high = '50';
my $def_ups_temp_thres_high = '40';
my $def_fan_thres_high = '1000';
my $def_fan_thres_low = '700';
my $def_pwr_thres_high = '1000';
my $def_pwr_rel_thres_high = '80';
GetOptions(
"output=s" => \$output,
"temp-margin=i" => \$temp_margin,
"temp-hd-margin=i" => \$temp_hd_margin,
"pwr-margin=i" => \$pwr_margin,
"pwr-rel-margin=i" => \$pwr_rel_margin,
"temp-hyst=i" => \$temp_hyst,
"temp-hd-hyst=i" => \$temp_hd_hyst,
"temp-ups-hyst=i" => \$temp_ups_hyst,
"pwr-hyst=i" => \$pwr_hyst,
"pwr-rel-hys=i" => \$pwr_rel_hyst
);
sub usage(){
print<<"_EOF";
Usage: $0 --output=/etc/zabbix/sensors.ini
_EOF
}
unless ($output){
usage();
exit 1;
}
# Path
my $ipmitool = which('ipmitool');
my $smartctl = which('smartctl');
my $lmsensor = which('sensors');
my $upsc = which('upsc');
my $cfg = new Config::Simple(syntax => 'ini');
my $sensors = {};
# Try to detect IPMI sensors
if ($ipmitool && -x $ipmitool){
# First check for temperature sensors
my @lines = qx($ipmitool sdr type Temperature);
if ($? == 0){
SENSOR: foreach my $l (@lines){
chomp $l;
# Looks like
# Inlet Temp | 04h | ok | 7.1 | 25 degrees C
if ($l !~ m/^(\w+[\s\w]+?\w+)\s*\|.*\|\s*([\w\.\s]+)\s*\|.*\|\s*([\-\w\.\s]+)$/){
next SENSOR;
}
my $name = $1;
my $sensor = {};
my @details = qx($ipmitool sdr get '$name');
if ($? != 0){
print "Couldn't get detail for sensor $name\n";
next SENSOR;
}
my $val = undef;
foreach my $d (@details){
chomp $d;
if ($d =~ m/^\s*Sensor\sReading\s*:\s*(\-?\w+)/){
$val = $1;
print "Sensor $name has value: $val\n";
if ($val !~ m/^\-?\d+$/){
print "Skipping sensor $name, couldn't parse its value: $val\n";
next SENSOR;
}
}
elsif ($d =~ m/^\s*Upper\scritical\s*:\s*(\-?\d+(\.\d+))/){
$sensor->{threshold_high} = $1-$temp_margin;
}
elsif ($d =~ m/^\s*Upper\snon\-critical\s*:\s*(\-?\d+(\.\d+))/){
$sensor->{threshold_low} = $1-$temp_margin;
}
}
# Another loop to check for Normal max if Upper critical wasn't found
if (!$sensor->{threshold_high}){
foreach my $d (@details){
chomp $d;
if ($d =~ m/^\s*Normal\sMaximum\s*:\s*(\-?\d+(\.\d+))/){
$sensor->{threshold_high} = $1-$temp_margin;
}
}
}
next SENSOR unless $val;
$sensor->{threshold_low} ||= ($sensor->{threshold_high}) ? $sensor->{threshold_high}-$temp_hyst : $def_temp_thres_high-$temp_hyst;
$sensor->{threshold_high} ||= $def_temp_thres_high;
$sensor->{threshold_high} =~ s/\.0+$//;
$sensor->{threshold_low} =~ s/\.0+$//;
$sensor->{description} = $name;
$sensor->{type} = 'temp';
$sensor->{unit} = '°C';
$sensor->{cmd} = "$ipmitool sdr get '$name' | grep 'Sensor Reading' | awk '{print \$4}'";
my $id = sensor_name($name);
$sensors->{$id} = $sensor;
print "Found a temperature sensor using IPMI: $name\n";
}
}
# Now check for Fan, nearly the same as Temp, but
# * We try to detect the unit
# * threshold handling is not the same
@lines = qx($ipmitool sdr type Fan);
if ($? == 0){
SENSOR: foreach my $l (@lines){
chomp $l;
if ($l !~ m/^(\w+[\s\w]+?\w+)\s*\|.*\|\s*([\w\.\s]+)\s*\|.*\|\s*([\-\w\.\s]+)$/){
next SENSOR;
}
my $name = $1;
my $value = $3;
my $sensor = {};
my @details = qx($ipmitool sdr get '$name');
if ($? != 0){
print "Couldn't get detail for sensor $name\n";
next SENSOR;
}
my $val = undef;
foreach my $d (@details){
chomp $d;
if ($d =~ m/^\s*Sensor\sReading\s*:\s*(\w+)/){
$val = $1;
if ($val !~ m/^\d+$/){
print "Skipping sensor $name, couldn't parse its value: $val\n";
next SENSOR;
}
}
elsif ($d =~ m/^\s*Lower\scritical\s*:\s*(\d+(\.\d+))/){
$sensor->{threshold_low} = $1-$temp_margin;
}
elsif ($d =~ m/^\s*Lower\snon\-critical\s*:\s*(\d+(\.\d+))/){
$sensor->{threshold_high} = $1-$temp_margin;
}
}
next SENSOR unless $val;
$sensor->{threshold_high} ||= $def_fan_thres_high;
$sensor->{threshold_low} ||= $def_fan_thres_high-$temp_hyst;
$sensor->{threshold_high} =~ s/\.0+$//;
$sensor->{threshold_low} =~ s/\.0+$//;
$sensor->{description} = $name;
$sensor->{type} = 'fan';
$sensor->{unit} = ($value =~ m/percent|%/ || $val < 100) ? '%' : 'rpm';
$sensor->{cmd} = "$ipmitool sdr get '$name' | grep 'Sensor Reading' | awk '{print \$4}'";
my $id = sensor_name($name);
$sensors->{$id} = $sensor;
print "Found a fan sensor using IPMI: $name\n";
}
}
# Now look for power information
@lines = qx($ipmitool sdr type 'Current');
if ($? == 0){
SENSOR: foreach my $l (@lines){
chomp $l;
if ($l !~ m/^(\w+[\s\w]+?\w+(\s%)?)\s*\|.*\|\s*([\w\.\s]+)\s*\|.*\|\s*([\-\w\.\s]+)$/){
print "Skiping line $l\n";
next SENSOR;
}
my $name = $1;
my $value = $4;
my $sensor = {};
if ($name =~ m/(Power)|(Pwr)|(Consumption)|(PS\d+\sCurr\sOut)/i || $value =~ m/W(att)?/i){
my @details = qx($ipmitool sdr get '$name');
if ($? != 0){
print "Couldn't get detail for sensor $name\n";
next SENSOR;
}
my $val = undef;
my $unit = ($name =~ m/%/) ? '%' : 'Watt';
foreach my $d (@details){
chomp $d;
if ($d =~ m/^\s*Sensor\sReading\s*:\s*(\w+)/){
$val = $1;
if ($val !~ m/^\d+$/){
print "Skipping sensor $name, couldn't parse its value: $val\n";
next SENSOR;
}
}
elsif ($d =~ m/^\s*Upper\scritical\s*:\s*(\d+(\.\d+)?)/){
$sensor->{threshold_high} = ($unit eq '%') ? $1-$pwr_rel_margin : $1-$pwr_margin;
}
elsif ($d =~ m/^\s*Upper\snon\-critical\s*:\s*(\d+(\.\d+)?)/){
$sensor->{threshold_low} = ($unit eq '%') ? $1-$pwr_rel_margin : $1-$pwr_margin;
}
}
next SENSOR unless $val;
$sensor->{threshold_high} ||= ($unit eq '%') ? $def_pwr_rel_thres_high : $def_pwr_thres_high;
$sensor->{threshold_low} ||= ($unit eq '%') ? $def_pwr_rel_thres_high-$pwr_rel_hyst : $def_pwr_thres_high-$pwr_hyst;
$sensor->{threshold_high} =~ s/\.0+$//;
$sensor->{threshold_low} =~ s/\.0+$//;
$sensor->{description} = $name;
$sensor->{type} = 'power';
$sensor->{unit} = ($name =~ m/%/) ? '%' : 'Watt';
$sensor->{cmd} = "$ipmitool sdr get '$name' | grep 'Sensor Reading' | awk '{print \$4}'";
my $id = sensor_name($name);
$sensors->{$id} = $sensor;
print "Found a power sensor using IPMI: $name\n";
}
}
}
}
# Try to detect lm_sensors, using the sensors command
if ($lmsensor && -x $lmsensor){
my @lines = qx($lmsensor);
if ($? == 0){
SENSOR: foreach my $l (@lines){
chomp $l;
# Looks like
# temp1: +27.8°C (crit = +119.0°C)
# or
# Core 0: +36.0°C (high = +80.0°C, crit = +100.0°C)
if ($l !~ m/^(\w+[\s\w]+?):\s*\+?(\d+)(\.\d+)?°C\s*(.*)$/){
next SENSOR;
}
my $name = $1;
my $val = $2;
my $thr = $4;
my $sensor = {};
if ($val !~ m/^\-?\d+$/){
print "Skipping sensor $name, couldn't parse its value: $val\n";
next SENSOR;
}
if ($thr =~ m/high\s+=\s+\+(\d+(\.\d+)?)/){
$sensor->{threshold_high} = $1;
}
elsif ($thr =~ m/^crit\s+=\s+\+(\d+(\.\d+)?)/){
$sensor->{threshold_high} = $1 - $temp_margin;
}
next SENSOR unless $val;
$sensor->{threshold_low} ||= ($sensor->{threshold_high}) ? $sensor->{threshold_high}-$temp_hyst : $def_temp_thres_high-$temp_hyst;
$sensor->{threshold_high} ||= $def_temp_thres_high;
$sensor->{threshold_high} =~ s/\.0+$//;
$sensor->{threshold_low} =~ s/\.0+$//;
$sensor->{description} = $name;
$sensor->{type} = 'temp';
$sensor->{unit} = '°C';
$sensor->{cmd} = "$lmsensor | grep '$name:' | cut -d+ -f2 | cut -d. -f1";
my $id = sensor_name($name);
$sensors->{$id} = $sensor;
print "Found a temperature sensor using lm_sensors: $name\n";
}
}
}
# Now, try to detect smart capable HDD
if ($smartctl && -x $smartctl){
foreach my $block (Zabbix::Agent::Addons::Disks::list_smart_hdd({ skip_remouvable => 1 })){
my @lines = qx($smartctl -A /dev/$block);
next if ($? != 0);
foreach my $l (@lines){
if ($l =~ /(Temperature_Celsius|Airflow_Temperature_Cel)/){
$sensors->{$block} = {
description => "$block temperature",
threshold_low => $def_hd_temp_thres_high-$temp_hd_hyst,
threshold_high => $def_hd_temp_thres_high,
type => 'temp',
unit => '°C',
cmd => "$smartctl -A /dev/$block | grep $1 | awk '{print \$10}'"
};
print "Found a temperature sensor using smartctl: $block\n";
last;
}
}
}
# Some LSI based hardware RAID controller can report HDD temp
if (-e '/dev/megaraid_sas_ioctl_node'){
# Only check for the firsts 26 drives
foreach my $i (0..25){
my @res = qx($smartctl -d megaraid,$i -A /dev/sda);
next if ($? != 0);
foreach my $l (@res){
if ($l =~ m/Drive\sTrip\sTemperature:\s+(\d+)\s/){
$sensors->{'sda-' . $i} = {
description => "Temperature for disk No $i on sda",
type => 'temp',
threshold_high => $1-$temp_hd_margin,
threshold_low => $1-$temp_hd_margin-$temp_hd_hyst,
unit => '°C',
cmd => "$smartctl -A -d megaraid,$i /dev/sda | grep 'Current Drive Temperature' | awk '{print \$4}'"
};
print "Found a temperature sensor using smartctl (megaraid): sda-$i\n";
last;
}
elsif ($l =~ /(Temperature_Celsius|Airflow_Temperature_Cel)/){
$sensors->{'sda-' . $i} = {
description => "Temperature for disk No $i on sda",
threshold_low => $def_hd_temp_thres_high-$temp_hd_hyst,
threshold_high => $def_hd_temp_thres_high,
type => 'temp',
unit => '°C',
cmd => "$smartctl -A -d megaraid,$i /dev/sda | grep $1 | awk '{print \$10}'"
};
print "Found a temperature sensor using smartctl (megaraid): sda-$i\n";
last;
}
}
}
}
}
# Now check UPS
if ($upsc && -x $upsc){
foreach my $ups (Zabbix::Agent::Addons::UPS::list_ups()){
my @lines = qx($upsc $ups);
next if ($? != 0);
foreach my $l (@lines){
if ($l =~ m/^ups\.temperature:\s+(\d+(\.\d+)?)/){
$sensors->{'ups_' . lc $ups . '_temp'} = {
description => "ups temperature for $ups",
type => 'temp',
threshold_high => $def_ups_temp_thres_high,
threshold_low => $def_ups_temp_thres_high-$temp_ups_hyst,
unit => '°C',
cmd => "$upsc $ups ups.temperature"
};
print "Found a temperature sensor for ups $ups\n";
last;
}
elsif ($l =~ m/^ups\.load:\s+(\d+(\.\d+)?)/){
$sensors->{'ups_' . lc $ups . '_load'} = {
description => "ups load for $ups",
type => 'power',
threshold_high => $def_pwr_rel_thres_high,
threshold_low => $def_pwr_rel_thres_high-$pwr_rel_hyst,
unit => '%',
cmd => "$upsc $ups ups.load"
};
}
}
}
}
# TODO: add support for lm sensors, but its ouput is harder to parse
foreach my $s (sort keys %$sensors){
$cfg->set_block($s, $sensors->{$s});
}
$cfg->write($output);
# Take a sensor description and return a suitable string as sensor ID
sub sensor_name{
my $desc = shift;
my $id = lc $desc;
$id =~ s/\s/_/g;
$id =~ s/%/percent/g;
$id =~ s/_rpm$//;
return $id;
}