Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package monitoring-plugins-smart for openSUSE:Factory checked in at 2025-02-05 17:23:19 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/monitoring-plugins-smart (Old) and /work/SRC/openSUSE:Factory/.monitoring-plugins-smart.new.2316 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "monitoring-plugins-smart" Wed Feb 5 17:23:19 2025 rev:14 rq:1243403 version:6.15.0 Changes: -------- --- /work/SRC/openSUSE:Factory/monitoring-plugins-smart/monitoring-plugins-smart.changes 2024-09-17 18:20:17.476162883 +0200 +++ /work/SRC/openSUSE:Factory/.monitoring-plugins-smart.new.2316/monitoring-plugins-smart.changes 2025-02-05 17:23:27.194304258 +0100 @@ -1,0 +2,14 @@ +Sun Feb 2 10:34:19 UTC 2025 - Martin Hauke <mar...@gmx.de> + +- Update to 6.15.0 + * Starting with version 6.15.0, check_smart.pl additionally + checks for errors in the so-called SMART Error Log by default. + * To skip checking the SMART Error Log, the new parameter + --skip-error-log can be used. + * Another new parameter is the -O/--oldage parameter. If this + parameter is used, certain attributes of type "Oldage" are + ignored. Right now these attributes are + 202,Percent_Lifetime_Used for ATA drives and when the + Critical_Warning attribute of NVMe drives has the value 0x04. + +------------------------------------------------------------------- Old: ---- check_smart-6.14.3.tar.gz New: ---- check_smart-6.15.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ monitoring-plugins-smart.spec ++++++ --- /var/tmp/diff_new_pack.umEpSH/_old 2025-02-05 17:23:27.846331235 +0100 +++ /var/tmp/diff_new_pack.umEpSH/_new 2025-02-05 17:23:27.846331235 +0100 @@ -17,7 +17,7 @@ Name: monitoring-plugins-smart -Version: 6.14.3 +Version: 6.15.0 Release: 0 Summary: Check SMART status of a given disk License: GPL-3.0-or-later ++++++ check_smart-6.14.3.tar.gz -> check_smart-6.15.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/check_smart-6.14.3/check_smart.pl new/check_smart-6.15.0/check_smart.pl --- old/check_smart-6.14.3/check_smart.pl 2024-09-10 07:50:49.000000000 +0200 +++ new/check_smart-6.15.0/check_smart.pl 2025-01-31 07:21:12.000000000 +0100 @@ -62,13 +62,14 @@ # Sep 20, 2023: Claudio Kuenzler - Fix debug output for raw check list, fix --hide-serial in debug output (6.14.1) # Mar 15, 2024: Yannick Martin - Fix nvme check when auto interface is given and device is nvme (6.14.2) # Sep 10, 2024: Claudio Kuenzler - Fix performance data format, missing perfdata in SCSI drives (6.14.3) +# Jan 31, 2025: Tomas Barton - Ignore old age attributes due to its unrealiability. Check ATA error logs (6.15.0) use strict; use Getopt::Long; use File::Basename qw(basename); my $basename = basename($0); -my $revision = '6.14.3'; +my $revision = '6.15.0'; # Standard Nagios return codes my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); @@ -78,7 +79,7 @@ $ENV{'BASH_ENV'}=''; $ENV{'ENV'}=''; -use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa $opt_skip_temp $opt_skip_load_cycles $opt_hide_sn); +use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_o $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa $opt_skip_temp $opt_skip_load_cycles $opt_skip_error_log $opt_hide_sn); Getopt::Long::Configure('bundling'); GetOptions( "debug" => \$opt_debug, @@ -89,6 +90,7 @@ "i=s" => \$opt_i, "interface=s" => \$opt_i, "e=s" => \$opt_e, "exclude=s" => \$opt_e, "E=s" => \$opt_E, "exclude-all=s" => \$opt_E, + "O" => \$opt_o, "oldage" => \$opt_o, "q" => \$opt_q, "quiet" => \$opt_q, "r=s" => \$opt_r, "raw=s" => \$opt_r, "s" => \$opt_s, "selftest" => \$opt_s, @@ -98,6 +100,7 @@ "skip-self-assessment" => \$opt_skip_sa, "skip-temp-check" => \$opt_skip_temp, "skip-load-cycles" => \$opt_skip_load_cycles, + "skip-error-log" => \$opt_skip_error_log, "hide-sn" => \$opt_hide_sn, ); @@ -127,7 +130,7 @@ # normal mode - push opt_d on the list of devices push(@dev,$opt_d); } else { - # glob all devices - try '?' first + # glob all devices - try '?' first @dev =glob($opt_g); } @@ -473,7 +476,7 @@ warn "(debug) plus, we can also use the information for perfdata/graphing\n" if $opt_debug; warn "###########################################################\n\n\n" if $opt_debug; - $full_command = "$smart_command -d $interface -A $device"; + $full_command = "$smart_command -d $interface -a $device"; warn "(debug) executing:\n$full_command\n\n" if $opt_debug; @output = `$full_command`; warn "(debug) output:\n@output\n\n" if $opt_debug; @@ -490,6 +493,24 @@ # Yeah - but megaraid is the same output as ata if ($output_mode =~ "ata") { foreach my $line(@output){ + unless ($opt_skip_error_log) { + if ($line =~ /^ATA Error Count:\s(\d+)\s/) { + my ($attribute_name, $raw_value) = ('ata_errors', $1); + if ( ($warn_list{$attribute_name}) && ($raw_value >= $warn_list{$attribute_name}) ) { + warn "(debug) $attribute_name is non-zero ($raw_value)\n\n" if $opt_debug; + push(@warning_messages, "$attribute_name is non-zero ($raw_value)"); + escalate_status('WARNING'); + } elsif ( ($warn_list{$attribute_name}) && ($raw_value < $warn_list{$attribute_name}) ) { + warn "(debug) $attribute_name is non-zero ($raw_value) but less than $warn_list{$attribute_name}\n\n" if $opt_debug; + push(@notice_messages, "$attribute_name is non-zero ($raw_value) (but less than threshold $warn_list{$attribute_name})"); + } else { + warn "(debug) $attribute_name is non-zero ($raw_value)\n\n" if $opt_debug; + push(@warning_messages, "$attribute_name is non-zero ($raw_value)"); + escalate_status('WARNING'); + } + push (@perfdata, "$attribute_name=$raw_value;;;;") + } + } # get lines that look like this: # 9 Power_On_Minutes 0x0032 241 241 000 Old_age Always - 113h+12m next unless $line =~ /^\s*(\d+)\s(\S+)\s+(?:\S+\s+){6}(\S+)\s+(\d+)/; @@ -499,9 +520,14 @@ if (grep {$_ eq $attribute_number || $_ eq $attribute_name || $_ eq $when_failed} @exclude_checks) { warn "SMART Attribute $attribute_name failed at $when_failed but was set to be ignored\n" if $opt_debug; } else { - push(@warning_messages, "Attribute $attribute_name failed at $when_failed"); - escalate_status('WARNING'); - warn "(debug) parsed SMART attribute $attribute_name with error condition:\n$when_failed\n\n" if $opt_debug; + if ($opt_o) { + if ($attribute_number == 202) { # Percent_Lifetime_Used might not be reliable health indicator + next; + } + } + push(@warning_messages, "Attribute $attribute_name failed at $when_failed"); + escalate_status('WARNING'); + warn "(debug) parsed SMART attribute $attribute_name with error condition:\n$when_failed\n\n" if $opt_debug; } } # some attributes produce questionable data; no need to graph them @@ -594,8 +620,12 @@ escalate_status('WARNING'); } elsif ($raw_value eq '0x04') { - push(@warning_messages, "NVM subsystem reliability degraded"); - escalate_status('WARNING'); + if ($opt_o) { + warn "(debug) $attribute_name = '0x04' was set to be ignored due to oldage flag\n\n" if $opt_debug; + } else { + push(@warning_messages, "NVM subsystem reliability degraded"); + escalate_status('WARNING'); + } } elsif ($raw_value eq '0x05') { push(@warning_messages, "Available spare below threshold and NVM subsystem reliability degraded"); @@ -742,6 +772,7 @@ } } } + warn "(debug) gathered perfdata:\n@perfdata\n\n" if $opt_debug; $perf_string = join(' ', @perfdata); @@ -867,8 +898,10 @@ print " --skip-self-assessment: Skip SMART self-assessment health status check\n"; print " --skip-temp-check: Skip temperature comparison current vs. drive max temperature\n"; print " --skip-load-cycles: Do not alert on high load/unload cycle count (600K considered safe on hard drives)\n"; + print " --skip-error-log: Do not alert on errors found in ATA log (ATA Error Count)\n"; print " --hide-sn: Do not show drive serial number in output\n"; print " -h/--help: this help\n"; + print " -O/--oldage: Ignore old age attributes\n"; print " -q/--quiet: When faults detected, only show faulted drive(s) (only affects output when used with -g parameter)\n"; print " --debug: show debugging information\n"; print " -v/--version: Version number\n";