Package: munin-plugins-core
Version: 2.0.42-5
Severity: normal
Tags: patch upstream
For some reason, NVME drives report near 100% utilization when using
/proc/diskstats to calculate utilization using the ticks data.
$ cat /proc/diskstats | awk '{print $3, $7, $11, $7+$11, $13, $14}' | grep nvme
nvme0n1 22988 457568 480556 2440737408 2441207708
nvme0n1p1 22988 339624 362612 528884 978496
Note that the $7+$11 column for n1p1 isn't that far off of $13
I have looked through the kernel sources and cannot figure out what is
going on.
See also
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1790181
Patch that illustrates the issue:
--- diskstats-dist 2018-11-11 04:48:17.000000000 -0800
+++ diskstats 2018-11-12 10:06:05.990785111 -0800
@@ -234,6 +234,9 @@
# a given second, the device is nearly 100% saturated.
my $utilization = $tot_ticks / $interval;
+ # NVME drives tend to show nearly 100% util, so lets use r+w as an
alternate
+ my $rw_util = ($rd_ticks + $wr_ticks) / $interval;
+
# Average time an I/O takes on the block device
my $servicetime_in_sec =
$total_ios_per_sec ? $utilization / $total_ios_per_sec / 1000 : 0;
@@ -253,9 +256,11 @@
: 0;
my $util_print = $utilization / 10;
+ my $rw_util_print = $rw_util / 10;
return {
utilization => $util_print,
+ rw_util => $rw_util_print,
servicetime => $servicetime_in_sec,
average_wait => $average_wait_in_sec,
average_rd_wait => $average_rd_wait_in_sec,
@@ -298,6 +303,8 @@
print "${graph_id}_util.value "
. $result->{$device}->{'utilization'} . "\n";
+ print "${graph_id}_rw_util.value "
+ . $result->{$device}->{'rw_util'} . "\n";
}
print "\nmultigraph ${plugin_name}_throughput\n";
@@ -342,6 +349,7 @@
multigraph ${plugin_name}_utilization.$graph_id
util.value $result->{'utilization'}
+rw_util.value $result->{'rw_util'}
EOF
}
@@ -784,6 +792,11 @@
${graph_id}_util.info Utilization of the device
${graph_id}_util.min 0
${graph_id}_util.draw LINE1
+${graph_id}_rw_util.label $cur_diskstats{$device}->{'pretty_device_name'} (r+w)
+${graph_id}_rw_util.type GAUGE
+${graph_id}_rw_util.info Read/write utilization of the device
+${graph_id}_rw_util.min 0
+${graph_id}_rw_util.draw LINE1
EOF
}
-- System Information:
Debian Release: buster/sid
APT prefers unstable-debug
APT policy: (500, 'unstable-debug'), (500, 'testing-debug'), (500,
'unstable'), (500, 'oldstable'), (1, 'experimental-debug')
Architecture: amd64 (x86_64)
Foreign Architectures: i386
Kernel: Linux 4.18.0-2-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8),
LANGUAGE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
Versions of packages munin-plugins-core depends on:
ii munin-common 2.0.42-5
ii perl 5.26.2-7+b1
Versions of packages munin-plugins-core recommends:
ii libnet-snmp-perl 6.0.1-4
Versions of packages munin-plugins-core suggests:
pn conntrack <none>
pn libcache-cache-perl <none>
pn libdbd-mysql-perl <none>
ii libhttp-date-perl 6.02-1
ii libnet-dns-perl 1.17-1
ii libnet-ip-perl 1.26-2
pn libnet-ldap-perl <none>
ii libnet-netmask-perl 1.9022-1
pn libnet-telnet-perl <none>
ii libxml-parser-perl 2.44-2+b3
ii python3 3.6.6-1
ii ruby 1:2.5.1
-- no debconf information