Hi, someone once posted the ideas first script below to zero out spurious device errors from iostat for scsi devices. But that one doesn't work for NVME block devices, so (after a couple of month occasionally staring at kernel code to find the relevant structures) I finally found the relevant structures and wrote the second script below. -h -s -t are for hard/soft/transport errors, without options it zeros all those errors. Just use the bare device name (e.g., c9t8CE38EE200A8FD01d0) as given from iostat -exn.
#======================= scsi devices ============================ ro sunth9 ~ # cat /imksun/bin_rootcsh/zero_iostat #!/bin/ksh93 os=`uname -r` if [ $os != '5.10' -a $os != '5.11' ]; then echo "Sorry $os is not supported" exit fi if [ $# -lt 1 -o $# -gt 4 ]; then echo "Usage : zero_iostat [-h|-s|-t] <device> or <sdnn>" exit fi if [ $# -eq 1 ]; then sh=1;ss=1;st=1;dev=$1; else sh=0;ss=0;st=0; for p in $@; do if [ $1 == '-h' ]; then sh=1 elif [ $1 == '-s' ]; then ss=1 elif [ $1 == '-t' ]; then st=1 else dev=$1 fi shift done fiun=`ksh93 -c 'paste -d= <(iostat -e|nawk '\''NR>2{print $1}'\'') <(iostat -en|nawk '\''NR>2{print $5}'\'')|grep '$dev'|cut -f1 -d=|cut -c3-'`
lun=0x`echo "obase=16;$un"|bc` sd=`echo "*sd_state::softstate $lun" | mdb -k` if [ -z "$sd" ] ; then exit 1; fi es=`echo "$sd::print struct sd_lun un_errstats"| mdb -k | cut -d" " -f3` ks=`echo "$es::print kstat_t ks_data" | mdb -k | cut -d" " -f3` if [ $sh -eq 1 ]; then echo Resetting Hard Errorha=`echo "$ks::print -a struct sd_errstats sd_harderrs.value.ui32" | mdb -k | cut -d" " -f1`
echo $ha/W 0 | mdb -kw fi if [ $ss -eq 1 ];then echo Resetting Soft Errorha=`echo "$ks::print -a struct sd_errstats sd_softerrs.value.ui32" | mdb -k | cut -d" " -f1`
echo $ha/W 0 | mdb -kw fi if [ $st -eq 1 ]; then echo Resetting Tran Errorha=`echo "$ks::print -a struct sd_errstats sd_transerrs.value.ui32" | mdb -k | cut -d" " -f1`
echo $ha/W 0 | mdb -kw fi exit 0 #======================= NVME blockdevices ============================ ro sunth9 ~ # cat /imksun/bin_rootcsh/zero_nvme_iostat #!/bin/ksh93 os=`uname -r` if [ $os != '5.11' ]; then echo "Sorry $os is not supported" exit fi if [ $# -lt 1 -o $# -gt 4 ]; then echo "Usage : zero_iostat [-h|-s|-t] <device> or <sdnn>" exit fi if [ $# -eq 1 ]; then sh=1;ss=1;st=1;dev=$1; else sh=0;ss=0;st=0; for p in $@; do if [ $1 == '-h' ]; then sh=1 elif [ $1 == '-s' ]; then ss=1 elif [ $1 == '-t' ]; then st=1 else dev=$1 fi shift done fiun=`ksh93 -c 'paste -d= <(iostat -e|nawk '\''NR>2{print $1}'\'') <(iostat -en|nawk '\''NR>2{print $5}'\'')|grep '$dev'|cut -f1 -d=|cut -c7-'`
lun=0x`echo "obase=16;$un"|bc`sd=`echo "bd_state::print bd_t d_private |::softstate $lun |::print bd_t d_errstats |::print kstat_t ks_data" | mdb -k | cut -d" " -f3`
if [ -z "$sd" ] ; then exit 1; fiks=`echo "$sd::print -a nvme_device_stat_t nds_dma_bind_err.value.ui32" | mdb -k | cut -d" " -f1`
if [ -z "$ks" ] ; then exit 1; fikh=`echo "$sd::print -a nvme_device_stat_t nds_abort_timeout.value.ui32"| mdb -k | cut -d" " -f1`
if [ -z "$kh" ] ; then exit 1; fikt=`echo "$sd::print -a nvme_device_stat_t nds_abort_failed.value.ui32" | mdb -k | cut -d" " -f1`
if [ -z "$kt" ] ; then exit 1; fi if [ $sh -eq 1 ]; then echo Resetting Hard Error echo $kh/W 0 | mdb -kw fi if [ $ss -eq 1 ];then echo Resetting Soft Error echo $ks/W 0 | mdb -kw fi if [ $st -eq 1 ]; then echo Resetting Tran Error echo $kt/W 0 | mdb -kw fi exit 0 -- Dr.Udo Grabowski Inst.of Meteorology & Climate Research IMKASF-SAT https://www.imk-asf.kit.edu/english/sat.php KIT - Karlsruhe Institute of Technology https://www.kit.edu Postfach 3640,76021 Karlsruhe,Germany T:(+49)721 608-26026 F:-926026
smime.p7s
Description: S/MIME Cryptographic Signature
------------------------------------------ illumos: illumos-discuss Permalink: https://illumos.topicbox.com/groups/discuss/T7119852329cedc41-Me0ea5cc222a7d75353a8b52e Delivery options: https://illumos.topicbox.com/groups/discuss/subscription