Gentle reminder - reviews welcome ...

On Tue, 2018-01-30 at 11:25 +0100, Martin Wilck wrote:
> Introduce a new blist flag that indicates the device may return
> certain
> sense code/ASC/ASCQ combinations that indicate different treatment
> than
> normal. In particular, some devices need unconditional retry (aka
> ADD_TO_MLQUEUE) under certain conditions; otherwise path failures may
> be
> falsely detected in the "maybe_retry" case.
> 
> Because different devices use different sense codes to indicate this
> condition, a single bit is not enough. But we also can't use a bit
> for
> every possible status code. Therefore the new flag
> BLIST_ABORTED_CMD_QUIRK
> just indicates that this is a device for which the return code should
> be
> checked. An extra "blacklist" in scsi_aborted_cmd_quirk() then checks
> for
> known ASC/ASCQ combinations, and handles them.
> 
> When such a combination is encountered for the first time, a message
> is
> printed. In systems that have several different peripherals using
> this
> flag, that might lead to a wrong match without a warning. This small
> risk
> is a compromise between exactness and the excessive resources that
> would be
> required to check for matching device vendor and model name every
> time.
> Also, if there were different devices (identified by vendor/model)
> using
> the same ASC/ASCQ, the code might print stray warnings. But the
> additional
> effort to required to handle this 100% correctly is hardly justified
> with
> the current minimal "blacklist".
> 
> I chose to recycle devinfo flag (1<<11) (former BLIST_INQUIRY_58,
> free
> since 496c91bbc910) for this purpose rather than extending
> blist_flags_t to
> 64 bit. This could be easily changed of course.
> 
> This patch replaces the previously submitted patches
>  "scsi: handle ABORTED_COMMAND on Fujitsu ETERNUS" and
>  "scsi: Always retry internal target error" (Hannes Reinecke)
> 
> Changes in v2:
>  - use ARRAY_SIZE (Bart van Assche)
>  - make blist array static const and use separate bitmask for warned
> flag
>    (Bart van Assche)
>  - Fix string comparison for SCSI vendor and model
>  - Print warning at scsi_logging_level 0, and improve message
> formatting
> 
> Signed-off-by: Martin Wilck <mwi...@suse.com>
> ---
>  drivers/scsi/scsi_devinfo.c |   4 +-
>  drivers/scsi/scsi_error.c   | 111
> ++++++++++++++++++++++++++++++++++++++++++++
>  include/scsi/scsi_devinfo.h |   6 +++
>  3 files changed, 120 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_devinfo.c
> b/drivers/scsi/scsi_devinfo.c
> index dfb8da83fa50..39455734d934 100644
> --- a/drivers/scsi/scsi_devinfo.c
> +++ b/drivers/scsi/scsi_devinfo.c
> @@ -161,12 +161,14 @@ static struct {
>       {"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV
> 650F, storage on LUN 0 */
>       {"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV
> 650F, no storage on LUN 0 */
>       {"EMC",  "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
> -     {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN
> | BLIST_REPORTLUN2},
> +     {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN
> +      | BLIST_REPORTLUN2 | BLIST_ABORTED_CMD_QUIRK},
>       {"EMULEX", "MD21/S2     ESDI", NULL, BLIST_SINGLELUN},
>       {"easyRAID", "16P", NULL, BLIST_NOREPORTLUN},
>       {"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN},
>       {"easyRAID", "F8", NULL, BLIST_NOREPORTLUN},
>       {"FSC", "CentricStor", "*", BLIST_SPARSELUN |
> BLIST_LARGELUN},
> +     {"FUJITSU", "ETERNUS_DXM", "*", BLIST_ABORTED_CMD_QUIRK},
>       {"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
>       {"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
>       {"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 62b56de38ae8..d60568f71047 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -26,6 +26,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/delay.h>
>  #include <linux/jiffies.h>
> +#include <linux/bitmap.h>
>  
>  #include <scsi/scsi.h>
>  #include <scsi/scsi_cmnd.h>
> @@ -39,6 +40,7 @@
>  #include <scsi/scsi_ioctl.h>
>  #include <scsi/scsi_dh.h>
>  #include <scsi/sg.h>
> +#include <scsi/scsi_devinfo.h>
>  
>  #include "scsi_priv.h"
>  #include "scsi_logging.h"
> @@ -432,6 +434,112 @@ static void scsi_report_sense(struct
> scsi_device *sdev,
>       }
>  }
>  
> +struct aborted_cmd_blist {
> +     u8 asc;
> +     u8 ascq;
> +     int retval;
> +     const char *vendor;
> +     const char *model;
> +};
> +
> +/**
> + * scsi_strcmp - Compare space-padded string with reference string
> + * @device_str:      vendor or model field of struct scsi_device,
> + *           possibly space-padded
> + * @ref_str: reference string to compare with
> + * @len:     max size of device_str: 8 for vendor, 16 for model
> + *
> + * Return value:
> + *   -1, 0, or 1, like strcmp().
> + */
> +static int scsi_strcmp(const char *device_str, const char *ref_str,
> int len)
> +{
> +     int ref_len = strlen(ref_str);
> +     int r, i;
> +
> +     WARN_ON(ref_len > len);
> +     r = strncmp(device_str, ref_str, min(ref_len, len));
> +     if (r != 0)
> +             return r;
> +
> +     for (i = ref_len; i < strnlen(device_str, len); i++)
> +             if (device_str[i] != ' ')
> +                     return 1;
> +     return 0;
> +}
> +
> +/**
> + * scsi_aborted_cmd_quirk - Handle special return codes for ABORTED
> COMMAND
> + * @sdev:    SCSI device that returned ABORTED COMMAND.
> + * @sshdr:   Sense data
> + *
> + * Return value:
> + *   SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE
> + *
> + * Notes:
> + *   This is only called for devices that have the blist flag
> + *      BLIST_ABORTED_CMD_QUIRK set.
> + */
> +static int scsi_aborted_cmd_quirk(const struct scsi_device *sdev,
> +                               const struct scsi_sense_hdr
> *sshdr)
> +{
> +     static const struct aborted_cmd_blist blist[] = {
> +             /*
> +              * 44/00: SYMMETRIX uses this code for a variety of
> internal
> +              * issues, all of which can be recovered by retry
> +              */
> +             { 0x44, 0x00, ADD_TO_MLQUEUE, "EMC", "SYMMETRIX" },
> +             /*
> +              * c1/01: This is used by ETERNUS to indicate the
> +              * command should be retried unconditionally
> +              */
> +             { 0xc1, 0x01, ADD_TO_MLQUEUE, "FUJITSU",
> "ETERNUS_DXM" }
> +     };
> +     const struct aborted_cmd_blist *found;
> +     int ret = NEEDS_RETRY, i;
> +     static DECLARE_BITMAP(warned, ARRAY_SIZE(blist));
> +
> +     for (i = 0; i < ARRAY_SIZE(blist); i++) {
> +             if (sshdr->asc == blist[i].asc &&
> +                 sshdr->ascq == blist[i].ascq)
> +                     break;
> +     }
> +
> +     if (i >= ARRAY_SIZE(blist))
> +             return ret;
> +
> +     found = &blist[i];
> +     ret = found->retval;
> +     if (test_and_set_bit(BIT(i), warned))
> +             return ret;
> +
> +     /*
> +      * When we encounter a known ASC/ASCQ combination, it may or
> may not
> +      * match the device for which this combination is known.
> +      * Warn only once for each known ASC/ASCQ combination.
> +      * We can't afford making a string comparison every time in
> the
> +      * SCSI command return path, and a wrong match here is
> expected to be
> +      * non-fatal.
> +      */
> +     if (!scsi_strcmp(sdev->vendor, found->vendor, 8) &&
> +         !scsi_strcmp(sdev->model, found->model, 16)) {
> +             SCSI_LOG_ERROR_RECOVERY(2,
> +                     sdev_printk(KERN_INFO, sdev,
> +                                 "special retcode %s for ABORTED
> COMMAND %02x/%02x (expected)",
> +                                 scsi_mlreturn_string(ret),
> +                                 sshdr->asc, sshdr->ascq));
> +     } else {
> +             sdev_printk(KERN_WARNING, sdev,
> +                         "special retcode %s for ABORTED COMMAND
> %02x/%02x\n",
> +                         scsi_mlreturn_string(ret),
> +                         sshdr->asc, sshdr->ascq);
> +             sdev_printk(KERN_WARNING, sdev,
> +                         "(UNEXPECTED from  \"%.8s:%.16s\",
> please inform linux-scsi@vger.kernel.org)\n",
> +                         sdev->vendor, sdev->model);
> +     }
> +     return ret;
> +}
> +
>  /**
>   * scsi_check_sense - Examine scsi cmd sense
>   * @scmd:    Cmd to have sense checked.
> @@ -503,6 +611,9 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
>               if (sshdr.asc == 0x10) /* DIF */
>                       return SUCCESS;
>  
> +             if (sdev->sdev_bflags & BLIST_ABORTED_CMD_QUIRK)
> +                     return scsi_aborted_cmd_quirk(sdev, &sshdr);
> +
>               return NEEDS_RETRY;
>       case NOT_READY:
>       case UNIT_ATTENTION:
> diff --git a/include/scsi/scsi_devinfo.h
> b/include/scsi/scsi_devinfo.h
> index ea67c32e870e..1f5ed53040ab 100644
> --- a/include/scsi/scsi_devinfo.h
> +++ b/include/scsi/scsi_devinfo.h
> @@ -28,6 +28,12 @@
>  #define BLIST_LARGELUN               ((__force blist_flags_t)(1 <<
> 9))
>  /* override additional length field */
>  #define BLIST_INQUIRY_36     ((__force blist_flags_t)(1 << 10))
> +/*
> + * Device uses special return codes for ABORTED COMMAND
> + * This flag must go together with matching status handling code in
> + * scsi_aborted_cmd_quirk()
> + */
> +#define BLIST_ABORTED_CMD_QUIRK      ((__force blist_flags_t)(1 <<
> 11))
>  /* do not do automatic start on add */
>  #define BLIST_NOSTARTONADD   ((__force blist_flags_t)(1 << 12))
>  /* try REPORT_LUNS even for SCSI-2 devs (if HBA supports more than 8
> LUNs) */

-- 
Dr. Martin Wilck <mwi...@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

Reply via email to