On Mon, Apr 7, 2025 at 12:00 PM Kevin Wolf <kw...@redhat.com> wrote:
>
> Originally, all failed SG_IO requests called scsi_handle_rw_error() to
> apply the configured error policy. However, commit f3126d65, which was
> supposed to be a mere refactoring for scsi-disk.c, broke this and
> accidentally completed the SCSI request without considering the error
> policy any more if the error was signalled in the host_status field.
>
> Apart from the commit message not describing the chance as intended,
> errors indicated in host_status are also obviously backend errors and
> not something the guest must deal with indepdently of the error policy.
>
> This behaviour means that some recoverable errors (such as a path error
> in multipath configurations) were reported to the guest anyway, which
> might not expect it and might consider its disk broken.
>
> Make sure that we apply the error policy again for host_status errors,
> too. This addresses an existing FIXME comment and allows us to remove
> some comments warning that callbacks weren't always called. With this
> fix, they are called in all cases again.
>
> The return value passed to the request callback doesn't have more free
> values that could be used to indicate host_status errors as well as SAM
> status codes and negative errno. Store the value in the host_status
> field of the SCSIRequest instead and use -ENODEV as the return value (if
> a path hasn't been reachable for a while, blk_aio_ioctl() will return
> -ENODEV instead of just setting host_status, so just reuse it here -
> it's not necessarily entirely accurate, but it's as good as any errno).
>
> Cc: qemu-sta...@nongnu.org
> Fixes: f3126d65b393 ('scsi: move host_status handling into SCSI drivers')
> Signed-off-by: Kevin Wolf <kw...@redhat.com>
> ---
>  hw/scsi/scsi-disk.c | 39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefa...@redhat.com>

>
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 8da1d5a77c..e59632e9b1 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -68,10 +68,9 @@ struct SCSIDiskClass {
>      SCSIDeviceClass parent_class;
>      /*
>       * Callbacks receive ret == 0 for success. Errors are represented either 
> as
> -     * negative errno values, or as positive SAM status codes.
> -     *
> -     * Beware: For errors returned in host_status, the function may directly
> -     * complete the request and never call the callback.
> +     * negative errno values, or as positive SAM status codes. For 
> host_status
> +     * errors, the function passes ret == -ENODEV and sets the host_status 
> field
> +     * of the SCSIRequest.
>       */
>      DMAIOFunc       *dma_readv;
>      DMAIOFunc       *dma_writev;
> @@ -225,11 +224,26 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int 
> ret, bool acct_failed)
>      SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
>      SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
>      SCSISense sense = SENSE_CODE(NO_SENSE);
> +    int16_t host_status;
>      int error;
>      bool req_has_sense = false;
>      BlockErrorAction action;
>      int status;
>
> +    /*
> +     * host_status should only be set for SG_IO requests that came back with 
> a
> +     * host_status error in scsi_block_sgio_complete(). This error path 
> passes
> +     * -ENODEV as the return value.
> +     *
> +     * Reset host_status in the request because we may still want to complete
> +     * the request successfully with the 'stop' or 'ignore' error policy.
> +     */
> +    host_status = r->req.host_status;
> +    if (host_status != -1) {
> +        assert(ret == -ENODEV);
> +        r->req.host_status = -1;
> +    }
> +
>      if (ret < 0) {
>          status = scsi_sense_from_errno(-ret, &sense);
>          error = -ret;
> @@ -289,6 +303,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int 
> ret, bool acct_failed)
>          if (acct_failed) {
>              block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
>          }
> +        if (host_status != -1) {
> +            scsi_req_complete_failed(&r->req, host_status);
> +            return true;
> +        }
>          if (req_has_sense) {
>              sdc->update_sense(&r->req);
>          } else if (status == CHECK_CONDITION) {
> @@ -409,7 +427,6 @@ done:
>      scsi_req_unref(&r->req);
>  }
>
> -/* May not be called in all error cases, don't rely on cleanup here */
>  static void scsi_dma_complete(void *opaque, int ret)
>  {
>      SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> @@ -448,7 +465,6 @@ done:
>      scsi_req_unref(&r->req);
>  }
>
> -/* May not be called in all error cases, don't rely on cleanup here */
>  static void scsi_read_complete(void *opaque, int ret)
>  {
>      SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> @@ -585,7 +601,6 @@ done:
>      scsi_req_unref(&r->req);
>  }
>
> -/* May not be called in all error cases, don't rely on cleanup here */
>  static void scsi_write_complete(void * opaque, int ret)
>  {
>      SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> @@ -2846,14 +2861,10 @@ static void scsi_block_sgio_complete(void *opaque, 
> int ret)
>      sg_io_hdr_t *io_hdr = &req->io_header;
>
>      if (ret == 0) {
> -        /* FIXME This skips calling req->cb() and any cleanup in it */
>          if (io_hdr->host_status != SCSI_HOST_OK) {
> -            scsi_req_complete_failed(&r->req, io_hdr->host_status);
> -            scsi_req_unref(&r->req);
> -            return;
> -        }
> -
> -        if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
> +            r->req.host_status = io_hdr->host_status;
> +            ret = -ENODEV;
> +        } else if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
>              ret = BUSY;
>          } else {
>              ret = io_hdr->status;
> --
> 2.48.1
>


Reply via email to