On Mon, Apr 7, 2025 at 12:00 PM Kevin Wolf <kw...@redhat.com> wrote: > > Originally, all failed SG_IO requests called scsi_handle_rw_error() to > apply the configured error policy. However, commit f3126d65, which was > supposed to be a mere refactoring for scsi-disk.c, broke this and > accidentally completed the SCSI request without considering the error > policy any more if the error was signalled in the host_status field. > > Apart from the commit message not describing the chance as intended, > errors indicated in host_status are also obviously backend errors and > not something the guest must deal with indepdently of the error policy. > > This behaviour means that some recoverable errors (such as a path error > in multipath configurations) were reported to the guest anyway, which > might not expect it and might consider its disk broken. > > Make sure that we apply the error policy again for host_status errors, > too. This addresses an existing FIXME comment and allows us to remove > some comments warning that callbacks weren't always called. With this > fix, they are called in all cases again. > > The return value passed to the request callback doesn't have more free > values that could be used to indicate host_status errors as well as SAM > status codes and negative errno. Store the value in the host_status > field of the SCSIRequest instead and use -ENODEV as the return value (if > a path hasn't been reachable for a while, blk_aio_ioctl() will return > -ENODEV instead of just setting host_status, so just reuse it here - > it's not necessarily entirely accurate, but it's as good as any errno). > > Cc: qemu-sta...@nongnu.org > Fixes: f3126d65b393 ('scsi: move host_status handling into SCSI drivers') > Signed-off-by: Kevin Wolf <kw...@redhat.com> > --- > hw/scsi/scsi-disk.c | 39 +++++++++++++++++++++++++-------------- > 1 file changed, 25 insertions(+), 14 deletions(-)
Reviewed-by: Stefan Hajnoczi <stefa...@redhat.com> > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > index 8da1d5a77c..e59632e9b1 100644 > --- a/hw/scsi/scsi-disk.c > +++ b/hw/scsi/scsi-disk.c > @@ -68,10 +68,9 @@ struct SCSIDiskClass { > SCSIDeviceClass parent_class; > /* > * Callbacks receive ret == 0 for success. Errors are represented either > as > - * negative errno values, or as positive SAM status codes. > - * > - * Beware: For errors returned in host_status, the function may directly > - * complete the request and never call the callback. > + * negative errno values, or as positive SAM status codes. For > host_status > + * errors, the function passes ret == -ENODEV and sets the host_status > field > + * of the SCSIRequest. > */ > DMAIOFunc *dma_readv; > DMAIOFunc *dma_writev; > @@ -225,11 +224,26 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int > ret, bool acct_failed) > SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); > SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); > SCSISense sense = SENSE_CODE(NO_SENSE); > + int16_t host_status; > int error; > bool req_has_sense = false; > BlockErrorAction action; > int status; > > + /* > + * host_status should only be set for SG_IO requests that came back with > a > + * host_status error in scsi_block_sgio_complete(). This error path > passes > + * -ENODEV as the return value. > + * > + * Reset host_status in the request because we may still want to complete > + * the request successfully with the 'stop' or 'ignore' error policy. > + */ > + host_status = r->req.host_status; > + if (host_status != -1) { > + assert(ret == -ENODEV); > + r->req.host_status = -1; > + } > + > if (ret < 0) { > status = scsi_sense_from_errno(-ret, &sense); > error = -ret; > @@ -289,6 +303,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int > ret, bool acct_failed) > if (acct_failed) { > block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); > } > + if (host_status != -1) { > + scsi_req_complete_failed(&r->req, host_status); > + return true; > + } > if (req_has_sense) { > sdc->update_sense(&r->req); > } else if (status == CHECK_CONDITION) { > @@ -409,7 +427,6 @@ done: > scsi_req_unref(&r->req); > } > > -/* May not be called in all error cases, don't rely on cleanup here */ > static void scsi_dma_complete(void *opaque, int ret) > { > SCSIDiskReq *r = (SCSIDiskReq *)opaque; > @@ -448,7 +465,6 @@ done: > scsi_req_unref(&r->req); > } > > -/* May not be called in all error cases, don't rely on cleanup here */ > static void scsi_read_complete(void *opaque, int ret) > { > SCSIDiskReq *r = (SCSIDiskReq *)opaque; > @@ -585,7 +601,6 @@ done: > scsi_req_unref(&r->req); > } > > -/* May not be called in all error cases, don't rely on cleanup here */ > static void scsi_write_complete(void * opaque, int ret) > { > SCSIDiskReq *r = (SCSIDiskReq *)opaque; > @@ -2846,14 +2861,10 @@ static void scsi_block_sgio_complete(void *opaque, > int ret) > sg_io_hdr_t *io_hdr = &req->io_header; > > if (ret == 0) { > - /* FIXME This skips calling req->cb() and any cleanup in it */ > if (io_hdr->host_status != SCSI_HOST_OK) { > - scsi_req_complete_failed(&r->req, io_hdr->host_status); > - scsi_req_unref(&r->req); > - return; > - } > - > - if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) { > + r->req.host_status = io_hdr->host_status; > + ret = -ENODEV; > + } else if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) { > ret = BUSY; > } else { > ret = io_hdr->status; > -- > 2.48.1 >