Re: [PATCH v5 20/26] nvme: handle dma errors

Maxim Levitsky Wed, 25 Mar 2020 03:29:32 -0700

On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:52, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > Handling DMA errors gracefully is required for the device to pass the
> > > block/011 test ("disable PCI device while doing I/O") in the blktests
> > > suite.
> > > 
> > > With this patch the device passes the test by retrying "critical"
> > > transfers (posting of completion entries and processing of submission
> > > queue entries).
> > > 
> > > If DMA errors occur at any other point in the execution of the command
> > > (say, while mapping the PRPs), the command is aborted with a Data
> > > Transfer Error status code.
> > > 
> > > Signed-off-by: Klaus Jensen <k.jen...@samsung.com>
> > > ---
> > >  hw/block/nvme.c       | 42 +++++++++++++++++++++++++++++++++---------
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  2 +-
> > >  3 files changed, 36 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f8c81b9e2202..204ae1d33234 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, 
> > > hwaddr addr)
> > >      return addr >= low && addr < hi;
> > >  }
> > >  
> > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > >  {
> > >      if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > >          memcpy(buf, (void *) &n->cmbuf[addr - n->ctrl_mem.addr], size);
> > > -        return;
> > > +        return 0;
> > >      }
> > >  
> > > -    pci_dma_read(&n->parent_obj, addr, buf, size);
> > > +    return pci_dma_read(&n->parent_obj, addr, buf, size);
> > >  }
> > >  
> > >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >      uint16_t status = NVME_SUCCESS;
> > >      bool is_cmb = false;
> > >      bool prp_list_in_cmb = false;
> > > +    int ret;
> > >  
> > >      trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > >          prp1, prp2, num_prps);
> > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  
> > >              nents = (len + n->page_size - 1) >> n->page_bits;
> > >              prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > > -            nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +            ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +            if (ret) {
> > > +                trace_nvme_dev_err_addr_read(prp2);
> > > +                status = NVME_DATA_TRANSFER_ERROR;
> > > +                goto unmap;
> > > +            }
> > >              while (len != 0) {
> > >                  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> > >  
> > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >                      i = 0;
> > >                      nents = (len + n->page_size - 1) >> n->page_bits;
> > >                      prp_trans = MIN(n->max_prp_ents, nents) * 
> > > sizeof(uint64_t);
> > > -                    nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > > prp_trans);
> > > +                    ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > > +                        prp_trans);
> > > +                    if (ret) {
> > > +                        trace_nvme_dev_err_addr_read(prp_ent);
> > > +                        status = NVME_DATA_TRANSFER_ERROR;
> > > +                        goto unmap;
> > > +                    }
> > >                      prp_ent = le64_to_cpu(prp_list[i]);
> > >                  }
> > >  
> > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> > >      NvmeCQueue *cq = opaque;
> > >      NvmeCtrl *n = cq->ctrl;
> > >      NvmeRequest *req, *next;
> > > +    int ret;
> > >  
> > >      QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
> > >          NvmeSQueue *sq;
> > > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> > >              break;
> > >          }
> > >  
> > > -        QTAILQ_REMOVE(&cq->req_list, req, entry);
> > >          sq = req->sq;
> > >          req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> > >          req->cqe.sq_id = cpu_to_le16(sq->sqid);
> > >          req->cqe.sq_head = cpu_to_le16(sq->head);
> > >          addr = cq->dma_addr + cq->tail * n->cqe_size;
> > > -        nvme_inc_cq_tail(cq);
> > > -        pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
> > > +        ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
> > >              sizeof(req->cqe));
> > > +        if (ret) {
> > > +            trace_nvme_dev_err_addr_write(addr);
> > > +            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > > +                100 * SCALE_MS);
> > > +            break;
> > > +        }
> > > +        QTAILQ_REMOVE(&cq->req_list, req, entry);
> > > +        nvme_inc_cq_tail(cq);
> > >          nvme_req_clear(req);
> > >          QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> > >      }
> > > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
> > >  
> > >      while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
> > >          addr = sq->dma_addr + sq->head * n->sqe_size;
> > > -        nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
> > > +        if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
> > > +            trace_nvme_dev_err_addr_read(addr);
> > > +            timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > > +                100 * SCALE_MS);
> > > +            break;
> > > +        }
> > 
> > Note that once the driver is optimized for performance, these timers must 
> > go,
> > since they run on main thread and also add latency to each request.
> > But for now this change is all right.
> > 
> > About user triggering this each 100ms on purpose, I don't think that this 
> > is such a big issue.
> > Maybe up that to 500ms or even one second, since this condition will not
> > happen in real life usage of the device anyway.
> > 
> 
> I bumped it to 500ms.
Great!
> 
> > >          nvme_inc_sq_head(sq);
> > >  
> > >          req = QTAILQ_FIRST(&sq->req_list);
> > > diff --git a/hw/block/trace-events b/hw/block/trace-events
> > > index 90a57fb6099a..09bfb3782dd0 100644
> > > --- a/hw/block/trace-events
> > > +++ b/hw/block/trace-events
> > > @@ -83,6 +83,8 @@ nvme_dev_mmio_shutdown_cleared(void) "shutdown bit 
> > > cleared"
> > >  nvme_dev_err_mdts(uint16_t cid, size_t mdts, size_t len) "cid %"PRIu16" 
> > > mdts %"PRIu64" len %"PRIu64""
> > >  nvme_dev_err_prinfo(uint16_t cid, uint16_t ctrl) "cid %"PRIu16" ctrl 
> > > %"PRIu16""
> > >  nvme_dev_err_aio(uint16_t cid, void *aio, const char *blkname, uint64_t 
> > > offset, const char *opc, void *req, uint16_t status) "cid %"PRIu16" aio 
> > > %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p
> > > status 0x%"PRIx16""
> > > +nvme_dev_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
> > > +nvme_dev_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
> > >  nvme_dev_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
> > >  nvme_dev_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is 
> > > null or not page aligned: 0x%"PRIx64""
> > >  nvme_dev_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page 
> > > aligned: 0x%"PRIx64""
> > > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > > index c1de92179596..a873776d98b8 100644
> > > --- a/include/block/nvme.h
> > > +++ b/include/block/nvme.h
> > > @@ -418,7 +418,7 @@ enum NvmeStatusCodes {
> > >      NVME_INVALID_OPCODE         = 0x0001,
> > >      NVME_INVALID_FIELD          = 0x0002,
> > >      NVME_CID_CONFLICT           = 0x0003,
> > > -    NVME_DATA_TRAS_ERROR        = 0x0004,
> > > +    NVME_DATA_TRANSFER_ERROR    = 0x0004,
> > >      NVME_POWER_LOSS_ABORT       = 0x0005,
> > >      NVME_INTERNAL_DEV_ERROR     = 0x0006,
> > >      NVME_CMD_ABORT_REQ          = 0x0007,
> > 
> > 
> > Best regards,
> >     Maxim Levitsky
> > 
> 
>



Best regards,
        Maxim Levitsky

Re: [PATCH v5 20/26] nvme: handle dma errors

Reply via email to