Re: [PATCH v5 20/26] nvme: handle dma errors

2020-03-25 Thread Maxim Levitsky
On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:52, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > Handling DMA errors gracefully is required for the device to pass the
> > > block/011 test ("disable PCI device while doing I/O") in the blktests
> > > suite.
> > > 
> > > With this patch the device passes the test by retrying "critical"
> > > transfers (posting of completion entries and processing of submission
> > > queue entries).
> > > 
> > > If DMA errors occur at any other point in the execution of the command
> > > (say, while mapping the PRPs), the command is aborted with a Data
> > > Transfer Error status code.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 42 +-
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  2 +-
> > >  3 files changed, 36 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f8c81b9e2202..204ae1d33234 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, 
> > > hwaddr addr)
> > >  return addr >= low && addr < hi;
> > >  }
> > >  
> > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > >  {
> > >  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > >  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> > > -return;
> > > +return 0;
> > >  }
> > >  
> > > -pci_dma_read(>parent_obj, addr, buf, size);
> > > +return pci_dma_read(>parent_obj, addr, buf, size);
> > >  }
> > >  
> > >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  uint16_t status = NVME_SUCCESS;
> > >  bool is_cmb = false;
> > >  bool prp_list_in_cmb = false;
> > > +int ret;
> > >  
> > >  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > >  prp1, prp2, num_prps);
> > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp2);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  while (len != 0) {
> > >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> > >  
> > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  i = 0;
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * 
> > > sizeof(uint64_t);
> > > -nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > > prp_trans);
> > > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > > +prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp_ent);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  prp_ent = le64_to_cpu(prp_list[i]);
> > >  }
> > >  
> > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> > >  NvmeCQueue *cq = opaque;
> > >  NvmeCtrl *n = cq->ctrl;
> > >  NvmeRequest *req, *next;
> > > +int ret;
> > >  
> > >  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
> > >  NvmeSQueue *sq;
> > > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> > >  break;
> > >  }
> > >  
> > > -QTAILQ_REMOVE(>req_list, req, entry);
> > >  sq = req->sq;
> > >  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> > >  req->cqe.sq_id = cpu_to_le16(sq->sqid);
> > >  req->cqe.sq_head = cpu_to_le16(sq->head);
> > >  addr = cq->dma_addr + cq->tail * n->cqe_size;
> > > -nvme_inc_cq_tail(cq);
> > > -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > >  sizeof(req->cqe));
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_write(addr);
> > > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > > +100 * SCALE_MS);
> > > 

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-03-16 Thread Klaus Birkelund Jensen
On Feb 12 13:52, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > Handling DMA errors gracefully is required for the device to pass the
> > block/011 test ("disable PCI device while doing I/O") in the blktests
> > suite.
> > 
> > With this patch the device passes the test by retrying "critical"
> > transfers (posting of completion entries and processing of submission
> > queue entries).
> > 
> > If DMA errors occur at any other point in the execution of the command
> > (say, while mapping the PRPs), the command is aborted with a Data
> > Transfer Error status code.
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c   | 42 +-
> >  hw/block/trace-events |  2 ++
> >  include/block/nvme.h  |  2 +-
> >  3 files changed, 36 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f8c81b9e2202..204ae1d33234 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr 
> > addr)
> >  return addr >= low && addr < hi;
> >  }
> >  
> > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> >  {
> >  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> >  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> > -return;
> > +return 0;
> >  }
> >  
> > -pci_dma_read(>parent_obj, addr, buf, size);
> > +return pci_dma_read(>parent_obj, addr, buf, size);
> >  }
> >  
> >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  uint16_t status = NVME_SUCCESS;
> >  bool is_cmb = false;
> >  bool prp_list_in_cmb = false;
> > +int ret;
> >  
> >  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
> >  prp1, prp2, num_prps);
> > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  
> >  nents = (len + n->page_size - 1) >> n->page_bits;
> >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > +if (ret) {
> > +trace_nvme_dev_err_addr_read(prp2);
> > +status = NVME_DATA_TRANSFER_ERROR;
> > +goto unmap;
> > +}
> >  while (len != 0) {
> >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> >  
> > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  i = 0;
> >  nents = (len + n->page_size - 1) >> n->page_bits;
> >  prp_trans = MIN(n->max_prp_ents, nents) * 
> > sizeof(uint64_t);
> > -nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > prp_trans);
> > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > +prp_trans);
> > +if (ret) {
> > +trace_nvme_dev_err_addr_read(prp_ent);
> > +status = NVME_DATA_TRANSFER_ERROR;
> > +goto unmap;
> > +}
> >  prp_ent = le64_to_cpu(prp_list[i]);
> >  }
> >  
> > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> >  NvmeCQueue *cq = opaque;
> >  NvmeCtrl *n = cq->ctrl;
> >  NvmeRequest *req, *next;
> > +int ret;
> >  
> >  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
> >  NvmeSQueue *sq;
> > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> >  break;
> >  }
> >  
> > -QTAILQ_REMOVE(>req_list, req, entry);
> >  sq = req->sq;
> >  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> >  req->cqe.sq_id = cpu_to_le16(sq->sqid);
> >  req->cqe.sq_head = cpu_to_le16(sq->head);
> >  addr = cq->dma_addr + cq->tail * n->cqe_size;
> > -nvme_inc_cq_tail(cq);
> > -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
> >  sizeof(req->cqe));
> > +if (ret) {
> > +trace_nvme_dev_err_addr_write(addr);
> > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > +100 * SCALE_MS);
> > +break;
> > +}
> > +QTAILQ_REMOVE(>req_list, req, entry);
> > +nvme_inc_cq_tail(cq);
> >  nvme_req_clear(req);
> >  QTAILQ_INSERT_TAIL(>req_list, req, entry);
> >  }
> > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
> >  
> 

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-02-12 Thread Maxim Levitsky
On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> Handling DMA errors gracefully is required for the device to pass the
> block/011 test ("disable PCI device while doing I/O") in the blktests
> suite.
> 
> With this patch the device passes the test by retrying "critical"
> transfers (posting of completion entries and processing of submission
> queue entries).
> 
> If DMA errors occur at any other point in the execution of the command
> (say, while mapping the PRPs), the command is aborted with a Data
> Transfer Error status code.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 42 +-
>  hw/block/trace-events |  2 ++
>  include/block/nvme.h  |  2 +-
>  3 files changed, 36 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f8c81b9e2202..204ae1d33234 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr 
> addr)
>  return addr >= low && addr < hi;
>  }
>  
> -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
>  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
>  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> -return;
> +return 0;
>  }
>  
> -pci_dma_read(>parent_obj, addr, buf, size);
> +return pci_dma_read(>parent_obj, addr, buf, size);
>  }
>  
>  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  uint16_t status = NVME_SUCCESS;
>  bool is_cmb = false;
>  bool prp_list_in_cmb = false;
> +int ret;
>  
>  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
>  prp1, prp2, num_prps);
> @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> +if (ret) {
> +trace_nvme_dev_err_addr_read(prp2);
> +status = NVME_DATA_TRANSFER_ERROR;
> +goto unmap;
> +}
>  while (len != 0) {
>  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
>  
> @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  i = 0;
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * 
> sizeof(uint64_t);
> -nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans);
> +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> +prp_trans);
> +if (ret) {
> +trace_nvme_dev_err_addr_read(prp_ent);
> +status = NVME_DATA_TRANSFER_ERROR;
> +goto unmap;
> +}
>  prp_ent = le64_to_cpu(prp_list[i]);
>  }
>  
> @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
>  NvmeCQueue *cq = opaque;
>  NvmeCtrl *n = cq->ctrl;
>  NvmeRequest *req, *next;
> +int ret;
>  
>  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
>  NvmeSQueue *sq;
> @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
>  break;
>  }
>  
> -QTAILQ_REMOVE(>req_list, req, entry);
>  sq = req->sq;
>  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
>  req->cqe.sq_id = cpu_to_le16(sq->sqid);
>  req->cqe.sq_head = cpu_to_le16(sq->head);
>  addr = cq->dma_addr + cq->tail * n->cqe_size;
> -nvme_inc_cq_tail(cq);
> -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
>  sizeof(req->cqe));
> +if (ret) {
> +trace_nvme_dev_err_addr_write(addr);
> +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> +100 * SCALE_MS);
> +break;
> +}
> +QTAILQ_REMOVE(>req_list, req, entry);
> +nvme_inc_cq_tail(cq);
>  nvme_req_clear(req);
>  QTAILQ_INSERT_TAIL(>req_list, req, entry);
>  }
> @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
>  
>  while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) {
>  addr = sq->dma_addr + sq->head * n->sqe_size;
> -nvme_addr_read(n, addr, (void *), sizeof(cmd));
> +if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
> +