Re: [PATCH v5 20/26] nvme: handle dma errors

2020-03-25 Thread Maxim Levitsky
On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:52, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > > Handling DMA errors gracefully is required for the device to pass the
> > > block/011 test ("disable PCI device while doing I/O") in the blktests
> > > suite.
> > > 
> > > With this patch the device passes the test by retrying "critical"
> > > transfers (posting of completion entries and processing of submission
> > > queue entries).
> > > 
> > > If DMA errors occur at any other point in the execution of the command
> > > (say, while mapping the PRPs), the command is aborted with a Data
> > > Transfer Error status code.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >  hw/block/nvme.c   | 42 +-
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  2 +-
> > >  3 files changed, 36 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f8c81b9e2202..204ae1d33234 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, 
> > > hwaddr addr)
> > >  return addr >= low && addr < hi;
> > >  }
> > >  
> > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > >  {
> > >  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> > >  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> > > -return;
> > > +return 0;
> > >  }
> > >  
> > > -pci_dma_read(>parent_obj, addr, buf, size);
> > > +return pci_dma_read(>parent_obj, addr, buf, size);
> > >  }
> > >  
> > >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  uint16_t status = NVME_SUCCESS;
> > >  bool is_cmb = false;
> > >  bool prp_list_in_cmb = false;
> > > +int ret;
> > >  
> > >  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > >  prp1, prp2, num_prps);
> > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp2);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  while (len != 0) {
> > >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> > >  
> > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > > *qsg, QEMUIOVector *iov,
> > >  i = 0;
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * 
> > > sizeof(uint64_t);
> > > -nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > > prp_trans);
> > > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > > +prp_trans);
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_read(prp_ent);
> > > +status = NVME_DATA_TRANSFER_ERROR;
> > > +goto unmap;
> > > +}
> > >  prp_ent = le64_to_cpu(prp_list[i]);
> > >  }
> > >  
> > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> > >  NvmeCQueue *cq = opaque;
> > >  NvmeCtrl *n = cq->ctrl;
> > >  NvmeRequest *req, *next;
> > > +int ret;
> > >  
> > >  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
> > >  NvmeSQueue *sq;
> > > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> > >  break;
> > >  }
> > >  
> > > -QTAILQ_REMOVE(>req_list, req, entry);
> > >  sq = req->sq;
> > >  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> > >  req->cqe.sq_id = cpu_to_le16(sq->sqid);
> > >  req->cqe.sq_head = cpu_to_le16(sq->head);
> > >  addr = cq->dma_addr + cq->tail * n->cqe_size;
> > > -nvme_inc_cq_tail(cq);
> > > -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > >  sizeof(req->cqe));
> > > +if (ret) {
> > > +trace_nvme_dev_err_addr_write(addr);
> > > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > > +100 * SCALE_MS);
> > > 

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-03-16 Thread Klaus Birkelund Jensen
On Feb 12 13:52, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> > Handling DMA errors gracefully is required for the device to pass the
> > block/011 test ("disable PCI device while doing I/O") in the blktests
> > suite.
> > 
> > With this patch the device passes the test by retrying "critical"
> > transfers (posting of completion entries and processing of submission
> > queue entries).
> > 
> > If DMA errors occur at any other point in the execution of the command
> > (say, while mapping the PRPs), the command is aborted with a Data
> > Transfer Error status code.
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c   | 42 +-
> >  hw/block/trace-events |  2 ++
> >  include/block/nvme.h  |  2 +-
> >  3 files changed, 36 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f8c81b9e2202..204ae1d33234 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr 
> > addr)
> >  return addr >= low && addr < hi;
> >  }
> >  
> > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> >  {
> >  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
> >  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> > -return;
> > +return 0;
> >  }
> >  
> > -pci_dma_read(>parent_obj, addr, buf, size);
> > +return pci_dma_read(>parent_obj, addr, buf, size);
> >  }
> >  
> >  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  uint16_t status = NVME_SUCCESS;
> >  bool is_cmb = false;
> >  bool prp_list_in_cmb = false;
> > +int ret;
> >  
> >  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
> >  prp1, prp2, num_prps);
> > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  
> >  nents = (len + n->page_size - 1) >> n->page_bits;
> >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > +if (ret) {
> > +trace_nvme_dev_err_addr_read(prp2);
> > +status = NVME_DATA_TRANSFER_ERROR;
> > +goto unmap;
> > +}
> >  while (len != 0) {
> >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> >  
> > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> > *qsg, QEMUIOVector *iov,
> >  i = 0;
> >  nents = (len + n->page_size - 1) >> n->page_bits;
> >  prp_trans = MIN(n->max_prp_ents, nents) * 
> > sizeof(uint64_t);
> > -nvme_addr_read(n, prp_ent, (void *) prp_list, 
> > prp_trans);
> > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> > +prp_trans);
> > +if (ret) {
> > +trace_nvme_dev_err_addr_read(prp_ent);
> > +status = NVME_DATA_TRANSFER_ERROR;
> > +goto unmap;
> > +}
> >  prp_ent = le64_to_cpu(prp_list[i]);
> >  }
> >  
> > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
> >  NvmeCQueue *cq = opaque;
> >  NvmeCtrl *n = cq->ctrl;
> >  NvmeRequest *req, *next;
> > +int ret;
> >  
> >  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
> >  NvmeSQueue *sq;
> > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
> >  break;
> >  }
> >  
> > -QTAILQ_REMOVE(>req_list, req, entry);
> >  sq = req->sq;
> >  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
> >  req->cqe.sq_id = cpu_to_le16(sq->sqid);
> >  req->cqe.sq_head = cpu_to_le16(sq->head);
> >  addr = cq->dma_addr + cq->tail * n->cqe_size;
> > -nvme_inc_cq_tail(cq);
> > -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
> >  sizeof(req->cqe));
> > +if (ret) {
> > +trace_nvme_dev_err_addr_write(addr);
> > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > +100 * SCALE_MS);
> > +break;
> > +}
> > +QTAILQ_REMOVE(>req_list, req, entry);
> > +nvme_inc_cq_tail(cq);
> >  nvme_req_clear(req);
> >  QTAILQ_INSERT_TAIL(>req_list, req, entry);
> >  }
> > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
> >  
> 

Re: [PATCH v5 20/26] nvme: handle dma errors

2020-02-12 Thread Maxim Levitsky
On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote:
> Handling DMA errors gracefully is required for the device to pass the
> block/011 test ("disable PCI device while doing I/O") in the blktests
> suite.
> 
> With this patch the device passes the test by retrying "critical"
> transfers (posting of completion entries and processing of submission
> queue entries).
> 
> If DMA errors occur at any other point in the execution of the command
> (say, while mapping the PRPs), the command is aborted with a Data
> Transfer Error status code.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 42 +-
>  hw/block/trace-events |  2 ++
>  include/block/nvme.h  |  2 +-
>  3 files changed, 36 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f8c81b9e2202..204ae1d33234 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr 
> addr)
>  return addr >= low && addr < hi;
>  }
>  
> -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
>  if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
>  memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
> -return;
> +return 0;
>  }
>  
> -pci_dma_read(>parent_obj, addr, buf, size);
> +return pci_dma_read(>parent_obj, addr, buf, size);
>  }
>  
>  static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
> @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  uint16_t status = NVME_SUCCESS;
>  bool is_cmb = false;
>  bool prp_list_in_cmb = false;
> +int ret;
>  
>  trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
>  prp1, prp2, num_prps);
> @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> +if (ret) {
> +trace_nvme_dev_err_addr_read(prp2);
> +status = NVME_DATA_TRANSFER_ERROR;
> +goto unmap;
> +}
>  while (len != 0) {
>  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
>  
> @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  i = 0;
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * 
> sizeof(uint64_t);
> -nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans);
> +ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
> +prp_trans);
> +if (ret) {
> +trace_nvme_dev_err_addr_read(prp_ent);
> +status = NVME_DATA_TRANSFER_ERROR;
> +goto unmap;
> +}
>  prp_ent = le64_to_cpu(prp_list[i]);
>  }
>  
> @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
>  NvmeCQueue *cq = opaque;
>  NvmeCtrl *n = cq->ctrl;
>  NvmeRequest *req, *next;
> +int ret;
>  
>  QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
>  NvmeSQueue *sq;
> @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
>  break;
>  }
>  
> -QTAILQ_REMOVE(>req_list, req, entry);
>  sq = req->sq;
>  req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
>  req->cqe.sq_id = cpu_to_le16(sq->sqid);
>  req->cqe.sq_head = cpu_to_le16(sq->head);
>  addr = cq->dma_addr + cq->tail * n->cqe_size;
> -nvme_inc_cq_tail(cq);
> -pci_dma_write(>parent_obj, addr, (void *)>cqe,
> +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
>  sizeof(req->cqe));
> +if (ret) {
> +trace_nvme_dev_err_addr_write(addr);
> +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> +100 * SCALE_MS);
> +break;
> +}
> +QTAILQ_REMOVE(>req_list, req, entry);
> +nvme_inc_cq_tail(cq);
>  nvme_req_clear(req);
>  QTAILQ_INSERT_TAIL(>req_list, req, entry);
>  }
> @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
>  
>  while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) {
>  addr = sq->dma_addr + sq->head * n->sqe_size;
> -nvme_addr_read(n, addr, (void *), sizeof(cmd));
> +if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
> +

[PATCH v5 20/26] nvme: handle dma errors

2020-02-04 Thread Klaus Jensen
Handling DMA errors gracefully is required for the device to pass the
block/011 test ("disable PCI device while doing I/O") in the blktests
suite.

With this patch the device passes the test by retrying "critical"
transfers (posting of completion entries and processing of submission
queue entries).

If DMA errors occur at any other point in the execution of the command
(say, while mapping the PRPs), the command is aborted with a Data
Transfer Error status code.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 42 +-
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  2 +-
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f8c81b9e2202..204ae1d33234 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr 
addr)
 return addr >= low && addr < hi;
 }
 
-static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 if (n->cmbsz && nvme_addr_is_cmb(n, addr)) {
 memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size);
-return;
+return 0;
 }
 
-pci_dma_read(>parent_obj, addr, buf, size);
+return pci_dma_read(>parent_obj, addr, buf, size);
 }
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
@@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 uint16_t status = NVME_SUCCESS;
 bool is_cmb = false;
 bool prp_list_in_cmb = false;
+int ret;
 
 trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
 prp1, prp2, num_prps);
@@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
+ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
+if (ret) {
+trace_nvme_dev_err_addr_read(prp2);
+status = NVME_DATA_TRANSFER_ERROR;
+goto unmap;
+}
 while (len != 0) {
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
@@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans);
+ret = nvme_addr_read(n, prp_ent, (void *) prp_list,
+prp_trans);
+if (ret) {
+trace_nvme_dev_err_addr_read(prp_ent);
+status = NVME_DATA_TRANSFER_ERROR;
+goto unmap;
+}
 prp_ent = le64_to_cpu(prp_list[i]);
 }
 
@@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque)
 NvmeCQueue *cq = opaque;
 NvmeCtrl *n = cq->ctrl;
 NvmeRequest *req, *next;
+int ret;
 
 QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) {
 NvmeSQueue *sq;
@@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque)
 break;
 }
 
-QTAILQ_REMOVE(>req_list, req, entry);
 sq = req->sq;
 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
 addr = cq->dma_addr + cq->tail * n->cqe_size;
-nvme_inc_cq_tail(cq);
-pci_dma_write(>parent_obj, addr, (void *)>cqe,
+ret = pci_dma_write(>parent_obj, addr, (void *)>cqe,
 sizeof(req->cqe));
+if (ret) {
+trace_nvme_dev_err_addr_write(addr);
+timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+100 * SCALE_MS);
+break;
+}
+QTAILQ_REMOVE(>req_list, req, entry);
+nvme_inc_cq_tail(cq);
 nvme_req_clear(req);
 QTAILQ_INSERT_TAIL(>req_list, req, entry);
 }
@@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque)
 
 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) {
 addr = sq->dma_addr + sq->head * n->sqe_size;
-nvme_addr_read(n, addr, (void *), sizeof(cmd));
+if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
+trace_nvme_dev_err_addr_read(addr);
+timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+100 * SCALE_MS);
+break;
+}
 nvme_inc_sq_head(sq);
 
 req = QTAILQ_FIRST(>req_list);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index