Re: [PATCH v5 20/26] nvme: handle dma errors
On Mon, 2020-03-16 at 00:53 -0700, Klaus Birkelund Jensen wrote: > On Feb 12 13:52, Maxim Levitsky wrote: > > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote: > > > Handling DMA errors gracefully is required for the device to pass the > > > block/011 test ("disable PCI device while doing I/O") in the blktests > > > suite. > > > > > > With this patch the device passes the test by retrying "critical" > > > transfers (posting of completion entries and processing of submission > > > queue entries). > > > > > > If DMA errors occur at any other point in the execution of the command > > > (say, while mapping the PRPs), the command is aborted with a Data > > > Transfer Error status code. > > > > > > Signed-off-by: Klaus Jensen > > > --- > > > hw/block/nvme.c | 42 +- > > > hw/block/trace-events | 2 ++ > > > include/block/nvme.h | 2 +- > > > 3 files changed, 36 insertions(+), 10 deletions(-) > > > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > > index f8c81b9e2202..204ae1d33234 100644 > > > --- a/hw/block/nvme.c > > > +++ b/hw/block/nvme.c > > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, > > > hwaddr addr) > > > return addr >= low && addr < hi; > > > } > > > > > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > > > { > > > if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { > > > memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size); > > > -return; > > > +return 0; > > > } > > > > > > -pci_dma_read(>parent_obj, addr, buf, size); > > > +return pci_dma_read(>parent_obj, addr, buf, size); > > > } > > > > > > static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) > > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > > *qsg, QEMUIOVector *iov, > > > uint16_t status = NVME_SUCCESS; > > > bool is_cmb = false; > > > bool prp_list_in_cmb = false; > > > +int ret; > > > > > > trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, > > > len, > > > prp1, prp2, num_prps); > > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > > *qsg, QEMUIOVector *iov, > > > > > > nents = (len + n->page_size - 1) >> n->page_bits; > > > prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); > > > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > > > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > > > +if (ret) { > > > +trace_nvme_dev_err_addr_read(prp2); > > > +status = NVME_DATA_TRANSFER_ERROR; > > > +goto unmap; > > > +} > > > while (len != 0) { > > > uint64_t prp_ent = le64_to_cpu(prp_list[i]); > > > > > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > > *qsg, QEMUIOVector *iov, > > > i = 0; > > > nents = (len + n->page_size - 1) >> n->page_bits; > > > prp_trans = MIN(n->max_prp_ents, nents) * > > > sizeof(uint64_t); > > > -nvme_addr_read(n, prp_ent, (void *) prp_list, > > > prp_trans); > > > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list, > > > +prp_trans); > > > +if (ret) { > > > +trace_nvme_dev_err_addr_read(prp_ent); > > > +status = NVME_DATA_TRANSFER_ERROR; > > > +goto unmap; > > > +} > > > prp_ent = le64_to_cpu(prp_list[i]); > > > } > > > > > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque) > > > NvmeCQueue *cq = opaque; > > > NvmeCtrl *n = cq->ctrl; > > > NvmeRequest *req, *next; > > > +int ret; > > > > > > QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) { > > > NvmeSQueue *sq; > > > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque) > > > break; > > > } > > > > > > -QTAILQ_REMOVE(>req_list, req, entry); > > > sq = req->sq; > > > req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); > > > req->cqe.sq_id = cpu_to_le16(sq->sqid); > > > req->cqe.sq_head = cpu_to_le16(sq->head); > > > addr = cq->dma_addr + cq->tail * n->cqe_size; > > > -nvme_inc_cq_tail(cq); > > > -pci_dma_write(>parent_obj, addr, (void *)>cqe, > > > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe, > > > sizeof(req->cqe)); > > > +if (ret) { > > > +trace_nvme_dev_err_addr_write(addr); > > > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + > > > +100 * SCALE_MS); > > >
Re: [PATCH v5 20/26] nvme: handle dma errors
On Feb 12 13:52, Maxim Levitsky wrote: > On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote: > > Handling DMA errors gracefully is required for the device to pass the > > block/011 test ("disable PCI device while doing I/O") in the blktests > > suite. > > > > With this patch the device passes the test by retrying "critical" > > transfers (posting of completion entries and processing of submission > > queue entries). > > > > If DMA errors occur at any other point in the execution of the command > > (say, while mapping the PRPs), the command is aborted with a Data > > Transfer Error status code. > > > > Signed-off-by: Klaus Jensen > > --- > > hw/block/nvme.c | 42 +- > > hw/block/trace-events | 2 ++ > > include/block/nvme.h | 2 +- > > 3 files changed, 36 insertions(+), 10 deletions(-) > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > index f8c81b9e2202..204ae1d33234 100644 > > --- a/hw/block/nvme.c > > +++ b/hw/block/nvme.c > > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr > > addr) > > return addr >= low && addr < hi; > > } > > > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > > { > > if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { > > memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size); > > -return; > > +return 0; > > } > > > > -pci_dma_read(>parent_obj, addr, buf, size); > > +return pci_dma_read(>parent_obj, addr, buf, size); > > } > > > > static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) > > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > *qsg, QEMUIOVector *iov, > > uint16_t status = NVME_SUCCESS; > > bool is_cmb = false; > > bool prp_list_in_cmb = false; > > +int ret; > > > > trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len, > > prp1, prp2, num_prps); > > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > *qsg, QEMUIOVector *iov, > > > > nents = (len + n->page_size - 1) >> n->page_bits; > > prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); > > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > > +if (ret) { > > +trace_nvme_dev_err_addr_read(prp2); > > +status = NVME_DATA_TRANSFER_ERROR; > > +goto unmap; > > +} > > while (len != 0) { > > uint64_t prp_ent = le64_to_cpu(prp_list[i]); > > > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > > *qsg, QEMUIOVector *iov, > > i = 0; > > nents = (len + n->page_size - 1) >> n->page_bits; > > prp_trans = MIN(n->max_prp_ents, nents) * > > sizeof(uint64_t); > > -nvme_addr_read(n, prp_ent, (void *) prp_list, > > prp_trans); > > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list, > > +prp_trans); > > +if (ret) { > > +trace_nvme_dev_err_addr_read(prp_ent); > > +status = NVME_DATA_TRANSFER_ERROR; > > +goto unmap; > > +} > > prp_ent = le64_to_cpu(prp_list[i]); > > } > > > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque) > > NvmeCQueue *cq = opaque; > > NvmeCtrl *n = cq->ctrl; > > NvmeRequest *req, *next; > > +int ret; > > > > QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) { > > NvmeSQueue *sq; > > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque) > > break; > > } > > > > -QTAILQ_REMOVE(>req_list, req, entry); > > sq = req->sq; > > req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); > > req->cqe.sq_id = cpu_to_le16(sq->sqid); > > req->cqe.sq_head = cpu_to_le16(sq->head); > > addr = cq->dma_addr + cq->tail * n->cqe_size; > > -nvme_inc_cq_tail(cq); > > -pci_dma_write(>parent_obj, addr, (void *)>cqe, > > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe, > > sizeof(req->cqe)); > > +if (ret) { > > +trace_nvme_dev_err_addr_write(addr); > > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + > > +100 * SCALE_MS); > > +break; > > +} > > +QTAILQ_REMOVE(>req_list, req, entry); > > +nvme_inc_cq_tail(cq); > > nvme_req_clear(req); > > QTAILQ_INSERT_TAIL(>req_list, req, entry); > > } > > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque) > > >
Re: [PATCH v5 20/26] nvme: handle dma errors
On Tue, 2020-02-04 at 10:52 +0100, Klaus Jensen wrote: > Handling DMA errors gracefully is required for the device to pass the > block/011 test ("disable PCI device while doing I/O") in the blktests > suite. > > With this patch the device passes the test by retrying "critical" > transfers (posting of completion entries and processing of submission > queue entries). > > If DMA errors occur at any other point in the execution of the command > (say, while mapping the PRPs), the command is aborted with a Data > Transfer Error status code. > > Signed-off-by: Klaus Jensen > --- > hw/block/nvme.c | 42 +- > hw/block/trace-events | 2 ++ > include/block/nvme.h | 2 +- > 3 files changed, 36 insertions(+), 10 deletions(-) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index f8c81b9e2202..204ae1d33234 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -73,14 +73,14 @@ static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr > addr) > return addr >= low && addr < hi; > } > > -static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) > { > if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { > memcpy(buf, (void *) >cmbuf[addr - n->ctrl_mem.addr], size); > -return; > +return 0; > } > > -pci_dma_read(>parent_obj, addr, buf, size); > +return pci_dma_read(>parent_obj, addr, buf, size); > } > > static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) > @@ -168,6 +168,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > *qsg, QEMUIOVector *iov, > uint16_t status = NVME_SUCCESS; > bool is_cmb = false; > bool prp_list_in_cmb = false; > +int ret; > > trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len, > prp1, prp2, num_prps); > @@ -218,7 +219,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > *qsg, QEMUIOVector *iov, > > nents = (len + n->page_size - 1) >> n->page_bits; > prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); > -nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > +ret = nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); > +if (ret) { > +trace_nvme_dev_err_addr_read(prp2); > +status = NVME_DATA_TRANSFER_ERROR; > +goto unmap; > +} > while (len != 0) { > uint64_t prp_ent = le64_to_cpu(prp_list[i]); > > @@ -237,7 +243,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList > *qsg, QEMUIOVector *iov, > i = 0; > nents = (len + n->page_size - 1) >> n->page_bits; > prp_trans = MIN(n->max_prp_ents, nents) * > sizeof(uint64_t); > -nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans); > +ret = nvme_addr_read(n, prp_ent, (void *) prp_list, > +prp_trans); > +if (ret) { > +trace_nvme_dev_err_addr_read(prp_ent); > +status = NVME_DATA_TRANSFER_ERROR; > +goto unmap; > +} > prp_ent = le64_to_cpu(prp_list[i]); > } > > @@ -443,6 +455,7 @@ static void nvme_post_cqes(void *opaque) > NvmeCQueue *cq = opaque; > NvmeCtrl *n = cq->ctrl; > NvmeRequest *req, *next; > +int ret; > > QTAILQ_FOREACH_SAFE(req, >req_list, entry, next) { > NvmeSQueue *sq; > @@ -452,15 +465,21 @@ static void nvme_post_cqes(void *opaque) > break; > } > > -QTAILQ_REMOVE(>req_list, req, entry); > sq = req->sq; > req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); > req->cqe.sq_id = cpu_to_le16(sq->sqid); > req->cqe.sq_head = cpu_to_le16(sq->head); > addr = cq->dma_addr + cq->tail * n->cqe_size; > -nvme_inc_cq_tail(cq); > -pci_dma_write(>parent_obj, addr, (void *)>cqe, > +ret = pci_dma_write(>parent_obj, addr, (void *)>cqe, > sizeof(req->cqe)); > +if (ret) { > +trace_nvme_dev_err_addr_write(addr); > +timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + > +100 * SCALE_MS); > +break; > +} > +QTAILQ_REMOVE(>req_list, req, entry); > +nvme_inc_cq_tail(cq); > nvme_req_clear(req); > QTAILQ_INSERT_TAIL(>req_list, req, entry); > } > @@ -1588,7 +1607,12 @@ static void nvme_process_sq(void *opaque) > > while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) { > addr = sq->dma_addr + sq->head * n->sqe_size; > -nvme_addr_read(n, addr, (void *), sizeof(cmd)); > +if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) { > +