Add protection information block level passthrough support to compare, dataset management, verify and copy nvme commands.
Signed-off-by: Dmitry Tihov <d.ti...@yadro.com> --- hw/nvme/ctrl.c | 348 +++++++++++++++++++++++++++++++++++++++---- hw/nvme/trace-events | 2 + 2 files changed, 325 insertions(+), 25 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index c646345bcc..950d773d59 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -197,6 +197,7 @@ #include "hw/pci/msix.h" #include "hw/pci/pcie_sriov.h" #include "migration/vmstate.h" +#include "qemu/memalign.h" #include "nvme.h" #include "dif.h" @@ -2168,6 +2169,50 @@ out: nvme_verify_cb(ctx, ret); } +static void nvme_dif_pass_verify_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + + trace_pci_nvme_dif_pass_verify_cb(nvme_cid(req)); + if (trace_event_get_state_backends(TRACE_PCI_NVME_DIF_DUMP_PASS_PI)) { + nvme_dif_pass_dump(ns, ctx->data.iov.dif.iov_base, + ctx->data.iov.dif.iov_len); + } + + if (unlikely(ret == -EILSEQ)) { + req->status = nvme_dif_pass_check(ns, ctx->data.bounce, + ctx->data.iov.size, ctx->data.iov.dif.iov_base, + prinfo, slba, reftag); + if (req->status) { + /* zero out ret to allow req->status passthrough */ + ret = 0; + } + goto out; + } + + if (ret) { + goto out; + } + + req->status = nvme_dif_pass_apptag_check(ns, ctx->data.iov.dif.iov_base, + ctx->data.iov.dif.iov_len, prinfo, apptag, appmask); + +out: + qemu_iovec_destroy_pi(&ctx->data.iov); + g_free(ctx->data.bounce); + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + struct nvme_compare_ctx { struct { QEMUIOVector iov; @@ -2331,6 +2376,83 @@ out: nvme_enqueue_req_completion(nvme_cq(req), req); } +static void nvme_dif_pass_compare_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeCtrl *n = nvme_ctrl(req); + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + struct nvme_compare_ctx *ctx = req->opaque; + g_autofree uint8_t *buf = NULL; + uint16_t status; + + trace_pci_nvme_dif_pass_compare_cb(nvme_cid(req)); + if (trace_event_get_state_backends(TRACE_PCI_NVME_DIF_DUMP_PASS_PI)) { + nvme_dif_pass_dump(ns, ctx->data.iov.dif.iov_base, + ctx->data.iov.dif.iov_len); + } + + if (unlikely(ret == -EILSEQ)) { + status = nvme_dif_pass_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->data.iov.dif.iov_base, prinfo, slba, + reftag); + if (status) { + /* zero out ret to allow req->status passthrough */ + ret = 0; + req->status = status; + } + goto out; + } + + if (ret) { + goto out; + } + + status = nvme_dif_pass_apptag_check(ns, ctx->data.iov.dif.iov_base, + ctx->data.iov.dif.iov_len, prinfo, apptag, appmask); + if (status) { + req->status = status; + goto out; + } + + buf = g_malloc(ctx->data.iov.size); + status = nvme_bounce_data(n, buf, ctx->data.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + status = nvme_bounce_mdata(n, ctx->mdata.bounce, mlen, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + if (memcmp(ctx->mdata.bounce, ctx->data.iov.dif.iov_base, mlen)) { + req->status = NVME_CMP_FAILURE; + } + +out: + qemu_iovec_destroy_pi(&ctx->data.iov); + g_free(ctx->data.bounce); + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + typedef struct NvmeDSMAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; @@ -2395,7 +2517,7 @@ static void nvme_dsm_md_cb(void *opaque, int ret) goto done; } - if (!ns->lbaf.ms) { + if (!ns->lbaf.ms || ns->pip) { nvme_dsm_cb(iocb, 0); return; } @@ -2556,19 +2678,35 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) } } - ctx = g_new0(NvmeBounceContext, 1); - ctx->req = req; + if (ns->pip) { + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; - ctx->data.bounce = g_malloc(len); + ctx->data.bounce = qemu_memalign(qemu_real_host_page_size(), len); - qemu_iovec_init(&ctx->data.iov, 1); - qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + qemu_iovec_init_pi(&ctx->data.iov, 1, nlb); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); - block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, - BLOCK_ACCT_READ); + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_dif_pass_verify_cb, ctx); + } else { + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); - req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, - nvme_verify_mdata_in_cb, ctx); + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_verify_mdata_in_cb, ctx); + } return NVME_NO_COMPLETE; } @@ -2625,7 +2763,11 @@ static void nvme_copy_bh(void *opaque) req->cqe.result = cpu_to_le32(iocb->idx); } - qemu_iovec_destroy(&iocb->iov); + if (ns->pip) { + qemu_iovec_destroy_pi(&iocb->iov); + } else { + qemu_iovec_destroy(&iocb->iov); + } g_free(iocb->bounce); qemu_bh_delete(iocb->bh); @@ -2737,10 +2879,29 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; uint32_t nlb; + uint16_t status; nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, &nlb, NULL, NULL, NULL); + if (ns->pip) { + if (iocb->iov.dif.iov_len) { + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(copy->sdlba); + uint16_t prinfo = ((copy->control[2] >> 2) & 0xf); + size_t len = nvme_l2b(ns, nlb); + if (unlikely(ret == -EILSEQ)) { + status = nvme_dif_pass_check(ns, iocb->bounce, len, + iocb->iov.dif.iov_base, prinfo, slba, + iocb->reftag); + if (status) { + goto invalid; + } + } + } + + iocb->reftag += nlb; + } if (ret < 0) { iocb->ret = ret; goto out; @@ -2754,8 +2915,17 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) iocb->idx++; iocb->slba += nlb; + out: nvme_copy_cb(iocb, iocb->ret); + return; + +invalid: + req->status = status; + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } } static void nvme_copy_out_cb(void *opaque, int ret) @@ -2900,6 +3070,99 @@ out: nvme_copy_cb(iocb, ret); } +static void nvme_dif_pass_copy_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + uint32_t nlb; + size_t len; + uint16_t status; + uint64_t slba; + uint16_t apptag; + uint16_t appmask; + uint64_t reftag; + + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, &apptag, &appmask, &reftag); + len = nvme_l2b(ns, nlb); + + if (unlikely(ret == -EILSEQ)) { + status = nvme_dif_pass_check(ns, iocb->bounce, len, + iocb->iov.dif.iov_base, prinfor, slba, + reftag); + if (status) { + goto invalid; + } + } + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + status = nvme_dif_pass_apptag_check(ns, iocb->iov.dif.iov_base, + nvme_m2b(ns, nlb), prinfor, apptag, + appmask); + if (status) { + goto invalid; + } + + status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag); + if (status) { + goto invalid; + } + status = nvme_check_bounds(ns, iocb->slba, nlb); + if (status) { + goto invalid; + } + + if (ns->params.zoned) { + status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb); + if (status) { + goto invalid; + } + + iocb->zone->w_ptr += nlb; + } + + if (prinfow & NVME_PRINFO_PRACT) { + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce, len); + } else { + appmask = le16_to_cpu(copy->appmask); + apptag = le16_to_cpu(copy->apptag); + status = nvme_dif_pass_apptag_check(ns, iocb->iov.dif.iov_base, + nvme_m2b(ns, nlb), prinfow, apptag, + appmask); + if (status) { + goto invalid; + } + } + iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba), + &iocb->iov, 0, nvme_copy_out_completed_cb, + iocb); + + return; + +invalid: + req->status = status; + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } + + return; + +out: + nvme_copy_cb(iocb, ret); +} + static void nvme_copy_in_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; @@ -2943,6 +3206,7 @@ static void nvme_copy_cb(void *opaque, int ret) NvmeNamespace *ns = req->ns; uint64_t slba; uint32_t nlb; + uint64_t reftag; size_t len; uint16_t status; @@ -2958,7 +3222,7 @@ static void nvme_copy_cb(void *opaque, int ret) } nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, - &nlb, NULL, NULL, NULL); + &nlb, NULL, NULL, &reftag); len = nvme_l2b(ns, nlb); trace_pci_nvme_copy_source_range(slba, nlb); @@ -2990,8 +3254,21 @@ static void nvme_copy_cb(void *opaque, int ret) qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, iocb->bounce, len); - iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), - &iocb->iov, 0, nvme_copy_in_cb, iocb); + if (ns->pip) { + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + status = nvme_check_prinfo(ns, prinfor, slba, reftag); + if (status) { + goto invalid; + } + iocb->iov.dif.iov_len = nvme_m2b(ns, nlb); + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), + &iocb->iov, 0, nvme_dif_pass_copy_cb, + iocb); + } else { + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), + &iocb->iov, 0, nvme_copy_in_cb, iocb); + } return; invalid: @@ -3078,11 +3355,19 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) iocb->idx = 0; iocb->reftag = le32_to_cpu(copy->reftag); iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32; - iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), - ns->lbasz + ns->lbaf.ms); qemu_iovec_init(&iocb->iov, 1); + if (ns->pip) { + qemu_iovec_init_pi(&iocb->iov, 1, le16_to_cpu(ns->id_ns.mssrl)); + iocb->bounce = qemu_memalign(qemu_real_host_page_size(), + le16_to_cpu(ns->id_ns.mssrl) * ns->lbasz); + } else { + qemu_iovec_init(&iocb->iov, 1); + iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), + ns->lbasz + ns->lbaf.ms); + } + block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0, BLOCK_ACCT_READ); block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0, @@ -3145,18 +3430,31 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) return status; } - ctx = g_new(struct nvme_compare_ctx, 1); - ctx->data.bounce = g_malloc(data_len); + if (ns->pip) { + ctx = g_new0(struct nvme_compare_ctx, 1); + ctx->data.bounce = qemu_memalign(qemu_real_host_page_size(), data_len); + + req->opaque = ctx; - req->opaque = ctx; + qemu_iovec_init_pi(&ctx->data.iov, 1, nlb); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); + block_acct_start(blk_get_stats(blk), &req->acct, data_len, + BLOCK_ACCT_READ); + req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0, + nvme_dif_pass_compare_cb, req); + } else { + ctx = g_new(struct nvme_compare_ctx, 1); + ctx->data.bounce = g_malloc(data_len); - qemu_iovec_init(&ctx->data.iov, 1); - qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); + req->opaque = ctx; - block_acct_start(blk_get_stats(blk), &req->acct, data_len, - BLOCK_ACCT_READ); - req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0, - nvme_compare_data_cb, req); + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); + block_acct_start(blk_get_stats(blk), &req->acct, data_len, + BLOCK_ACCT_READ); + req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0, + nvme_compare_data_cb, req); + } return NVME_NO_COMPLETE; } diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index 259fa8ffa2..42c171ed72 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -41,12 +41,14 @@ pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pass_verify_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" pci_nvme_dsm(uint32_t nr, uint32_t attr) "nr %"PRIu32" attr 0x%"PRIx32"" pci_nvme_dsm_deallocate(uint64_t slba, uint32_t nlb) "slba %"PRIu64" nlb %"PRIu32"" pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_dif_pass_compare_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" -- 2.38.1