Author: chuck
Date: Sun Apr 12 19:14:27 2020
New Revision: 359831
URL: https://svnweb.freebsd.org/changeset/base/359831

Log:
  MFC r359364
  
  bhyve: implement NVMe deallocate command

Modified:
  stable/12/usr.sbin/bhyve/pci_nvme.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_nvme.c Sun Apr 12 19:02:34 2020        
(r359830)
+++ stable/12/usr.sbin/bhyve/pci_nvme.c Sun Apr 12 19:14:27 2020        
(r359831)
@@ -180,6 +180,7 @@ struct pci_nvme_blockstore {
        uint32_t        sectsz;
        uint32_t        sectsz_bits;
        uint64_t        eui64;
+       uint32_t        deallocate:1;
 };
 
 struct pci_nvme_ioreq {
@@ -209,6 +210,15 @@ struct pci_nvme_ioreq {
        struct iovec    iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX];
 };
 
+enum nvme_dsm_type {
+       /* Dataset Management bit in ONCS reflects backing storage capability */
+       NVME_DATASET_MANAGEMENT_AUTO,
+       /* Unconditionally set Dataset Management bit in ONCS */
+       NVME_DATASET_MANAGEMENT_ENABLE,
+       /* Unconditionally clear Dataset Management bit in ONCS */
+       NVME_DATASET_MANAGEMENT_DISABLE,
+};
+
 struct pci_nvme_softc {
        struct pci_devinst *nsc_pi;
 
@@ -246,6 +256,8 @@ struct pci_nvme_softc {
        uint32_t        intr_coales_aggr_time;   /* 0x08: uS to delay intr */
        uint32_t        intr_coales_aggr_thresh; /* 0x08: compl-Q entries */
        uint32_t        async_ev_config;         /* 0x0B: async event config */
+
+       enum nvme_dsm_type dataset_management;
 };
 
 
@@ -285,6 +297,9 @@ static void pci_nvme_io_partial(struct blockif_req *br
        ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
         (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
 
+#define NVME_ONCS_DSM  (NVME_CTRLR_DATA_ONCS_DSM_MASK << \
+       NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
+
 static __inline void
 cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
 {
@@ -363,6 +378,19 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
            (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
        cd->nn = 1;     /* number of namespaces */
 
+       cd->oncs = 0;
+       switch (sc->dataset_management) {
+       case NVME_DATASET_MANAGEMENT_AUTO:
+               if (sc->nvstore.deallocate)
+                       cd->oncs |= NVME_ONCS_DSM;
+               break;
+       case NVME_DATASET_MANAGEMENT_ENABLE:
+               cd->oncs |= NVME_ONCS_DSM;
+               break;
+       default:
+               break;
+       }
+
        cd->fna = 0x03;
 
        cd->power_state[0].mp = 10;
@@ -429,6 +457,9 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
        nd->ncap = nd->nsze;
        nd->nuse = nd->nsze;
 
+       if (nvstore->type == NVME_STOR_BLOCKIF)
+               nvstore->deallocate = blockif_candelete(nvstore->ctx);
+
        nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
        nd->flbas = 0;
 
@@ -1339,7 +1370,7 @@ pci_nvme_io_done(struct blockif_req *br, int err)
        uint16_t code, status;
 
        DPRINTF(("%s error %d %s", __func__, err, strerror(err)));
-       
+
        /* TODO return correct error */
        code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
        pci_nvme_status_genc(&status, code);
@@ -1358,7 +1389,128 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
        pthread_cond_signal(&req->cv);
 }
 
+static void
+pci_nvme_dealloc_sm(struct blockif_req *br, int err)
+{
+       struct pci_nvme_ioreq *req = br->br_param;
+       struct pci_nvme_softc *sc = req->sc;
+       bool done = true;
+       uint16_t status;
 
+       if (err) {
+               pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
+       } else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
+               pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+       } else {
+               struct iovec *iov = req->io_req.br_iov;
+
+               req->prev_gpaddr++;
+               iov += req->prev_gpaddr;
+
+               /* The iov_* values already include the sector size */
+               req->io_req.br_offset = (off_t)iov->iov_base;
+               req->io_req.br_resid = iov->iov_len;
+               if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
+                       pci_nvme_status_genc(&status,
+                           NVME_SC_INTERNAL_DEVICE_ERROR);
+               } else
+                       done = false;
+       }
+
+       if (done) {
+               pci_nvme_set_completion(sc, req->nvme_sq, req->sqid,
+                   req->cid, 0, status, 0);
+               pci_nvme_release_ioreq(sc, req);
+       }
+}
+
+static int
+nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
+    struct nvme_command *cmd,
+    struct pci_nvme_blockstore *nvstore,
+    struct pci_nvme_ioreq *req,
+    uint16_t *status)
+{
+       int err = -1;
+
+       if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
+               pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
+               goto out;
+       }
+
+       if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
+               struct nvme_dsm_range *range;
+               uint32_t nr, r;
+               int sectsz = sc->nvstore.sectsz;
+
+               /*
+                * DSM calls are advisory only, and compliant controllers
+                * may choose to take no actions (i.e. return Success).
+                */
+               if (!nvstore->deallocate) {
+                       pci_nvme_status_genc(status, NVME_SC_SUCCESS);
+                       goto out;
+               }
+
+               if (req == NULL) {
+                       pci_nvme_status_genc(status, 
NVME_SC_INTERNAL_DEVICE_ERROR);
+                       goto out;
+               }
+
+               /* copy locally because a range entry could straddle PRPs */
+               range = calloc(1, NVME_MAX_DSM_TRIM);
+               if (range == NULL) {
+                       pci_nvme_status_genc(status, 
NVME_SC_INTERNAL_DEVICE_ERROR);
+                       goto out;
+               }
+               nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
+                   (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
+
+               req->opc = cmd->opc;
+               req->cid = cmd->cid;
+               req->nsid = cmd->nsid;
+               /*
+                * If the request is for more than a single range, store
+                * the ranges in the br_iov. Optimize for the common case
+                * of a single range.
+                *
+                * Note that NVMe Number of Ranges is a zero based value
+                */
+               nr = cmd->cdw10 & 0xff;
+
+               req->io_req.br_iovcnt = 0;
+               req->io_req.br_offset = range[0].starting_lba * sectsz;
+               req->io_req.br_resid = range[0].length * sectsz;
+
+               if (nr == 0) {
+                       req->io_req.br_callback = pci_nvme_io_done;
+               } else {
+                       struct iovec *iov = req->io_req.br_iov;
+
+                       for (r = 0; r <= nr; r++) {
+                               iov[r].iov_base = (void 
*)(range[r].starting_lba * sectsz);
+                               iov[r].iov_len = range[r].length * sectsz;
+                       }
+                       req->io_req.br_callback = pci_nvme_dealloc_sm;
+
+                       /*
+                        * Use prev_gpaddr to track the current entry and
+                        * prev_size to track the number of entries
+                        */
+                       req->prev_gpaddr = 0;
+                       req->prev_size = r;
+               }
+
+               err = blockif_delete(nvstore->ctx, &req->io_req);
+               if (err)
+                       pci_nvme_status_genc(status, 
NVME_SC_INTERNAL_DEVICE_ERROR);
+
+               free(range);
+       }
+out:
+       return (err);
+}
+
 static void
 pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
 {
@@ -1410,16 +1562,27 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
                        continue;
                }
 
-               nblocks = (cmd->cdw12 & 0xFFFF) + 1;
-
-               bytes = nblocks * sc->nvstore.sectsz;
-
                if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
                        req = pci_nvme_get_ioreq(sc);
                        req->nvme_sq = sq;
                        req->sqid = idx;
                }
 
+               if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
+                       if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
+                           &status)) {
+                               pci_nvme_set_completion(sc, sq, idx, cmd->cid,
+                                   0, status, 1);
+                               if (req)
+                                       pci_nvme_release_ioreq(sc, req);
+                       }
+                       continue;
+               }
+
+               nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+               bytes = nblocks * sc->nvstore.sectsz;
+
                /*
                 * If data starts mid-page and flows into the next page, then
                 * increase page count
@@ -1868,6 +2031,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
        sc->ioslots = NVME_IOSLOTS;
        sc->num_squeues = sc->max_queues;
        sc->num_cqueues = sc->max_queues;
+       sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
        sectsz = 0;
 
        uopt = strdup(opts);
@@ -1912,6 +2076,13 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
                        }
                } else if (!strcmp("eui64", xopts)) {
                        sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0));
+               } else if (!strcmp("dsm", xopts)) {
+                       if (!strcmp("auto", config))
+                               sc->dataset_management = 
NVME_DATASET_MANAGEMENT_AUTO;
+                       else if (!strcmp("enable", config))
+                               sc->dataset_management = 
NVME_DATASET_MANAGEMENT_ENABLE;
+                       else if (!strcmp("disable", config))
+                               sc->dataset_management = 
NVME_DATASET_MANAGEMENT_DISABLE;
                } else if (optidx == 0) {
                        snprintf(bident, sizeof(bident), "%d:%d",
                                 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
@@ -2031,8 +2202,12 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *p
        sem_init(&sc->iosemlock, 0, sc->ioslots);
 
        pci_nvme_reset(sc);
-       pci_nvme_init_ctrldata(sc);
+       /*
+        * Controller data depends on Namespace data so initialize Namespace
+        * data first.
+        */
        pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
+       pci_nvme_init_ctrldata(sc);
        pci_nvme_init_logpages(sc);
 
        pci_lintr_request(pi);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to