Re: [PATCH v2 5/5] hw/nvme: flexible data placement emulation

2023-02-17 Thread Keith Busch
On Fri, Feb 17, 2023 at 01:07:43PM +0100, Jesper Devantier wrote:
> +static void nvme_do_write_fdp(NvmeCtrl *n, NvmeRequest *req, uint64_t slba,
> +  uint32_t nlb)
> +{
> +NvmeNamespace *ns = req->ns;
> +NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
> +uint64_t data_size = nvme_l2b(ns, nlb);
> +uint32_t dw12 = le32_to_cpu(req->cmd.cdw12);
> +uint8_t dtype = (dw12 >> 20) & 0xf;
> +uint16_t pid = le16_to_cpu(rw->dspec);
> +uint16_t ph, rg, ruhid;
> +NvmeReclaimUnit *ru;
> +
> +if (dtype != NVME_DIRECTIVE_DATA_PLACEMENT
> +|| !nvme_parse_pid(ns, pid, &ph, &rg)) {

Style nit, the "||" ought to go in the previous line.

> +ph = 0;
> +rg = 0;
> +}
> +
> +ruhid = ns->fdp.phs[ph];
> +ru = &ns->endgrp->fdp.ruhs[ruhid].rus[rg];
> +
> +nvme_fdp_stat_inc(&ns->endgrp->fdp.hbmw, data_size);
> +nvme_fdp_stat_inc(&ns->endgrp->fdp.mbmw, data_size);
> +
> +//trace_pci_nvme_fdp_ruh_write(ruh->rgid, ruh->ruhid, ruh->nlb_ruamw, 
> nlb);
> +
> +while (nlb) {
> +if (nlb < ru->ruamw) {
> +ru->ruamw -= nlb;
> +break;
> +}
> +
> +nlb -= ru->ruamw;
> +//trace_pci_nvme_fdp_ruh_change(ruh->rgid, ruh->ruhid);

Please use the trace points if you find them useful, otherwise just delete
them instead of committing commented out code.

Beyond that, looks good! For the series:

Reviewed-by: Keith Busch 



[PATCH v2 5/5] hw/nvme: flexible data placement emulation

2023-02-17 Thread Jesper Devantier
From: Jesper Devantier 

Add emulation of TP4146 ("Flexible Data Placement").

Signed-off-by: Jesper Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 703 ++-
 hw/nvme/ns.c | 142 +
 hw/nvme/nvme.h   |  85 +-
 hw/nvme/subsys.c |  94 +-
 hw/nvme/trace-events |   5 +
 include/block/nvme.h | 173 ++-
 6 files changed, 1187 insertions(+), 15 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 17e6b430e2..b894bda326 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -238,6 +238,8 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 [NVME_TIMESTAMP]= true,
 [NVME_HOST_BEHAVIOR_SUPPORT]= true,
 [NVME_COMMAND_SET_PROFILE]  = true,
+[NVME_FDP_MODE] = true,
+[NVME_FDP_EVENTS]   = true,
 };
 
 static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
@@ -249,6 +251,8 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_TIMESTAMP]= NVME_FEAT_CAP_CHANGE,
 [NVME_HOST_BEHAVIOR_SUPPORT]= NVME_FEAT_CAP_CHANGE,
 [NVME_COMMAND_SET_PROFILE]  = NVME_FEAT_CAP_CHANGE,
+[NVME_FDP_MODE] = NVME_FEAT_CAP_CHANGE,
+[NVME_FDP_EVENTS]   = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
 };
 
 static const uint32_t nvme_cse_acs[256] = {
@@ -281,6 +285,8 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
 [NVME_CMD_VERIFY]   = NVME_CMD_EFF_CSUPP,
 [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 [NVME_CMD_COMPARE]  = NVME_CMD_EFF_CSUPP,
+[NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
+[NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 };
 
 static const uint32_t nvme_cse_iocs_zoned[256] = {
@@ -299,12 +305,66 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
 
 static void nvme_process_sq(void *opaque);
 static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
+static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n);
 
 static uint16_t nvme_sqid(NvmeRequest *req)
 {
 return le16_to_cpu(req->sq->sqid);
 }
 
+static inline uint16_t nvme_make_pid(NvmeNamespace *ns, uint16_t rg,
+ uint16_t ph)
+{
+uint16_t rgif = ns->endgrp->fdp.rgif;
+
+if (!rgif) {
+return ph;
+}
+
+return (rg << (16 - rgif)) | ph;
+}
+
+static inline bool nvme_ph_valid(NvmeNamespace *ns, uint16_t ph)
+{
+return ph < ns->fdp.nphs;
+}
+
+static inline bool nvme_rg_valid(NvmeEnduranceGroup *endgrp, uint16_t rg)
+{
+return rg < endgrp->fdp.nrg;
+}
+
+static inline uint16_t nvme_pid2ph(NvmeNamespace *ns, uint16_t pid)
+{
+uint16_t rgif = ns->endgrp->fdp.rgif;
+
+if (!rgif) {
+return pid;
+}
+
+return pid & ((1 << (15 - rgif)) - 1);
+}
+
+static inline uint16_t nvme_pid2rg(NvmeNamespace *ns, uint16_t pid)
+{
+uint16_t rgif = ns->endgrp->fdp.rgif;
+
+if (!rgif) {
+return 0;
+}
+
+return pid >> (16 - rgif);
+}
+
+static inline bool nvme_parse_pid(NvmeNamespace *ns, uint16_t pid,
+  uint16_t *ph, uint16_t *rg)
+{
+*rg = nvme_pid2rg(ns, pid);
+*ph = nvme_pid2ph(ns, pid);
+
+return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg);
+}
+
 static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state)
 {
@@ -378,6 +438,69 @@ static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t 
act, uint32_t opn)
 return nvme_zns_check_resources(ns, act, opn, 0);
 }
 
+static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer 
*ebuf)
+{
+NvmeFdpEvent *ret = NULL;
+bool is_full = ebuf->next == ebuf->start && ebuf->nelems;
+
+ret = &ebuf->events[ebuf->next++];
+if (unlikely(ebuf->next == NVME_FDP_MAX_EVENTS)) {
+ebuf->next = 0;
+}
+if (is_full) {
+ebuf->start = ebuf->next;
+} else {
+ebuf->nelems++;
+}
+
+memset(ret, 0, sizeof(NvmeFdpEvent));
+ret->timestamp = nvme_get_timestamp(n);
+
+return ret;
+}
+
+static inline int log_event(NvmeRuHandle *ruh, uint8_t event_type)
+{
+return (ruh->event_filter >> nvme_fdp_evf_shifts[event_type]) & 0x1;
+}
+
+static bool nvme_update_ruh(NvmeCtrl *n, NvmeNamespace *ns, uint16_t pid)
+{
+NvmeEnduranceGroup *endgrp = ns->endgrp;
+NvmeRuHandle *ruh;
+NvmeReclaimUnit *ru;
+NvmeFdpEvent *e = NULL;
+uint16_t ph, rg, ruhid;
+
+if (!nvme_parse_pid(ns, pid, &ph, &rg)) {
+return false;
+}
+
+ruhid = ns->fdp.phs[ph];
+
+ruh = &endgrp->fdp.ruhs[ruhid];
+ru = &ruh->rus[rg];
+
+if (ru->ruamw) {
+if (log_event(ruh, FDP_EVT_RU_NOT_FULLY_WRITTEN)) {
+e = nvme_fdp_alloc_event(n, &endgrp->fdp.host_events);
+e->type = FDP_EVT_RU_NOT_FULLY_WRITTEN;
+e->flags = FDPEF_PIV | FDPEF_NSIDV | FDPEF_