On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > From: Klaus Jensen <k.jen...@samsung.com> > > Add support for returning a resonable response to Get/Set Features of > mandatory features. > > Signed-off-by: Klaus Jensen <klaus.jen...@cnexlabs.com> > Acked-by: Keith Busch <kbu...@kernel.org> > --- > hw/block/nvme.c | 60 ++++++++++++++++++++++++++++++++++++++++++- > hw/block/trace-events | 2 ++ > include/block/nvme.h | 6 ++++- > 3 files changed, 66 insertions(+), 2 deletions(-) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index ff8975cd6667..eb9c722df968 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -1025,7 +1025,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > uint32_t dw11 = le32_to_cpu(cmd->cdw11); > uint32_t result; > > + trace_nvme_dev_getfeat(nvme_cid(req), dw10); > + > switch (dw10) { > + case NVME_ARBITRATION: > + result = cpu_to_le32(n->features.arbitration); > + break; > + case NVME_POWER_MANAGEMENT: > + result = cpu_to_le32(n->features.power_mgmt); > + break; > case NVME_TEMPERATURE_THRESHOLD: > result = 0; > > @@ -1046,9 +1054,12 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > break; > } > > + break; > + case NVME_ERROR_RECOVERY: > + result = cpu_to_le32(n->features.err_rec); > break; > case NVME_VOLATILE_WRITE_CACHE: > - result = blk_enable_write_cache(n->conf.blk); > + result = cpu_to_le32(blk_enable_write_cache(n->conf.blk)); > trace_nvme_dev_getfeat_vwcache(result ? "enabled" : "disabled"); > break; > case NVME_NUMBER_OF_QUEUES: > @@ -1058,6 +1069,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > break; > case NVME_TIMESTAMP: > return nvme_get_feature_timestamp(n, cmd); > + case NVME_INTERRUPT_COALESCING: > + result = cpu_to_le32(n->features.int_coalescing); > + break; > + case NVME_INTERRUPT_VECTOR_CONF: > + if ((dw11 & 0xffff) > n->params.max_ioqpairs + 1) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } I still think that this should be >= since the interrupt vector is not zero based. So if we have for example 3 IO queues, then we have 4 queues in total which translates to irq numbers 0..3.
BTW the user of the device doesn't have to have 1:1 mapping between qid and msi interrupt index, in fact when MSI is not used, all the queues will map to the same vector, which will be interrupt 0 from point of view of the device IMHO. So it kind of makes sense IMHO to have num_irqs or something, even if it technically equals to number of queues. > + > + result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]); > + break; > + case NVME_WRITE_ATOMICITY: > + result = cpu_to_le32(n->features.write_atomicity); > + break; > case NVME_ASYNCHRONOUS_EVENT_CONF: > result = cpu_to_le32(n->features.async_config); > break; > @@ -1093,6 +1117,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > uint32_t dw10 = le32_to_cpu(cmd->cdw10); > uint32_t dw11 = le32_to_cpu(cmd->cdw11); > > + trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11); > + > switch (dw10) { > case NVME_TEMPERATURE_THRESHOLD: > if (NVME_TEMP_TMPSEL(dw11)) { > @@ -1120,6 +1146,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > > break; > case NVME_VOLATILE_WRITE_CACHE: > + if (blk_enable_write_cache(n->conf.blk)) { > + blk_flush(n->conf.blk); > + } (not your fault) but the blk_enable_write_cache function name is highly misleading, since it doesn't enable anything but just gets the flag if the write cache is enabled. It really should be called blk_get_enable_write_cache. > + > blk_set_enable_write_cache(n->conf.blk, dw11 & 1); > break; > case NVME_NUMBER_OF_QUEUES: > @@ -1135,6 +1165,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd > *cmd, NvmeRequest *req) > case NVME_ASYNCHRONOUS_EVENT_CONF: > n->features.async_config = dw11; > break; > + case NVME_ARBITRATION: > + case NVME_POWER_MANAGEMENT: > + case NVME_ERROR_RECOVERY: > + case NVME_INTERRUPT_COALESCING: > + case NVME_INTERRUPT_VECTOR_CONF: > + case NVME_WRITE_ATOMICITY: > + return NVME_FEAT_NOT_CHANGABLE | NVME_DNR; > default: > trace_nvme_dev_err_invalid_setfeat(dw10); > return NVME_INVALID_FIELD | NVME_DNR; > @@ -1716,6 +1753,25 @@ static void nvme_init_state(NvmeCtrl *n) > n->temperature = NVME_TEMPERATURE; > n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING; > n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); > + > + /* > + * There is no limit on the number of commands that the controller may > + * launch at one time from a particular Submission Queue. > + */ > + n->features.arbitration = NVME_ARB_AB_NOLIMIT; > + > + n->features.int_vector_config = g_malloc0_n(n->params.max_ioqpairs + 1, > + sizeof(*n->features.int_vector_config)); > + > + for (int i = 0; i < n->params.max_ioqpairs + 1; i++) { > + n->features.int_vector_config[i] = i; > + > + /* interrupt coalescing is not supported for the admin queue */ > + if (i == 0) { > + n->features.int_vector_config[i] |= NVME_INTVC_NOCOALESCING; > + } > + } > + > n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); > } > > @@ -1804,6 +1860,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) > id->cqes = (0x4 << 4) | 0x4; > id->nn = cpu_to_le32(n->num_namespaces); > id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); > + Unrelated whitespace change > id->psd[0].mp = cpu_to_le16(0x9c4); > id->psd[0].enlat = cpu_to_le32(0x10); > id->psd[0].exlat = cpu_to_le32(0x4); > @@ -1879,6 +1936,7 @@ static void nvme_exit(PCIDevice *pci_dev) > g_free(n->cq); > g_free(n->sq); > g_free(n->aer_reqs); > + g_free(n->features.int_vector_config); > > if (n->params.cmb_size_mb) { > g_free(n->cmbuf); > diff --git a/hw/block/trace-events b/hw/block/trace-events > index 3952c36774cf..4cf39961989d 100644 > --- a/hw/block/trace-events > +++ b/hw/block/trace-events > @@ -41,6 +41,8 @@ nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, > sqid=%"PRIu16"" > nvme_dev_identify_ctrl(void) "identify controller" > nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" > nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, > nsid=%"PRIu16"" > +nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32"" > +nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" > fid 0x%"PRIx32" val 0x%"PRIx32"" > nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write > cache, result=%s" > nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d" > nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested > cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" > diff --git a/include/block/nvme.h b/include/block/nvme.h > index f2a8b07c0f2f..ecc02fbe8bb8 100644 > --- a/include/block/nvme.h > +++ b/include/block/nvme.h > @@ -490,7 +490,8 @@ enum NvmeStatusCodes { > NVME_FW_REQ_RESET = 0x010b, > NVME_INVALID_QUEUE_DEL = 0x010c, > NVME_FID_NOT_SAVEABLE = 0x010d, > - NVME_FID_NOT_NSID_SPEC = 0x010f, > + NVME_FEAT_NOT_CHANGABLE = 0x010e, > + NVME_FEAT_NOT_NS_SPEC = 0x010f, > NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, > NVME_CONFLICTING_ATTRS = 0x0180, > NVME_INVALID_PROT_INFO = 0x0181, > @@ -706,6 +707,7 @@ typedef struct NvmeFeatureVal { > } NvmeFeatureVal; > > #define NVME_ARB_AB(arb) (arb & 0x7) > +#define NVME_ARB_AB_NOLIMIT 0x7 > #define NVME_ARB_LPW(arb) ((arb >> 8) & 0xff) > #define NVME_ARB_MPW(arb) ((arb >> 16) & 0xff) > #define NVME_ARB_HPW(arb) ((arb >> 24) & 0xff) > @@ -713,6 +715,8 @@ typedef struct NvmeFeatureVal { > #define NVME_INTC_THR(intc) (intc & 0xff) > #define NVME_INTC_TIME(intc) ((intc >> 8) & 0xff) > > +#define NVME_INTVC_NOCOALESCING (0x1 << 16) > + > #define NVME_TEMP_THSEL(temp) ((temp >> 20) & 0x3) > #define NVME_TEMP_TMPSEL(temp) ((temp >> 16) & 0xf) > #define NVME_TEMP_TMPTH(temp) ((temp >> 0) & 0xffff) Best regards, Maxim Levitsky