On Jun 27 13:47, Niklas Cassel wrote: > TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace > as ready independently from the controller. > > When CC.CRIME is 0 (default), things behave as before, all namespaces > are ready when CSTS.RDY gets set to 1. > > When CC.CRIME is 1, the controller will become ready when CSTS.RDY gets > set to 1, but commands accessing a namespace are allowed to return > "Namespace Not Ready" or "Admin Command Media Not Ready". > After CRTO.CRWMT amount of time, if the namespace has not yet been > marked ready, the status codes also need to have the DNR bit set. > > Add a new "ready_delay" namespace device parameter, in order to emulate > different ready latencies for namespaces. > > Once a namespace is ready, it will set the NRDY bit in the I/O Command > Set Independent Identify Namespace Data Structure, and then send out a > Namespace Attribute Changed event. > > This new "ready_delay" is supported on controllers not part of a NVMe > subsystem. The reasons are many. One problem is that multiple controllers > can have different CC.CRIME modes running. Another problem is the extra > locking needed. The third problem is when to actually clear NRDY. If we > assume that a namespace clears NRDY when it no longer has any controller > online for that namespace. The problem then is that Linux will reset the > controllers one by one during probe time. The reset goes so fast so that > there is no time when all controllers are in reset at the same time, so > NRDY will never get cleared. (The controllers are enabled by SeaBIOS by > default.) We could introduce a reset_time param, but this would only > increase the chances that all controllers are in reset at the same time. > > Signed-off-by: Niklas Cassel <niklas.cas...@wdc.com> > --- > hw/nvme/ctrl.c | 123 +++++++++++++++++++++++++++++++++++++++++-- > hw/nvme/ns.c | 18 +++++++ > hw/nvme/nvme.h | 6 +++ > hw/nvme/trace-events | 1 + > include/block/nvme.h | 60 ++++++++++++++++++++- > 5 files changed, 204 insertions(+), 4 deletions(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 8ca824ea14..5404f67480 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -88,6 +88,12 @@ > * completion when there are no outstanding AERs. When the maximum number > of > * enqueued events are reached, subsequent events will be dropped. > * > + * - `crwmt` > + * This is the Controller Ready With Media Timeout (CRWMT) field that is > + * defined in the CRTO register. This specifies the worst-case time that > host > + * software should wait for the controller and all attached namespaces to > + * become ready. The value is in units of 500 milliseconds. > + * > * - `mdts` > * Indicates the maximum data transfer size for a command that transfers > data > * between host-accessible memory and the controller. The value is > specified > @@ -157,6 +163,11 @@ > * namespace will be available in the subsystem but not attached to any > * controllers. > * > + * - `ready_delay` > + * This parameter specifies the amount of time that the namespace should > wait > + * before being marked ready. Only applicable if CC.CRIME is set by the > user. > + * The value is in units of 500 milliseconds (to be consistent with > `crwmt`). > + * > * Setting `zoned` to true selects Zoned Command Set at the namespace. > * In this case, the following namespace properties are available to > configure > * zoned operation: > @@ -216,6 +227,8 @@ > #define NVME_VF_RES_GRANULARITY 1 > #define NVME_VF_OFFSET 0x1 > #define NVME_VF_STRIDE 1 > +#define NVME_DEFAULT_CRIMT 0xa > +#define NVME_DEFAULT_CRWMT 0xf > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > do { \ > @@ -4188,6 +4201,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest > *req) > return NVME_INVALID_OPCODE | NVME_DNR; > } > > + if (!(ns->id_indep_ns.nstat & NVME_NSTAT_NRDY)) { > + return NVME_NS_NOT_READY; > + } > +
I'd add a ns->ready bool instead. See below for my previously posted patch. > if (ns->status) { > return ns->status; > } > @@ -4791,6 +4808,27 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, > NvmeRequest *req, bool active) > return NVME_INVALID_CMD_SET | NVME_DNR; > } > > +static uint16_t nvme_identify_cs_indep_ns(NvmeCtrl *n, NvmeRequest *req) > +{ > + NvmeNamespace *ns; > + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > + uint32_t nsid = le32_to_cpu(c->nsid); > + > + trace_pci_nvme_identify_cs_indep_ns(nsid); > + > + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { > + return NVME_INVALID_NSID | NVME_DNR; > + } > + > + ns = nvme_ns(n, nsid); > + if (unlikely(!ns)) { > + return nvme_rpt_empty_id_struct(n, req); > + } > + > + return nvme_c2h(n, (uint8_t *)&ns->id_indep_ns, sizeof(NvmeIdNsCsIndep), > + req); > +} > + I posted a patch for this some time ago https://lore.kernel.org/qemu-devel/20220531060342.2556973-1-...@irrelevant.dk/ The structure is so simple that we can just "build" it here instead of adding yet another (mostly empty) 4k member to the NvmeNamespace struct. > static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, > bool attached) > { > @@ -5081,6 +5119,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest > *req) > return nvme_identify_ns(n, req, true); > case NVME_ID_CNS_NS_PRESENT: > return nvme_identify_ns(n, req, false); > + case NVME_ID_CNS_CS_INDEPENDENT_NS: > + return nvme_identify_cs_indep_ns(n, req); > case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: > return nvme_identify_ctrl_list(n, req, true); > case NVME_ID_CNS_CTRL_LIST: > @@ -5556,6 +5596,44 @@ static void nvme_select_iocs_ns(NvmeCtrl *n, > NvmeNamespace *ns) > } > } > > +void nvme_ns_ready_cb(void *opaque) > +{ > + NvmeNamespace *ns = opaque; > + DeviceState *dev = &ns->parent_obj; > + BusState *s = qdev_get_parent_bus(dev); > + NvmeCtrl *n = NVME(s->parent); > + > + ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY; > + > + if (!test_and_set_bit(ns->params.nsid, n->changed_nsids)) { > + nvme_enqueue_event(n, NVME_AER_TYPE_NOTICE, > + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED, > + NVME_LOG_CHANGED_NSLIST); > + } > +} Move to hw/nvme/ns.c. > + > +static void nvme_set_ready_or_start_timer(NvmeCtrl *n, NvmeNamespace *ns) > +{ > + int64_t expire_time; > + > + if (!NVME_CC_CRIME(ldl_le_p(&n->bar.cc)) || ns->params.ready_delay == 0) > { > + ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY; > + return; > + } > + > + expire_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); > + expire_time += ns->params.ready_delay * 500; > + timer_mod(ns->ready_delay_timer, expire_time); > +} This can be made independent of NvmeCtrl by passing the CRIME value (and then moved to hw/nvme/ns.c). > + > +static inline void nvme_ns_clear_ready_and_stop_timer(NvmeNamespace *ns) > +{ > + if (!ns->subsys) { > + timer_del(ns->ready_delay_timer); > + ns->id_indep_ns.nstat &= ~NVME_NSTAT_NRDY; > + } > +} > + Move to hw/nvme/ns.c.
signature.asc
Description: PGP signature