Re: [PATCH v2 3/4] hw/nvme: add support for ratified TP4084

2022-06-28 Thread Klaus Jensen
On Jun 27 13:47, Niklas Cassel wrote:
> TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace
> as ready independently from the controller.
> 
> When CC.CRIME is 0 (default), things behave as before, all namespaces
> are ready when CSTS.RDY gets set to 1.
> 
> When CC.CRIME is 1, the controller will become ready when CSTS.RDY gets
> set to 1, but commands accessing a namespace are allowed to return
> "Namespace Not Ready" or "Admin Command Media Not Ready".
> After CRTO.CRWMT amount of time, if the namespace has not yet been
> marked ready, the status codes also need to have the DNR bit set.
> 
> Add a new "ready_delay" namespace device parameter, in order to emulate
> different ready latencies for namespaces.
> 
> Once a namespace is ready, it will set the NRDY bit in the I/O Command
> Set Independent Identify Namespace Data Structure, and then send out a
> Namespace Attribute Changed event.
> 
> This new "ready_delay" is supported on controllers not part of a NVMe
> subsystem. The reasons are many. One problem is that multiple controllers
> can have different CC.CRIME modes running. Another problem is the extra
> locking needed. The third problem is when to actually clear NRDY. If we
> assume that a namespace clears NRDY when it no longer has any controller
> online for that namespace. The problem then is that Linux will reset the
> controllers one by one during probe time. The reset goes so fast so that
> there is no time when all controllers are in reset at the same time, so
> NRDY will never get cleared. (The controllers are enabled by SeaBIOS by
> default.) We could introduce a reset_time param, but this would only
> increase the chances that all controllers are in reset at the same time.
> 
> Signed-off-by: Niklas Cassel 
> ---
>  hw/nvme/ctrl.c   | 123 +--
>  hw/nvme/ns.c |  18 +++
>  hw/nvme/nvme.h   |   6 +++
>  hw/nvme/trace-events |   1 +
>  include/block/nvme.h |  60 -
>  5 files changed, 204 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 8ca824ea14..5404f67480 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -88,6 +88,12 @@
>   *   completion when there are no outstanding AERs. When the maximum number 
> of
>   *   enqueued events are reached, subsequent events will be dropped.
>   *
> + * - `crwmt`
> + *   This is the Controller Ready With Media Timeout (CRWMT) field that is
> + *   defined in the CRTO register. This specifies the worst-case time that 
> host
> + *   software should wait for the controller and all attached namespaces to
> + *   become ready. The value is in units of 500 milliseconds.
> + *
>   * - `mdts`
>   *   Indicates the maximum data transfer size for a command that transfers 
> data
>   *   between host-accessible memory and the controller. The value is 
> specified
> @@ -157,6 +163,11 @@
>   *   namespace will be available in the subsystem but not attached to any
>   *   controllers.
>   *
> + * - `ready_delay`
> + *   This parameter specifies the amount of time that the namespace should 
> wait
> + *   before being marked ready. Only applicable if CC.CRIME is set by the 
> user.
> + *   The value is in units of 500 milliseconds (to be consistent with 
> `crwmt`).
> + *
>   * Setting `zoned` to true selects Zoned Command Set at the namespace.
>   * In this case, the following namespace properties are available to 
> configure
>   * zoned operation:
> @@ -216,6 +227,8 @@
>  #define NVME_VF_RES_GRANULARITY 1
>  #define NVME_VF_OFFSET 0x1
>  #define NVME_VF_STRIDE 1
> +#define NVME_DEFAULT_CRIMT 0xa
> +#define NVME_DEFAULT_CRWMT 0xf
>  
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>  do { \
> @@ -4188,6 +4201,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
> *req)
>  return NVME_INVALID_OPCODE | NVME_DNR;
>  }
>  
> +if (!(ns->id_indep_ns.nstat & NVME_NSTAT_NRDY)) {
> +return NVME_NS_NOT_READY;
> +}
> +

I'd add a ns->ready bool instead. See below for my previously posted
patch.

>  if (ns->status) {
>  return ns->status;
>  }
> @@ -4791,6 +4808,27 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
> NvmeRequest *req, bool active)
>  return NVME_INVALID_CMD_SET | NVME_DNR;
>  }
>  
> +static uint16_t nvme_identify_cs_indep_ns(NvmeCtrl *n, NvmeRequest *req)
> +{
> +NvmeNamespace *ns;
> +NvmeIdentify *c = (NvmeIdentify *)>cmd;
> +uint32_t nsid = le32_to_cpu(c->nsid);
> +
> +trace_pci_nvme_identify_cs_indep_ns(nsid);
> +
> +if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
> +return NVME_INVALID_NSID | NVME_DNR;
> +}
> +
> +ns = nvme_ns(n, nsid);
> +if (unlikely(!ns)) {
> +return nvme_rpt_empty_id_struct(n, req);
> +}
> +
> +return nvme_c2h(n, (uint8_t *)>id_indep_ns, sizeof(NvmeIdNsCsIndep),
> +req);
> +}
> +

I posted a patch for this some time ago


[PATCH v2 3/4] hw/nvme: add support for ratified TP4084

2022-06-27 Thread Niklas Cassel via
TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace
as ready independently from the controller.

When CC.CRIME is 0 (default), things behave as before, all namespaces
are ready when CSTS.RDY gets set to 1.

When CC.CRIME is 1, the controller will become ready when CSTS.RDY gets
set to 1, but commands accessing a namespace are allowed to return
"Namespace Not Ready" or "Admin Command Media Not Ready".
After CRTO.CRWMT amount of time, if the namespace has not yet been
marked ready, the status codes also need to have the DNR bit set.

Add a new "ready_delay" namespace device parameter, in order to emulate
different ready latencies for namespaces.

Once a namespace is ready, it will set the NRDY bit in the I/O Command
Set Independent Identify Namespace Data Structure, and then send out a
Namespace Attribute Changed event.

This new "ready_delay" is supported on controllers not part of a NVMe
subsystem. The reasons are many. One problem is that multiple controllers
can have different CC.CRIME modes running. Another problem is the extra
locking needed. The third problem is when to actually clear NRDY. If we
assume that a namespace clears NRDY when it no longer has any controller
online for that namespace. The problem then is that Linux will reset the
controllers one by one during probe time. The reset goes so fast so that
there is no time when all controllers are in reset at the same time, so
NRDY will never get cleared. (The controllers are enabled by SeaBIOS by
default.) We could introduce a reset_time param, but this would only
increase the chances that all controllers are in reset at the same time.

Signed-off-by: Niklas Cassel 
---
 hw/nvme/ctrl.c   | 123 +--
 hw/nvme/ns.c |  18 +++
 hw/nvme/nvme.h   |   6 +++
 hw/nvme/trace-events |   1 +
 include/block/nvme.h |  60 -
 5 files changed, 204 insertions(+), 4 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8ca824ea14..5404f67480 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -88,6 +88,12 @@
  *   completion when there are no outstanding AERs. When the maximum number of
  *   enqueued events are reached, subsequent events will be dropped.
  *
+ * - `crwmt`
+ *   This is the Controller Ready With Media Timeout (CRWMT) field that is
+ *   defined in the CRTO register. This specifies the worst-case time that host
+ *   software should wait for the controller and all attached namespaces to
+ *   become ready. The value is in units of 500 milliseconds.
+ *
  * - `mdts`
  *   Indicates the maximum data transfer size for a command that transfers data
  *   between host-accessible memory and the controller. The value is specified
@@ -157,6 +163,11 @@
  *   namespace will be available in the subsystem but not attached to any
  *   controllers.
  *
+ * - `ready_delay`
+ *   This parameter specifies the amount of time that the namespace should wait
+ *   before being marked ready. Only applicable if CC.CRIME is set by the user.
+ *   The value is in units of 500 milliseconds (to be consistent with `crwmt`).
+ *
  * Setting `zoned` to true selects Zoned Command Set at the namespace.
  * In this case, the following namespace properties are available to configure
  * zoned operation:
@@ -216,6 +227,8 @@
 #define NVME_VF_RES_GRANULARITY 1
 #define NVME_VF_OFFSET 0x1
 #define NVME_VF_STRIDE 1
+#define NVME_DEFAULT_CRIMT 0xa
+#define NVME_DEFAULT_CRWMT 0xf
 
 #define NVME_GUEST_ERR(trace, fmt, ...) \
 do { \
@@ -4188,6 +4201,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 return NVME_INVALID_OPCODE | NVME_DNR;
 }
 
+if (!(ns->id_indep_ns.nstat & NVME_NSTAT_NRDY)) {
+return NVME_NS_NOT_READY;
+}
+
 if (ns->status) {
 return ns->status;
 }
@@ -4791,6 +4808,27 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeRequest *req, bool active)
 return NVME_INVALID_CMD_SET | NVME_DNR;
 }
 
+static uint16_t nvme_identify_cs_indep_ns(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeNamespace *ns;
+NvmeIdentify *c = (NvmeIdentify *)>cmd;
+uint32_t nsid = le32_to_cpu(c->nsid);
+
+trace_pci_nvme_identify_cs_indep_ns(nsid);
+
+if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+return NVME_INVALID_NSID | NVME_DNR;
+}
+
+ns = nvme_ns(n, nsid);
+if (unlikely(!ns)) {
+return nvme_rpt_empty_id_struct(n, req);
+}
+
+return nvme_c2h(n, (uint8_t *)>id_indep_ns, sizeof(NvmeIdNsCsIndep),
+req);
+}
+
 static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
 bool attached)
 {
@@ -5081,6 +5119,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest 
*req)
 return nvme_identify_ns(n, req, true);
 case NVME_ID_CNS_NS_PRESENT:
 return nvme_identify_ns(n, req, false);
+case NVME_ID_CNS_CS_INDEPENDENT_NS:
+return nvme_identify_cs_indep_ns(n, req);