Re: [PATCH for-7.0 4/4] hw/nvme: add support for zoned random write area

2022-01-26 Thread Keith Busch
On Thu, Nov 25, 2021 at 08:37:35AM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for TP 4076 ("Zoned Random Write Area"), v2021.08.23
> ("Ratified").
> 
> This adds three new namespace parameters: "zoned.numzrwa" (number of
> zrwa resources, i.e. number of zones that can have a zrwa),
> "zoned.zrwas" (zrwa size in LBAs), "zoned.zrwafg" (granularity in LBAs
> for flushes).
> 
> Signed-off-by: Klaus Jensen 

Looks good, and will just need a minor update if you choose to take the
feedback from patch 2 onboard.

Reviewed-by: Keith Busch 



[PATCH for-7.0 4/4] hw/nvme: add support for zoned random write area

2021-11-24 Thread Klaus Jensen
From: Klaus Jensen 

Add support for TP 4076 ("Zoned Random Write Area"), v2021.08.23
("Ratified").

This adds three new namespace parameters: "zoned.numzrwa" (number of
zrwa resources, i.e. number of zones that can have a zrwa),
"zoned.zrwas" (zrwa size in LBAs), "zoned.zrwafg" (granularity in LBAs
for flushes).

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 171 ++-
 hw/nvme/ns.c |  58 +++
 hw/nvme/nvme.h   |  10 +++
 hw/nvme/trace-events |   1 +
 include/block/nvme.h |  17 -
 5 files changed, 237 insertions(+), 20 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 7ac6ec50a0d1..4c9b303dfdca 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -299,26 +299,37 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, 
NvmeZone *zone,
 }
 }
 
-/*
- * Check if we can open a zone without exceeding open/active limits.
- * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
- */
-static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
+ uint32_t opn, uint32_t zrwa)
 {
 if (ns->params.max_active_zones != 0 &&
 ns->nr_active_zones + act > ns->params.max_active_zones) {
 trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
 return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
 }
+
 if (ns->params.max_open_zones != 0 &&
 ns->nr_open_zones + opn > ns->params.max_open_zones) {
 trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
 return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
 }
 
+if (zrwa > ns->zns.numzrwa) {
+return NVME_NOZRWA | NVME_DNR;
+}
+
 return NVME_SUCCESS;
 }
 
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+{
+return nvme_zns_check_resources(ns, act, opn, 0);
+}
+
 static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
 hwaddr hi, lo;
@@ -1605,9 +1616,19 @@ static uint16_t nvme_check_zone_write(NvmeNamespace *ns, 
NvmeZone *zone,
 return status;
 }
 
-if (unlikely(slba != zone->w_ptr)) {
-trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
-return NVME_ZONE_INVALID_WRITE;
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas;
+
+if (slba < zone->w_ptr || slba + nlb > ezrwa) {
+trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr);
+return NVME_ZONE_INVALID_WRITE;
+}
+} else {
+if (unlikely(slba != zone->w_ptr)) {
+trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
+   zone->w_ptr);
+return NVME_ZONE_INVALID_WRITE;
+}
 }
 
 if (unlikely((slba + nlb) > zcap)) {
@@ -1687,6 +1708,14 @@ static uint16_t nvme_zrm_finish(NvmeNamespace *ns, 
NvmeZone *zone)
 /* fallthrough */
 case NVME_ZONE_STATE_CLOSED:
 nvme_aor_dec_active(ns);
+
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+NVME_ZA_CLEAR(zone->d.za, NVME_ZA_ZRWA_VALID);
+if (ns->params.numzrwa) {
+ns->zns.numzrwa++;
+}
+}
+
 /* fallthrough */
 case NVME_ZONE_STATE_EMPTY:
 nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
@@ -1722,6 +1751,13 @@ static uint16_t nvme_zrm_reset(NvmeNamespace *ns, 
NvmeZone *zone)
 /* fallthrough */
 case NVME_ZONE_STATE_CLOSED:
 nvme_aor_dec_active(ns);
+
+if (zone->d.za & NVME_ZA_ZRWA_VALID) {
+if (ns->params.numzrwa) {
+ns->zns.numzrwa++;
+}
+}
+
 /* fallthrough */
 case NVME_ZONE_STATE_FULL:
 zone->w_ptr = zone->d.zslba;
@@ -1755,6 +1791,7 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace 
*ns)
 
 enum {
 NVME_ZRM_AUTO = 1 << 0,
+NVME_ZRM_ZRWA = 1 << 1,
 };
 
 static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
@@ -1773,7 +1810,8 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, 
NvmeNamespace *ns,
 if (n->params.auto_transition_zones) {
 nvme_zrm_auto_transition_zone(ns);
 }
-status = nvme_aor_check(ns, act, 1);
+status = nvme_zns_check_resources(ns, act, 1,
+  (flags & NVME_ZRM_ZRWA) ? 1 : 0);
 if (status) {
 return status;
 }
@@ -1801,6 +1839,12 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, 
NvmeNamespace *ns,
 /* fallthrough */
 
 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+if (flags & NVME_ZRM_ZRWA) {
+ns->zns.numzrwa--;
+
+NVME_ZA_SET(zone->d.za, NVME_ZA_ZRWA_VALID)