I'm running into a issue with the latest qemu-nvme with v10.0.0-rc2 with regards to multiple controllers behind a subsystem.  When I setup a subsystem with 2 controllers, each with a private/non-shared namespace, the two private/non-shared namespaces all get attached to one of the controllers.

I'm sending out diffs that resolve the problem but would like to get some feedback before sending a formal patch.

See below.

Thanks,

Alan Adamson




[root@localhost qemu-subsys]# git describe
v10.0.0-rc2
[root@localhost qemu-subsys]#


QEMU NVMe Config
================
-device nvme-subsys,id=subsys0 \
-device nvme,serial=deadbeef,id=nvme0,subsys=subsys0,atomic.dn=off,atomic.awun=31,atomic.awupf=15 \ -device nvme,serial=deadbeef,id=nvme1,subsys=subsys0,atomic.dn=off,atomic.awun=127,atomic.awupf=63 \
-drive id=ns1,file=/dev/nullb3,if=none \
-drive id=ns2,file=/dev/nullb2,if=none \
-drive id=ns3,file=/dev/nullb1,if=none \
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,shared=false \
-device nvme-ns,drive=ns2,bus=nvme1,nsid=2,shared=false \
-device nvme-ns,drive=ns3,bus=nvme1,nsid=3,shared=true  \

1 Subsystem (subsys0)
    2 Controllers (nvme0/nvme1)

nvme0
    private namespace (nsid=1)
    shared namespace (nsid=3)
nvme1
    private namespace (nsid=2)
    shared namespace (nsid=3)


What Linux is seeing
====================
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
---------------- ------------------------------------------------------------------------------------------------ ----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1

Device           Cntlid SN MN                                       FR       TxPort Address        Slot   Subsystem    Namespaces ---------------- ------ -------------------- ---------------------------------------- -------- ------ -------------- ------ ------------ ---------------- nvme0    0      deadbeef             QEMU NVMe Ctrl                           9.2.92   pcie   0000:00:04.0   4 nvme-subsys1 nvme1n1, nvme1n2, nvme1n3 nvme1    1      deadbeef             QEMU NVMe Ctrl                           9.2.92   pcie   0000:00:05.0   5 nvme-subsys1 nvme1n1

Device            Generic           NSID Usage                      Format           Controllers ----------------- ----------------- ---------- -------------------------- ---------------- ---------------- /dev/nvme1n1 /dev/ng1n1   0x3        268.44  GB / 268.44  GB 512   B +  0 B   nvme0, nvme1 /dev/nvme1n2 /dev/ng1n2   0x2        268.44  GB / 268.44  GB 512   B +  0 B   nvme0 /dev/nvme1n3 /dev/ng1n3   0x1        268.44  GB / 268.44  GB 512   B +  0 B   nvme0

[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 |  grep awupf
awupf     : 15
[root@localhost ~]# nvme id-ctrl /dev/nvme1n3 |  grep awupf
awupf     : 15
[root@localhost ~]#

===================
Both private namspaces are being attached to the same controller (nvme0)


Fix
===
Need to keep track of the controller that registers a namespace with the subsystem
and only allow other controllers to attach to the namespace if it is shared.
Non-shared namespaces can only be attached to a controller than registers it.

Proposal
========
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
     }

     for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-        NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+        NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
         if (!ns) {
             continue;
         }
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
     ns = nvme_ns(n, nsid);
     if (unlikely(!ns)) {
         if (!active) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
+            ns = nvme_subsys_ns(n, nsid);
             if (!ns) {
[root@localhost qemu-subsys]# git log -p f4b9f10a80c30f1d6b9850d476eb8226bda3f553 > /tmp/p
^C
[root@localhost qemu-subsys]# vi /tmp/p
[root@localhost qemu-subsys]# cat /tmp/p
commit f4b9f10a80c30f1d6b9850d476eb8226bda3f553
Author: Alan Adamson <alan.adam...@oracle.com>
Date:   2025-04-03 14:10:30 -0400

    subsys multi controller shared fix

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
     }

     for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-        NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+        NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
         if (!ns) {
             continue;
         }
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
     ns = nvme_ns(n, nsid);
     if (unlikely(!ns)) {
         if (!active) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
+            ns = nvme_subsys_ns(n, nsid);
             if (!ns) {
                 return nvme_rpt_empty_id_struct(n, req);
             }
@@ -5739,7 +5740,7 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
             return NVME_INVALID_FIELD | NVME_DNR;
         }

-        ns = nvme_subsys_ns(n->subsys, nsid);
+        ns = nvme_subsys_ns(n, nsid);
         if (!ns) {
             return NVME_INVALID_FIELD | NVME_DNR;
         }
@@ -5809,7 +5810,7 @@ static uint16_t nvme_identify_ns_ind(NvmeCtrl *n, NvmeRequest *req, bool alloc)
     ns = nvme_ns(n, nsid);
     if (unlikely(!ns)) {
         if (alloc) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
+            ns = nvme_subsys_ns(n, nsid);
             if (!ns) {
                 return nvme_rpt_empty_id_struct(n, req);
             }
@@ -5837,7 +5838,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
     ns = nvme_ns(n, nsid);
     if (unlikely(!ns)) {
         if (!active) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
+            ns = nvme_subsys_ns(n, nsid);
             if (!ns) {
                 return nvme_rpt_empty_id_struct(n, req);
             }
@@ -5884,7 +5885,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
         ns = nvme_ns(n, i);
         if (!ns) {
             if (!active) {
-                ns = nvme_subsys_ns(n->subsys, i);
+                ns = nvme_subsys_ns(n, i);
                 if (!ns) {
                     continue;
                 }
@@ -5932,7 +5933,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
         ns = nvme_ns(n, i);
         if (!ns) {
             if (!active) {
-                ns = nvme_subsys_ns(n->subsys, i);
+                ns = nvme_subsys_ns(n, i);
                 if (!ns) {
                     continue;
                 }
@@ -6793,7 +6794,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
         return NVME_INVALID_NSID | NVME_DNR;
     }

-    ns = nvme_subsys_ns(n->subsys, nsid);
+    ns = nvme_subsys_ns(n, nsid);
     if (!ns) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
@@ -7751,17 +7752,23 @@ static int nvme_start_ctrl(NvmeCtrl *n)

     nvme_set_timestamp(n, 0ULL);

-    /* verify that the command sets of attached namespaces are supported */
-    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-        NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+    if (n->subsys) {
+        for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        NvmeNamespace *ns = n->subsys->namespaces[i].namespace;

-        if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
-            if (!ns->attached || ns->params.shared) {
-                nvme_attach_ns(n, ns);
+        if (!ns) {
+                continue;
             }
+        if (!(n->subsys->namespaces[i].ctrl == n) && !ns->params.shared) {
+                continue;
+            }
+        if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+                if (!ns->attached || ns->params.shared) {
+            nvme_attach_ns(n, ns);
+        }
+            }
         }
     }
-
     nvme_update_dsm_limits(n, NULL);

     return 0;
@@ -8993,7 +9000,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
             return;
         }

-        n->subsys->namespaces[ns->params.nsid] = ns;
+        n->subsys->namespaces[ns->params.nsid].namespace = ns;
+        n->subsys->namespaces[ns->params.nsid].ctrl = n;
     }
 }

diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 98c1e75a5d29..9cca699b354f 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -742,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)

     if (!nsid) {
         for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-            if (nvme_subsys_ns(subsys, i)) {
+            if (nvme_subsys_ns(n, i)) {
                 continue;
             }

@@ -754,12 +754,13 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
             error_setg(errp, "no free namespace id");
             return;
         }
-    } else if (nvme_subsys_ns(subsys, nsid)) {
+    } else if (nvme_subsys_ns(n, nsid)) {
         error_setg(errp, "namespace id '%d' already allocated", nsid);
         return;
     }

-    subsys->namespaces[nsid] = ns;
+    subsys->namespaces[nsid].namespace = ns;
+    subsys->namespaces[nsid].ctrl = n;

     ns->id_ns.endgid = cpu_to_le16(0x1);
     ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 6f782ba18826..bea3b96a6dfa 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -97,6 +97,11 @@ typedef struct NvmeEnduranceGroup {
     } fdp;
 } NvmeEnduranceGroup;

+typedef struct Namespaces {
+    NvmeCtrl           *ctrl;
+    NvmeNamespace      *namespace;
+} Namespaces;
+
 typedef struct NvmeSubsystem {
     DeviceState parent_obj;
     NvmeBus     bus;
@@ -104,7 +109,7 @@ typedef struct NvmeSubsystem {
     char        *serial;

     NvmeCtrl           *ctrls[NVME_MAX_CONTROLLERS];
-    NvmeNamespace      *namespaces[NVME_MAX_NAMESPACES + 1];
+    Namespaces         namespaces[NVME_MAX_NAMESPACES + 1];
     NvmeEnduranceGroup endgrp;

     struct {
@@ -136,16 +141,6 @@ static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
     return subsys->ctrls[cntlid];
 }

-static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
-                                            uint32_t nsid)
-{
-    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
-        return NULL;
-    }
-
-    return subsys->namespaces[nsid];
-}
-
 #define TYPE_NVME_NS "nvme-ns"
 #define NVME_NS(obj) \
     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
@@ -711,6 +706,14 @@ static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
     return NULL;
 }

+static inline NvmeNamespace *nvme_subsys_ns(NvmeCtrl *n, uint32_t nsid)
+{
+    if (!n->subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+        return NULL;
+    }
+    return n->subsys->namespaces[nsid].namespace;
+}
+
 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
                           NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 2ae56f12a596..d5751564c05c 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -92,13 +92,19 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)

     subsys->ctrls[cntlid] = n;

-    for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
-        NvmeNamespace *ns = subsys->namespaces[nsid];
-        if (ns && ns->params.shared && !ns->params.detached) {
-            nvme_attach_ns(n, ns);
+    for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+        NvmeNamespace *ns = subsys->namespaces[nsid].namespace;
+
+    if (!ns) {
+            continue;
+        }
+        if (!(subsys->namespaces[nsid].ctrl == n) && !ns->params.shared) {
+            continue;
         }
+    if (ns->params.shared && !ns->params.detached) {
+            nvme_attach_ns(n, ns);
+    }
     }
-
     return cntlid;
 }


With this fix:
==============
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
---------------- ------------------------------------------------------------------------------------------------ ----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1

Device           Cntlid SN MN                                       FR       TxPort Address        Slot   Subsystem    Namespaces ---------------- ------ -------------------- ---------------------------------------- -------- ------ -------------- ------ ------------ ---------------- nvme0    0      deadbeef             QEMU NVMe Ctrl                           9.2.92   pcie   0000:00:04.0   4 nvme-subsys1 nvme1n2, nvme1n3 nvme1    1      deadbeef             QEMU NVMe Ctrl                           9.2.92   pcie   0000:00:05.0   5 nvme-subsys1 nvme1n1, nvme1n3

Device            Generic           NSID Usage                      Format           Controllers ----------------- ----------------- ---------- -------------------------- ---------------- ---------------- /dev/nvme1n1 /dev/ng1n1   0x2        268.44  GB / 268.44  GB 512   B +  0 B   nvme1 /dev/nvme1n2 /dev/ng1n2   0x1        268.44  GB / 268.44  GB 512   B +  0 B   nvme0 /dev/nvme1n3 /dev/ng1n3   0x3        268.44  GB / 268.44  GB 512   B +  0 B   nvme0, nvme1

[root@localhost ~]# nvme id-ctrl /dev/nvme1n1 |  grep awupf
awupf     : 63
[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 |  grep awupf
awupf     : 15
[root@localhost ~]#

Each private namespace is attached to its own controller and that is verified with the AWUPF values.




Reply via email to