I'm running into a issue with the latest qemu-nvme with v10.0.0-rc2 with
regards to multiple controllers behind a subsystem. When I setup a
subsystem with 2 controllers, each with a private/non-shared namespace,
the two private/non-shared namespaces all get attached to one of the
controllers.
I'm sending out diffs that resolve the problem but would like to get
some feedback before sending a formal patch.
See below.
Thanks,
Alan Adamson
[root@localhost qemu-subsys]# git describe
v10.0.0-rc2
[root@localhost qemu-subsys]#
QEMU NVMe Config
================
-device nvme-subsys,id=subsys0 \
-device
nvme,serial=deadbeef,id=nvme0,subsys=subsys0,atomic.dn=off,atomic.awun=31,atomic.awupf=15
\
-device
nvme,serial=deadbeef,id=nvme1,subsys=subsys0,atomic.dn=off,atomic.awun=127,atomic.awupf=63
\
-drive id=ns1,file=/dev/nullb3,if=none \
-drive id=ns2,file=/dev/nullb2,if=none \
-drive id=ns3,file=/dev/nullb1,if=none \
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,shared=false \
-device nvme-ns,drive=ns2,bus=nvme1,nsid=2,shared=false \
-device nvme-ns,drive=ns3,bus=nvme1,nsid=3,shared=true \
1 Subsystem (subsys0)
2 Controllers (nvme0/nvme1)
nvme0
private namespace (nsid=1)
shared namespace (nsid=3)
nvme1
private namespace (nsid=2)
shared namespace (nsid=3)
What Linux is seeing
====================
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
----------------
------------------------------------------------------------------------------------------------
----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1
Device Cntlid SN MN
FR TxPort Address Slot Subsystem Namespaces
---------------- ------ --------------------
---------------------------------------- -------- ------ --------------
------ ------------ ----------------
nvme0 0 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:04.0 4
nvme-subsys1 nvme1n1, nvme1n2, nvme1n3
nvme1 1 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:05.0 5
nvme-subsys1 nvme1n1
Device Generic NSID Usage
Format Controllers
----------------- ----------------- ----------
-------------------------- ---------------- ----------------
/dev/nvme1n1 /dev/ng1n1 0x3 268.44 GB / 268.44 GB 512 B +
0 B nvme0, nvme1
/dev/nvme1n2 /dev/ng1n2 0x2 268.44 GB / 268.44 GB 512 B +
0 B nvme0
/dev/nvme1n3 /dev/ng1n3 0x1 268.44 GB / 268.44 GB 512 B +
0 B nvme0
[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 | grep awupf
awupf : 15
[root@localhost ~]# nvme id-ctrl /dev/nvme1n3 | grep awupf
awupf : 15
[root@localhost ~]#
===================
Both private namspaces are being attached to the same controller (nvme0)
Fix
===
Need to keep track of the controller that registers a namespace with the
subsystem
and only allow other controllers to attach to the namespace if it is shared.
Non-shared namespaces can only be attached to a controller than
registers it.
Proposal
========
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n,
uint8_t rae, uint32_t buf_len,
}
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
if (!ns) {
continue;
}
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
NvmeRequest *req, bool active)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
[root@localhost qemu-subsys]# git log -p
f4b9f10a80c30f1d6b9850d476eb8226bda3f553 > /tmp/p
^C
[root@localhost qemu-subsys]# vi /tmp/p
[root@localhost qemu-subsys]# cat /tmp/p
commit f4b9f10a80c30f1d6b9850d476eb8226bda3f553
Author: Alan Adamson <alan.adam...@oracle.com>
Date: 2025-04-03 14:10:30 -0400
subsys multi controller shared fix
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n,
uint8_t rae, uint32_t buf_len,
}
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
if (!ns) {
continue;
}
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
NvmeRequest *req, bool active)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5739,7 +5740,7 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl
*n, NvmeRequest *req,
return NVME_INVALID_FIELD | NVME_DNR;
}
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -5809,7 +5810,7 @@ static uint16_t nvme_identify_ns_ind(NvmeCtrl *n,
NvmeRequest *req, bool alloc)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (alloc) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5837,7 +5838,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n,
NvmeRequest *req,
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5884,7 +5885,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n,
NvmeRequest *req,
ns = nvme_ns(n, i);
if (!ns) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, i);
+ ns = nvme_subsys_ns(n, i);
if (!ns) {
continue;
}
@@ -5932,7 +5933,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl
*n, NvmeRequest *req,
ns = nvme_ns(n, i);
if (!ns) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, i);
+ ns = nvme_subsys_ns(n, i);
if (!ns) {
continue;
}
@@ -6793,7 +6794,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n,
NvmeRequest *req)
return NVME_INVALID_NSID | NVME_DNR;
}
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -7751,17 +7752,23 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
- /* verify that the command sets of attached namespaces are supported */
- for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ if (n->subsys) {
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ NvmeNamespace *ns = n->subsys->namespaces[i].namespace;
- if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
- if (!ns->attached || ns->params.shared) {
- nvme_attach_ns(n, ns);
+ if (!ns) {
+ continue;
}
+ if (!(n->subsys->namespaces[i].ctrl == n) && !ns->params.shared) {
+ continue;
+ }
+ if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+ if (!ns->attached || ns->params.shared) {
+ nvme_attach_ns(n, ns);
+ }
+ }
}
}
-
nvme_update_dsm_limits(n, NULL);
return 0;
@@ -8993,7 +9000,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error
**errp)
return;
}
- n->subsys->namespaces[ns->params.nsid] = ns;
+ n->subsys->namespaces[ns->params.nsid].namespace = ns;
+ n->subsys->namespaces[ns->params.nsid].ctrl = n;
}
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 98c1e75a5d29..9cca699b354f 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -742,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error
**errp)
if (!nsid) {
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- if (nvme_subsys_ns(subsys, i)) {
+ if (nvme_subsys_ns(n, i)) {
continue;
}
@@ -754,12 +754,13 @@ static void nvme_ns_realize(DeviceState *dev,
Error **errp)
error_setg(errp, "no free namespace id");
return;
}
- } else if (nvme_subsys_ns(subsys, nsid)) {
+ } else if (nvme_subsys_ns(n, nsid)) {
error_setg(errp, "namespace id '%d' already allocated", nsid);
return;
}
- subsys->namespaces[nsid] = ns;
+ subsys->namespaces[nsid].namespace = ns;
+ subsys->namespaces[nsid].ctrl = n;
ns->id_ns.endgid = cpu_to_le16(0x1);
ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 6f782ba18826..bea3b96a6dfa 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -97,6 +97,11 @@ typedef struct NvmeEnduranceGroup {
} fdp;
} NvmeEnduranceGroup;
+typedef struct Namespaces {
+ NvmeCtrl *ctrl;
+ NvmeNamespace *namespace;
+} Namespaces;
+
typedef struct NvmeSubsystem {
DeviceState parent_obj;
NvmeBus bus;
@@ -104,7 +109,7 @@ typedef struct NvmeSubsystem {
char *serial;
NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
- NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+ Namespaces namespaces[NVME_MAX_NAMESPACES + 1];
NvmeEnduranceGroup endgrp;
struct {
@@ -136,16 +141,6 @@ static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem *subsys,
return subsys->ctrls[cntlid];
}
-static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
- uint32_t nsid)
-{
- if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
- return NULL;
- }
-
- return subsys->namespaces[nsid];
-}
-
#define TYPE_NVME_NS "nvme-ns"
#define NVME_NS(obj) \
OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
@@ -711,6 +706,14 @@ static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl *n,
return NULL;
}
+static inline NvmeNamespace *nvme_subsys_ns(NvmeCtrl *n, uint32_t nsid)
+{
+ if (!n->subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+ return NULL;
+ }
+ return n->subsys->namespaces[nsid].namespace;
+}
+
void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 2ae56f12a596..d5751564c05c 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -92,13 +92,19 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
subsys->ctrls[cntlid] = n;
- for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
- NvmeNamespace *ns = subsys->namespaces[nsid];
- if (ns && ns->params.shared && !ns->params.detached) {
- nvme_attach_ns(n, ns);
+ for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+ NvmeNamespace *ns = subsys->namespaces[nsid].namespace;
+
+ if (!ns) {
+ continue;
+ }
+ if (!(subsys->namespaces[nsid].ctrl == n) && !ns->params.shared) {
+ continue;
}
+ if (ns->params.shared && !ns->params.detached) {
+ nvme_attach_ns(n, ns);
+ }
}
-
return cntlid;
}
With this fix:
==============
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
----------------
------------------------------------------------------------------------------------------------
----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1
Device Cntlid SN MN
FR TxPort Address Slot Subsystem Namespaces
---------------- ------ --------------------
---------------------------------------- -------- ------ --------------
------ ------------ ----------------
nvme0 0 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:04.0 4
nvme-subsys1 nvme1n2, nvme1n3
nvme1 1 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:05.0 5
nvme-subsys1 nvme1n1, nvme1n3
Device Generic NSID Usage
Format Controllers
----------------- ----------------- ----------
-------------------------- ---------------- ----------------
/dev/nvme1n1 /dev/ng1n1 0x2 268.44 GB / 268.44 GB 512 B +
0 B nvme1
/dev/nvme1n2 /dev/ng1n2 0x1 268.44 GB / 268.44 GB 512 B +
0 B nvme0
/dev/nvme1n3 /dev/ng1n3 0x3 268.44 GB / 268.44 GB 512 B +
0 B nvme0, nvme1
[root@localhost ~]# nvme id-ctrl /dev/nvme1n1 | grep awupf
awupf : 63
[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 | grep awupf
awupf : 15
[root@localhost ~]#
Each private namespace is attached to its own controller and that is
verified with the AWUPF values.