On 5/6/26 09:52, Neil Armstrong wrote:
On 4/30/26 16:54, Torsten Duwe wrote:
From: Torsten Duwe <[email protected]>

U-Boot currently does not account for PCIe bridges with a non-zero
inbound access offset when talking NVMe, it only works on platforms
where this offset happens to be zero.

This patch enhances the NVMe driver with the ability to also handle
these cases. The fix is required to boot e.g. the Raspberry Pi5 from
NVMe.

Signed-off-by: Torsten Duwe <[email protected]>
---

Kudos for the idea of case discrimination go to Andrea; this eases
things a lot.

I do consider this a bug fix. The problem has popped up during RPi5
development but really is a generic major defect IMHO. Please apply
wherever appropriate.

    Torsten

---
  drivers/nvme/nvme.c | 39 ++++++++++++++++++++++++++-------------
  1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
index 2b14437f69c..8056f52fb50 100644
--- a/drivers/nvme/nvme.c
+++ b/drivers/nvme/nvme.c
@@ -12,6 +12,7 @@
  #include <log.h>
  #include <malloc.h>
  #include <memalign.h>
+#include <phys2bus.h>
  #include <time.h>
  #include <dm/device-internal.h>
  #include <linux/compat.h>
@@ -27,6 +28,18 @@
  #define IO_TIMEOUT        30
  #define MAX_PRP_POOL        512
+/*
+ * This macro detects the correct inbound DMA offset in both cases:
+ *   1) the NVMe is mentioned in the device tree, along with its parent,
+ *    and no intermediate PCIe bus node in between.
+ *   2) the NVMe was found dynamically by PCI enumeration which has created
+ *    an intermediate PCIe bus node. That node will be skipped and the offset
+ *    looked up in the grandparent.
+ */
+#define DEV_ADDR(a)        (dev_has_ofnode(dev->udev) ? \
+                    dev_phys_to_bus(dev->udev, (a)) : \
+                    dev_phys_to_bus(dev->udev->parent, (a)))

This looks subspicious, what is the state of the rpi5 here ? with a DT node or 
without ?

AFAIK NVMes are always dynamically detected, it seems the dma_offset is not 
correctly
copied to the nvme udev when created.

Could you look into this direction instead ?

Ok so I think the problem is from this test:

static int device_get_dma_constraints(struct udevice *dev)
{
...
        if (!CONFIG_IS_ENABLED(DM_DMA) || !parent || !dev_has_ofnode(parent))
                                                            /\
                return 0;

The NVMe is a child of the root bus, but it doesn't seem tre RPI5 has a node
for the root bus for PCI1 so the dma constraints are applied to the root port
but not to the following devices.

So to check could you try adding something like:

```
diff --git a/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi 
b/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
index 04738bf281e..640c5987077 100644
--- a/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
+++ b/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
@@ -247,6 +247,15 @@

 &pcie1 {
        status = "okay";
+
+       pci@0,0 {
+               reg = <0x0 0x0 0x0 0x0 0x0>;
+               ranges;
+               bus-range = <0 1>;
+               device_type = "pci";
+               #address-cells = <3>;
+               #size-cells = <2>;
+       };
 };

 &pcie2 {
```

or:

```
--- a/drivers/core/device.c
+++ b/drivers/core/device.c
@@ -459,7 +459,19 @@ static int device_get_dma_constraints(struct udevice *dev)
        u64 size = 0;
        int ret;

-       if (!CONFIG_IS_ENABLED(DM_DMA) || !parent || !dev_has_ofnode(parent))
+       if (!CONFIG_IS_ENABLED(DM_DMA) || !parent)
+               return 0;
+
+       /* Look for the first node in the parent chain */
+       while (parent) {
+               if (dev_has_ofnode(parent))
+                       break;
+
+               parent = dev_get_parent(parent);
+       }
+
+       /* No parents have a node, bail out */
+       if (!parent)
                return 0;

        /*
```
Which will try to find the node higher in the parent chain.

Neil


+
  static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val)
  {
      int timeout;
@@ -91,12 +104,12 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
      i = 0;
      while (nprps) {
          if ((i == (prps_per_page - 1)) && nprps > 1) {
-            *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
-                    page_size);
+            *(prp_pool + i) = cpu_to_le64(DEV_ADDR((ulong)prp_pool +
+                                page_size));
              i = 0;
              prp_pool += page_size;
          }
-        *(prp_pool + i++) = cpu_to_le64(dma_addr);
+        *(prp_pool + i++) = cpu_to_le64(DEV_ADDR(dma_addr));
          dma_addr += page_size;
          nprps--;
      }
@@ -393,8 +406,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
      dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
      writel(aqa, &dev->bar->aqa);
-    nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
-    nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
+    nvme_writeq(DEV_ADDR((ulong)nvmeq->sq_cmds), &dev->bar->asq);
+    nvme_writeq(DEV_ADDR((ulong)nvmeq->cqes), &dev->bar->acq);
      result = nvme_enable_ctrl(dev);
      if (result)
@@ -420,7 +433,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
      memset(&c, 0, sizeof(c));
      c.create_cq.opcode = nvme_admin_create_cq;
-    c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
+    c.create_cq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->cqes));
      c.create_cq.cqid = cpu_to_le16(qid);
      c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
      c.create_cq.cq_flags = cpu_to_le16(flags);
@@ -437,7 +450,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
      memset(&c, 0, sizeof(c));
      c.create_sq.opcode = nvme_admin_create_sq;
-    c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
+    c.create_sq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->sq_cmds));
      c.create_sq.sqid = cpu_to_le16(qid);
      c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
      c.create_sq.sq_flags = cpu_to_le16(flags);
@@ -458,14 +471,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
      memset(&c, 0, sizeof(c));
      c.identify.opcode = nvme_admin_identify;
      c.identify.nsid = cpu_to_le32(nsid);
-    c.identify.prp1 = cpu_to_le64(dma_addr);
+    c.identify.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
      length -= (page_size - offset);
      if (length <= 0) {
          c.identify.prp2 = 0;
      } else {
          dma_addr += (page_size - offset);
-        c.identify.prp2 = cpu_to_le64(dma_addr);
+        c.identify.prp2 = cpu_to_le64(DEV_ADDR(dma_addr));
      }
      c.identify.cns = cpu_to_le32(cns);
@@ -490,7 +503,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, 
unsigned nsid,
      memset(&c, 0, sizeof(c));
      c.features.opcode = nvme_admin_get_features;
      c.features.nsid = cpu_to_le32(nsid);
-    c.features.prp1 = cpu_to_le64(dma_addr);
+    c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
      c.features.fid = cpu_to_le32(fid);
      ret = nvme_submit_admin_cmd(dev, &c, result);
@@ -516,7 +529,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, 
unsigned dword11,
      memset(&c, 0, sizeof(c));
      c.features.opcode = nvme_admin_set_features;
-    c.features.prp1 = cpu_to_le64(dma_addr);
+    c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
      c.features.fid = cpu_to_le32(fid);
      c.features.dword11 = cpu_to_le32(dword11);
@@ -785,8 +798,8 @@ static ulong nvme_blk_rw(struct udevice *udev, lbaint_t 
blknr,
          c.rw.slba = cpu_to_le64(slba);
          slba += lbas;
          c.rw.length = cpu_to_le16(lbas - 1);
-        c.rw.prp1 = cpu_to_le64(temp_buffer);
-        c.rw.prp2 = cpu_to_le64(prp2);
+        c.rw.prp1 = cpu_to_le64(DEV_ADDR(temp_buffer));
+        c.rw.prp2 = cpu_to_le64(DEV_ADDR(prp2));
          status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
                  &c, NULL, IO_TIMEOUT);
          if (status)


Reply via email to