Hi all, working on the RPi5 I found that the U-Boot NVMe code is lacking a significant feature. The NVMe protocol heavily relies on device access into main memory, in the medieval times of computing this was called "bus master DMA". The NVMe device must generate memory addresses and read or write contents there. The current U-Boot code does in no way account for PCIe bridges in the path that add an offset to such "inbound accesses". Consequently, U-Boot can only boot from NVMe on platforms where this address offset is zero. (These are surprisingly many!)
This proposed patch uses the already existent dev_phys_to_bus(), which can dig up the correct offset from associated DT nodes and subtract it. I have no strong opinion on the macro naming, and whether it should silently pull in the "dev" from the outer scope or have it better passed explicitly. Signed-off-by: Torsten Duwe <[email protected]> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c index 2b14437f69c..98b9cec47d6 100644 --- a/drivers/nvme/nvme.c +++ b/drivers/nvme/nvme.c @@ -12,6 +12,7 @@ #include <log.h> #include <malloc.h> #include <memalign.h> +#include <phys2bus.h> #include <time.h> #include <dm/device-internal.h> #include <linux/compat.h> @@ -27,6 +28,8 @@ #define IO_TIMEOUT 30 #define MAX_PRP_POOL 512 +#define BUS_ADDR(a) dev_phys_to_bus(dev->udev, (a)) + static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val) { int timeout; @@ -91,12 +100,12 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, i = 0; while (nprps) { if ((i == (prps_per_page - 1)) && nprps > 1) { - *(prp_pool + i) = cpu_to_le64((ulong)prp_pool + - page_size); + *(prp_pool + i) = cpu_to_le64(BUS_ADDR((ulong)prp_pool + + page_size)); i = 0; prp_pool += page_size; } - *(prp_pool + i++) = cpu_to_le64(dma_addr); + *(prp_pool + i++) = cpu_to_le64(BUS_ADDR(dma_addr)); dma_addr += page_size; nprps--; } @@ -393,8 +406,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; writel(aqa, &dev->bar->aqa); - nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); - nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); + nvme_writeq(BUS_ADDR((ulong)nvmeq->sq_cmds), &dev->bar->asq); + nvme_writeq(BUS_ADDR((ulong)nvmeq->cqes), &dev->bar->acq); result = nvme_enable_ctrl(dev); if (result) @@ -420,7 +436,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); + c.create_cq.prp1 = cpu_to_le64(BUS_ADDR((ulong)nvmeq->cqes)); c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); @@ -437,7 +453,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); + c.create_sq.prp1 = cpu_to_le64(BUS_ADDR((ulong)nvmeq->sq_cmds)); c.create_sq.sqid = cpu_to_le16(qid); c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_sq.sq_flags = cpu_to_le16(flags); @@ -458,14 +474,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, memset(&c, 0, sizeof(c)); c.identify.opcode = nvme_admin_identify; c.identify.nsid = cpu_to_le32(nsid); - c.identify.prp1 = cpu_to_le64(dma_addr); + c.identify.prp1 = cpu_to_le64(BUS_ADDR(dma_addr)); length -= (page_size - offset); if (length <= 0) { c.identify.prp2 = 0; } else { dma_addr += (page_size - offset); - c.identify.prp2 = cpu_to_le64(dma_addr); + c.identify.prp2 = cpu_to_le64(BUS_ADDR(dma_addr)); } c.identify.cns = cpu_to_le32(cns); @@ -490,7 +506,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; c.features.nsid = cpu_to_le32(nsid); - c.features.prp1 = cpu_to_le64(dma_addr); + c.features.prp1 = cpu_to_le64(BUS_ADDR(dma_addr)); c.features.fid = cpu_to_le32(fid); ret = nvme_submit_admin_cmd(dev, &c, result); @@ -516,7 +532,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; - c.features.prp1 = cpu_to_le64(dma_addr); + c.features.prp1 = cpu_to_le64(BUS_ADDR(dma_addr)); c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); @@ -785,8 +805,8 @@ static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, c.rw.slba = cpu_to_le64(slba); slba += lbas; c.rw.length = cpu_to_le16(lbas - 1); - c.rw.prp1 = cpu_to_le64(temp_buffer); - c.rw.prp2 = cpu_to_le64(prp2); + c.rw.prp1 = cpu_to_le64(BUS_ADDR(temp_buffer)); + c.rw.prp2 = cpu_to_le64(BUS_ADDR(prp2)); status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], &c, NULL, IO_TIMEOUT); if (status)

