On 5/13/26 09:38, Christoph Hellwig wrote:
FYI, I really want SGL support before this get merged, but ignoring that
for now:

I was hoping to let Samsung guys to send a follow up they already have,
but I'll ask them to have about taking it into this patch set.

+struct nvme_dmabuf_map {
+       struct io_dmabuf_map base;
+       dma_addr_t *dma_list;
+       struct sg_table *sgt;
+       unsigned nr_entries;

I'd make dma_list a variable-sized array at the end of the struture to avoid
an extra allocation and pointer derefernece.

Ok

+static void nvme_dmabuf_map_sync(struct nvme_dev *nvme_dev, struct request 
*req,
+                                bool for_cpu)
+{
+       int length = blk_rq_payload_bytes(req);
+       struct device *dev = nvme_dev->dev;
+       enum dma_data_direction dma_dir;
+       struct bio *bio = req->bio;
+       struct nvme_dmabuf_map *map;
+       dma_addr_t *dma_list;
+       int offset, map_idx;
+
+       dma_dir = rq_data_dir(req) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+       map = container_of(bio->dmabuf_map, struct nvme_dmabuf_map, base);
+       dma_list = map->dma_list;
+
+       offset = bio->bi_iter.bi_bvec_done;
+       map_idx = offset / NVME_CTRL_PAGE_SIZE;
+       length += offset & (NVME_CTRL_PAGE_SIZE - 1);

Please initialize the variable at declaration time and use or add proper
helpers to simplify this:

static inline struct nvme_dmabuf_map *
to_nvme_dmabuf_map(struct io_dmabuf_map *map)
{
        return container_of(map, struct nvme_dmabuf_map, base);
}

....

        enum dma_data_direction dma_dir = rq_dma_dir(req);
        struct device *dev = nvme_dev->dev;
        struct bio *bio = req->bio;
        struct nvme_dmabuf_map *map = to_nvme_dmabuf_map(bio->bi_dmabuf_map);
        dma_addr_t *dma_list = map->dma_list;
        int offset = bio->bi_iter.bi_bvec_done;
        int mmap_idx = offset / NVME_CTRL_PAGE_SIZE;
        int length = blk_rq_payload_bytes(req) +
                offset & (NVME_CTRL_PAGE_SIZE - 1);

Also a lot of these ints sound like they should be unsigned.

Ok

+
+       while (length > 0) {
+               u64 dma_addr = dma_list[map_idx++];
+
+               if (for_cpu)
+                       __dma_sync_single_for_cpu(dev, dma_addr,
+                                                 NVME_CTRL_PAGE_SIZE, dma_dir);
+               else
+                       __dma_sync_single_for_device(dev, dma_addr,
+                                                    NVME_CTRL_PAGE_SIZE,
+                                                    dma_dir);
+               length -= NVME_CTRL_PAGE_SIZE;
+       }
+}

Nothing should be using these __dma_sync helpers that are internal
details. Using them means you call into sync code that should be skipped
on most common server class systems.

Yeah, the kernel test robot already flagged it as well

Also the for_cpu argument is a bit ugly.  I'd rather have separate
routines as in the core dma-mapping code, even if that means a little bit
of code duplication.

+static blk_status_t nvme_rq_setup_dmabuf_map(struct request *req,
+                                            struct nvme_queue *nvmeq)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       int length = blk_rq_payload_bytes(req);
+       u64 dma_addr, prp1_dma, prp2_dma;
+       struct bio *bio = req->bio;
+       struct nvme_dmabuf_map *map;
+       dma_addr_t *dma_list;
+       dma_addr_t prp_dma;
+       __le64 *prp_list;
+       int i, map_idx;
+       int offset;
+
+       nvme_dmabuf_map_sync(nvmeq->dev, req, false);
+
+       map = container_of(bio->dmabuf_map, struct nvme_dmabuf_map, base);
+       dma_list = map->dma_list;
+
+       offset = bio->bi_iter.bi_bvec_done;
+       map_idx = offset / NVME_CTRL_PAGE_SIZE;
+       offset &= (NVME_CTRL_PAGE_SIZE - 1);
+       prp1_dma = dma_list[map_idx++] + offset;

Same comments as for the sync helper above.

+       length -= (NVME_CTRL_PAGE_SIZE - offset);
+       if (length <= 0) {
+               prp2_dma = 0;
+               goto done;
+       }
+
+       if (length <= NVME_CTRL_PAGE_SIZE) {
+               prp2_dma = dma_list[map_idx];
+               goto done;
+       }
+
+       if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <=
+           NVME_SMALL_POOL_SIZE / sizeof(__le64))
+               iod->flags |= IOD_SMALL_DESCRIPTOR;
+
+       prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
+                       &prp_dma);
+       if (!prp_list)
+               return BLK_STS_RESOURCE;
+
+       iod->descriptors[iod->nr_descriptors++] = prp_list;
+       prp2_dma = prp_dma;

And I really hate how this duplicates all the nasty PRP building logic,
although right now I don't have a good answer to that.

+static inline bool nvme_rq_is_dmabuf_attached(struct request *req)
+{
+       if (!IS_ENABLED(CONFIG_DMABUF_TOKEN))
+               return false;
+       return req->bio && bio_flagged(req->bio, BIO_DMABUF_MAP);
+}

This is something that should go into the block layer.

I'll move it

--
Pavel Begunkov

Reply via email to