On Thu,  6 Nov 2025 16:16:56 +0200
Leon Romanovsky <[email protected]> wrote:

> From: Jason Gunthorpe <[email protected]>
> 
> Call vfio_pci_core_fill_phys_vec() with the proper physical ranges for the
> synthetic BAR 2 and BAR 4 regions. Otherwise use the normal flow based on
> the PCI bar.
> 
> This demonstrates a DMABUF that follows the region info report to only
> allow mapping parts of the region that are mmapable. Since the BAR is
> power of two sized and the "CXL" region is just page aligned the there can
> be a padding region at the end that is not mmaped or passed into the
> DMABUF.
> 
> The "CXL" ranges that are remapped into BAR 2 and BAR 4 areas are not PCI
> MMIO, they actually run over the CXL-like coherent interconnect and for
> the purposes of DMA behave identically to DRAM. We don't try to model this
> distinction between true PCI BAR memory that takes a real PCI path and the
> "CXL" memory that takes a different path in the p2p framework for now.
> 
> Signed-off-by: Jason Gunthorpe <[email protected]>
> Tested-by: Alex Mastro <[email protected]>
> Tested-by: Nicolin Chen <[email protected]>
> Signed-off-by: Leon Romanovsky <[email protected]>
> ---
>  drivers/vfio/pci/nvgrace-gpu/main.c | 56 
> +++++++++++++++++++++++++++++++++++++
>  1 file changed, 56 insertions(+)
> 
> diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c 
> b/drivers/vfio/pci/nvgrace-gpu/main.c
> index e346392b72f6..7d7ab2c84018 100644
> --- a/drivers/vfio/pci/nvgrace-gpu/main.c
> +++ b/drivers/vfio/pci/nvgrace-gpu/main.c
> @@ -7,6 +7,7 @@
>  #include <linux/vfio_pci_core.h>
>  #include <linux/delay.h>
>  #include <linux/jiffies.h>
> +#include <linux/pci-p2pdma.h>
>  
>  /*
>   * The device memory usable to the workloads running in the VM is cached
> @@ -683,6 +684,54 @@ nvgrace_gpu_write(struct vfio_device *core_vdev,
>       return vfio_pci_core_write(core_vdev, buf, count, ppos);
>  }
>  
> +static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev,
> +                                struct p2pdma_provider **provider,
> +                                unsigned int region_index,
> +                                struct dma_buf_phys_vec *phys_vec,
> +                                struct vfio_region_dma_range *dma_ranges,
> +                                size_t nr_ranges)
> +{
> +     struct nvgrace_gpu_pci_core_device *nvdev = container_of(
> +             core_vdev, struct nvgrace_gpu_pci_core_device, core_device);
> +     struct pci_dev *pdev = core_vdev->pdev;
> +
> +     if (nvdev->resmem.memlength && region_index == RESMEM_REGION_INDEX) {
> +             /*
> +              * The P2P properties of the non-BAR memory is the same as the
> +              * BAR memory, so just use the provider for index 0. Someday
> +              * when CXL gets P2P support we could create CXLish providers
> +              * for the non-BAR memory.
> +              */
> +             *provider = pcim_p2pdma_provider(pdev, 0);
> +             if (!*provider)
> +                     return -EINVAL;
> +             return vfio_pci_core_fill_phys_vec(phys_vec, dma_ranges,
> +                                                nr_ranges,
> +                                                nvdev->resmem.memphys,
> +                                                nvdev->resmem.memlength);
> +     } else if (region_index == USEMEM_REGION_INDEX) {
> +             /*
> +              * This is actually cachable memory and isn't treated as P2P in
> +              * the chip. For now we have no way to push cachable memory
> +              * through everything and the Grace HW doesn't care what caching
> +              * attribute is programmed into the SMMU. So use BAR 0.
> +              */
> +             *provider = pcim_p2pdma_provider(pdev, 0);
> +             if (!*provider)
> +                     return -EINVAL;
> +             return vfio_pci_core_fill_phys_vec(phys_vec, dma_ranges,
> +                                                nr_ranges,
> +                                                nvdev->usemem.memphys,
> +                                                nvdev->usemem.memlength);
> +     }
> +     return vfio_pci_core_get_dmabuf_phys(core_vdev, provider, region_index,
> +                                          phys_vec, dma_ranges, nr_ranges);
> +}


Unless my eyes deceive, we could reduce the redundancy a bit:

        struct mem_region *mem_region = NULL;

        if (nvdev->resmem.memlength && region_index == RESMEM_REGION_INDEX) {
                /*
                 * The P2P properties of the non-BAR memory is the same as the
                 * BAR memory, so just use the provider for index 0. Someday
                 * when CXL gets P2P support we could create CXLish providers
                 * for the non-BAR memory.
                 */
                mem_region = &nvdev->resmem;
        } else if (region_index == USEMEM_REGION_INDEX) {
                /*
                 * This is actually cachable memory and isn't treated as P2P in
                 * the chip. For now we have no way to push cachable memory
                 * through everything and the Grace HW doesn't care what caching
                 * attribute is programmed into the SMMU. So use BAR 0.
                 */
                mem_region = &nvdev->usemem;
        }

        if (mem_region) {
                *provider = pcim_p2pdma_provider(pdev, 0);
                if (!*provider)
                        return -EINVAL;
                return vfio_pci_core_fill_phys_vec(phys_vec, dma_ranges,
                                                   nr_ranges,
                                                   mem_region->memphys,
                                                   mem_region->memlength);
        }

        return vfio_pci_core_get_dmabuf_phys(core_vdev, provider, region_index,
                                             phys_vec, dma_ranges, nr_ranges);
                
Thanks,
Alex

Reply via email to