AMD General Ping on this patch series. Thanks. ________________________________ From: Kasiviswanathan, Harish <[email protected]> Sent: Saturday, May 2, 2026 5:28 PM To: [email protected] <[email protected]> Cc: Kasiviswanathan, Harish <[email protected]> Subject: [PATCH 2/2] drm/amdgpu: amdgpu_device_is_peer_accessible to support all BARs
amdgpu_device_is_peer_accessible was initially written to check peer VRAM access, however, it is getting called for DOORBELL & MMIO range. Add support for these also Signed-off-by: Harish Kasiviswanathan <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 77 +++++++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6b9d103fbff1..b67d42d7f8a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1455,7 +1455,8 @@ enum amdgpu_pcie_bar { }; bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev); + struct amdgpu_device *peer_adev, + enum amdgpu_pcie_bar pcie_bar); int amdgpu_device_baco_enter(struct amdgpu_device *adev); int amdgpu_device_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c01492e69dd..a9be242c6bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -900,8 +900,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); - if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) - return -EINVAL; + if (!same_hive) { + int bar; + + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) + bar = AMDGPU_PCIE_BAR_DOORBELL; + else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) + bar = AMDGPU_PCIE_BAR_MMIO; + else + bar = AMDGPU_PCIE_BAR_VRAM; + + if (!amdgpu_device_is_peer_accessible(bo_adev, adev, bar)) + return -EINVAL; + } } for (i = 0; i <= is_aql; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c14fdbc1847..a5c1160cdd03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6185,36 +6185,83 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @peer_adev: amdgpu_device pointer for peer device trying to access @adev + * @pcie_bar: PCIe BAR index to check accessibility for: + * 0 = VRAM aperture (BAR 0) + * 2 = Doorbell aperture (BAR 2) + * 5 = MMIO remap aperture (BAR 5) + * + * Return true if @peer_adev can access (DMA) @adev through the specified + * PCIe BAR. For VRAM (BAR 0), @adev must be "large BAR" and the BAR must + * match the DMA mask of @peer_adev. For doorbell and MMIO BARs, only the + * DMA addressability and P2P chipset support are checked. + * + * @note: CONFIG_HSA_AMD_P2P indicates support for P2P DMA mappings. Query + * P2PDMA distance only if the kernel has all the prerequisites for P2P DMA + * support. Otherwise fall back to the less reliable legacy P2P support to + * avoid regressions. * - * Return true if @peer_adev can access (DMA) @adev through the PCIe - * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of - * @peer_adev. */ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev) + struct amdgpu_device *peer_adev, + enum amdgpu_pcie_bar pcie_bar) { #ifdef CONFIG_HSA_AMD_P2P - bool p2p_access = - !adev->gmc.xgmi.connected_to_cpu && + bool p2p_access = true; + bool p2p_addressable = false; + resource_size_t bar_base = 0, bar_size = 0; + + /* VRAM requires large BAR (full VRAM visible) for P2P access */ + if (pcie_bar == AMDGPU_PCIE_BAR_VRAM) { + if (!adev->gmc.visible_vram_size || + adev->gmc.real_vram_size != adev->gmc.visible_vram_size) + return false; + + /* VRAM on CPU-connected xGMI devices is accessed via + * coherent fabric, not PCIe BAR P2P + */ + if (adev->gmc.xgmi.connected_to_cpu) + return false; + } + + switch (pcie_bar) { + case AMDGPU_PCIE_BAR_DOORBELL: + bar_base = adev->doorbell.base; + bar_size = adev->doorbell.size; + break; + case AMDGPU_PCIE_BAR_MMIO: + bar_base = adev->rmmio_base; + bar_size = adev->rmmio_size; + break; + case AMDGPU_PCIE_BAR_VRAM: + default: + bar_base = adev->gmc.aper_base; + bar_size = adev->gmc.aper_size; + break; + } + + if (!bar_base || !bar_size) { + dev_dbg(adev->dev, "Invalid BAR%d configuration for P2P access\n", + pcie_bar); + return false; + } + + p2p_access = !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); if (!p2p_access) dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n", pci_name(peer_adev->pdev)); - - bool is_large_bar = adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size; - bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); if (!p2p_addressable) { uint64_t address_mask = peer_adev->dev->dma_mask ? ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; + resource_size_t bar_limit = bar_base + bar_size - 1; - p2p_addressable = !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask); + p2p_addressable = !(bar_base & address_mask || + bar_limit & address_mask); } - return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; + + return p2p_access && p2p_addressable; #else return false; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4dc9f9aa3a2e..0b50a35b79a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1496,7 +1496,7 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, if (!amdgpu_device_is_peer_accessible( kdev->gpu->adev, - peer->gpu->adev)) + peer->gpu->adev, AMDGPU_PCIE_BAR_VRAM)) return ret; if (list_empty(&kdev->io_link_props)) -- 2.43.0
