amdgpu_device_is_peer_accessible was initially written to check peer VRAM access, however, it is getting called for DOORBELL & MMIO range. Add support for these also
v2: Updated comments in function amdgpu_device_is_peer_accessible() Signed-off-by: Harish Kasiviswanathan <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 77 +++++++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1944d1bece86..6004736a08a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1423,7 +1423,8 @@ enum amdgpu_pcie_bar { int amdgpu_get_bar_idx(struct amdgpu_device *adev, enum amdgpu_pcie_bar bar); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev); + struct amdgpu_device *peer_adev, + enum amdgpu_pcie_bar pcie_bar); int amdgpu_device_baco_enter(struct amdgpu_device *adev); int amdgpu_device_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c01492e69dd..3df92b49d478 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -900,8 +900,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); - if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) - return -EINVAL; + if (!same_hive) { + enum amdgpu_pcie_bar bar; + + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) + bar = AMDGPU_PCIE_BAR_DOORBELL; + else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) + bar = AMDGPU_PCIE_BAR_MMIO; + else + bar = AMDGPU_PCIE_BAR_VRAM; + + if (!amdgpu_device_is_peer_accessible(bo_adev, adev, bar)) + return -EINVAL; + } } for (i = 0; i <= is_aql; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2e1e5791f123..bd1f5cfd88c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6218,36 +6218,83 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @peer_adev: amdgpu_device pointer for peer device trying to access @adev + * @pcie_bar: PCIe BAR role to check (enum amdgpu_pcie_bar) + * + * Return true if @peer_adev can access (DMA) @adev through the specified + * PCIe BAR. For VRAM, @adev must be "large BAR" and the BAR must match + * the DMA mask of @peer_adev. For doorbell and MMIO BARs, only the DMA + * addressability and P2P chipset support are checked. Doorbell P2P is + * only supported on Bonaire and newer ASICs. + * + * @note: CONFIG_HSA_AMD_P2P indicates support for P2P DMA mappings. Query + * P2PDMA distance only if the kernel has all the prerequisites for P2P DMA + * support. * - * Return true if @peer_adev can access (DMA) @adev through the PCIe - * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of - * @peer_adev. */ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev) + struct amdgpu_device *peer_adev, + enum amdgpu_pcie_bar pcie_bar) { #ifdef CONFIG_HSA_AMD_P2P - bool p2p_access = - !adev->gmc.xgmi.connected_to_cpu && + bool p2p_access = true; + bool p2p_addressable = false; + resource_size_t bar_base = 0, bar_size = 0; + + /* VRAM requires large BAR (full VRAM visible) for P2P access */ + if (pcie_bar == AMDGPU_PCIE_BAR_VRAM) { + if (!adev->gmc.visible_vram_size || + adev->gmc.real_vram_size != adev->gmc.visible_vram_size) + return false; + + /* VRAM on CPU-connected xGMI devices is accessed via + * coherent fabric, not PCIe BAR P2P + */ + if (adev->gmc.xgmi.connected_to_cpu) + return false; + } + + switch (pcie_bar) { + case AMDGPU_PCIE_BAR_DOORBELL: + if (adev->asic_type < CHIP_BONAIRE) + return false; + bar_base = adev->doorbell.base; + bar_size = adev->doorbell.size; + break; + case AMDGPU_PCIE_BAR_MMIO: + bar_base = adev->rmmio_base; + bar_size = adev->rmmio_size; + break; + case AMDGPU_PCIE_BAR_VRAM: + default: + bar_base = adev->gmc.aper_base; + bar_size = adev->gmc.aper_size; + break; + } + + if (!bar_base || !bar_size) { + dev_dbg(adev->dev, + "Invalid BAR%d configuration for P2P access (role %d)\n", + amdgpu_get_bar_idx(adev, pcie_bar), pcie_bar); + return false; + } + + p2p_access = !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); if (!p2p_access) dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n", pci_name(peer_adev->pdev)); - - bool is_large_bar = adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size; - bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); if (!p2p_addressable) { uint64_t address_mask = peer_adev->dev->dma_mask ? ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; + resource_size_t bar_limit = bar_base + bar_size - 1; - p2p_addressable = !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask); + p2p_addressable = !(bar_base & address_mask || + bar_limit & address_mask); } - return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; + + return p2p_access && p2p_addressable; #else return false; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index f57da088f1f8..b77acf97fbc8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1497,7 +1497,7 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, if (!amdgpu_device_is_peer_accessible( kdev->gpu->adev, - peer->gpu->adev)) + peer->gpu->adev, AMDGPU_PCIE_BAR_VRAM)) return ret; if (list_empty(&kdev->io_link_props)) -- 2.43.0
