PASID resue could cause cache, TLBs and interrupt issues when process immediately runs into hw states left by previous process exited with the same PASID, to prevent the case, it introduces a freed list to store used ids to provid maximum safety avoiding resue.
Signed-off-by: Eric Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 87 +++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 9cab36322c16..0443b05ddb1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -40,6 +40,21 @@ */ static DEFINE_IDA(amdgpu_pasid_ida); +/* + * Freed PASID pool - PASIDs are stored here after being freed and only + * reused when no fresh PASIDs are available from the main allocator. + * This prevents immediate PASID reuse and gives hardware time to flush + * all references (caches, IOMMU TLBs and firmware state). + */ +struct amdgpu_pasid_freed { + struct list_head list; + u32 pasid; +}; + +static bool mpool_exhausted = false; +static LIST_HEAD(amdgpu_pasid_freed_list); +static DEFINE_SPINLOCK(amdgpu_pasid_freed_lock); + /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { struct dma_fence_cb cb; @@ -51,7 +66,8 @@ struct amdgpu_pasid_cb { * @bits: Maximum width of the PASID in bits, must be at least 1 * * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. + * available if possible. Prefers to allocate fresh PASIDs from the + * main pool. Only reuses freed PASIDs when the main pool is exhausted. * * Returns a positive integer on success. Returns %-EINVAL if bits==0. * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on @@ -61,13 +77,34 @@ int amdgpu_pasid_alloc(unsigned int bits) { int pasid = -EINVAL; - for (bits = min(bits, 31U); bits > 0; bits--) { + /* First, try to allocate a fresh PASID from the main pool */ + for (bits = min(bits, 31U); bits > 0 && !mpool_exhausted; bits--) { pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), (1U << bits) - 1, GFP_KERNEL); if (pasid != -ENOSPC) break; } + if (pasid == -ENOSPC && !mpool_exhausted) + mpool_exhausted = true; + + /* If main pool is exhausted, try to reuse a freed PASID */ + if (pasid < 0) { + struct amdgpu_pasid_freed *entry; + + spin_lock(&amdgpu_pasid_freed_lock); + if (!list_empty(&amdgpu_pasid_freed_list)) { + entry = list_first_entry(&amdgpu_pasid_freed_list, + struct amdgpu_pasid_freed, list); + pasid = entry->pasid; + list_del(&entry->list); + spin_unlock(&amdgpu_pasid_freed_lock); + kfree(entry); + } else { + spin_unlock(&amdgpu_pasid_freed_lock); + } + } + if (pasid >= 0) trace_amdgpu_pasid_allocated(pasid); @@ -75,13 +112,34 @@ int amdgpu_pasid_alloc(unsigned int bits) } /** - * amdgpu_pasid_free - Free a PASID + * amdgpu_pasid_free - Free a PASID to the freed pool * @pasid: PASID to free + * + * Add the PASID to the freed list instead of immediately returning it + * to the allocator. This PASID will only be reused when the main pool + * is exhausted, providing maximum delay before reuse and allowing time + * for hardware caches, IOMMU TLBs, and firmware to clear all references. */ void amdgpu_pasid_free(u32 pasid) { + struct amdgpu_pasid_freed *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + /* Fallback: free directly to IDA if allocation fails */ + trace_amdgpu_pasid_freed(pasid); + ida_free(&amdgpu_pasid_ida, pasid); + mpool_exhausted = false; + return; + } + + entry->pasid = pasid; + + spin_lock(&amdgpu_pasid_freed_lock); + list_add_tail(&entry->list, &amdgpu_pasid_freed_list); + spin_unlock(&amdgpu_pasid_freed_lock); + trace_amdgpu_pasid_freed(pasid); - ida_free(&amdgpu_pasid_ida, pasid); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -616,3 +674,24 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) } } } + +/** + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager + * + * Free all PASIDs from the freed list back to the IDA pool. + * This should be called during driver cleanup. + */ +void amdgpu_pasid_mgr_cleanup(void) +{ + struct amdgpu_pasid_freed *entry, *tmp; + + spin_lock(&amdgpu_pasid_freed_lock); + list_for_each_entry_safe(entry, tmp, &amdgpu_pasid_freed_list, list) { + list_del(&entry->list); + /* Return PASID to the IDA pool during cleanup */ + ida_free(&amdgpu_pasid_ida, entry->pasid); + kfree(entry); + } + spin_unlock(&amdgpu_pasid_freed_lock); + mpool_exhausted = false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index b3649cd3af56..a57919478d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(u32 pasid); void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid); +void amdgpu_pasid_mgr_cleanup(void); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b89013a6aa0b..5b9bdb79efcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); + amdgpu_pasid_mgr_cleanup(); } /** -- 2.34.1
