On 2026-03-18 03:37, Christian König wrote:
On 3/17/26 19:58, Eric Huang wrote:
PASID resue could cause cache, TLBs and interrupt issues
when process immediately runs into hw states left by previous
process exited with the same PASID, to prevent the case, it
uses the same allocator as kernel pid's.
The implementation looks good now, but that is still not a good justification 
for the change.

What potential HW state do we have which could cause problems here?
We do have an issue reported by customer https://ontrack-internal.amd.com/browse/SWDEV-578010, on which there are two apps, first app intentionally overflows a buffer, that causes bunch of GPU page faults, and then second app runs immediately and get unexpected page faults with the same pasid, so we have internal discussion and Felix think a proper solution would be in ID manager to make sure pasids don't get reused when there could still be outstanding interrupts in the IH ring with that pasid. That is the motivation for this change.

Regards,
Eric

Regards,
Christian.

Signed-off-by: Eric Huang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
  3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 9cab36322c16..0801c023f5a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@
   * PASIDs are global address space identifiers that can be shared
   * between the GPU, an IOMMU and the driver. VMs on different devices
   * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
+ * space. Therefore PASIDs are allocated using IDR cyclic allocator
+ * (similar to kernel PID allocation) which naturally delays reuse.
+ * VMs are looked up from the PASID per amdgpu_device.
   */
-static DEFINE_IDA(amdgpu_pasid_ida);
+
+static DEFINE_IDR(amdgpu_pasid_idr);
+static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
/* Helper to free pasid from a fence callback */
  struct amdgpu_pasid_cb {
@@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
   * amdgpu_pasid_alloc - Allocate a PASID
   * @bits: Maximum width of the PASID in bits, must be at least 1
   *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
+ * Uses kernel's IDR cyclic allocator (same as PID allocation).
+ * Allocates sequentially with automatic wrap-around.
   *
   * Returns a positive integer on success. Returns %-EINVAL if bits==0.
   * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
@@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
   */
  int amdgpu_pasid_alloc(unsigned int bits)
  {
-       int pasid = -EINVAL;
+       int pasid;
- for (bits = min(bits, 31U); bits > 0; bits--) {
-               pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
-                                       (1U << bits) - 1, GFP_KERNEL);
-               if (pasid != -ENOSPC)
-                       break;
-       }
+       if (bits == 0)
+               return -EINVAL;
+
+       spin_lock(&amdgpu_pasid_idr_lock);
+       pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
+                                1U << bits, GFP_KERNEL);
+       spin_unlock(&amdgpu_pasid_idr_lock);
if (pasid >= 0)
                trace_amdgpu_pasid_allocated(pasid);
@@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
  void amdgpu_pasid_free(u32 pasid)
  {
        trace_amdgpu_pasid_freed(pasid);
-       ida_free(&amdgpu_pasid_ida, pasid);
+
+       spin_lock(&amdgpu_pasid_idr_lock);
+       idr_remove(&amdgpu_pasid_idr, pasid);
+       spin_unlock(&amdgpu_pasid_idr_lock);
  }
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
                }
        }
  }
+
+/**
+ * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
+ *
+ * Cleanup the IDR allocator.
+ */
+void amdgpu_pasid_mgr_cleanup(void)
+{
+       spin_lock(&amdgpu_pasid_idr_lock);
+       idr_destroy(&amdgpu_pasid_idr);
+       spin_unlock(&amdgpu_pasid_idr_lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index b3649cd3af56..a57919478d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
  void amdgpu_pasid_free(u32 pasid);
  void amdgpu_pasid_free_delayed(struct dma_resv *resv,
                               u32 pasid);
+void amdgpu_pasid_mgr_cleanup(void);
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
                               struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b89013a6aa0b..5b9bdb79efcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
        xa_destroy(&adev->vm_manager.pasids);
amdgpu_vmid_mgr_fini(adev);
+       amdgpu_pasid_mgr_cleanup();
  }
/**

Reply via email to