On 3/16/26 20:15, Eric Huang wrote: > PASID resue could cause cache, TLBs and interrupt issues > when process immediately runs into hw states left by previous > process exited with the same PASID, to prevent the case, it > introduces a freed list to store used ids to provid maximum > safety avoiding resue.
Well absolutely clear NAK. This just hides problems instead of fixing them. Even if we want to not re-use PASIDs immediately there are the cycle functions for that. So this here is completely unecesssary. Regards, Christian. > > Signed-off-by: Eric Huang <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 87 +++++++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + > 3 files changed, 85 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > index 9cab36322c16..0443b05ddb1d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > @@ -40,6 +40,21 @@ > */ > static DEFINE_IDA(amdgpu_pasid_ida); > > +/* > + * Freed PASID pool - PASIDs are stored here after being freed and only > + * reused when no fresh PASIDs are available from the main allocator. > + * This prevents immediate PASID reuse and gives hardware time to flush > + * all references (caches, IOMMU TLBs and firmware state). > + */ > +struct amdgpu_pasid_freed { > + struct list_head list; > + u32 pasid; > +}; > + > +static bool mpool_exhausted = false; > +static LIST_HEAD(amdgpu_pasid_freed_list); > +static DEFINE_SPINLOCK(amdgpu_pasid_freed_lock); > + > /* Helper to free pasid from a fence callback */ > struct amdgpu_pasid_cb { > struct dma_fence_cb cb; > @@ -51,7 +66,8 @@ struct amdgpu_pasid_cb { > * @bits: Maximum width of the PASID in bits, must be at least 1 > * > * Allocates a PASID of the given width while keeping smaller PASIDs > - * available if possible. > + * available if possible. Prefers to allocate fresh PASIDs from the > + * main pool. Only reuses freed PASIDs when the main pool is exhausted. > * > * Returns a positive integer on success. Returns %-EINVAL if bits==0. > * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on > @@ -61,13 +77,34 @@ int amdgpu_pasid_alloc(unsigned int bits) > { > int pasid = -EINVAL; > > - for (bits = min(bits, 31U); bits > 0; bits--) { > + /* First, try to allocate a fresh PASID from the main pool */ > + for (bits = min(bits, 31U); bits > 0 && !mpool_exhausted; bits--) { > pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), > (1U << bits) - 1, GFP_KERNEL); > if (pasid != -ENOSPC) > break; > } > > + if (pasid == -ENOSPC && !mpool_exhausted) > + mpool_exhausted = true; > + > + /* If main pool is exhausted, try to reuse a freed PASID */ > + if (pasid < 0) { > + struct amdgpu_pasid_freed *entry; > + > + spin_lock(&amdgpu_pasid_freed_lock); > + if (!list_empty(&amdgpu_pasid_freed_list)) { > + entry = list_first_entry(&amdgpu_pasid_freed_list, > + struct amdgpu_pasid_freed, > list); > + pasid = entry->pasid; > + list_del(&entry->list); > + spin_unlock(&amdgpu_pasid_freed_lock); > + kfree(entry); > + } else { > + spin_unlock(&amdgpu_pasid_freed_lock); > + } > + } > + > if (pasid >= 0) > trace_amdgpu_pasid_allocated(pasid); > > @@ -75,13 +112,34 @@ int amdgpu_pasid_alloc(unsigned int bits) > } > > /** > - * amdgpu_pasid_free - Free a PASID > + * amdgpu_pasid_free - Free a PASID to the freed pool > * @pasid: PASID to free > + * > + * Add the PASID to the freed list instead of immediately returning it > + * to the allocator. This PASID will only be reused when the main pool > + * is exhausted, providing maximum delay before reuse and allowing time > + * for hardware caches, IOMMU TLBs, and firmware to clear all references. > */ > void amdgpu_pasid_free(u32 pasid) > { > + struct amdgpu_pasid_freed *entry; > + > + entry = kmalloc(sizeof(*entry), GFP_KERNEL); > + if (!entry) { > + /* Fallback: free directly to IDA if allocation fails */ > + trace_amdgpu_pasid_freed(pasid); > + ida_free(&amdgpu_pasid_ida, pasid); > + mpool_exhausted = false; > + return; > + } > + > + entry->pasid = pasid; > + > + spin_lock(&amdgpu_pasid_freed_lock); > + list_add_tail(&entry->list, &amdgpu_pasid_freed_list); > + spin_unlock(&amdgpu_pasid_freed_lock); > + > trace_amdgpu_pasid_freed(pasid); > - ida_free(&amdgpu_pasid_ida, pasid); > } > > static void amdgpu_pasid_free_cb(struct dma_fence *fence, > @@ -616,3 +674,24 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) > } > } > } > + > +/** > + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager > + * > + * Free all PASIDs from the freed list back to the IDA pool. > + * This should be called during driver cleanup. > + */ > +void amdgpu_pasid_mgr_cleanup(void) > +{ > + struct amdgpu_pasid_freed *entry, *tmp; > + > + spin_lock(&amdgpu_pasid_freed_lock); > + list_for_each_entry_safe(entry, tmp, &amdgpu_pasid_freed_list, list) { > + list_del(&entry->list); > + /* Return PASID to the IDA pool during cleanup */ > + ida_free(&amdgpu_pasid_ida, entry->pasid); > + kfree(entry); > + } > + spin_unlock(&amdgpu_pasid_freed_lock); > + mpool_exhausted = false; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > index b3649cd3af56..a57919478d3b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); > void amdgpu_pasid_free(u32 pasid); > void amdgpu_pasid_free_delayed(struct dma_resv *resv, > u32 pasid); > +void amdgpu_pasid_mgr_cleanup(void); > > bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, > struct amdgpu_vmid *id); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index b89013a6aa0b..5b9bdb79efcf 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) > xa_destroy(&adev->vm_manager.pasids); > > amdgpu_vmid_mgr_fini(adev); > + amdgpu_pasid_mgr_cleanup(); > } > > /**
