On 2026-06-05 05:13, Prike Liang wrote: > Replace the stack-allocated amdgpu_lockdep mutex with a heap allocation > via kmalloc to fix a stack overflow caused by the large struct size. > > Signed-off-by: Prike Liang <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c | 93 ++++++++++----------- > drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.h | 12 +++ > 2 files changed, 55 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > index d5d71fd7c70d..c13bfaa3dfa7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > @@ -13,6 +13,7 @@ > > #include "amdgpu.h" > #include "amdgpu_reset.h" > +#include "amdgpu_lockdep.h" > > #ifdef CONFIG_LOCKDEP > > @@ -84,72 +85,65 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev) > int amdgpu_lockdep_init(void) > { > struct amdgpu_reset_domain *reset_domain = NULL; > - struct amdgpu_reset_control reset_ctl; > - struct mutex userq_sch_mutex; > - struct mutex userq_mutex; > - struct mutex notifier_lock; > - struct mutex vram_lock; > - struct mutex srbm_mutex; > - struct mutex grbm_idx_mutex; > - spinlock_t mmio_idx_lock; > + struct amdgpu_lockdep_dummy_locks *locks; > unsigned long flags; > > + locks = kzalloc(sizeof(*locks), GFP_KERNEL); > + if (!locks) > + return -ENOMEM; > + > /* > * Initialize dummy reset domain > */ > reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, > "lockdep_test"); > - if (!reset_domain) > + if (!reset_domain) { > + kfree(locks); > return -ENOMEM; > - > + } > /* Initialize dummy locks */ > - mutex_init(&userq_sch_mutex); > - mutex_init(&userq_mutex); > - mutex_init(¬ifier_lock); > - mutex_init(&vram_lock); > - mutex_init(&reset_ctl.reset_lock); > - mutex_init(&srbm_mutex); > - mutex_init(&grbm_idx_mutex); > - spin_lock_init(&mmio_idx_lock); > + mutex_init(&locks->userq_sch_mutex); > + mutex_init(&locks->userq_mutex); > + mutex_init(&locks->notifier_lock); > + mutex_init(&locks->vram_lock); > + mutex_init(&locks->reset_lock); > + mutex_init(&locks->srbm_mutex); > + mutex_init(&locks->grbm_idx_mutex); > + spin_lock_init(&locks->mmio_idx_lock); > > /* > * Associate dummy locks with the same class keys used for real > * driver locks. This ensures lockdep connects the ordering learned > * here with the actual locks used at runtime. > */ > - lockdep_set_class(&userq_sch_mutex, &amdgpu_userq_sch_mutex_key); > - lockdep_set_class(&userq_mutex, &amdgpu_userq_mutex_key); > - lockdep_set_class(¬ifier_lock, &amdgpu_notifier_lock_key); > - lockdep_set_class(&vram_lock, &amdgpu_vram_lock_key); > + lockdep_set_class(&locks->userq_sch_mutex, &amdgpu_userq_sch_mutex_key); > + lockdep_set_class(&locks->userq_mutex, &amdgpu_userq_mutex_key); > + lockdep_set_class(&locks->notifier_lock, &amdgpu_notifier_lock_key); > + lockdep_set_class(&locks->vram_lock, &amdgpu_vram_lock_key); > lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key); > - lockdep_set_class(&reset_ctl.reset_lock, &amdgpu_reset_lock_key); > - lockdep_set_class(&srbm_mutex, &amdgpu_srbm_lock_key); > - lockdep_set_class(&grbm_idx_mutex, &amdgpu_grbm_lock_key); > - lockdep_set_class(&mmio_idx_lock, &amdgpu_mmio_lock_key); > - > + lockdep_set_class(&locks->reset_lock, &amdgpu_reset_lock_key); > + lockdep_set_class(&locks->srbm_mutex, &amdgpu_srbm_lock_key); > + lockdep_set_class(&locks->grbm_idx_mutex, &amdgpu_grbm_lock_key); > + lockdep_set_class(&locks->mmio_idx_lock, &amdgpu_mmio_lock_key); > /* > * Take locks in the correct order to train lockdep. > * This establishes the dependency chain. > */ > > /* Level 1: Global userq scheduler mutex (outermost) */ > - mutex_lock(&userq_sch_mutex); > + mutex_lock(&locks->userq_sch_mutex); > > /* Level 2: Per-context userq mutex */ > - mutex_lock(&userq_mutex); > - > + mutex_lock(&locks->userq_mutex); > /* Level 3: MMU notifier lock */ > - mutex_lock(¬ifier_lock); > - > + mutex_lock(&locks->notifier_lock); > /* Level 4: VRAM allocator lock */ > - mutex_lock(&vram_lock); > - > + mutex_lock(&locks->vram_lock); > /* Level 5: Reset domain semaphore */ > down_read(&reset_domain->sem); > > /* Level 6: Reset control lock */ > - mutex_lock(&reset_ctl.reset_lock); > - > + mutex_lock(&locks->reset_lock); > /* > * Mark potential memory reclaim boundary. > * GPU operations might trigger memory allocation/reclaim. > @@ -157,36 +151,35 @@ int amdgpu_lockdep_init(void) > fs_reclaim_acquire(GFP_KERNEL); > > /* Level 7: SRBM register access */ > - mutex_lock(&srbm_mutex); > - > + mutex_lock(&locks->srbm_mutex); > /* Level 8: GRBM index access */ > - mutex_lock(&grbm_idx_mutex); > + mutex_lock(&locks->grbm_idx_mutex); > > /* Level 9: MMIO index access (innermost lock, spinlock) */ > - spin_lock_irqsave(&mmio_idx_lock, flags); > - > + spin_lock_irqsave(&locks->mmio_idx_lock, flags); > /* > * All locks acquired in order. > * Lockdep has now learned the valid dependency chain. > */ > > /* Release in reverse order */ > - spin_unlock_irqrestore(&mmio_idx_lock, flags); > - mutex_unlock(&grbm_idx_mutex); > - mutex_unlock(&srbm_mutex); > - > + spin_unlock_irqrestore(&locks->mmio_idx_lock, flags); > + mutex_unlock(&locks->grbm_idx_mutex); > + mutex_unlock(&locks->srbm_mutex); > fs_reclaim_release(GFP_KERNEL); > > - mutex_unlock(&reset_ctl.reset_lock); > + mutex_unlock(&locks->reset_lock); > up_read(&reset_domain->sem); > - mutex_unlock(&vram_lock); > - mutex_unlock(¬ifier_lock); > - mutex_unlock(&userq_mutex); > - mutex_unlock(&userq_sch_mutex); > + > + mutex_unlock(&locks->vram_lock); > + mutex_unlock(&locks->notifier_lock); > + mutex_unlock(&locks->userq_mutex); > + mutex_unlock(&locks->userq_sch_mutex); > > /* Cleanup */ > amdgpu_reset_put_reset_domain(reset_domain); > > + kfree(locks); > pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n"); > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.h > index 04adb58665bf..8bff09bd2dbb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.h > @@ -9,9 +9,21 @@ > #define __AMDGPU_LOCKDEP_H__ > > #include <linux/lockdep.h> > +#include <linux/mutex.h> > > struct amdgpu_device; > Hi Prike,
Thanks for the fix -- the heap allocation approach is correct and addresses the stack overflow cleanly. One minor suggestion: could we move struct amdgpu_lockdep_dummy_locks from the header into amdgpu_lockdep.c (inside the #ifdef CONFIG_LOCKDEP block)? Since it is only used locally in amdgpu_lockdep_init(), keeping it in the .c file avoids exposing implementation details in the header interface. With that change: Reviewed-by: Vitaly Prosyak <[email protected]> > +struct amdgpu_lockdep_dummy_locks { > + struct mutex reset_lock; > + struct mutex userq_sch_mutex; > + struct mutex userq_mutex; > + struct mutex notifier_lock; > + struct mutex vram_lock; > + struct mutex srbm_mutex; > + struct mutex grbm_idx_mutex; > + spinlock_t mmio_idx_lock; > +}; > + > #ifdef CONFIG_LOCKDEP > > /**
