On 6/17/26 03:15, [email protected] wrote: > From: Vitaly Prosyak <[email protected]> > > Add ddev->filelist_mutex as level 7 in the lockdep hierarchy, between > reset_lock (level 6) and srbm_mutex (now level 8). This teaches lockdep > the correct ordering for filelist_mutex relative to other amdgpu locks. > > The ordering evidence comes from the GPU reset path: gpu_recover() holds > reset_domain->sem (write) and calls amdgpu_device_eventfd_signal_gpu_reset() > which acquires filelist_mutex to iterate the DRM file list and signal > eventfds. This places filelist_mutex inner to reset_domain->sem.
Yeah I'm not sure if that is correct or not. That's why I asked. > > Existing filelist_mutex users (amdgpu_gem_force_release, debugfs vm_info) > do not hold any reset locks, so this ordering is consistent with all > current call paths. > > Updated hierarchy (10 lock levels): > 1. userq_sch_mutex - Global userq scheduler > 2. userq_mutex - Per-context userq > 3. notifier_lock - MMU notifier > 4. vram_lock - VRAM allocator > 5. reset_domain->sem - GPU reset synchronization > 6. reset_lock - Reset control > 7. filelist_mutex - DRM file list iteration (NEW) > 8. srbm_mutex - SRBM register access > 9. grbm_idx_mutex - GRBM index access > 10. mmio_idx_lock - MMIO index (spinlock, innermost) > > Requested-by: Christian König <[email protected]> > Cc: Christian König <[email protected]> > Cc: Alex Deucher <[email protected]> > Signed-off-by: Vitaly Prosyak <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c | 29 ++++++++++++++++----- > 1 file changed, 22 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > index 61450af539a6..b251350b1fb2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c > @@ -18,6 +18,7 @@ > > struct amdgpu_lockdep_dummy_locks { > struct mutex reset_lock; > + struct mutex filelist_mutex; > struct mutex userq_sch_mutex; > struct mutex userq_mutex; > struct mutex notifier_lock; > @@ -34,6 +35,7 @@ static struct lock_class_key amdgpu_notifier_lock_key; > static struct lock_class_key amdgpu_vram_lock_key; > static struct lock_class_key amdgpu_reset_sem_key; > static struct lock_class_key amdgpu_reset_lock_key; > +static struct lock_class_key amdgpu_filelist_mutex_key; > static struct lock_class_key amdgpu_srbm_lock_key; > static struct lock_class_key amdgpu_grbm_lock_key; > static struct lock_class_key amdgpu_mmio_lock_key; > @@ -57,6 +59,9 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev) > if (adev->reset_domain) > lockdep_set_class(&adev->reset_domain->sem, > &amdgpu_reset_sem_key); > + > + lockdep_set_class(&adev_to_drm(adev)->filelist_mutex, > + &amdgpu_filelist_mutex_key); I'm not sure if we can do this. Could be that I'm wrong but modifying the lockdep class of a DRM mutex is potentially a bit problematic. Adding a few people on CC for opinions/input. Regards, Christian. > } > > /** > @@ -74,9 +79,10 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev) > * 4. vram_lock - VRAM allocator lock > * 5. reset_domain->sem - GPU reset synchronization > * 6. reset_lock - Reset control lock > - * 7. srbm_mutex - SRBM register access > - * 8. grbm_idx_mutex - GRBM index access > - * 9. mmio_idx_lock - MMIO index access (spinlock) > + * 7. filelist_mutex - DRM file list iteration (ddev->filelist_mutex) > + * 8. srbm_mutex - SRBM register access > + * 9. grbm_idx_mutex - GRBM index access > + * 10. mmio_idx_lock - MMIO index access (spinlock) > * > * Evidence: > * - userq_sch_mutex -> userq_mutex: amdgpu_gfx_kfd_sch_ctrl() calls > @@ -88,6 +94,9 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev) > * must be outer to reset_domain->sem > * - vram_lock -> reset_domain->sem: VRAM management paths may need to > * wait for ongoing reset to complete > + * - reset_domain->sem -> filelist_mutex: GPU reset path > + * (amdgpu_device_gpu_recover) holds reset_domain->sem and calls > + * amdgpu_device_eventfd_signal_gpu_reset() which takes filelist_mutex > * > * Note: mmap_lock ordering relative to GPU locks is already taught > * by dma-resv (drivers/dma-buf/dma-resv.c). > @@ -117,6 +126,7 @@ int amdgpu_lockdep_init(void) > mutex_init(&locks->notifier_lock); > mutex_init(&locks->vram_lock); > mutex_init(&locks->reset_lock); > + mutex_init(&locks->filelist_mutex); > mutex_init(&locks->srbm_mutex); > mutex_init(&locks->grbm_idx_mutex); > spin_lock_init(&locks->mmio_idx_lock); > @@ -132,6 +142,7 @@ int amdgpu_lockdep_init(void) > lockdep_set_class(&locks->vram_lock, &amdgpu_vram_lock_key); > lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key); > lockdep_set_class(&locks->reset_lock, &amdgpu_reset_lock_key); > + lockdep_set_class(&locks->filelist_mutex, &amdgpu_filelist_mutex_key); > lockdep_set_class(&locks->srbm_mutex, &amdgpu_srbm_lock_key); > lockdep_set_class(&locks->grbm_idx_mutex, &amdgpu_grbm_lock_key); > lockdep_set_class(&locks->mmio_idx_lock, &amdgpu_mmio_lock_key); > @@ -154,18 +165,21 @@ int amdgpu_lockdep_init(void) > > /* Level 6: Reset control lock */ > mutex_lock(&locks->reset_lock); > + > + /* Level 7: DRM file list mutex */ > + mutex_lock(&locks->filelist_mutex); > /* > * Mark potential memory reclaim boundary. > * GPU operations might trigger memory allocation/reclaim. > */ > fs_reclaim_acquire(GFP_KERNEL); > > - /* Level 7: SRBM register access */ > + /* Level 8: SRBM register access */ > mutex_lock(&locks->srbm_mutex); > - /* Level 8: GRBM index access */ > + /* Level 9: GRBM index access */ > mutex_lock(&locks->grbm_idx_mutex); > > - /* Level 9: MMIO index access (innermost lock, spinlock) */ > + /* Level 10: MMIO index access (innermost lock, spinlock) */ > spin_lock_irqsave(&locks->mmio_idx_lock, flags); > /* > * All locks acquired in order. > @@ -178,6 +192,7 @@ int amdgpu_lockdep_init(void) > mutex_unlock(&locks->srbm_mutex); > fs_reclaim_release(GFP_KERNEL); > > + mutex_unlock(&locks->filelist_mutex); > mutex_unlock(&locks->reset_lock); > up_read(&reset_domain->sem); > > @@ -190,7 +205,7 @@ int amdgpu_lockdep_init(void) > amdgpu_reset_put_reset_domain(reset_domain); > > kfree(locks); > - pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n"); > + pr_info("AMDGPU: Lockdep annotations initialized (10 lock levels)\n"); > > return 0; > }
