On 6/17/26 03:15, [email protected] wrote:
> From: Vitaly Prosyak <[email protected]>
> 
> Add ddev->filelist_mutex as level 7 in the lockdep hierarchy, between
> reset_lock (level 6) and srbm_mutex (now level 8). This teaches lockdep
> the correct ordering for filelist_mutex relative to other amdgpu locks.
> 
> The ordering evidence comes from the GPU reset path: gpu_recover() holds
> reset_domain->sem (write) and calls amdgpu_device_eventfd_signal_gpu_reset()
> which acquires filelist_mutex to iterate the DRM file list and signal
> eventfds. This places filelist_mutex inner to reset_domain->sem.

Yeah I'm not sure if that is correct or not. That's why I asked.

> 
> Existing filelist_mutex users (amdgpu_gem_force_release, debugfs vm_info)
> do not hold any reset locks, so this ordering is consistent with all
> current call paths.
> 
> Updated hierarchy (10 lock levels):
>  1. userq_sch_mutex     - Global userq scheduler
>  2. userq_mutex         - Per-context userq
>  3. notifier_lock       - MMU notifier
>  4. vram_lock           - VRAM allocator
>  5. reset_domain->sem   - GPU reset synchronization
>  6. reset_lock          - Reset control
>  7. filelist_mutex      - DRM file list iteration (NEW)
>  8. srbm_mutex          - SRBM register access
>  9. grbm_idx_mutex      - GRBM index access
> 10. mmio_idx_lock       - MMIO index (spinlock, innermost)
> 
> Requested-by: Christian König <[email protected]>
> Cc: Christian König <[email protected]>
> Cc: Alex Deucher <[email protected]>
> Signed-off-by: Vitaly Prosyak <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c | 29 ++++++++++++++++-----
>  1 file changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
> index 61450af539a6..b251350b1fb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lockdep.c
> @@ -18,6 +18,7 @@
>  
>  struct amdgpu_lockdep_dummy_locks {
>       struct mutex reset_lock;
> +     struct mutex filelist_mutex;
>       struct mutex userq_sch_mutex;
>       struct mutex userq_mutex;
>       struct mutex notifier_lock;
> @@ -34,6 +35,7 @@ static struct lock_class_key amdgpu_notifier_lock_key;
>  static struct lock_class_key amdgpu_vram_lock_key;
>  static struct lock_class_key amdgpu_reset_sem_key;
>  static struct lock_class_key amdgpu_reset_lock_key;
> +static struct lock_class_key amdgpu_filelist_mutex_key;
>  static struct lock_class_key amdgpu_srbm_lock_key;
>  static struct lock_class_key amdgpu_grbm_lock_key;
>  static struct lock_class_key amdgpu_mmio_lock_key;
> @@ -57,6 +59,9 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev)
>       if (adev->reset_domain)
>               lockdep_set_class(&adev->reset_domain->sem,
>                                 &amdgpu_reset_sem_key);
> +
> +     lockdep_set_class(&adev_to_drm(adev)->filelist_mutex,
> +                       &amdgpu_filelist_mutex_key);

I'm not sure if we can do this.

Could be that I'm wrong but modifying the lockdep class of a DRM mutex is 
potentially a bit problematic.

Adding a few people on CC for opinions/input.

Regards,
Christian.
 

>  }
>  
>  /**
> @@ -74,9 +79,10 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev)
>   * 4. vram_lock           - VRAM allocator lock
>   * 5. reset_domain->sem   - GPU reset synchronization
>   * 6. reset_lock          - Reset control lock
> - * 7. srbm_mutex          - SRBM register access
> - * 8. grbm_idx_mutex      - GRBM index access
> - * 9. mmio_idx_lock       - MMIO index access (spinlock)
> + * 7. filelist_mutex      - DRM file list iteration (ddev->filelist_mutex)
> + * 8. srbm_mutex          - SRBM register access
> + * 9. grbm_idx_mutex      - GRBM index access
> + * 10. mmio_idx_lock      - MMIO index access (spinlock)
>   *
>   * Evidence:
>   * - userq_sch_mutex -> userq_mutex: amdgpu_gfx_kfd_sch_ctrl() calls
> @@ -88,6 +94,9 @@ void amdgpu_lockdep_set_class(struct amdgpu_device *adev)
>   *   must be outer to reset_domain->sem
>   * - vram_lock -> reset_domain->sem: VRAM management paths may need to
>   *   wait for ongoing reset to complete
> + * - reset_domain->sem -> filelist_mutex: GPU reset path
> + *   (amdgpu_device_gpu_recover) holds reset_domain->sem and calls
> + *   amdgpu_device_eventfd_signal_gpu_reset() which takes filelist_mutex
>   *
>   * Note: mmap_lock ordering relative to GPU locks is already taught
>   * by dma-resv (drivers/dma-buf/dma-resv.c).
> @@ -117,6 +126,7 @@ int amdgpu_lockdep_init(void)
>       mutex_init(&locks->notifier_lock);
>       mutex_init(&locks->vram_lock);
>       mutex_init(&locks->reset_lock);
> +     mutex_init(&locks->filelist_mutex);
>       mutex_init(&locks->srbm_mutex);
>       mutex_init(&locks->grbm_idx_mutex);
>       spin_lock_init(&locks->mmio_idx_lock);
> @@ -132,6 +142,7 @@ int amdgpu_lockdep_init(void)
>       lockdep_set_class(&locks->vram_lock, &amdgpu_vram_lock_key);
>       lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key);
>       lockdep_set_class(&locks->reset_lock, &amdgpu_reset_lock_key);
> +     lockdep_set_class(&locks->filelist_mutex, &amdgpu_filelist_mutex_key);
>       lockdep_set_class(&locks->srbm_mutex, &amdgpu_srbm_lock_key);
>       lockdep_set_class(&locks->grbm_idx_mutex, &amdgpu_grbm_lock_key);
>       lockdep_set_class(&locks->mmio_idx_lock, &amdgpu_mmio_lock_key);
> @@ -154,18 +165,21 @@ int amdgpu_lockdep_init(void)
>  
>       /* Level 6: Reset control lock */
>       mutex_lock(&locks->reset_lock);
> +
> +     /* Level 7: DRM file list mutex */
> +     mutex_lock(&locks->filelist_mutex);
>       /*
>        * Mark potential memory reclaim boundary.
>        * GPU operations might trigger memory allocation/reclaim.
>        */
>       fs_reclaim_acquire(GFP_KERNEL);
>  
> -     /* Level 7: SRBM register access */
> +     /* Level 8: SRBM register access */
>       mutex_lock(&locks->srbm_mutex);
> -     /* Level 8: GRBM index access */
> +     /* Level 9: GRBM index access */
>       mutex_lock(&locks->grbm_idx_mutex);
>  
> -     /* Level 9: MMIO index access (innermost lock, spinlock) */
> +     /* Level 10: MMIO index access (innermost lock, spinlock) */
>       spin_lock_irqsave(&locks->mmio_idx_lock, flags);
>       /*
>        * All locks acquired in order.
> @@ -178,6 +192,7 @@ int amdgpu_lockdep_init(void)
>       mutex_unlock(&locks->srbm_mutex);
>       fs_reclaim_release(GFP_KERNEL);
>  
> +     mutex_unlock(&locks->filelist_mutex);
>       mutex_unlock(&locks->reset_lock);
>       up_read(&reset_domain->sem);
>  
> @@ -190,7 +205,7 @@ int amdgpu_lockdep_init(void)
>       amdgpu_reset_put_reset_domain(reset_domain);
>  
>       kfree(locks);
> -     pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n");
> +     pr_info("AMDGPU: Lockdep annotations initialized (10 lock levels)\n");
>  
>       return 0;
>  }

Reply via email to