On Mon, 18 May 2026 16:52:25 +0200
Boris Brezillon <[email protected]> wrote:

> +Mark for the silent conflict resolution needed to reconcile
> drm-misc-fixes and drm-next/drm-misc-next.
> 
> On Mon, 18 May 2026 13:41:45 +0200
> Boris Brezillon <[email protected]> wrote:
> 
> > Recently, a few races have been discovered in the GEM LRU logic, all
> > of them caused by the fact the LRU lock is accessed through
> > gem->lru->lock, and that very same lock also protects changes to
> > gem->lru, leading to situations where gem->lru needs to first be
> > accessed without the lock held, to then get the lru to access the lock
> > through and finally take the lock and do the expected operation.
> > 
> > Currently, the only driver making use of this API (MSM) declares a
> > device-wide lock, and the user we're about to add (panthor) will
> > do the same. There's no evidence that we will ever have a driver
> > that wants different pools of LRUs protected by different locks under
> > the same drm_device. So we're better off moving this lock to drm_device
> > and always locking it through obj->dev->gem_lru_mutex, or directly
> > through dev->gem_lru_mutex.
> > 
> > If anyone ever needs more fine-grained locking, this can be revisited
> > to pass some drm_gem_lru_pool object representing the pool of LRUs
> > under a specific lock, but for now, the per-device lock seems to be
> > enough.
> > 
> > Fixes: e7c2af13f811 ("drm/gem: Add LRU/shrinker helper")
> > Reported-by: Chia-I Wu <[email protected]>
> > Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86
> > Reviewed-by: Rob Clark <[email protected]>
> > Reviewed-by: Liviu Dudau <[email protected]>
> > Reviewed-by: Steven Price <[email protected]>
> > Reviewed-by: Chia-I Wu <[email protected]>
> > Signed-off-by: Boris Brezillon <[email protected]>  
> 
> Queued to drm-misc-next.

s/drm-misc-next/drm-misc-fixes/, sorry for the typo.

> 
> Note for the linux-next maintainers: below is the conflict
> resolution currently stored in drm-tip.
> 
> Note for the drm-misc maintainers: we'll need a backmerge of the
> next -rc into drm-misc-next so we can resolve the conflict there.
> 
> --->8---  
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h 
> b/drivers/gpu/drm/panthor/panthor_device.h
> index 4e4607bca7cc..a412a50eec76 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -187,9 +187,6 @@ struct panthor_device {
>               /** @reclaim.shrinker: Shrinker instance */
>               struct shrinker *shrinker;
>  
> -             /** @reclaim.lock: Lock protecting all LRUs */
> -             struct mutex lock;
> -
>               /**
>                * @reclaim.unused: BOs with unused pages
>                *
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.c 
> b/drivers/gpu/drm/panthor/panthor_gem.c
> index 13295d7a593d..abe0c5bb1bca 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.c
> +++ b/drivers/gpu/drm/panthor/panthor_gem.c
> @@ -1495,13 +1495,13 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, 
> struct shrink_control *sc)
>       if (!can_swap())
>               goto out;
>  
> -     freed += drm_gem_lru_scan(&ptdev->reclaim.unused,
> +     freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.unused,
>                                 sc->nr_to_scan - freed, &remaining,
>                                 panthor_gem_try_evict_no_resv_wait, NULL);
>       if (freed >= sc->nr_to_scan)
>               goto out;
>  
> -     freed += drm_gem_lru_scan(&ptdev->reclaim.mmapped,
> +     freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.mmapped,
>                                 sc->nr_to_scan - freed, &remaining,
>                                 panthor_gem_try_evict_no_resv_wait, NULL);
>       if (freed >= sc->nr_to_scan)
> @@ -1515,7 +1515,7 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, 
> struct shrink_control *sc)
>       if (freed >= sc->nr_to_scan)
>               goto out;
>  
> -     freed += drm_gem_lru_scan(&ptdev->reclaim.gpu_mapped_shared,
> +     freed += drm_gem_lru_scan(&ptdev->base, 
> &ptdev->reclaim.gpu_mapped_shared,
>                                 sc->nr_to_scan - freed, &remaining,
>                                 panthor_gem_try_evict, NULL);
>  
> @@ -1544,21 +1544,16 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, 
> struct shrink_control *sc)
>  int panthor_gem_shrinker_init(struct panthor_device *ptdev)
>  {
>       struct shrinker *shrinker;
> -     int ret;
> -
> -     ret = drmm_mutex_init(&ptdev->base, &ptdev->reclaim.lock);
> -     if (ret)
> -             return ret;
>  
>       INIT_LIST_HEAD(&ptdev->reclaim.vms);
> -     drm_gem_lru_init(&ptdev->reclaim.unused, &ptdev->reclaim.lock);
> -     drm_gem_lru_init(&ptdev->reclaim.mmapped, &ptdev->reclaim.lock);
> -     drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared, 
> &ptdev->reclaim.lock);
> +     drm_gem_lru_init(&ptdev->reclaim.unused);
> +     drm_gem_lru_init(&ptdev->reclaim.mmapped);
> +     drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared);
>       ptdev->reclaim.gpu_mapped_count = 0;
>  
>       /* Teach lockdep about lock ordering wrt. shrinker: */
>       fs_reclaim_acquire(GFP_KERNEL);
> -     might_lock(&ptdev->reclaim.lock);
> +     might_lock(&ptdev->base.gem_lru_mutex);
>       fs_reclaim_release(GFP_KERNEL);
>  
>       shrinker = shrinker_alloc(0, "drm-panthor-gem");
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c 
> b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 452d0b6d4668..9d4500850561 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -715,10 +715,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>        * never became active in the first place will be reclaimed last, but
>        * that's an acceptable trade-off.
>        */
> -     mutex_lock(&ptdev->reclaim.lock);
> +     mutex_lock(&ptdev->base.gem_lru_mutex);
>       if (vm->reclaim.lru.count)
>               list_move_tail(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
> -     mutex_unlock(&ptdev->reclaim.lock);
> +     mutex_unlock(&ptdev->base.gem_lru_mutex);
>  
>       /* Make sure we don't race with lock/unlock_region() calls
>        * happening around VM bind operations.
> @@ -1962,9 +1962,9 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm)
>       struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
>       struct panthor_device *ptdev = vm->ptdev;
>  
> -     mutex_lock(&ptdev->reclaim.lock);
> +     mutex_lock(&ptdev->base.gem_lru_mutex);
>       list_del_init(&vm->reclaim.lru_node);
> -     mutex_unlock(&ptdev->reclaim.lock);
> +     mutex_unlock(&ptdev->base.gem_lru_mutex);
>  
>       mutex_lock(&vm->heaps.lock);
>       if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
> @@ -2360,11 +2360,11 @@ void panthor_vm_update_bo_reclaim_lru_locked(struct 
> panthor_gem_object *bo)
>               drm_WARN_ON(&ptdev->base, vm);
>               vm = container_of(vm_bo->vm, struct panthor_vm, base);
>  
> -             mutex_lock(&ptdev->reclaim.lock);
> +             mutex_lock(&ptdev->base.gem_lru_mutex);
>               drm_gem_lru_move_tail_locked(&vm->reclaim.lru, &bo->base);
>               if (list_empty(&vm->reclaim.lru_node))
>                       list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
> -             mutex_unlock(&ptdev->reclaim.lock);
> +             mutex_unlock(&ptdev->base.gem_lru_mutex);
>       }
>  }
>  
> @@ -2774,7 +2774,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool 
> for_mcu,
>       vm->kernel_auto_va.start = auto_kernel_va_start;
>       vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size 
> - 1;
>  
> -     drm_gem_lru_init(&vm->reclaim.lru, &ptdev->reclaim.lock);
> +     drm_gem_lru_init(&vm->reclaim.lru);
>       INIT_LIST_HEAD(&vm->reclaim.lru_node);
>       INIT_LIST_HEAD(&vm->node);
>       INIT_LIST_HEAD(&vm->as.lru_node);
> @@ -3140,7 +3140,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device 
> *ptdev,
>       LIST_HEAD(remaining_vms);
>       LIST_HEAD(vms);
>  
> -     mutex_lock(&ptdev->reclaim.lock);
> +     mutex_lock(&ptdev->base.gem_lru_mutex);
>       list_splice_init(&ptdev->reclaim.vms, &vms);
>  
>       while (freed < nr_to_scan) {
> @@ -3156,12 +3156,13 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device 
> *ptdev,
>                       continue;
>               }
>  
> -             mutex_unlock(&ptdev->reclaim.lock);
> +             mutex_unlock(&ptdev->base.gem_lru_mutex);
>  
> -             freed += drm_gem_lru_scan(&vm->reclaim.lru, nr_to_scan - freed,
> +             freed += drm_gem_lru_scan(&ptdev->base, &vm->reclaim.lru,
> +                                       nr_to_scan - freed,
>                                         remaining, shrink, NULL);
>  
> -             mutex_lock(&ptdev->reclaim.lock);
> +             mutex_lock(&ptdev->base.gem_lru_mutex);
>  
>               /* If the VM is still in the temporary list, remove it so we
>                * can proceed with the next VM.
> @@ -3177,11 +3178,11 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device 
> *ptdev,
>                               list_add_tail(&vm->reclaim.lru_node, 
> &remaining_vms);
>               }
>  
> -             mutex_unlock(&ptdev->reclaim.lock);
> +             mutex_unlock(&ptdev->base.gem_lru_mutex);
>  
>               panthor_vm_put(vm);
>  
> -             mutex_lock(&ptdev->reclaim.lock);
> +             mutex_lock(&ptdev->base.gem_lru_mutex);
>       }
>  
>       /* Re-insert VMs with remaining data to reclaim at the beginning of
> @@ -3192,7 +3193,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device 
> *ptdev,
>        */
>       list_splice_tail(&vms, &remaining_vms);
>       list_splice(&remaining_vms, &ptdev->reclaim.vms);
> -     mutex_unlock(&ptdev->reclaim.lock);
> +     mutex_unlock(&ptdev->base.gem_lru_mutex);
>  
>       return freed;
>  }

Reply via email to