On Mon, 18 May 2026 16:52:25 +0200 Boris Brezillon <[email protected]> wrote:
> +Mark for the silent conflict resolution needed to reconcile > drm-misc-fixes and drm-next/drm-misc-next. > > On Mon, 18 May 2026 13:41:45 +0200 > Boris Brezillon <[email protected]> wrote: > > > Recently, a few races have been discovered in the GEM LRU logic, all > > of them caused by the fact the LRU lock is accessed through > > gem->lru->lock, and that very same lock also protects changes to > > gem->lru, leading to situations where gem->lru needs to first be > > accessed without the lock held, to then get the lru to access the lock > > through and finally take the lock and do the expected operation. > > > > Currently, the only driver making use of this API (MSM) declares a > > device-wide lock, and the user we're about to add (panthor) will > > do the same. There's no evidence that we will ever have a driver > > that wants different pools of LRUs protected by different locks under > > the same drm_device. So we're better off moving this lock to drm_device > > and always locking it through obj->dev->gem_lru_mutex, or directly > > through dev->gem_lru_mutex. > > > > If anyone ever needs more fine-grained locking, this can be revisited > > to pass some drm_gem_lru_pool object representing the pool of LRUs > > under a specific lock, but for now, the per-device lock seems to be > > enough. > > > > Fixes: e7c2af13f811 ("drm/gem: Add LRU/shrinker helper") > > Reported-by: Chia-I Wu <[email protected]> > > Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86 > > Reviewed-by: Rob Clark <[email protected]> > > Reviewed-by: Liviu Dudau <[email protected]> > > Reviewed-by: Steven Price <[email protected]> > > Reviewed-by: Chia-I Wu <[email protected]> > > Signed-off-by: Boris Brezillon <[email protected]> > > Queued to drm-misc-next. s/drm-misc-next/drm-misc-fixes/, sorry for the typo. > > Note for the linux-next maintainers: below is the conflict > resolution currently stored in drm-tip. > > Note for the drm-misc maintainers: we'll need a backmerge of the > next -rc into drm-misc-next so we can resolve the conflict there. > > --->8--- > diff --git a/drivers/gpu/drm/panthor/panthor_device.h > b/drivers/gpu/drm/panthor/panthor_device.h > index 4e4607bca7cc..a412a50eec76 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.h > +++ b/drivers/gpu/drm/panthor/panthor_device.h > @@ -187,9 +187,6 @@ struct panthor_device { > /** @reclaim.shrinker: Shrinker instance */ > struct shrinker *shrinker; > > - /** @reclaim.lock: Lock protecting all LRUs */ > - struct mutex lock; > - > /** > * @reclaim.unused: BOs with unused pages > * > diff --git a/drivers/gpu/drm/panthor/panthor_gem.c > b/drivers/gpu/drm/panthor/panthor_gem.c > index 13295d7a593d..abe0c5bb1bca 100644 > --- a/drivers/gpu/drm/panthor/panthor_gem.c > +++ b/drivers/gpu/drm/panthor/panthor_gem.c > @@ -1495,13 +1495,13 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, > struct shrink_control *sc) > if (!can_swap()) > goto out; > > - freed += drm_gem_lru_scan(&ptdev->reclaim.unused, > + freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.unused, > sc->nr_to_scan - freed, &remaining, > panthor_gem_try_evict_no_resv_wait, NULL); > if (freed >= sc->nr_to_scan) > goto out; > > - freed += drm_gem_lru_scan(&ptdev->reclaim.mmapped, > + freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.mmapped, > sc->nr_to_scan - freed, &remaining, > panthor_gem_try_evict_no_resv_wait, NULL); > if (freed >= sc->nr_to_scan) > @@ -1515,7 +1515,7 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, > struct shrink_control *sc) > if (freed >= sc->nr_to_scan) > goto out; > > - freed += drm_gem_lru_scan(&ptdev->reclaim.gpu_mapped_shared, > + freed += drm_gem_lru_scan(&ptdev->base, > &ptdev->reclaim.gpu_mapped_shared, > sc->nr_to_scan - freed, &remaining, > panthor_gem_try_evict, NULL); > > @@ -1544,21 +1544,16 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, > struct shrink_control *sc) > int panthor_gem_shrinker_init(struct panthor_device *ptdev) > { > struct shrinker *shrinker; > - int ret; > - > - ret = drmm_mutex_init(&ptdev->base, &ptdev->reclaim.lock); > - if (ret) > - return ret; > > INIT_LIST_HEAD(&ptdev->reclaim.vms); > - drm_gem_lru_init(&ptdev->reclaim.unused, &ptdev->reclaim.lock); > - drm_gem_lru_init(&ptdev->reclaim.mmapped, &ptdev->reclaim.lock); > - drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared, > &ptdev->reclaim.lock); > + drm_gem_lru_init(&ptdev->reclaim.unused); > + drm_gem_lru_init(&ptdev->reclaim.mmapped); > + drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared); > ptdev->reclaim.gpu_mapped_count = 0; > > /* Teach lockdep about lock ordering wrt. shrinker: */ > fs_reclaim_acquire(GFP_KERNEL); > - might_lock(&ptdev->reclaim.lock); > + might_lock(&ptdev->base.gem_lru_mutex); > fs_reclaim_release(GFP_KERNEL); > > shrinker = shrinker_alloc(0, "drm-panthor-gem"); > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c > b/drivers/gpu/drm/panthor/panthor_mmu.c > index 452d0b6d4668..9d4500850561 100644 > --- a/drivers/gpu/drm/panthor/panthor_mmu.c > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c > @@ -715,10 +715,10 @@ int panthor_vm_active(struct panthor_vm *vm) > * never became active in the first place will be reclaimed last, but > * that's an acceptable trade-off. > */ > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > if (vm->reclaim.lru.count) > list_move_tail(&vm->reclaim.lru_node, &ptdev->reclaim.vms); > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > > /* Make sure we don't race with lock/unlock_region() calls > * happening around VM bind operations. > @@ -1962,9 +1962,9 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm) > struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base); > struct panthor_device *ptdev = vm->ptdev; > > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > list_del_init(&vm->reclaim.lru_node); > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > > mutex_lock(&vm->heaps.lock); > if (drm_WARN_ON(&ptdev->base, vm->heaps.pool)) > @@ -2360,11 +2360,11 @@ void panthor_vm_update_bo_reclaim_lru_locked(struct > panthor_gem_object *bo) > drm_WARN_ON(&ptdev->base, vm); > vm = container_of(vm_bo->vm, struct panthor_vm, base); > > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > drm_gem_lru_move_tail_locked(&vm->reclaim.lru, &bo->base); > if (list_empty(&vm->reclaim.lru_node)) > list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms); > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > } > } > > @@ -2774,7 +2774,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool > for_mcu, > vm->kernel_auto_va.start = auto_kernel_va_start; > vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size > - 1; > > - drm_gem_lru_init(&vm->reclaim.lru, &ptdev->reclaim.lock); > + drm_gem_lru_init(&vm->reclaim.lru); > INIT_LIST_HEAD(&vm->reclaim.lru_node); > INIT_LIST_HEAD(&vm->node); > INIT_LIST_HEAD(&vm->as.lru_node); > @@ -3140,7 +3140,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device > *ptdev, > LIST_HEAD(remaining_vms); > LIST_HEAD(vms); > > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > list_splice_init(&ptdev->reclaim.vms, &vms); > > while (freed < nr_to_scan) { > @@ -3156,12 +3156,13 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device > *ptdev, > continue; > } > > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > > - freed += drm_gem_lru_scan(&vm->reclaim.lru, nr_to_scan - freed, > + freed += drm_gem_lru_scan(&ptdev->base, &vm->reclaim.lru, > + nr_to_scan - freed, > remaining, shrink, NULL); > > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > > /* If the VM is still in the temporary list, remove it so we > * can proceed with the next VM. > @@ -3177,11 +3178,11 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device > *ptdev, > list_add_tail(&vm->reclaim.lru_node, > &remaining_vms); > } > > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > > panthor_vm_put(vm); > > - mutex_lock(&ptdev->reclaim.lock); > + mutex_lock(&ptdev->base.gem_lru_mutex); > } > > /* Re-insert VMs with remaining data to reclaim at the beginning of > @@ -3192,7 +3193,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device > *ptdev, > */ > list_splice_tail(&vms, &remaining_vms); > list_splice(&remaining_vms, &ptdev->reclaim.vms); > - mutex_unlock(&ptdev->reclaim.lock); > + mutex_unlock(&ptdev->base.gem_lru_mutex); > > return freed; > }
