On 6/13/25 17:18, Thomas Hellström wrote:
> To avoid duplicating the tricky bo locking implementation,
> Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration.
> 
> To facilitate this, support ticketlocking from the guarded bo LRU
> iteration.

That looks mostly identical to a patch I have in my drm_exec branch.

A few questions below.

> 
> Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo_util.c | 166 ++++++++++++------------------
>  drivers/gpu/drm/xe/xe_shrinker.c  |   7 +-
>  include/drm/ttm/ttm_bo.h          |   9 +-
>  3 files changed, 76 insertions(+), 106 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 62b76abac578..9bc17ea1adb2 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -821,12 +821,6 @@ static int ttm_lru_walk_ticketlock(struct 
> ttm_lru_walk_arg *arg,
>       return ret;
>  }
>  
> -static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
> -{
> -     if (locked)
> -             dma_resv_unlock(bo->base.resv);
> -}
> -
>  /**
>   * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
>   * valid items.
> @@ -861,64 +855,21 @@ static void ttm_lru_walk_unlock(struct 
> ttm_buffer_object *bo, bool locked)
>  s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device 
> *bdev,
>                          struct ttm_resource_manager *man, s64 target)
>  {
> -     struct ttm_resource_cursor cursor;
> -     struct ttm_resource *res;
> +     struct ttm_bo_lru_cursor cursor;
> +     struct ttm_buffer_object *bo;
>       s64 progress = 0;
>       s64 lret;
>  
> -     spin_lock(&bdev->lru_lock);
> -     ttm_resource_cursor_init(&cursor, man);
> -     ttm_resource_manager_for_each_res(&cursor, res) {
> -             struct ttm_buffer_object *bo = res->bo;
> -             bool bo_needs_unlock = false;
> -             bool bo_locked = false;
> -             int mem_type;
> -
> -             /*
> -              * Attempt a trylock before taking a reference on the bo,
> -              * since if we do it the other way around, and the trylock 
> fails,
> -              * we need to drop the lru lock to put the bo.
> -              */
> -             if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
> -                     bo_locked = true;
> -             else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
> -                      walk->arg.trylock_only)
> -                     continue;
> -
> -             if (!ttm_bo_get_unless_zero(bo)) {
> -                     ttm_lru_walk_unlock(bo, bo_needs_unlock);
> -                     continue;
> -             }
> -
> -             mem_type = res->mem_type;
> -             spin_unlock(&bdev->lru_lock);
> -
> -             lret = 0;
> -             if (!bo_locked)
> -                     lret = ttm_lru_walk_ticketlock(&walk->arg, bo, 
> &bo_needs_unlock);
> -
> -             /*
> -              * Note that in between the release of the lru lock and the
> -              * ticketlock, the bo may have switched resource,
> -              * and also memory type, since the resource may have been
> -              * freed and allocated again with a different memory type.
> -              * In that case, just skip it.
> -              */
> -             if (!lret && bo->resource && bo->resource->mem_type == mem_type)
> -                     lret = walk->ops->process_bo(walk, bo);
> -
> -             ttm_lru_walk_unlock(bo, bo_needs_unlock);
> -             ttm_bo_put(bo);
> +     ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) {
> +             lret = walk->ops->process_bo(walk, bo);
>               if (lret == -EBUSY || lret == -EALREADY)
>                       lret = 0;
>               progress = (lret < 0) ? lret : progress + lret;
> -
> -             spin_lock(&bdev->lru_lock);
>               if (progress < 0 || progress >= target)
>                       break;
>       }
> -     ttm_resource_cursor_fini(&cursor);
> -     spin_unlock(&bdev->lru_lock);
> +     if (IS_ERR(bo))
> +             return PTR_ERR(bo);
>  
>       return progress;
>  }
> @@ -958,10 +909,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
>   * @man: The ttm resource_manager whose LRU lists to iterate over.
>   * @arg: The ttm_lru_walk_arg to govern the walk.
>   *
> - * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
> - * or prelocked buffer objects are available as detailed by
> - * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
> - * supported.
> + * Initialize a struct ttm_bo_lru_cursor.
>   *
>   * Return: Pointer to @curs. The function does not fail.
>   */
> @@ -979,21 +927,65 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
>  EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
>  
>  static struct ttm_buffer_object *
> -ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor 
> *curs)
> +__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool first)

Giving first as bool parameter here looks really ugly. Isn't there any other 
way to do this?

>  {
> -     struct ttm_buffer_object *bo = res->bo;
> +     spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> +     struct ttm_resource *res = NULL;
> +     struct ttm_buffer_object *bo;
> +     struct ttm_lru_walk_arg *arg = curs->arg;
>  
> -     if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
> -             return NULL;
> +     ttm_bo_lru_cursor_cleanup_bo(curs);
>  
> -     if (!ttm_bo_get_unless_zero(bo)) {
> -             if (curs->needs_unlock)
> -                     dma_resv_unlock(bo->base.resv);
> -             return NULL;
> +     spin_lock(lru_lock);
> +     for (;;) {
> +             int mem_type, ret;
> +             bool bo_locked = false;
> +
> +             if (first) {
> +                     res = ttm_resource_manager_first(&curs->res_curs);
> +                     first = false;
> +             } else {
> +                     res = ttm_resource_manager_next(&curs->res_curs);
> +             }
> +             if (!res)
> +                     break;
> +
> +             bo = res->bo;
> +             if (ttm_lru_walk_trylock(arg, bo, &curs->needs_unlock))

Could/should we move needs_unlock into arg as well?

Apart from that looks good to me.

Regards,
Christian.

> +                     bo_locked = true;
> +             else if (!arg->ticket || arg->ctx->no_wait_gpu || 
> arg->trylock_only)
> +                     continue;
> +
> +             if (!ttm_bo_get_unless_zero(bo)) {
> +                     if (curs->needs_unlock)
> +                             dma_resv_unlock(bo->base.resv);
> +                     continue;
> +             }
> +
> +             mem_type = res->mem_type;
> +             spin_unlock(lru_lock);
> +             if (!bo_locked)
> +                     ret = ttm_lru_walk_ticketlock(arg, bo, 
> &curs->needs_unlock);
> +             /*
> +              * Note that in between the release of the lru lock and the
> +              * ticketlock, the bo may have switched resource,
> +              * and also memory type, since the resource may have been
> +              * freed and allocated again with a different memory type.
> +              * In that case, just skip it.
> +              */
> +             curs->bo = bo;
> +             if (!ret && bo->resource && bo->resource->mem_type == mem_type)
> +                     return bo;
> +
> +             ttm_bo_lru_cursor_cleanup_bo(curs);
> +             if (ret)
> +                     return ERR_PTR(ret);
> +
> +             spin_lock(lru_lock);
>       }
>  
> -     curs->bo = bo;
> -     return bo;
> +     spin_unlock(lru_lock);
> +     return res ? bo : NULL;
>  }
>  
>  /**
> @@ -1007,25 +999,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, 
> struct ttm_bo_lru_cursor *cur
>   */
>  struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor 
> *curs)
>  {
> -     spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> -     struct ttm_resource *res = NULL;
> -     struct ttm_buffer_object *bo;
> -
> -     ttm_bo_lru_cursor_cleanup_bo(curs);
> -
> -     spin_lock(lru_lock);
> -     for (;;) {
> -             res = ttm_resource_manager_next(&curs->res_curs);
> -             if (!res)
> -                     break;
> -
> -             bo = ttm_bo_from_res_reserved(res, curs);
> -             if (bo)
> -                     break;
> -     }
> -
> -     spin_unlock(lru_lock);
> -     return res ? bo : NULL;
> +     return __ttm_bo_lru_cursor_next(curs, false);
>  }
>  EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
>  
> @@ -1039,21 +1013,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
>   */
>  struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor 
> *curs)
>  {
> -     spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> -     struct ttm_buffer_object *bo;
> -     struct ttm_resource *res;
> -
> -     spin_lock(lru_lock);
> -     res = ttm_resource_manager_first(&curs->res_curs);
> -     if (!res) {
> -             spin_unlock(lru_lock);
> -             return NULL;
> -     }
> -
> -     bo = ttm_bo_from_res_reserved(res, curs);
> -     spin_unlock(lru_lock);
> -
> -     return bo ? bo : ttm_bo_lru_cursor_next(curs);
> +     return __ttm_bo_lru_cursor_next(curs, true);
>  }
>  EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
>  
> diff --git a/drivers/gpu/drm/xe/xe_shrinker.c 
> b/drivers/gpu/drm/xe/xe_shrinker.c
> index f8a1129da2c3..1c3c04d52f55 100644
> --- a/drivers/gpu/drm/xe/xe_shrinker.c
> +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> @@ -66,7 +66,10 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
>               struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, 
> mem_type);
>               struct ttm_bo_lru_cursor curs;
>               struct ttm_buffer_object *ttm_bo;
> -             struct ttm_lru_walk_arg arg = {.ctx = ctx};
> +             struct ttm_lru_walk_arg arg = {
> +                     .ctx = ctx,
> +                     .trylock_only = true,
> +             };
>  
>               if (!man || !man->use_tt)
>                       continue;
> @@ -83,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
>                       if (*scanned >= to_scan)
>                               break;
>               }
> +             /* Trylocks should never error, just fail. */
> +             xe_assert(xe, !IS_ERR(ttm_bo));
>       }
>  
>       return freed;
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 8f04fa48b332..d3a85d76aaff 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -529,10 +529,15 @@ 
> class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
>   * up at looping termination, even if terminated prematurely by, for
>   * example a return or break statement. Exiting the loop will also unlock
>   * (if needed) and unreference @_bo.
> + *
> + * Return: If locking of a bo returns an error, then iteration is terminated
> + * and @_bo is set to a corresponding error pointer. It's illegal to
> + * dereference @_bo after loop exit.
>   */
>  #define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo)       
> \
>       scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg)            \
> -             for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo);   \
> -                  (_bo) = ttm_bo_lru_cursor_next(_cursor))
> +             for ((_bo) = ttm_bo_lru_cursor_first(_cursor);          \
> +                    !IS_ERR_OR_NULL(_bo);                            \
> +                    (_bo) = ttm_bo_lru_cursor_next(_cursor))
>  
>  #endif

Reply via email to