On 10/11/2025 12:37, Natalie Vock wrote:
ttm_bo_alloc_place is a new helper function to make an attempt at
allocating a bo's resource in a specific place. It also makes decisions
on whether eviction should be attempted: If -ENOSPC is returned,
allocation should not be retried.
At first I thought the new helper will get used from more than one call
site but it seems that is not the case? No objections to extract some
code to be clear, just that when I see a patch title of "making a
helper" I expect something different.
Is there any functional difference here or it is just prep to enable
easier extending in the following patch? If no functional difference it
is good to state that in the commit message. If functional difference
please explain what and why.
Also please explain that the patch is only adding a new struct parameter
as a preparation for it being extended.
Signed-off-by: Natalie Vock <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 98 +++++++++++++++++++++++++++++++++-----------
1 file changed, 73 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4d9e68b21e70..829d994798835 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -489,6 +489,11 @@ int ttm_bo_evict_first(struct ttm_device *bdev, struct
ttm_resource_manager *man
return ret;
}
+struct ttm_bo_alloc_state {
+ /** @limit_pool: Which pool limit we should test against */
+ struct dmem_cgroup_pool_state *limit_pool;
+};
+
/**
* struct ttm_bo_evict_walk - Parameters for the evict walk.
*/
@@ -504,12 +509,13 @@ struct ttm_bo_evict_walk {
/** @evicted: Number of successful evictions. */
unsigned long evicted;
- /** @limit_pool: Which pool limit we should test against */
- struct dmem_cgroup_pool_state *limit_pool;
/** @try_low: Whether we should attempt to evict BO's with low
watermark threshold */
bool try_low;
/** @hit_low: If we cannot evict a bo when @try_low is false (first
pass) */
bool hit_low;
+
+ /** @alloc_state: */
+ struct ttm_bo_alloc_state *alloc_state;
};
static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
@@ -518,8 +524,9 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk,
struct ttm_buffer_object *
container_of(walk, typeof(*evict_walk), walk);
s64 lret;
- if (!dmem_cgroup_state_evict_valuable(evict_walk->limit_pool, bo->resource->css,
- evict_walk->try_low,
&evict_walk->hit_low))
+ if
(!dmem_cgroup_state_evict_valuable(evict_walk->alloc_state->limit_pool,
+ bo->resource->css,
evict_walk->try_low,
+ &evict_walk->hit_low))
return 0;
if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place))
@@ -561,7 +568,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
struct ttm_operation_ctx *ctx,
struct ww_acquire_ctx *ticket,
struct ttm_resource **res,
- struct dmem_cgroup_pool_state *limit_pool)
+ struct ttm_bo_alloc_state *state)
{
struct ttm_bo_evict_walk evict_walk = {
.walk = {
@@ -574,7 +581,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
.place = place,
.evictor = evictor,
.res = res,
- .limit_pool = limit_pool,
+ .alloc_state = state,
};
s64 lret;
@@ -688,6 +695,47 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
return ret;
}
+
+/**
+ * ttm_bo_alloc_at_place - Attempt allocating a BO's backing store in a place
+ *
+ * @bo: The buffer to allocate the backing store of
+ * @place: The place to attempt allocation in
+ * @ctx: ttm_operation_ctx associated with this allocation
+ * @force_space: If we should evict buffers to force space
+ * @res: On allocation success, the resulting struct ttm_resource.
+ * @alloc_state: Object holding allocation state such as charged cgroups.
+ *
+ * Returns:
+ * -EBUSY: No space available, but allocation should be retried with eviction.
With or after eviction?
+ * -ENOSPC: No space available, allocation should not be retried.
+ * -ERESTARTSYS: An interruptible sleep was interrupted by a signal.
-EAGAIN cannot get out from ttm_resource_alloc()? In the current
codebase or only with this patch?
+ *
+ */
+static int ttm_bo_alloc_at_place(struct ttm_buffer_object *bo,
+ const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ bool force_space,
+ struct ttm_resource **res,
+ struct ttm_bo_alloc_state *alloc_state)
Maybe you did not write this but I am curious and thinking out loud
here. The documentation for struct ttm_operation_ctx among other things
says:
"""
* Context for TTM operations like changing buffer placement or general
memory
* allocation.
"""
Hence I am wondering if the new alloc_state couldn't simply go in there?
Which would make the function prototype identical to the existing
ttm_bo_alloc_resource and is also already passed through the relevant
call chains. Which raises another question - why did
ttm_bo_evict_alloc() need to have struct dmem_cgroup_pool_state as a
separate argument and why it couldn't be passed in the context?
+{
+ bool may_evict;
+ int ret;
+
+ may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
+
+ ret = ttm_resource_alloc(bo, place, res,
+ force_space ? &alloc_state->limit_pool : NULL);
+
+ if (ret) {
+ if ((ret == -ENOSPC || ret == -EAGAIN) && may_evict)
+ ret = -EBUSY;
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* ttm_bo_alloc_resource - Allocate backing store for a BO
*
@@ -713,7 +761,9 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object
*bo,
bool force_space,
struct ttm_resource **res)
{
+ struct ttm_bo_alloc_state alloc_state = {0};
= {}; is usually enough.
Any particular reason to move this and the manager outside of the loop?
struct ttm_device *bdev = bo->bdev;
+ struct ttm_resource_manager *man;
struct ww_acquire_ctx *ticket;
int i, ret;
@@ -724,9 +774,6 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
for (i = 0; i < placement->num_placement; ++i) {
const struct ttm_place *place = &placement->placement[i];
- struct dmem_cgroup_pool_state *limit_pool = NULL;
- struct ttm_resource_manager *man;
- bool may_evict;
man = ttm_manager_type(bdev, place->mem_type);
if (!man || !ttm_resource_manager_used(man))
@@ -736,25 +783,26 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object
*bo,
TTM_PL_FLAG_FALLBACK))
continue;
- may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
- ret = ttm_resource_alloc(bo, place, res, force_space ?
&limit_pool : NULL);
- if (ret) {
- if (ret != -ENOSPC && ret != -EAGAIN) {
- dmem_cgroup_pool_state_put(limit_pool);
- return ret;
- }
- if (!may_evict) {
- dmem_cgroup_pool_state_put(limit_pool);
- continue;
- }
+ ret = ttm_bo_alloc_at_place(bo, place, ctx, force_space,
+ res, &alloc_state);
+ if (ret == -ENOSPC) {
+ dmem_cgroup_pool_state_put(alloc_state.limit_pool);
+ continue;
I always disliked the TTM eviction logic and now I remember why. It
requires a brain size of a small planet to figure out the flow.. :) I'd
say this change makes it more readable.
+ } else if (ret == -EBUSY) {
ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx,
- ticket, res, limit_pool);
- dmem_cgroup_pool_state_put(limit_pool);
- if (ret == -EBUSY)
+ ticket, res, &alloc_state);
+
+ dmem_cgroup_pool_state_put(alloc_state.limit_pool);
+
+ if (ret) {
+ if (ret != -ENOSPC && ret != -EBUSY)
+ return ret;
continue;
Is this a functional change and why? Before only EBUSY went to the next
placement. Now ENOSPC does as well.
Regards,
Tvrtko
- if (ret)
- return ret;
+ }
+ } else if (ret) {
+ dmem_cgroup_pool_state_put(alloc_state.limit_pool);
+ return ret;
}
ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);