On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote: > Until now ttm stored a single pipelined eviction fence which means > drivers had to use a single entity for these evictions. > > To lift this requirement, this commit allows up to 8 entities to > be used. > > Ideally a dma_resv object would have been used as a container of > the eviction fences, but the locking rules makes it complex. > dma_resv all have the same ww_class, which means "Attempting to > lock more mutexes after ww_acquire_done." is an error. > > One alternative considered was to introduced a 2nd ww_class for > specific resv to hold a single "transient" lock (= the resv lock > would only be held for a short period, without taking any other > locks). > > The other option, is to statically reserve a fence array, and > extend the existing code to deal with N fences, instead of 1. > > The driver is still responsible to reserve the correct number > of fence slots. > > --- > v2: > - simplified code > - dropped n_fences > - name changes > v3: use ttm_resource_manager_cleanup > --- > > Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Christian König <[email protected]> Going to push separately to drm-misc-next on Monday. Regards, Christian. > --- > .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++-- > drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +- > drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++--------- > drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++--- > drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++----- > include/drm/ttm/ttm_resource.h | 29 ++++++++---- > 6 files changed, 104 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > index 3148f5d3dbd6..8f71906c4238 100644 > --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c > @@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct > kunit *test) > int err; > > man = ttm_manager_type(priv->ttm_dev, mem_type); > - man->move = dma_fence_get_stub(); > + man->eviction_fences[0] = dma_fence_get_stub(); > > bo = ttm_bo_kunit_init(test, test->priv, size, NULL); > bo->type = bo_type; > @@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct > kunit *test) > KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size); > > ttm_bo_put(bo); > - dma_fence_put(man->move); > + dma_fence_put(man->eviction_fences[0]); > } > > static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = > { > @@ -732,9 +732,9 @@ static void > ttm_bo_validate_move_fence_not_signaled(struct kunit *test) > > spin_lock_init(&fence_lock); > man = ttm_manager_type(priv->ttm_dev, fst_mem); > - man->move = alloc_mock_fence(test); > + man->eviction_fences[0] = alloc_mock_fence(test); > > - task = kthread_create(threaded_fence_signal, man->move, > "move-fence-signal"); > + task = kthread_create(threaded_fence_signal, man->eviction_fences[0], > "move-fence-signal"); > if (IS_ERR(task)) > KUNIT_FAIL(test, "Couldn't create move fence signal task\n"); > > @@ -742,7 +742,8 @@ static void > ttm_bo_validate_move_fence_not_signaled(struct kunit *test) > err = ttm_bo_validate(bo, placement_val, &ctx_val); > dma_resv_unlock(bo->base.resv); > > - dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT); > + dma_fence_wait_timeout(man->eviction_fences[0], false, > MAX_SCHEDULE_TIMEOUT); > + man->eviction_fences[0] = NULL; > > KUNIT_EXPECT_EQ(test, err, 0); > KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size); > diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c > b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c > index e6ea2bd01f07..c0e4e35e0442 100644 > --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c > +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c > @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit > *test) > struct ttm_resource_test_priv *priv = test->priv; > struct ttm_resource_manager *man; > size_t size = SZ_16K; > + int i; > > man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL); > KUNIT_ASSERT_NOT_NULL(test, man); > @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit > *test) > KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev); > KUNIT_ASSERT_EQ(test, man->size, size); > KUNIT_ASSERT_EQ(test, man->usage, 0); > - KUNIT_ASSERT_NULL(test, man->move); > - KUNIT_ASSERT_NOT_NULL(test, &man->move_lock); > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) > + KUNIT_ASSERT_NULL(test, man->eviction_fences[i]); > > for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i) > KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i])); > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index f4d9e68b21e7..0b3732ed6f6c 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo) > EXPORT_SYMBOL(ttm_bo_unpin); > > /* > - * Add the last move fence to the BO as kernel dependency and reserve a new > - * fence slot. > + * Add the pipelined eviction fencesto the BO as kernel dependency and > reserve new > + * fence slots. > */ > -static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, > - struct ttm_resource_manager *man, > - bool no_wait_gpu) > +static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo, > + struct ttm_resource_manager > *man, > + bool no_wait_gpu) > { > struct dma_fence *fence; > - int ret; > + int i; > > - spin_lock(&man->move_lock); > - fence = dma_fence_get(man->move); > - spin_unlock(&man->move_lock); > + spin_lock(&man->eviction_lock); > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { > + fence = man->eviction_fences[i]; > + if (!fence) > + continue; > > - if (!fence) > - return 0; > - > - if (no_wait_gpu) { > - ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; > - dma_fence_put(fence); > - return ret; > + if (no_wait_gpu) { > + if (!dma_fence_is_signaled(fence)) { > + spin_unlock(&man->eviction_lock); > + return -EBUSY; > + } > + } else { > + dma_resv_add_fence(bo->base.resv, fence, > DMA_RESV_USAGE_KERNEL); > + } > } > + spin_unlock(&man->eviction_lock); > > - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); > - > - ret = dma_resv_reserve_fences(bo->base.resv, 1); > - dma_fence_put(fence); > - return ret; > + /* TODO: this call should be removed. */ > + return dma_resv_reserve_fences(bo->base.resv, 1); > } > > /** > @@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object > *bo, > int i, ret; > > ticket = dma_resv_locking_ctx(bo->base.resv); > - ret = dma_resv_reserve_fences(bo->base.resv, 1); > + ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES); > if (unlikely(ret)) > return ret; > > @@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object > *bo, > return ret; > } > > - ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu); > + ret = ttm_bo_add_pipelined_eviction_fences(bo, man, > ctx->no_wait_gpu); > if (unlikely(ret)) { > ttm_resource_free(bo, res); > if (ret == -EBUSY) > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c > b/drivers/gpu/drm/ttm/ttm_bo_util.c > index acbbca9d5c92..2ff35d55e462 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct > ttm_buffer_object *bo, > ret = dma_resv_trylock(&fbo->base.base._resv); > WARN_ON(!ret); > > - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); > + ret = dma_resv_reserve_fences(&fbo->base.base._resv, > TTM_NUM_MOVE_FENCES); > if (ret) { > dma_resv_unlock(&fbo->base.base._resv); > kfree(fbo); > @@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct > ttm_buffer_object *bo, > { > struct ttm_device *bdev = bo->bdev; > struct ttm_resource_manager *from; > + struct dma_fence *tmp; > + int i; > > from = ttm_manager_type(bdev, bo->resource->mem_type); > > /** > * BO doesn't have a TTM we need to bind/unbind. Just remember > - * this eviction and free up the allocation > + * this eviction and free up the allocation. > + * The fence will be saved in the first free slot or in the slot > + * already used to store a fence from the same context. Since > + * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for > + * evictions we should always find a slot to use. > */ > - spin_lock(&from->move_lock); > - if (!from->move || dma_fence_is_later(fence, from->move)) { > - dma_fence_put(from->move); > - from->move = dma_fence_get(fence); > + spin_lock(&from->eviction_lock); > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { > + tmp = from->eviction_fences[i]; > + if (!tmp) > + break; > + if (fence->context != tmp->context) > + continue; > + if (dma_fence_is_later(fence, tmp)) { > + dma_fence_put(tmp); > + break; > + } > + goto unlock; > + } > + if (i < TTM_NUM_MOVE_FENCES) { > + from->eviction_fences[i] = dma_fence_get(fence); > + } else { > + WARN(1, "not enough fence slots for all fence contexts"); > + spin_unlock(&from->eviction_lock); > + dma_fence_wait(fence, false); > + goto end; > } > - spin_unlock(&from->move_lock); > > +unlock: > + spin_unlock(&from->eviction_lock); > +end: > ttm_resource_free(bo, &bo->resource); > } > > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c > b/drivers/gpu/drm/ttm/ttm_resource.c > index e2c82ad07eb4..62c34cafa387 100644 > --- a/drivers/gpu/drm/ttm/ttm_resource.c > +++ b/drivers/gpu/drm/ttm/ttm_resource.c > @@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct > ttm_resource_manager *man, > { > unsigned i; > > - spin_lock_init(&man->move_lock); > man->bdev = bdev; > man->size = size; > man->usage = 0; > > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) > INIT_LIST_HEAD(&man->lru[i]); > - man->move = NULL; > + spin_lock_init(&man->eviction_lock); > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) > + man->eviction_fences[i] = NULL; > } > EXPORT_SYMBOL(ttm_resource_manager_init); > > @@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device > *bdev, > .no_wait_gpu = false, > }; > struct dma_fence *fence; > - int ret; > + int ret, i; > > do { > ret = ttm_bo_evict_first(bdev, man, &ctx); > @@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device > *bdev, > if (ret && ret != -ENOENT) > return ret; > > - spin_lock(&man->move_lock); > - fence = dma_fence_get(man->move); > - spin_unlock(&man->move_lock); > + ret = 0; > > - if (fence) { > - ret = dma_fence_wait(fence, false); > - dma_fence_put(fence); > - if (ret) > - return ret; > + spin_lock(&man->eviction_lock); > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { > + fence = man->eviction_fences[i]; > + if (fence && !dma_fence_is_signaled(fence)) { > + dma_fence_get(fence); > + spin_unlock(&man->eviction_lock); > + ret = dma_fence_wait(fence, false); > + dma_fence_put(fence); > + if (ret) > + return ret; > + spin_lock(&man->eviction_lock); > + } > } > + spin_unlock(&man->eviction_lock); > > - return 0; > + return ret; > } > EXPORT_SYMBOL(ttm_resource_manager_evict_all); > > diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h > index f49daa504c36..50e6added509 100644 > --- a/include/drm/ttm/ttm_resource.h > +++ b/include/drm/ttm/ttm_resource.h > @@ -50,6 +50,15 @@ struct io_mapping; > struct sg_table; > struct scatterlist; > > +/** > + * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions > + * > + * Pipelined evictions can be spread on multiple entities. This > + * is the max number of entities that can be used by the driver > + * for that purpose. > + */ > +#define TTM_NUM_MOVE_FENCES 8 > + > /** > * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses > */ > @@ -180,8 +189,8 @@ struct ttm_resource_manager_func { > * @size: Size of the managed region. > * @bdev: ttm device this manager belongs to > * @func: structure pointer implementing the range manager. See above > - * @move_lock: lock for move fence > - * @move: The fence of the last pipelined move operation. > + * @eviction_lock: lock for eviction fences > + * @eviction_fences: The fences of the last pipelined move operation. > * @lru: The lru list for this memory type. > * > * This structure is used to identify and manage memory types for a device. > @@ -195,12 +204,12 @@ struct ttm_resource_manager { > struct ttm_device *bdev; > uint64_t size; > const struct ttm_resource_manager_func *func; > - spinlock_t move_lock; > > - /* > - * Protected by @move_lock. > + /* This is very similar to a dma_resv object, but locking rules make > + * it difficult to use one in this context. > */ > - struct dma_fence *move; > + spinlock_t eviction_lock; > + struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES]; > > /* > * Protected by the bdev->lru_lock. > @@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct > ttm_resource_manager *man) > static inline void > ttm_resource_manager_cleanup(struct ttm_resource_manager *man) > { > - dma_fence_put(man->move); > - man->move = NULL; > + int i; > + > + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) { > + dma_fence_put(man->eviction_fences[i]); > + man->eviction_fences[i] = NULL; > + } > } > > void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
