On 5/12/25 08:12, Dave Airlie wrote: > From: Dave Airlie <airl...@redhat.com> > > Doing proper integration of TTM system memory allocations with > memcg is a difficult ask, primarily due to difficulties around > accounting for evictions properly. > > However there are systems where userspace will be allocating > objects in system memory and they won't be prone to migrating > or evicting and we should start with at least accounting those. > > This adds a memcg group to ttm bo and tt objects. > > This memcg is used when: > a) when a tt is populated (and unpopulated) > b) the TTM_PL_FLAG_MEMCG is set on the placement for the > bo when the tt is allocated. > > The placement flag is set for all non-eviction placements. > > This version moves back from the resource to the tt layer, > when accounting at the resource layer, if an object is swapped > out there was no way to remove it from the accounting, whereas > the tt layer has more info for this.
Good point, we should probably really come up with a SWAPED TTM domain. The nice thing about attaching it to the resource would have been that you could charge evicted BOs when they are re-validated by the driver. But that can also come much later. Regards, Christian. > > v4: move back to the tt layer from the resource layer to > handle swap, but keep the memcg charging hooks for now. > v3: moves from having a flags on the op ctx to the using a > placement flag. > v2: moved the charging up a level and also no longer used > __GFP_ACCOUNT, or attached the memcg to object pages, it instead > uses the same approach as socket memory and just charges/uncharges > at the object level. This was suggested by Christian. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 6 ++++-- > drivers/gpu/drm/ttm/ttm_bo_util.c | 6 +++--- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 +++- > drivers/gpu/drm/ttm/ttm_tt.c | 17 ++++++++++++++++- > include/drm/ttm/ttm_bo.h | 7 +++++++ > include/drm/ttm/ttm_placement.h | 3 +++ > include/drm/ttm/ttm_tt.h | 9 ++++++++- > 7 files changed, 44 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 5bf3c969907c..1630ef28e5a8 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -140,7 +140,7 @@ static int ttm_bo_handle_move_mem(struct > ttm_buffer_object *bo, > goto out_err; > > if (mem->mem_type != TTM_PL_SYSTEM) { > - ret = ttm_bo_populate(bo, ctx); > + ret = ttm_bo_populate(bo, mem->placement & > TTM_PL_FLAG_MEMCG, ctx); > if (ret) > goto out_err; > } > @@ -1237,6 +1237,7 @@ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) > /** > * ttm_bo_populate() - Ensure that a buffer object has backing pages > * @bo: The buffer object > + * @memcg_account: account this memory with memcg if needed > * @ctx: The ttm_operation_ctx governing the operation. > * > * For buffer objects in a memory type whose manager uses > @@ -1250,6 +1251,7 @@ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) > * is set to true. > */ > int ttm_bo_populate(struct ttm_buffer_object *bo, > + bool memcg_account, > struct ttm_operation_ctx *ctx) > { > struct ttm_tt *tt = bo->ttm; > @@ -1262,7 +1264,7 @@ int ttm_bo_populate(struct ttm_buffer_object *bo, > return 0; > > swapped = ttm_tt_is_swapped(tt); > - ret = ttm_tt_populate(bo->bdev, tt, ctx); > + ret = ttm_tt_populate(bo->bdev, tt, memcg_account, ctx); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c > b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 15cab9bda17f..7d599d0707e4 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -163,7 +163,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, > src_man = ttm_manager_type(bdev, src_mem->mem_type); > if (ttm && ((ttm->page_flags & TTM_TT_FLAG_SWAPPED) || > dst_man->use_tt)) { > - ret = ttm_bo_populate(bo, ctx); > + ret = ttm_bo_populate(bo, dst_mem->placement & > TTM_PL_FLAG_MEMCG, ctx); > if (ret) > return ret; > } > @@ -350,7 +350,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, > > BUG_ON(!ttm); > > - ret = ttm_bo_populate(bo, &ctx); > + ret = ttm_bo_populate(bo, mem->placement & TTM_PL_FLAG_MEMCG, &ctx); > if (ret) > return ret; > > @@ -507,7 +507,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct > iosys_map *map) > pgprot_t prot; > void *vaddr; > > - ret = ttm_bo_populate(bo, &ctx); > + ret = ttm_bo_populate(bo, mem->placement & TTM_PL_FLAG_MEMCG, > &ctx); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index a194db83421d..02aea23a34e7 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -224,7 +224,9 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > }; > > ttm = bo->ttm; > - err = ttm_bo_populate(bo, &ctx); > + err = ttm_bo_populate(bo, > + bo->resource->placement & > TTM_PL_FLAG_MEMCG, > + &ctx); > if (err) { > if (err == -EINTR || err == -ERESTARTSYS || > err == -EAGAIN) > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index 698cd4bf5e46..81c4cbbeb130 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -161,6 +161,7 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm, > ttm->caching = caching; > ttm->restore = NULL; > ttm->backup = NULL; > + ttm->memcg = bo->memcg; > } > > int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo, > @@ -365,7 +366,9 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt > *ttm, > EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_swapout); > > int ttm_tt_populate(struct ttm_device *bdev, > - struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) > + struct ttm_tt *ttm, > + bool memcg_account_tt, > + struct ttm_operation_ctx *ctx) > { > int ret; > > @@ -376,6 +379,14 @@ int ttm_tt_populate(struct ttm_device *bdev, > return 0; > > if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { > + if (ttm->memcg && memcg_account_tt) { > + gfp_t gfp_flags = GFP_USER; > + if (ctx->gfp_retry_mayfail) > + gfp_flags |= __GFP_RETRY_MAYFAIL; > + if (!mem_cgroup_charge_gpu(ttm->memcg, ttm->num_pages, > gfp_flags)) > + return -ENOMEM; > + ttm->page_flags |= TTM_TT_FLAG_ACCOUNTED; > + } > atomic_long_add(ttm->num_pages, &ttm_pages_allocated); > if (bdev->pool.use_dma32) > atomic_long_add(ttm->num_pages, > @@ -437,6 +448,10 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct > ttm_tt *ttm) > ttm_pool_free(&bdev->pool, ttm); > > if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { > + if (ttm->page_flags & TTM_TT_FLAG_ACCOUNTED) { > + mem_cgroup_uncharge_gpu(ttm->memcg, ttm->num_pages); > + ttm->page_flags &= ~TTM_TT_FLAG_ACCOUNTED; > + } > atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); > if (bdev->pool.use_dma32) > atomic_long_sub(ttm->num_pages, > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h > index 903cd1030110..d7c0dd9e0746 100644 > --- a/include/drm/ttm/ttm_bo.h > +++ b/include/drm/ttm/ttm_bo.h > @@ -135,6 +135,12 @@ struct ttm_buffer_object { > * reservation lock. > */ > struct sg_table *sg; > + > + /** > + * @memcg: memory cgroup to charge this to if it ends up using system > memory. > + * NULL means don't charge. > + */ > + struct mem_cgroup *memcg; > }; > > #define TTM_BO_MAP_IOMEM_MASK 0x80 > @@ -486,6 +492,7 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct > ttm_resource *res, > pgprot_t tmp); > void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); > int ttm_bo_populate(struct ttm_buffer_object *bo, > + bool memcg_account, > struct ttm_operation_ctx *ctx); > > /* Driver LRU walk helpers initially targeted for shrinking. */ > diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h > index b510a4812609..668798072292 100644 > --- a/include/drm/ttm/ttm_placement.h > +++ b/include/drm/ttm/ttm_placement.h > @@ -70,6 +70,9 @@ > /* Placement is only used during eviction */ > #define TTM_PL_FLAG_FALLBACK (1 << 4) > > +/* Placement causes memcg accounting */ > +#define TTM_PL_FLAG_MEMCG (1 << 5) > + > /** > * struct ttm_place > * > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h > index 406437ad674b..2790fc82edc3 100644 > --- a/include/drm/ttm/ttm_tt.h > +++ b/include/drm/ttm/ttm_tt.h > @@ -90,6 +90,8 @@ struct ttm_tt { > * TTM_TT_FLAG_BACKED_UP: TTM internal only. This is set if the > * struct ttm_tt has been (possibly partially) backed up. > * > + * TTM_TT_FLAG_ACCOUNTED: TTM internal. This tt has been accounted. > + * > * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is > * set by TTM after ttm_tt_populate() has successfully returned, and is > * then unset when TTM calls ttm_tt_unpopulate(). > @@ -101,8 +103,9 @@ struct ttm_tt { > #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) > #define TTM_TT_FLAG_DECRYPTED BIT(4) > #define TTM_TT_FLAG_BACKED_UP BIT(5) > +#define TTM_TT_FLAG_ACCOUNTED BIT(6) > > -#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) > +#define TTM_TT_FLAG_PRIV_POPULATED BIT(7) > uint32_t page_flags; > /** @num_pages: Number of pages in the page array. */ > uint32_t num_pages; > @@ -126,6 +129,8 @@ struct ttm_tt { > enum ttm_caching caching; > /** @restore: Partial restoration from backup state. TTM private */ > struct ttm_pool_tt_restore *restore; > + /** @memcg: Memory cgroup for this TT allocation */ > + struct mem_cgroup *memcg; > }; > > /** > @@ -245,11 +250,13 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct > ttm_tt *ttm, > * > * @bdev: the ttm_device this object belongs to > * @ttm: Pointer to the ttm_tt structure > + * @mem_account_tt: Account this population to the memcg > * @ctx: operation context for populating the tt object. > * > * Calls the driver method to allocate pages for a ttm > */ > int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, > + bool mem_account_tt, > struct ttm_operation_ctx *ctx); > > /**