On 5/19/26 10:22, Timur Kristóf wrote:
> UVD 4.x and older require that BOs don't cross 256M segments.
> We need to respect that in amdgpu_ttm_alloc_gart().
> We can't move the BOs later because GTT->GTT moves are
> not implemented. We also can't force all BOs to VRAM
> because that becomes very problematic in low VRAM scenarios.
>
> This fixes UVD CS BOs crossing 256M segments
> when they are placed in the GART.
Clear NAK for that approach.
This is the general TTM interface function and shouldn't have any HW generation
dependent code in it.
Regards,
Christian.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4799
> Signed-off-by: Timur Kristóf <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 56 ++++++++++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++
> 2 files changed, 53 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6c6ab4dd6ea9..a106c7e77e26 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -959,6 +959,40 @@ static int amdgpu_ttm_backend_bind(struct ttm_device
> *bdev,
> return 0;
> }
>
> +/**
> + * amdgpu_ttm_fill_gart_256M_placements() - Fill placements array with 256M
> GART segments
> + *
> + * @bo: TTM buffer objects whose placements should be filled
> + * @placements: Pointer to an array of placements
> + * @max_placements: Size of the placements array
> + *
> + * Fill the specified placements array with 256M GART segments,
> + * starting from the highest address in order to reduce the
> + * contention of the lowest segment.
> + *
> + * Returns the number of placements filled.
> + */
> +u32 amdgpu_ttm_fill_gart_256M_placements(struct ttm_buffer_object *bo,
> + struct ttm_place *placements,
> + u32 max_placements)
> +{
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> + u32 i;
> +
> + /* Fill the placements array with 256M segments, starting from highest.
> */
> + for (i = 0; i < max_placements; ++i) {
> + if (i * SZ_256M >= adev->gmc.gart_size)
> + break;
> +
> + placements[i].lpfn = (adev->gmc.gart_size - i * SZ_256M) >>
> PAGE_SHIFT;
> + placements[i].fpfn = ALIGN_DOWN(placements[i].lpfn - 1, SZ_256M
> >> PAGE_SHIFT);
> + placements[i].mem_type = TTM_PL_TT;
> + placements[i].flags = bo->resource->placement;
> + }
> +
> + return i;
> +}
> +
> /*
> * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
> * through AGP or GART aperture.
> @@ -973,7 +1007,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
> struct ttm_operation_ctx ctx = { false, false };
> struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
> struct ttm_placement placement;
> - struct ttm_place placements;
> + struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS];
> struct ttm_resource *tmp;
> uint64_t addr, flags;
> int r;
> @@ -987,11 +1021,21 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
>
> /* allocate GART space */
> placement.num_placement = 1;
> - placement.placement = &placements;
> - placements.fpfn = 0;
> - placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
> - placements.mem_type = TTM_PL_TT;
> - placements.flags = bo->resource->placement;
> + placement.placement = &placements[0];
> + placements[0].fpfn = 0;
> + placements[0].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
> + placements[0].mem_type = TTM_PL_TT;
> + placements[0].flags = bo->resource->placement;
> +
> + /*
> + * UVD 4.x and older require that BOs don't cross 256M segments.
> + * We need to respect that here. We can't move the BO later
> + * because GTT->GTT moves are not implemented.
> + */
> + if (bo->base.size < SZ_256M && adev->family <= AMDGPU_FAMILY_KV)
> + placement.num_placement =
> + amdgpu_ttm_fill_gart_256M_placements(bo, placements,
> +
> ARRAY_SIZE(placements));
>
> r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
> if (unlikely(r))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 2d72fa217274..e9de628c8d2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -202,6 +202,9 @@ int amdgpu_ttm_clear_buffer(struct
> amdgpu_ttm_buffer_entity *entity,
> u64 k_job_id);
> struct amdgpu_ttm_buffer_entity *amdgpu_ttm_next_clear_entity(struct
> amdgpu_device *adev);
>
> +u32 amdgpu_ttm_fill_gart_256M_placements(struct ttm_buffer_object *bo,
> + struct ttm_place *placements,
> + u32 max_placements);
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);