On Tuesday, May 19, 2026 10:54:10 AM Central European Summer Time Christian 
König wrote:
> On 5/19/26 10:22, Timur Kristóf wrote:
> > UVD 4.x and older require that BOs don't cross 256M segments.
> > We need to respect that in amdgpu_ttm_alloc_gart().
> > We can't move the BOs later because GTT->GTT moves are
> > not implemented. We also can't force all BOs to VRAM
> > because that becomes very problematic in low VRAM scenarios.
> > 
> > This fixes UVD CS BOs crossing 256M segments
> > when they are placed in the GART.
> 
> Clear NAK for that approach.
> 
> This is the general TTM interface function and shouldn't have any HW
> generation dependent code in it.

I don't see how else to solve this, since GTT->GTT moves are not implemented,
so we can't move the BO to a suitable address later. We also can't move it to 
VRAM.

Please suggest a better approach if you don't like this one.


> 
> Regards,
> Christian.
> 
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4799
> > Signed-off-by: Timur Kristóf <[email protected]>
> > ---
> > 
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 56 ++++++++++++++++++++++---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  3 ++
> >  2 files changed, 53 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index
> > 6c6ab4dd6ea9..a106c7e77e26 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -959,6 +959,40 @@ static int amdgpu_ttm_backend_bind(struct ttm_device
> > *bdev,> 
> >     return 0;
> >  
> >  }
> > 
> > +/**
> > + * amdgpu_ttm_fill_gart_256M_placements() - Fill placements array with
> > 256M GART segments + *
> > + * @bo: TTM buffer objects whose placements should be filled
> > + * @placements: Pointer to an array of placements
> > + * @max_placements: Size of the placements array
> > + *
> > + * Fill the specified placements array with 256M GART segments,
> > + * starting from the highest address in order to reduce the
> > + * contention of the lowest segment.
> > + *
> > + * Returns the number of placements filled.
> > + */
> > +u32 amdgpu_ttm_fill_gart_256M_placements(struct ttm_buffer_object *bo,
> > +                                    struct ttm_place 
*placements,
> > +                                    u32 max_placements)
> > +{
> > +   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> > +   u32 i;
> > +
> > +   /* Fill the placements array with 256M segments, starting from 
highest.
> > */ +        for (i = 0; i < max_placements; ++i) {
> > +           if (i * SZ_256M >= adev->gmc.gart_size)
> > +                   break;
> > +
> > +           placements[i].lpfn = (adev->gmc.gart_size - i * 
SZ_256M) >> PAGE_SHIFT;
> > +           placements[i].fpfn = ALIGN_DOWN(placements[i].lpfn - 1, 
SZ_256M >>
> > PAGE_SHIFT); +              placements[i].mem_type = TTM_PL_TT;
> > +           placements[i].flags = bo->resource->placement;
> > +   }
> > +
> > +   return i;
> > +}
> > +
> > 
> >  /*
> >  
> >   * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
> >   * through AGP or GART aperture.
> > 
> > @@ -973,7 +1007,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object
> > *bo)> 
> >     struct ttm_operation_ctx ctx = { false, false };
> >     struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
> >     struct ttm_placement placement;
> > 
> > -   struct ttm_place placements;
> > +   struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS];
> > 
> >     struct ttm_resource *tmp;
> >     uint64_t addr, flags;
> >     int r;
> > 
> > @@ -987,11 +1021,21 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object
> > *bo)> 
> >     /* allocate GART space */
> >     placement.num_placement = 1;
> > 
> > -   placement.placement = &placements;
> > -   placements.fpfn = 0;
> > -   placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
> > -   placements.mem_type = TTM_PL_TT;
> > -   placements.flags = bo->resource->placement;
> > +   placement.placement = &placements[0];
> > +   placements[0].fpfn = 0;
> > +   placements[0].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
> > +   placements[0].mem_type = TTM_PL_TT;
> > +   placements[0].flags = bo->resource->placement;
> > +
> > +   /*
> > +    * UVD 4.x and older require that BOs don't cross 256M segments.
> > +    * We need to respect that here. We can't move the BO later
> > +    * because GTT->GTT moves are not implemented.
> > +    */
> > +   if (bo->base.size < SZ_256M && adev->family <= AMDGPU_FAMILY_KV)
> > +           placement.num_placement =
> > +                   amdgpu_ttm_fill_gart_256M_placements(bo, 
placements,
> > +                                                        
ARRAY_SIZE(placements));
> > 
> >     r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
> >     if (unlikely(r))
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index
> > 2d72fa217274..e9de628c8d2d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > @@ -202,6 +202,9 @@ int amdgpu_ttm_clear_buffer(struct
> > amdgpu_ttm_buffer_entity *entity,> 
> >                         u64 k_job_id);
> >  
> >  struct amdgpu_ttm_buffer_entity *amdgpu_ttm_next_clear_entity(struct
> >  amdgpu_device *adev);> 
> > +u32 amdgpu_ttm_fill_gart_256M_placements(struct ttm_buffer_object *bo,
> > +                                    struct ttm_place 
*placements,
> > +                                    u32 max_placements);
> > 
> >  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> >  void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> >  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t
> >  type);




Reply via email to