amdgpu: use larger gart window when possible

Christian König Fri, 21 Nov 2025 07:03:30 -0800

On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> Entities' gart windows are contiguous so when copying a buffer
> and src doesn't need a gart window, its window can be used to
> extend dst one (and vice versa).
> 
> This doubles the gart window size and reduces the number of jobs
> required.
> 
> ---
> v2: pass adev instead of ring to amdgpu_ttm_needs_gart_window
> ---
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78 ++++++++++++++++++-------
>  1 file changed, 58 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 164b49d768d8..489880b2fb8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -177,6 +177,21 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct 
> amdgpu_ttm_buffer_entit
>       return amdgpu_job_submit(job);
>  }
>  
> +static bool amdgpu_ttm_needs_gart_window(struct amdgpu_device *adev,
> +                                      struct ttm_resource *mem,
> +                                      struct amdgpu_res_cursor *mm_cur,
> +                                      bool tmz,
> +                                      uint64_t *addr)
> +{
> +     /* Map only what can't be accessed directly */
> +     if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {


Checking mem->start here is actually a really bad idea.

IIRC Amar was once working on patches to fix that, but never finished them.

On the other hand, that is certainly not a problem for this patch here.

> +             *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> +                     mm_cur->start;
> +             return false;
> +     }
> +     return true;
> +}
> +
>  /**
>   * amdgpu_ttm_map_buffer - Map memory into the GART windows
>   * @adev: the device being used
> @@ -185,6 +200,7 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct 
> amdgpu_ttm_buffer_entit
>   * @mem: memory object to map
>   * @mm_cur: range to map
>   * @window: which GART window to use
> + * @use_two_windows: if true, use a double window
>   * @tmz: if we should setup a TMZ enabled mapping
>   * @size: in number of bytes to map, out number of bytes mapped
>   * @addr: resulting address inside the MC address space
> @@ -198,6 +214,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device 
> *adev,
>                                struct ttm_resource *mem,
>                                struct amdgpu_res_cursor *mm_cur,
>                                unsigned int window,
> +                              bool use_two_windows,
>                                bool tmz, uint64_t *size, uint64_t *addr)
>  {
>       unsigned int offset, num_pages, num_dw, num_bytes;
> @@ -213,13 +230,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device 
> *adev,
>       if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
>               return -EINVAL;
>  
> -     /* Map only what can't be accessed directly */
> -     if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> -             *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> -                     mm_cur->start;
> +     if (!amdgpu_ttm_needs_gart_window(adev, mem, mm_cur, tmz, addr))
>               return 0;
> -     }
> -

When that is now checkout outside of the function this shouldn't be necessary 
any more and we should be able to remove that here.

>  
>       /*
>        * If start begins at an offset inside the page, then adjust the size
> @@ -228,7 +240,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device 
> *adev,
>       offset = mm_cur->start & ~PAGE_MASK;
>  
>       num_pages = PFN_UP(*size + offset);
> -     num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
> +     num_pages = min_t(uint32_t,
> +             num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE * (use_two_windows ? 2 
> : 1));

Rather use two separate calls to amdgpu_ttm_map_buffer() and just increment the 
cursor before the second call.

>  
>       *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
>  
> @@ -300,7 +313,9 @@ static int amdgpu_ttm_copy_mem_to_mem(struct 
> amdgpu_device *adev,
>                                     struct dma_resv *resv,
>                                     struct dma_fence **f)
>  {
> +     bool src_needs_gart_window, dst_needs_gart_window, use_two_gart_windows;
>       struct amdgpu_res_cursor src_mm, dst_mm;
> +     int src_gart_window, dst_gart_window;
>       struct dma_fence *fence = NULL;
>       int r = 0;
>       uint32_t copy_flags = 0;
> @@ -324,18 +339,40 @@ static int amdgpu_ttm_copy_mem_to_mem(struct 
> amdgpu_device *adev,
>               /* Never copy more than 256MiB at once to avoid a timeout */
>               cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>  
> -             /* Map src to window 0 and dst to window 1. */
> -             r = amdgpu_ttm_map_buffer(adev, entity,
> -                                       src->bo, src->mem, &src_mm,
> -                                       0, tmz, &cur_size, &from);
> -             if (r)
> -                     goto error;
> +             /* If only one direction needs a gart window to access memory, 
> use both
> +              * windows for it.
> +              */
> +             src_needs_gart_window =
> +                     amdgpu_ttm_needs_gart_window(adev, src->mem, &src_mm, 
> tmz, &from);
> +             dst_needs_gart_window =
> +                     amdgpu_ttm_needs_gart_window(adev, dst->mem, &dst_mm, 
> tmz, &to);

The coding style looks a bit odd.

Regards,
Christian.

>  
> -             r = amdgpu_ttm_map_buffer(adev, entity,
> -                                       dst->bo, dst->mem, &dst_mm,
> -                                       1, tmz, &cur_size, &to);
> -             if (r)
> -                     goto error;
> +             if (src_needs_gart_window) {
> +                     src_gart_window = 0;
> +                     use_two_gart_windows = !dst_needs_gart_window;
> +             }
> +             if (dst_needs_gart_window) {
> +                     dst_gart_window = src_needs_gart_window ? 1 : 0;
> +                     use_two_gart_windows = !src_needs_gart_window;
> +             }
> +
> +             if (src_needs_gart_window) {
> +                     r = amdgpu_ttm_map_buffer(adev, entity,
> +                                               src->bo, src->mem, &src_mm,
> +                                               src_gart_window, 
> use_two_gart_windows,
> +                                               tmz, &cur_size, &from);
> +                     if (r)
> +                             goto error;
> +             }
> +
> +             if (dst_needs_gart_window) {
> +                     r = amdgpu_ttm_map_buffer(adev, entity,
> +                                               dst->bo, dst->mem, &dst_mm,
> +                                               dst_gart_window, 
> use_two_gart_windows,
> +                                               tmz, &cur_size, &to);
> +                     if (r)
> +                             goto error;
> +             }
>  
>               abo_src = ttm_to_amdgpu_bo(src->bo);
>               abo_dst = ttm_to_amdgpu_bo(dst->bo);
> @@ -2434,7 +2471,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_device *adev,
>  
>               r = amdgpu_ttm_map_buffer(adev, entity,
>                                         &bo->tbo, bo->tbo.resource, &cursor,
> -                                       1, false, &size, &addr);
> +                                       1, false, false, &size, &addr);
>               if (r)
>                       goto err;
>  
> @@ -2485,7 +2522,8 @@ int amdgpu_fill_buffer(struct amdgpu_device *adev,
>  
>               r = amdgpu_ttm_map_buffer(adev, entity,
>                                         &bo->tbo, bo->tbo.resource, &dst,
> -                                       1, false, &cur_size, &to);
> +                                       1, false, false,
> +                                       &cur_size, &to);
>               if (r)
>                       goto error;
>

Re: [PATCH v3 16/28] drm/amdgpu: use larger gart window when possible

Reply via email to