On 2024-08-12 02:59, Samuel Zhang wrote:
The requested access range may be across 2 adjacent buddy blocks of a
BO. In this case, it needs to issue 2 sdma copy commands to fully access
the data range. But current implementation only issue 1 sdma copy
command and result in incomplete access.

The fix is to loop the res cursor when emitting copy commands so that
multiple(2) copy commands got issued when necessary.

Signed-off-by: Samuel Zhang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 26 ++++++++++++++-----------
  1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a6e90eada367..c423574acd5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1484,7 +1484,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
        struct dma_fence *fence;
        uint64_t src_addr, dst_addr;
        unsigned int num_dw;
-       int r, idx;
+       int r, idx, count = 0;
if (len > PAGE_SIZE)
                return -EINVAL;
@@ -1498,7 +1498,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
        if (write)
                memcpy(adev->mman.sdma_access_ptr, buf, len);
- num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+       num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw * 2, 8);
        r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
                                     AMDGPU_FENCE_OWNER_UNDEFINED,
                                     num_dw * 4, AMDGPU_IB_POOL_DELAYED,
@@ -1507,15 +1507,19 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
                goto out;
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
-       src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
-               src_mm.start;
-       dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
-       if (write)
-               swap(src_addr, dst_addr);
-
-       amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
-                               len, 0);
-
+       while (src_mm.remaining) {
+               src_addr = amdgpu_ttm_domain_start(adev, 
bo->resource->mem_type) +
+                       src_mm.start;
+               dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo) + 
count;
+               if (write)
+                       swap(src_addr, dst_addr);
+
+               amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+                                       src_mm.size, 0);
+
+               count += src_mm.size;

You could just increment dst_addr instead. And move the initialization of dst_addr outside the loop. Other than that, this patch is

Reviewed-by: Felix Kuehling <[email protected]>


+               amdgpu_res_next(&src_mm, src_mm.size);
+    }
        amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);

Reply via email to