RE: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Monday, April 26, 2021 10:41 PM
To: Zeng, Oak ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

Am 2021-04-26 um 8:09 p.m. schrieb Zeng, Oak:
> As I understand it, when one GPU map another GPU's vram, this vram should 
> also be mapped in iommu page table. Also normal GTT memory (versus userptr) 
> also need to be mapped in iommu. But don't see this code below.

Right, I'm not solving all problems at once. The next patch is there to handle 
GTT BOs.

Peer mappings of doorbells, MMIO and VRAM still need to be handled in the 
future. I'm trying to fix the worst issues first. This series should get 99% of 
real world tests working.


>  I only see you map userptr in iommu. Maybe you map them in iommu not during 
> memory attachment time?
>
> Also see a nit-pick inline
>
> Regards,
> Oak
>
>  
>
> On 2021-04-21, 9:31 PM, "dri-devel on behalf of Felix Kuehling" 
>  
> wrote:
>
> Add BO-type specific helpers functions to DMA-map and unmap
> kfd_mem_attachments. Implement this functionality for userptrs by creating
> one SG BO per GPU and filling it with a DMA mapping of the pages from the
> original mem->bo.
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   8 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 146 +-
>  2 files changed, 145 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index c24b2478f445..63668433f5a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
>
>  struct amdgpu_device;
>
> +enum kfd_mem_attachment_type {
> + KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
> + KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a userptr bo */
> +};
> +
>  struct kfd_mem_attachment {
>   struct list_head list;
> + enum kfd_mem_attachment_type type;
> + bool is_mapped;
>   struct amdgpu_bo_va *bo_va;
>   struct amdgpu_device *adev;
> - bool is_mapped;
>   uint64_t va;
>   uint64_t pte_flags;
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index fbd7e786b54e..49d1af4aa5f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -473,12 +473,117 @@ static uint64_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   return pte_flags;
>  }
>
> +static int
> +kfd_mem_dmamap_userptr(struct kgd_mem *mem,
> +struct kfd_mem_attachment *attachment)
> +{
> + enum dma_data_direction direction =
> + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> + DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
> + struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> + struct amdgpu_device *adev = attachment->adev;
> + struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
> + struct ttm_tt *ttm = bo->tbo.ttm;
> + int ret;
> +
> + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
> + if (unlikely(!ttm->sg))
> + return -ENOMEM;
> +
> + if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
> + return -EINVAL;
> +
> + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
> + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
> + ttm->num_pages, 0,
> + (u64)ttm->num_pages << PAGE_SHIFT,
> + GFP_KERNEL);
> + if (unlikely(ret))
> + goto release_sg;
> Should go to a label starting from kfree below?

Thanks, I'll fix that.

Regards,
  Felix


> +
> + ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
> + if (unlikely(ret))
> + goto release_sg;
> +
> + drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
> +ttm->num_pages);
> +
> + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> 

Re: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

2021-04-26 Thread Felix Kuehling
Am 2021-04-26 um 8:09 p.m. schrieb Zeng, Oak:
> As I understand it, when one GPU map another GPU's vram, this vram should 
> also be mapped in iommu page table. Also normal GTT memory (versus userptr) 
> also need to be mapped in iommu. But don't see this code below.

Right, I'm not solving all problems at once. The next patch is there to
handle GTT BOs.

Peer mappings of doorbells, MMIO and VRAM still need to be handled in
the future. I'm trying to fix the worst issues first. This series should
get 99% of real world tests working.


>  I only see you map userptr in iommu. Maybe you map them in iommu not during 
> memory attachment time?
>
> Also see a nit-pick inline
>
> Regards,
> Oak 
>
>  
>
> On 2021-04-21, 9:31 PM, "dri-devel on behalf of Felix Kuehling" 
>  
> wrote:
>
> Add BO-type specific helpers functions to DMA-map and unmap
> kfd_mem_attachments. Implement this functionality for userptrs by creating
> one SG BO per GPU and filling it with a DMA mapping of the pages from the
> original mem->bo.
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   8 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 146 +-
>  2 files changed, 145 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index c24b2478f445..63668433f5a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
>
>  struct amdgpu_device;
>
> +enum kfd_mem_attachment_type {
> + KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
> + KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a userptr bo */
> +};
> +
>  struct kfd_mem_attachment {
>   struct list_head list;
> + enum kfd_mem_attachment_type type;
> + bool is_mapped;
>   struct amdgpu_bo_va *bo_va;
>   struct amdgpu_device *adev;
> - bool is_mapped;
>   uint64_t va;
>   uint64_t pte_flags;
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index fbd7e786b54e..49d1af4aa5f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -473,12 +473,117 @@ static uint64_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   return pte_flags;
>  }
>
> +static int
> +kfd_mem_dmamap_userptr(struct kgd_mem *mem,
> +struct kfd_mem_attachment *attachment)
> +{
> + enum dma_data_direction direction =
> + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> + DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
> + struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> + struct amdgpu_device *adev = attachment->adev;
> + struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
> + struct ttm_tt *ttm = bo->tbo.ttm;
> + int ret;
> +
> + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
> + if (unlikely(!ttm->sg))
> + return -ENOMEM;
> +
> + if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
> + return -EINVAL;
> +
> + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
> + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
> + ttm->num_pages, 0,
> + (u64)ttm->num_pages << PAGE_SHIFT,
> + GFP_KERNEL);
> + if (unlikely(ret))
> + goto release_sg;
> Should go to a label starting from kfree below?

Thanks, I'll fix that.

Regards,
  Felix


> +
> + ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
> + if (unlikely(ret))
> + goto release_sg;
> +
> + drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
> +ttm->num_pages);
> +
> + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> + ret = ttm_bo_validate(>tbo, >placement, );
> + if (ret)
> + goto release_sg;
> +
> + return 0;
> +
> +release_sg:
> + pr_err("DMA map userptr failed: %d\n", ret);
> + sg_free_table(ttm->sg);
> + kfree(ttm->sg);
> + ttm->sg = NULL;
> + return ret;
> +}
> +
> +static int
> +kfd_mem_dmamap_attachment(struct kgd_mem *mem,
> +   struct kfd_mem_attachment *attachment)
> +{
> + switch (attachment->type) {
> + case KFD_MEM_ATT_SHARED:
> + return 0;
> + case KFD_MEM_ATT_USERPTR:
> + return kfd_mem_dmamap_userptr(mem, attachment);
> + default:
> + WARN_ON_ONCE(1);
> + }
> + return -EINVAL;
> +}
> +

Re: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

2021-04-26 Thread Zeng, Oak
As I understand it, when one GPU map another GPU's vram, this vram should also 
be mapped in iommu page table. Also normal GTT memory (versus userptr) also 
need to be mapped in iommu. But don't see this code below. I only see you map 
userptr in iommu. Maybe you map them in iommu not during memory attachment time?

Also see a nit-pick inline

Regards,
Oak 

 

On 2021-04-21, 9:31 PM, "dri-devel on behalf of Felix Kuehling" 
 
wrote:

Add BO-type specific helpers functions to DMA-map and unmap
kfd_mem_attachments. Implement this functionality for userptrs by creating
one SG BO per GPU and filling it with a DMA mapping of the pages from the
original mem->bo.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   8 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 146 +-
 2 files changed, 145 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c24b2478f445..63668433f5a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;

 struct amdgpu_device;

+enum kfd_mem_attachment_type {
+   KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
+   KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a userptr bo */
+};
+
 struct kfd_mem_attachment {
struct list_head list;
+   enum kfd_mem_attachment_type type;
+   bool is_mapped;
struct amdgpu_bo_va *bo_va;
struct amdgpu_device *adev;
-   bool is_mapped;
uint64_t va;
uint64_t pte_flags;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fbd7e786b54e..49d1af4aa5f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -473,12 +473,117 @@ static uint64_t get_pte_flags(struct amdgpu_device 
*adev, struct kgd_mem *mem)
return pte_flags;
 }

+static int
+kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+  struct kfd_mem_attachment *attachment)
+{
+   enum dma_data_direction direction =
+   mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+   DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+   struct ttm_operation_ctx ctx = {.interruptible = true};
+   struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+   struct amdgpu_device *adev = attachment->adev;
+   struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
+   struct ttm_tt *ttm = bo->tbo.ttm;
+   int ret;
+
+   ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+   if (unlikely(!ttm->sg))
+   return -ENOMEM;
+
+   if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+   return -EINVAL;
+
+   /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+   ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+   ttm->num_pages, 0,
+   (u64)ttm->num_pages << PAGE_SHIFT,
+   GFP_KERNEL);
+   if (unlikely(ret))
+   goto release_sg;
Should go to a label starting from kfree below?
+
+   ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+   if (unlikely(ret))
+   goto release_sg;
+
+   drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
+  ttm->num_pages);
+
+   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+   ret = ttm_bo_validate(>tbo, >placement, );
+   if (ret)
+   goto release_sg;
+
+   return 0;
+
+release_sg:
+   pr_err("DMA map userptr failed: %d\n", ret);
+   sg_free_table(ttm->sg);
+   kfree(ttm->sg);
+   ttm->sg = NULL;
+   return ret;
+}
+
+static int
+kfd_mem_dmamap_attachment(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+   switch (attachment->type) {
+   case KFD_MEM_ATT_SHARED:
+   return 0;
+   case KFD_MEM_ATT_USERPTR:
+   return kfd_mem_dmamap_userptr(mem, attachment);
+   default:
+   WARN_ON_ONCE(1);
+   }
+   return -EINVAL;
+}
+
+static void
+kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
+struct kfd_mem_attachment *attachment)
+{
+   enum dma_data_direction direction =
+   mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+   DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+   struct ttm_operation_ctx ctx = {.interruptible = false};
+   struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+   struct amdgpu_device *adev = attachment->adev;
+