RE: [PATCH 6/6] drm/amdgpu: Do bo_va ref counting for KFD BOs

2023-01-16 Thread Errabolu, Ramesh
[AMD Official Use Only - General]

Comment inline.

Regards,
Ramesh

-Original Message-
From: amd-gfx  On Behalf Of Chen, 
Xiaogang
Sent: Saturday, January 14, 2023 4:28 AM
To: Kuehling, Felix ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: Re: [PATCH 6/6] drm/amdgpu: Do bo_va ref counting for KFD BOs

Caution: This message originated from an External Source. Use proper caution 
when opening attachments, clicking links, or responding.


Reviewed-by: Xiaogang Chen 

Regards

Xiaogang

On 1/11/2023 7:31 PM, Felix Kuehling wrote:
> This is needed to correctly handle BOs imported into the GEM API, 
> which would otherwise get added twice to the same VM.
>
> Signed-off-by: Felix Kuehling 
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++
>   1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index df08e84f01d7..8b5ba2e04a79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -370,9 +370,17 @@ static int amdgpu_amdkfd_bo_validate_and_fence(struct 
> amdgpu_bo *bo,
>   return ret;
>
>   ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
> - if (!ret)
> - dma_resv_add_fence(bo->tbo.base.resv, fence,
> -DMA_RESV_USAGE_BOOKKEEP);
> + if (ret)
> + goto unreserve_out;
> +
> + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
Ramesh: Could this stmt to reserve space for fence be applied in patch 4.

> + if (ret)
> + goto unreserve_out;
> +
> + dma_resv_add_fence(bo->tbo.base.resv, fence,
> +DMA_RESV_USAGE_BOOKKEEP);
> +
> +unreserve_out:
>   amdgpu_bo_unreserve(bo);
>
>   return ret;
> @@ -785,6 +793,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
> struct kgd_mem *mem,
>   uint64_t va = mem->va;
>   struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>   struct amdgpu_bo *bo[2] = {NULL, NULL};
> + struct amdgpu_bo_va *bo_va;
>   bool same_hive = false;
>   int i, ret;
>
> @@ -871,7 +880,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
> struct kgd_mem *mem,
>   pr_debug("Unable to reserve BO during memory attach");
>   goto unwind;
>   }
> - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> + bo_va = amdgpu_vm_bo_find(vm, bo[i]);
> + if (!bo_va)
> + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> + else
> + ++bo_va->ref_count;
> + attachment[i]->bo_va = bo_va;
>   amdgpu_bo_unreserve(bo[i]);
>   if (unlikely(!attachment[i]->bo_va)) {
>   ret = -ENOMEM;
> @@ -895,7 +909,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
> struct kgd_mem *mem,
>   continue;
>   if (attachment[i]->bo_va) {
>   amdgpu_bo_reserve(bo[i], true);
> - amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
> + if (--attachment[i]->bo_va->ref_count == 0)
> + amdgpu_vm_bo_del(adev, 
> + attachment[i]->bo_va);
>   amdgpu_bo_unreserve(bo[i]);
>   list_del([i]->list);
>   }
> @@ -912,7 +927,8 @@ static void kfd_mem_detach(struct 
> kfd_mem_attachment *attachment)
>
>   pr_debug("\t remove VA 0x%llx in entry %p\n",
>   attachment->va, attachment);
> - amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
> + if (--attachment->bo_va->ref_count == 0)
> + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
>   drm_gem_object_put(>tbo.base);
>   list_del(>list);
>   kfree(attachment);


RE: [PATCH 4/6] drm/amdgpu: Attach eviction fence on alloc

2023-01-16 Thread Errabolu, Ramesh
[AMD Official Use Only - General]

Comment inline.

Regards,
Ramesh

-Original Message-
From: amd-gfx  On Behalf Of Felix 
Kuehling
Sent: Thursday, January 12, 2023 7:02 AM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Cc: Chen, Xiaogang ; Koenig, Christian 

Subject: [PATCH 4/6] drm/amdgpu: Attach eviction fence on alloc

Caution: This message originated from an External Source. Use proper caution 
when opening attachments, clicking links, or responding.


Instead of attaching the eviction fence when a KFD BO is first mapped, attach 
it when it is allocated or imported. This in preparation to allow KFD BOs to be 
mapped using the render node API.

Signed-off-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 63 ++-
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 5645103beed0..79213f476493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -360,6 +360,24 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, 
uint32_t domain,
return ret;
 }

+static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+  uint32_t domain,
+  struct dma_fence *fence) 
+{
+   int ret = amdgpu_bo_reserve(bo, false);
+
+   if (ret)
+   return ret;
+
+   ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+   if (!ret)
Ramesh: Should space for fences be reserved before adding one.

+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_BOOKKEEP);
+   amdgpu_bo_unreserve(bo);
+
+   return ret;
+}
+
 static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)  {
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ 
-1720,6 +1738,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+   } else {
+   ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+   >process_info->eviction_fence->base);
+   if (ret)
+   goto err_validate_bo;
}

if (offset)
@@ -1729,6 +1752,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

 allocate_init_user_pages_failed:
 err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
drm_vma_node_revoke(>vma_node, drm_priv);
 err_node_allow:
@@ -1804,10 +1828,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (unlikely(ret))
return ret;

-   /* The eviction fence should be removed by the last unmap.
-* TODO: Log an error condition if the bo still has the eviction fence
-* attached
-*/
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
Ramesh: Is it correct to call remove_eviction() unconditionally? This procedure 
applies to GTT and VRAM BO's only. Furthermore, the fence on these BO's has 
already been removed in the unmap_memory() call.

pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, @@ -1931,19 +1951,6 
@@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out_unreserve;

-   if (mem->mapped_to_gpu_memory == 0 &&
-   !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-   /* Validate BO only once. The eviction fence gets added to BO
-* the first time it is mapped. Validate will wait for all
-* background evictions to complete.
-*/
-   ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
-   if (ret) {
-   pr_debug("Validate failed\n");
-   goto out_unreserve;
-   }
-   }
-
list_for_each_entry(entry, >attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
@@ -1970,10 +1977,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 mem->mapped_to_gpu_memory);
}

-   if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
-   dma_resv_add_fence(bo->tbo.base.resv,
-  >process_info->eviction_fence->base,
-  DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(, false, false);

goto out;
@@ -1990,7 +1993,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv)  {
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-   struct amdkfd_process_info 

RE: [PATCH 1/6] drm/amdgpu: Generalize KFD dmabuf import

2023-01-16 Thread Errabolu, Ramesh
[AMD Official Use Only - General]

A minor comment, unrelated to the patch. The comments are inline.

Regards,
Ramesh

-Original Message-
From: amd-gfx  On Behalf Of Felix 
Kuehling
Sent: Thursday, January 12, 2023 7:02 AM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Cc: Chen, Xiaogang ; Koenig, Christian 

Subject: [PATCH 1/6] drm/amdgpu: Generalize KFD dmabuf import

Caution: This message originated from an External Source. Use proper caution 
when opening attachments, clicking links, or responding.


Use proper amdgpu_gem_prime_import function to handle all kinds of imports. 
Remember the dmabuf reference to enable proper multi-GPU attachment to multiple 
VMs without erroneously re-exporting the underlying BO multiple times.

Signed-off-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 38 ++-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6f236ded5f12..e13c3493b786 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2209,30 +2209,27 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct 
amdgpu_device *adev,
struct amdgpu_bo *bo;
int ret;

-   if (dma_buf->ops != _dmabuf_ops)
-   /* Can't handle non-graphics buffers */
-   return -EINVAL;
-
-   obj = dma_buf->priv;
-   if (drm_to_adev(obj->dev) != adev)
-   /* Can't handle buffers from other devices */
-   return -EINVAL;
+   obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
+   if (IS_ERR(obj))
+   return PTR_ERR(obj);

bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-   AMDGPU_GEM_DOMAIN_GTT)))
+   AMDGPU_GEM_DOMAIN_GTT))) {
/* Only VRAM and GTT BOs are supported */
-   return -EINVAL;
+   ret = -EINVAL;
+   goto err_put_obj;
+   }

*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-   if (!*mem)
-   return -ENOMEM;
+   if (!*mem) {
+   ret = -ENOMEM;
+   goto err_put_obj;
+   }

ret = drm_vma_node_allow(>vma_node, drm_priv);
-   if (ret) {
-   kfree(*mem);
-   return ret;
-   }
+   if (ret)
+   goto err_free_mem;

if (size)
*size = amdgpu_bo_size(bo); @@ -2249,7 +2246,8 @@ int 
amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
Ramesh: Is it correct to assign WRITE & EXECUTABLE permissions to alloc_flags 
variable.

-   drm_gem_object_get(>tbo.base);
+   get_dma_buf(dma_buf);
+   (*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
@@ -2261,6 +2259,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct 
amdgpu_device *adev,
(*mem)->is_imported = true;

return 0;
+
+err_free_mem:
+   kfree(*mem);
+err_put_obj:
+   drm_gem_object_put(obj);
+   return ret;
 }

 /* Evict a userptr BO by stopping the queues if necessary
--
2.34.1


RE: [PATCH] Whitelist AMD host bridge device(s) to enable P2P DMA

2021-08-11 Thread Errabolu, Ramesh
I will investigate it further. I am using DKMS-5.11 branch. The codebase I am 
using to build has the right definition i.e. allow P2PDMA for Zen CPU's.

Regards,
Ramesh

-Original Message-
From: Alex Deucher  
Sent: Wednesday, August 11, 2021 4:07 PM
To: Kuehling, Felix 
Cc: Errabolu, Ramesh ; amd-gfx list 
; Maling list - DRI developers 
; Bjorn Helgaas ; Linux 
PCI ; LKML 
Subject: Re: [PATCH] Whitelist AMD host bridge device(s) to enable P2P DMA

[CAUTION: External Email]

On Wed, Aug 11, 2021 at 4:50 PM Felix Kuehling  wrote:
>
>
> Am 2021-08-11 um 3:29 p.m. schrieb Alex Deucher:
> > On Wed, Aug 11, 2021 at 3:11 PM Ramesh Errabolu  
> > wrote:
> >> Current implementation will disallow P2P DMA if the participating 
> >> devices belong to different root complexes. Implementation allows 
> >> this default behavior to be overridden for whitelisted devices. The 
> >> patch adds an AMD host bridge to be whitelisted
> > Why do we need this?  cpu_supports_p2pdma() should return true for 
> > all AMD Zen CPUs.
>
> This is part of our on-going work to get P2P support upstream. We want 
> to use pci_p2pdma_distance_many to determine whether P2P is possible 
> between a pair of devices. This whitelist is used in this function. 
> This will affect the P2P links reported in the topology and it will be 
> double-checked in the BO mapping function to ensure a peer mapping is legal.
>
> I think this change is a bit free of context at the moment, as we're 
> still working on a few other loose ends for P2P support in our 
> internal branch. I'm hoping we'll have a bigger patch series for 
> upstreamable KFD P2P support ready in a few weeks. I also think we'll 
> probably want to add a few more PCI IDs for other supported AMD root 
> complexes.

We don't need to keep adding AMD root complexes.  You must be using an older 
kernel or something.  All root complexes on all Zen platforms support P2P DMA.  
See:

commit dea286bb71baded7d2fb4f090e3b9fd2c1ccac58
Author: Logan Gunthorpe 
Date:   Wed Jul 29 17:18:44 2020 -0600

PCI/P2PDMA: Allow P2PDMA on AMD Zen and newer CPUs

Allow P2PDMA if the CPU vendor is AMD and family is 0x17 (Zen) or greater.

[bhelgaas: commit log, simplify #if/#else/#endif]
Link: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fr%2F20200729231844.4653-1-logang%40deltatee.comdata=04%7C01%7CRamesh.Errabolu%40amd.com%7C7ffc96d473d4471889f808d95d0bf57b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637643128220735445%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=vmwJP2VwKPrXwl3ngUfwN%2BJu3JWeMP0ZZMDh29evW%2Bg%3Dreserved=0
Signed-off-by: Logan Gunthorpe 
Signed-off-by: Bjorn Helgaas 
Reviewed-by: Alex Deucher 
Cc: Christian König 
Cc: Huang Rui 

Alex


>
> Regards,
>   Felix
>
>
> >
> > Alex
> >
> >> Signed-off-by: Ramesh Errabolu 
> >> ---
> >>  drivers/pci/p2pdma.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >>
> >> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 
> >> 196382630363..7003bb9faf23 100644
> >> --- a/drivers/pci/p2pdma.c
> >> +++ b/drivers/pci/p2pdma.c
> >> @@ -305,6 +305,8 @@ static const struct pci_p2pdma_whitelist_entry {
> >> {PCI_VENDOR_ID_INTEL,   0x2032, 0},
> >> {PCI_VENDOR_ID_INTEL,   0x2033, 0},
> >> {PCI_VENDOR_ID_INTEL,   0x2020, 0},
> >> +   /* AMD Host Bridge Devices */
> >> +   {PCI_VENDOR_ID_AMD, 0x1480, 0},
> >> {}
> >>  };
> >>
> >> --
> >> 2.31.1
> >>


RE: [PATCH v2 10/10] drm/amdgpu: Move dmabuf attach/detach to backend_(un)bind

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Thursday, April 22, 2021 6:20 AM
To: Kuehling, Felix ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 10/10] drm/amdgpu: Move dmabuf attach/detach to 
backend_(un)bind

Am 22.04.21 um 03:30 schrieb Felix Kuehling:
> The dmabuf attachment should be updated by moving the SG BO to 
> DOMAIN_CPU and back to DOMAIN_GTT. This does not necessarily invoke 
> the populate/unpopulate callbacks. Do this in backend_bind/unbind instead.
>
> Signed-off-by: Felix Kuehling 

Reviewed-by: Christian König 

> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  3 --
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 51 +--
>   2 files changed, 25 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 18a1f9222a59..68e6ce8dcf33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -582,9 +582,6 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment 
> *attachment)
>   
>   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
>   ttm_bo_validate(>tbo, >placement, );
> - /* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is
> -  * called
> -  */
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7e7d8330d64b..fc2a8d681dbc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -910,7 +910,23 @@ static int amdgpu_ttm_backend_bind(struct ttm_device 
> *bdev,
>   DRM_ERROR("failed to pin userptr\n");
>   return r;
>   }
> + } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
> + if (!ttm->sg) {
> + struct dma_buf_attachment *attach;
> + struct sg_table *sgt;
> +
> + attach = gtt->gobj->import_attach;
> + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
> + if (IS_ERR(sgt))
> + return PTR_ERR(sgt);
> +
> + ttm->sg = sgt;
> + }
> +
> + drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
> +ttm->num_pages);
>   }
> +
>   if (!ttm->num_pages) {
>   WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
>ttm->num_pages, bo_mem, ttm); @@ -1037,8 +1053,15 @@ 
> static 
> void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
>   int r;
>   
>   /* if the pages have userptr pinning then clear that first */
> - if (gtt->userptr)
> + if (gtt->userptr) {
>   amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
> + } else if (ttm->sg && gtt->gobj->import_attach) {
> + struct dma_buf_attachment *attach;
> +
> + attach = gtt->gobj->import_attach;
> + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
> + ttm->sg = NULL;
> + }
>   
>   if (!gtt->bound)
>   return;
> @@ -1125,23 +1148,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_device 
> *bdev,
>   return 0;
>   }
>   
> - if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
> - if (!ttm->sg) {
> - struct dma_buf_attachment *attach;
> - struct sg_table *sgt;
> -
> - attach = gtt->gobj->import_attach;
> - sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
> - if (IS_ERR(sgt))
> - return PTR_ERR(sgt);
> -
> - ttm->sg = sgt;
> - }
> -
> - drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
> -ttm->num_pages);
> + if (ttm->page_flags & TTM_PAGE_FLAG_SG)
>   return 0;
> - }
>   
>   return ttm_pool_alloc(>mman.bdev.pool, ttm, ctx);
>   }
> @@ -1165,15 +1173,6 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
> *bdev,
>   return;
>   }
>   
> - if (ttm->sg && gtt->gobj->import_attach) {
> - struct dma_buf_attachment *attach;
> -
> - attach = gtt->gobj->import_attach;
> - dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
> - ttm->sg = NULL;
> - return;
> - }
> -
>   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
>   return;
>   

___
amd-gfx mailing list
amd-...@lists.freedesktop.org

RE: [PATCH v2 09/10] drm/ttm: Don't count pages in SG BOs against pages_limit

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Wednesday, April 21, 2021 8:31 PM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: [PATCH v2 09/10] drm/ttm: Don't count pages in SG BOs against 
pages_limit

Pages in SG BOs were not allocated by TTM. So don't count them against TTM's 
pages limit.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/ttm/ttm_tt.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 
5d8820725b75..e8b8c3257392 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
 
-   atomic_long_add(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_add(ttm->num_pages, _dma32_pages_allocated);
+   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   atomic_long_add(ttm->num_pages, _pages_allocated);
+   if (bdev->pool.use_dma32)
+   atomic_long_add(ttm->num_pages,
+   _dma32_pages_allocated);
+   }
 
while (atomic_long_read(_pages_allocated) > ttm_pages_limit ||
   atomic_long_read(_dma32_pages_allocated) > @@ -350,9 +353,12 
@@ int ttm_tt_populate(struct ttm_device *bdev,
return 0;
 
 error:
-   atomic_long_sub(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_sub(ttm->num_pages, _dma32_pages_allocated);
+   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   atomic_long_sub(ttm->num_pages, _pages_allocated);
+   if (bdev->pool.use_dma32)
+   atomic_long_sub(ttm->num_pages,
+   _dma32_pages_allocated);
+   }
return ret;
 }
 EXPORT_SYMBOL(ttm_tt_populate);
@@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
else
ttm_pool_free(>pool, ttm);
 
-   atomic_long_sub(ttm->num_pages, _pages_allocated);
-   if (bdev->pool.use_dma32)
-   atomic_long_sub(ttm->num_pages, _dma32_pages_allocated);
+   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   atomic_long_sub(ttm->num_pages, _pages_allocated);
+   if (bdev->pool.use_dma32)
+   atomic_long_sub(ttm->num_pages,
+   _dma32_pages_allocated);
+   }
 
ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;  }
--
2.31.1

___
amd-gfx mailing list
amd-...@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cphilip.yang%40amd.com%7C2c56b6451f56454af1ed08d9052e6395%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637546519067581184%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=eMVIddYdHMgr4TKyeS1fsjQbYVKvzg5D2EgzreknCEI%3Dreserved=0


RE: [PATCH v2 08/10] drm/amdgpu: Add DMA mapping of GTT BOs

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Tuesday, April 27, 2021 10:09 AM
To: Zeng, Oak ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 08/10] drm/amdgpu: Add DMA mapping of GTT BOs

Am 2021-04-27 um 10:29 a.m. schrieb Zeng, Oak:
> Regards,
> Oak
>
>  
>
> On 2021-04-26, 11:56 PM, "Kuehling, Felix"  wrote:
>
> Am 2021-04-26 um 8:35 p.m. schrieb Zeng, Oak:
> > Regards,
> > Oak 
> >
> >  
> >
> > On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" 
>  
> wrote:
> >
> > Use DMABufs with dynamic attachment to DMA-map GTT BOs on other 
> GPUs.
> >
> > Signed-off-by: Felix Kuehling 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 76 
> ++-
> >  2 files changed, 77 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 63668433f5a6..b706e5a54782 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -41,6 +41,7 @@ struct amdgpu_device;
> >  enum kfd_mem_attachment_type {
> > KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another 
> attachment's */
> > KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a 
> userptr bo */
> > +   KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
> >  };
> >
> >  struct kfd_mem_attachment {
> > @@ -56,6 +57,7 @@ struct kfd_mem_attachment {
> >  struct kgd_mem {
> > struct mutex lock;
> > struct amdgpu_bo *bo;
> > +   struct dma_buf *dmabuf;
> > struct list_head attachments;
> > /* protected by amdkfd_process_info.lock */
> > struct ttm_validate_buffer validate_list;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index 9eeedd0c7920..18a1f9222a59 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -524,6 +524,16 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
> > return ret;
> >  }
> >
> > +static int
> > +kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
> > +{
> > +   struct ttm_operation_ctx ctx = {.interruptible = true};
> > +   struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> > +
> > +   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> > +   return ttm_bo_validate(>tbo, >placement, );
> > How does this work? The function name says this is dma mapping a 
> buffer but from the implementation, it is just a placement and 
> validation
>
> Conceptually, calling ttm_bo_validate ensures that the BO is in the
> specified domain, in this case GTT. Before calling validate, it can be
> in the CPU domain, which means it may be swapped to disk so it's not GPU
> accessible. For a DMABuf attachment, the CPU domain means, that the
> DMABuf is not attached because the underlying memory object may be on
> the move or swapped out.
>
> The actual implementation of the dmabuf attachment is currently in
> amdgpu_ttm_populate/unpopulate. This is incorrect. Patch 10 in this
> series fixes that to move the actual dmabuf attachment into
> amdgpu_ttm_backend_bind/unbind, which is called from amdgpu_bo_move when
> a BO is moved between the CPU and GTT domains.
>
> Thanks for the explanation. One more thing I don't quite understand: before 
> this series, GTT memory should already has been validated somewhere before 
> GTT memory is mapped to GPU. You added GTT memory validation here - will this 
> validation be duplicated?

When you have N GPUs there are now N BOs involved. Each GPU needs its own BO 
because it needs its own DMA mapping. There will be one actual GTT BO that 
allocates physical pages in TTM. The other BOs are dmabuf imports that DMA-map 
the same physical pages for access by the other GPUs.

The validate call here validates one of the dmabuf imports. This does not 
duplicate the validation of the underlying TTM BO with the actual physical 
memory allocation.


>
> The function naming kfd_mem_dmamap_dmabuf is still confusing since it seems 
> to me it is only some preparation work before dynamically dma-map a GTT 
> memory.

No, this series is not just preparation. It implements DMA mapping of BOs for 
multiple GPUs. TTM already handles DMA mapping of the memory for the device 
where the memory was allocated. (Yes, even GTT memory is associated with 

RE: [PATCH v2 07/10] drm/amdgpu: Move kfd_mem_attach outside reservation

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Wednesday, April 21, 2021 8:31 PM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: [PATCH v2 07/10] drm/amdgpu: Move kfd_mem_attach outside reservation

This is needed to avoid deadlocks with DMA buf import in the next patch.
Also move PT/PD validation out of kfd_mem_attach, that way the caller can bo 
this unconditionally.

Signed-off-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 75 +++
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d25d886b98c..9eeedd0c7920 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -577,6 +577,34 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
}
 }
 
+static int
+kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
+  struct amdgpu_bo **bo)
+{
+   unsigned long bo_size = mem->bo->tbo.base.size;
+   struct drm_gem_object *gobj;
+   int ret;
+
+   ret = amdgpu_bo_reserve(mem->bo, false);
+   if (ret)
+   return ret;
+
+   ret = amdgpu_gem_object_create(adev, bo_size, 1,
+  AMDGPU_GEM_DOMAIN_CPU,
+  0, ttm_bo_type_sg,
+  mem->bo->tbo.base.resv,
+  );
+   if (ret)
+   return ret;
+
+   amdgpu_bo_unreserve(mem->bo);
+
+   *bo = gem_to_amdgpu_bo(gobj);
+   (*bo)->parent = amdgpu_bo_ref(mem->bo);
+
+   return 0;
+}
+
 /* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added @@ 
-598,7 +626,6 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct 
kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
-   struct drm_gem_object *gobj;
int i, ret;
 
if (!va) {
@@ -632,15 +659,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
/* Create an SG BO to DMA-map userptrs on other GPUs */
attachment[i]->type = KFD_MEM_ATT_USERPTR;
-   ret = amdgpu_gem_object_create(adev, bo_size, 1,
-  AMDGPU_GEM_DOMAIN_CPU,
-  0, ttm_bo_type_sg,
-  mem->bo->tbo.base.resv,
-  );
+   ret = kfd_mem_attach_userptr(adev, mem, [i]);
if (ret)
goto unwind;
-   bo[i] = gem_to_amdgpu_bo(gobj);
-   bo[i]->parent = amdgpu_bo_ref(mem->bo);
} else {
/* FIXME: Need to DMA-map other BO types */
attachment[i]->type = KFD_MEM_ATT_SHARED; @@ -665,13 
+686,6 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem 
*mem,
va += bo_size;
}
 
-   /* Allocate validate page tables if needed */
-   ret = vm_validate_pt_pd_bos(vm);
-   if (unlikely(ret)) {
-   pr_err("validate_pt_pd_bos() failed\n");
-   goto unwind;
-   }
-
return 0;
 
 unwind:
@@ -1478,12 +1492,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
 
+   ret = unreserve_bo_and_vms(, false, false);
+
/* Remove from VM internal data structures */
list_for_each_entry_safe(entry, tmp, >attachments, list)
kfd_mem_detach(entry);
 
-   ret = unreserve_bo_and_vms(, false, false);
-
/* Free the sync object */
amdgpu_sync_free(>sync);
 
@@ -1560,6 +1574,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
mem->va + bo_size * (1 + mem->aql_queue),
avm, domain_string(domain));
 
+   if (!kfd_mem_is_attached(avm, mem)) {
+   ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
+   if (ret)
+   goto out;
+   }
+
ret = reserve_bo_and_vm(mem, avm, );
if (unlikely(ret))
goto out;
@@ -1573,15 +1593,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
 
-   if (!kfd_mem_is_attached(avm, mem)) {
-   

RE: [PATCH v2 06/10] drm/amdgpu: DMA map/unmap when updating GPU mappings

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Monday, April 26, 2021 10:48 PM
To: Zeng, Oak ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 06/10] drm/amdgpu: DMA map/unmap when updating GPU 
mappings

Am 2021-04-26 um 8:23 p.m. schrieb Zeng, Oak:
> Regards,
> Oak
>
>  
>
> On 2021-04-21, 9:31 PM, "dri-devel on behalf of Felix Kuehling" 
>  
> wrote:
>
> DMA map kfd_mem_attachments in update_gpuvm_pte. This function is called
> with the BO and page tables reserved, so we can safely update the DMA
> mapping.
>
> DMA unmap when a BO is unmapped from a GPU and before updating mappings
> in restore workers.
>
> Signed-off-by: Felix Kuehling 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 56 ++-
>  1 file changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 49d1af4aa5f1..7d25d886b98c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -961,11 +961,12 @@ static int unreserve_bo_and_vms(struct 
> bo_vm_reservation_context *ctx,
>   return ret;
>  }
>
> -static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
> +static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
>   struct kfd_mem_attachment *entry,
>   struct amdgpu_sync *sync)
>  {
>   struct amdgpu_bo_va *bo_va = entry->bo_va;
> + struct amdgpu_device *adev = entry->adev;
>   struct amdgpu_vm *vm = bo_va->base.vm;
>
>   amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
> @@ -974,15 +975,20 @@ static int unmap_bo_from_gpuvm(struct 
> amdgpu_device *adev,
>
>   amdgpu_sync_fence(sync, bo_va->last_pt_update);
>
> - return 0;
> + kfd_mem_dmaunmap_attachment(mem, entry);
>  }
>
> -static int update_gpuvm_pte(struct amdgpu_device *adev,
> - struct kfd_mem_attachment *entry,
> - struct amdgpu_sync *sync)
> +static int update_gpuvm_pte(struct kgd_mem *mem,
> + struct kfd_mem_attachment *entry,
> + struct amdgpu_sync *sync)
>  {
> - int ret;
>   struct amdgpu_bo_va *bo_va = entry->bo_va;
> + struct amdgpu_device *adev = entry->adev;
> + int ret;
> +
> + ret = kfd_mem_dmamap_attachment(mem, entry);
> Should the dma mapping be done in the kfd_mem_attach function on a memory 
> object is attached to a vm the first time? Since each memory object can be 
> mapped to many GPU or many VMs, by doing dma mapping the first it is attached 
> can simplify the logics. Or even simpler, maybe we can just just dma map when 
> a memory object is created - it wastes some iommu page table entry but really 
> simplify the logic in this patch series. I found this series is not very easy 
> to understand.

The DMA mapping must be updated every time the physical memory allocation 
changes, e.g. after a BO was evicted and restored. Basically, if the physical 
pages of the BO change, we need to update the DMA mapping to point to those new 
pages. Therefore I added this in the update_gpu_vm_pte function, which is 
called after a BO has been validated the first time, or revalidated after an 
eviction.

You'll also see that I call dmaunmap in the re-validation cases (in the restore 
workers below) to ensure that we don't leak DMA mappings.

Regards,
  Felix


> + if (ret)
> + return ret;
>
>   /* Update the page tables  */
>   ret = amdgpu_vm_bo_update(adev, bo_va, false);
> @@ -994,14 +1000,15 @@ static int update_gpuvm_pte(struct amdgpu_device 
> *adev,
>   return amdgpu_sync_fence(sync, bo_va->last_pt_update);
>  }
>
> -static int map_bo_to_gpuvm(struct amdgpu_device *adev,
> - struct kfd_mem_attachment *entry, struct amdgpu_sync *sync,
> - bool no_update_pte)
> +static int map_bo_to_gpuvm(struct kgd_mem *mem,
> +struct kfd_mem_attachment *entry,
> +struct amdgpu_sync *sync,
> +bool no_update_pte)
>  {
>   int ret;
>
>   /* Set virtual address for the allocation */
> - ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
> + ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
>  amdgpu_bo_size(entry->bo_va->base.bo),
>  entry->pte_flags);
>   if (ret) {
> @@ -1013,7 +1020,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device 
> *adev,
>   if (no_update_pte)
>   return 0;
>
> - ret = update_gpuvm_pte(adev, entry, sync);
> + ret = update_gpuvm_pte(mem, entry, sync);
>   if (ret) {
>   

RE: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Monday, April 26, 2021 10:41 PM
To: Zeng, Oak ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 05/10] drm/amdgpu: Add multi-GPU DMA mapping helpers

Am 2021-04-26 um 8:09 p.m. schrieb Zeng, Oak:
> As I understand it, when one GPU map another GPU's vram, this vram should 
> also be mapped in iommu page table. Also normal GTT memory (versus userptr) 
> also need to be mapped in iommu. But don't see this code below.

Right, I'm not solving all problems at once. The next patch is there to handle 
GTT BOs.

Peer mappings of doorbells, MMIO and VRAM still need to be handled in the 
future. I'm trying to fix the worst issues first. This series should get 99% of 
real world tests working.


>  I only see you map userptr in iommu. Maybe you map them in iommu not during 
> memory attachment time?
>
> Also see a nit-pick inline
>
> Regards,
> Oak
>
>  
>
> On 2021-04-21, 9:31 PM, "dri-devel on behalf of Felix Kuehling" 
>  
> wrote:
>
> Add BO-type specific helpers functions to DMA-map and unmap
> kfd_mem_attachments. Implement this functionality for userptrs by creating
> one SG BO per GPU and filling it with a DMA mapping of the pages from the
> original mem->bo.
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   8 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 146 +-
>  2 files changed, 145 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index c24b2478f445..63668433f5a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
>
>  struct amdgpu_device;
>
> +enum kfd_mem_attachment_type {
> + KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
> + KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a userptr bo */
> +};
> +
>  struct kfd_mem_attachment {
>   struct list_head list;
> + enum kfd_mem_attachment_type type;
> + bool is_mapped;
>   struct amdgpu_bo_va *bo_va;
>   struct amdgpu_device *adev;
> - bool is_mapped;
>   uint64_t va;
>   uint64_t pte_flags;
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index fbd7e786b54e..49d1af4aa5f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -473,12 +473,117 @@ static uint64_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   return pte_flags;
>  }
>
> +static int
> +kfd_mem_dmamap_userptr(struct kgd_mem *mem,
> +struct kfd_mem_attachment *attachment)
> +{
> + enum dma_data_direction direction =
> + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> + DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
> + struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> + struct amdgpu_device *adev = attachment->adev;
> + struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
> + struct ttm_tt *ttm = bo->tbo.ttm;
> + int ret;
> +
> + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
> + if (unlikely(!ttm->sg))
> + return -ENOMEM;
> +
> + if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
> + return -EINVAL;
> +
> + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
> + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
> + ttm->num_pages, 0,
> + (u64)ttm->num_pages << PAGE_SHIFT,
> + GFP_KERNEL);
> + if (unlikely(ret))
> + goto release_sg;
> Should go to a label starting from kfree below?

Thanks, I'll fix that.

Regards,
  Felix


> +
> + ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
> + if (unlikely(ret))
> + goto release_sg;
> +
> + drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
> +ttm->num_pages);
> +
> + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> + ret = ttm_bo_validate(>tbo, >placement, );
> + if (ret)
> + goto release_sg;
> +
> + return 0;
> +
> +release_sg:
> + pr_err("DMA map userptr failed: %d\n", ret);
> + sg_free_table(ttm->sg);
> + kfree(ttm->sg);
> + ttm->sg = NULL;
> + return ret;
> +}
> +
> +static int
> +kfd_mem_dmamap_attachment(struct kgd_mem *mem,
> +   

RE: [PATCH v2 04/10] drm/amdgpu: Simplify AQL queue mapping

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Friday, April 23, 2021 2:23 AM
To: Zeng, Oak ; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 04/10] drm/amdgpu: Simplify AQL queue mapping

Am 2021-04-22 um 9:33 p.m. schrieb Zeng, Oak:
> Regards,
> Oak
>
>  
>
> On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" 
>  
> wrote:
>
> Do AQL queue double-mapping with a single attach call. That will make it
> easier to create per-GPU BOs later, to be shared between the two BO VA
> mappings on the same GPU.
>
> Freeing the attachments is not necessary if map_to_gpu fails. These will 
> be
> cleaned up when the kdg_mem object is destroyed in
> amdgpu_amdkfd_gpuvm_free_memory_of_gpu.
>
> Signed-off-by: Felix Kuehling 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 --
>  1 file changed, 48 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 34c9a2d0028e..fbd7e786b54e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -486,70 +486,76 @@ static uint64_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   * 4a.  Validate new page tables and directories
>   */
>  static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem 
> *mem,
> - struct amdgpu_vm *vm, bool is_aql,
> - struct kfd_mem_attachment **p_attachment)
> + struct amdgpu_vm *vm, bool is_aql)
>  {
>   unsigned long bo_size = mem->bo->tbo.base.size;
>   uint64_t va = mem->va;
> - struct kfd_mem_attachment *attachment;
> - struct amdgpu_bo *bo;
> - int ret;
> + struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
> + struct amdgpu_bo *bo[2] = {NULL, NULL};
> + int i, ret;
>
>   if (!va) {
>   pr_err("Invalid VA when adding BO to VM\n");
>   return -EINVAL;
>   }
>
> - if (is_aql)
> - va += bo_size;
> -
> - attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
> - if (!attachment)
> - return -ENOMEM;
> + for (i = 0; i <= is_aql; i++) {
> + attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
> + if (unlikely(!attachment[i])) {
> + ret = -ENOMEM;
> + goto unwind;
> + }
>
> - pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
> - va + bo_size, vm);
> + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
> +  va + bo_size, vm);
>
> - /* FIXME: For now all attachments use the same BO. This is incorrect
> -  * because one BO can only have one DMA mapping for one GPU. We need
> -  * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
> -  * will be addressed one BO-type at a time in subsequent patches.
> -  */
> - bo = mem->bo;
> - drm_gem_object_get(>tbo.base);
> + /* FIXME: For now all attachments use the same BO. This is
> +  * incorrect because one BO can only have one DMA mapping
> +  * for one GPU. We need one BO per GPU, e.g. a DMABuf
> +  * import with dynamic attachment. This will be addressed
> +  * one BO-type at a time in subsequent patches.
> +  */
> + bo[i] = mem->bo;
> + drm_gem_object_get([i]->tbo.base);
>
> - /* Add BO to VM internal data structures*/
> - attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
> - if (!attachment->bo_va) {
> - ret = -EINVAL;
> - pr_err("Failed to add BO object to VM. ret == %d\n",
> - ret);
> - goto err_vmadd;
> - }
> + /* Add BO to VM internal data structures */
> + attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> Just for discussion. Are we allowed to add one bo twice to a vm? When I 
> looked at amdgpu_vm_bo_base_init (called by amdgpu_vm_bo_add), line:
> bo->vm_bo = base;
> when you add the same bo to vm the second time, bo->vm_bo will be 
> overwritten. I am not sure whether this will cause an issue later.
> This is not introduced by your code. The original code (calling 
> kfd_mem_attach twice for aql) has the same problem.

If you just add one more line of context, you'll see that bo->vm_bo is the 
start of a single linked list of struct amdgpu_vm_bo_base. So adding a BO to a 
VM multiple times just extends that single-linked list:

    base->next = bo->vm_bo;
    bo->vm_bo = base;

Regards,
  Felix


> + if (unlikely(!attachment[i]->bo_va)) {
> + ret = -ENOMEM;
> +   

RE: [PATCH v2 03/10] drm/amdgpu: Keep a bo-reference per-attachment

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Wednesday, April 21, 2021 8:31 PM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: [PATCH v2 03/10] drm/amdgpu: Keep a bo-reference per-attachment

For now they all reference the same BO. For correct DMA mappings they will 
refer to different BOs per-GPU.

Signed-off-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 22 ++-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fee4c64dd051..34c9a2d0028e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -489,11 +489,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
struct amdgpu_vm *vm, bool is_aql,
struct kfd_mem_attachment **p_attachment)  {
-   int ret;
-   struct kfd_mem_attachment *attachment;
-   struct amdgpu_bo *bo = mem->bo;
+   unsigned long bo_size = mem->bo->tbo.base.size;
uint64_t va = mem->va;
-   unsigned long bo_size = bo->tbo.base.size;
+   struct kfd_mem_attachment *attachment;
+   struct amdgpu_bo *bo;
+   int ret;
 
if (!va) {
pr_err("Invalid VA when adding BO to VM\n"); @@ -510,6 +510,14 
@@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
 
+   /* FIXME: For now all attachments use the same BO. This is incorrect
+* because one BO can only have one DMA mapping for one GPU. We need
+* one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
+* will be addressed one BO-type at a time in subsequent patches.
+*/
+   bo = mem->bo;
+   drm_gem_object_get(>tbo.base);
+
/* Add BO to VM internal data structures*/
attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!attachment->bo_va) {
@@ -529,7 +537,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
 
/* Allocate validate page tables if needed */
ret = vm_validate_pt_pd_bos(vm);
-   if (ret) {
+   if (unlikely(ret)) {
pr_err("validate_pt_pd_bos() failed\n");
goto err_alloc_pts;
}
@@ -540,15 +548,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
amdgpu_vm_bo_rmv(adev, attachment->bo_va);
list_del(>list);
 err_vmadd:
+   drm_gem_object_put(>tbo.base);
kfree(attachment);
return ret;
 }
 
 static void kfd_mem_detach(struct kfd_mem_attachment *attachment)  {
+   struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+   drm_gem_object_put(>tbo.base);
list_del(>list);
kfree(attachment);
 }
--
2.31.1

___
amd-gfx mailing list
amd-...@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cphilip.yang%40amd.com%7C45e84767a4a54ffcf1e908d9052e6301%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637546519079854064%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=h9%2BK4amKsU5EZUkaA5tI3j1x7xSWECROzVi%2FAY%2FgtLs%3Dreserved=0


RE: [PATCH v2 02/10] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment

2021-05-10 Thread Errabolu, Ramesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu 

-Original Message-
From: amd-gfx  On Behalf Of Kuehling, 
Felix
Sent: Wednesday, April 21, 2021 8:31 PM
To: amd-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: [PATCH v2 02/10] drm/amdgpu: Rename kfd_bo_va_list to 
kfd_mem_attachment

This name is more fitting, especially for the changes coming next to support 
multi-GPU systems with proper DMA mappings. Cleaned up the code and renamed 
some related functions and variables to improve readability.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   8 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 209 +-
 2 files changed, 104 insertions(+), 113 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 313ee49b9f17..c24b2478f445 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -38,10 +38,10 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
 
 struct amdgpu_device;
 
-struct kfd_bo_va_list {
-   struct list_head bo_list;
+struct kfd_mem_attachment {
+   struct list_head list;
struct amdgpu_bo_va *bo_va;
-   void *kgd_dev;
+   struct amdgpu_device *adev;
bool is_mapped;
uint64_t va;
uint64_t pte_flags;
@@ -50,7 +50,7 @@ struct kfd_bo_va_list {  struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
-   struct list_head bo_va_list;
+   struct list_head attachments;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
struct ttm_validate_buffer resv_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index dfa025d694f8..fee4c64dd051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -72,16 +72,16 @@ static inline struct amdgpu_device 
*get_amdgpu_device(struct kgd_dev *kgd)
return (struct amdgpu_device *)kgd;
 }
 
-static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
struct kgd_mem *mem)
 {
-   struct kfd_bo_va_list *entry;
+   struct kfd_mem_attachment *entry;
 
-   list_for_each_entry(entry, >bo_va_list, bo_list)
+   list_for_each_entry(entry, >attachments, list)
if (entry->bo_va->base.vm == avm)
-   return false;
+   return true;
 
-   return true;
+   return false;
 }
 
 /* Set memory usage limits. Current, limits are @@ -473,7 +473,7 @@ static 
uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
return pte_flags;
 }
 
-/* add_bo_to_vm - Add a BO to a VM
+/* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
  * to a VM. It can later be mapped and unmapped many times without @@ -485,15 
+485,14 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
kgd_mem *mem)
  * 4. Alloc page tables and directories if needed
  * 4a.  Validate new page tables and directories
  */
-static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem 
+*mem,
struct amdgpu_vm *vm, bool is_aql,
-   struct kfd_bo_va_list **p_bo_va_entry)
+   struct kfd_mem_attachment **p_attachment)
 {
int ret;
-   struct kfd_bo_va_list *bo_va_entry;
+   struct kfd_mem_attachment *attachment;
struct amdgpu_bo *bo = mem->bo;
uint64_t va = mem->va;
-   struct list_head *list_bo_va = >bo_va_list;
unsigned long bo_size = bo->tbo.base.size;
 
if (!va) {
@@ -504,29 +503,29 @@ static int add_bo_to_vm(struct amdgpu_device *adev, 
struct kgd_mem *mem,
if (is_aql)
va += bo_size;
 
-   bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
-   if (!bo_va_entry)
+   attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
+   if (!attachment)
return -ENOMEM;
 
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
 
/* Add BO to VM internal data structures*/
-   bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
-   if (!bo_va_entry->bo_va) {
+   attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+   if (!attachment->bo_va) {
ret = -EINVAL;
pr_err("Failed to add BO object to VM. ret == %d\n",
ret);
goto err_vmadd;
}
 
-   bo_va_entry->va = va;
-   bo_va_entry->pte_flags = get_pte_flags(adev, mem);
-   bo_va_entry->kgd_dev = (void *)adev;
-   list_add(_va_entry->bo_list, list_bo_va);
+   attachment->va