Re: [PATCH] nouveau/hmm: map pages after migration

2019-08-13 Thread Jerome Glisse
On Wed, Aug 07, 2019 at 08:02:14AM -0700, Ralph Campbell wrote:
> When memory is migrated to the GPU it is likely to be accessed by GPU
> code soon afterwards. Instead of waiting for a GPU fault, map the
> migrated memory into the GPU page tables with the same access permissions
> as the source CPU page table entries. This preserves copy on write
> semantics.
> 
> Signed-off-by: Ralph Campbell 
> Cc: Christoph Hellwig 
> Cc: Jason Gunthorpe 
> Cc: "Jérôme Glisse" 
> Cc: Ben Skeggs 

Sorry for delay i am swamp, couple issues:
- nouveau_pfns_map() is never call, it should be call after
  the dma copy is done (iirc it is lacking proper fencing
  so that would need to be implemented first)

- the migrate ioctl is disconnected from the svm part and
  thus we would need first to implement svm reference counting
  and take a reference at begining of migration process and
  release it at the end ie struct nouveau_svmm needs refcounting
  of some sort. I let Ben decides what he likes best for that.

- i rather not have an extra pfns array, i am pretty sure we
  can directly feed what we get from the dma array to the svm
  code to update the GPU page table

Observation that can be delayed to latter patches:
- i do not think we want to map directly if the dma engine
  is queue up and thus if the fence will take time to signal

  This is why i did not implement this in the first place.
  Maybe using a workqueue to schedule a pre-fill of the GPU
  page table and wakeup the workqueue with the fence notify
  event.


> ---
> 
> This patch is based on top of Christoph Hellwig's 9 patch series
> https://lore.kernel.org/linux-mm/20190729234611.gc7...@redhat.com/T/#u
> "turn the hmm migrate_vma upside down" but without patch 9
> "mm: remove the unused MIGRATE_PFN_WRITE" and adds a use for the flag.
> 
> 
>  drivers/gpu/drm/nouveau/nouveau_dmem.c | 45 +-
>  drivers/gpu/drm/nouveau/nouveau_svm.c  | 86 ++
>  drivers/gpu/drm/nouveau/nouveau_svm.h  | 19 ++
>  3 files changed, 133 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index ef9de82b0744..c83e6f118817 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -25,11 +25,13 @@
>  #include "nouveau_dma.h"
>  #include "nouveau_mem.h"
>  #include "nouveau_bo.h"
> +#include "nouveau_svm.h"
>  
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -560,11 +562,12 @@ nouveau_dmem_init(struct nouveau_drm *drm)
>  }
>  
>  static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
> - struct vm_area_struct *vma, unsigned long addr,
> - unsigned long src, dma_addr_t *dma_addr)
> + struct vm_area_struct *vma, unsigned long src,
> + dma_addr_t *dma_addr, u64 *pfn)
>  {
>   struct device *dev = drm->dev->dev;
>   struct page *dpage, *spage;
> + unsigned long paddr;
>  
>   spage = migrate_pfn_to_page(src);
>   if (!spage || !(src & MIGRATE_PFN_MIGRATE))
> @@ -572,17 +575,21 @@ static unsigned long 
> nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
>  
>   dpage = nouveau_dmem_page_alloc_locked(drm);
>   if (!dpage)
> - return 0;
> + goto out;
>  
>   *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
>   if (dma_mapping_error(dev, *dma_addr))
>   goto out_free_page;
>  
> + paddr = nouveau_dmem_page_addr(dpage);
>   if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM,
> - nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST,
> - *dma_addr))
> + paddr, NOUVEAU_APER_HOST, *dma_addr))
>   goto out_dma_unmap;
>  
> + *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM |
> + ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
> + if (src & MIGRATE_PFN_WRITE)
> + *pfn |= NVIF_VMM_PFNMAP_V0_W;
>   return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
>  
>  out_dma_unmap:
> @@ -590,18 +597,19 @@ static unsigned long 
> nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
>  out_free_page:
>   nouveau_dmem_page_free_locked(drm, dpage);
>  out:
> + *pfn = NVIF_VMM_PFNMAP_V0_NONE;
>   return 0;
>  }
>  
>  static void nouveau_dmem_migrate_chunk(struct migrate_vma *args,
> - struct nouveau_drm *drm, dma_addr_t *dma_addrs)
> + struct nouveau_drm *drm, dma_addr_t *dma_addrs, u64 *pfns)
>  {
>   struct nouveau_fence *fence;
>   unsigned long addr = args->start, nr_dma = 0, i;
>  
>   for (i = 0; addr < args->end; i++) {
>   args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->vma,
> - addr, args->src[i], _addrs[nr_dma]);
> + 

Re: [PATCH] nouveau/hmm: map pages after migration

2019-08-11 Thread Christoph Hellwig
On Thu, Aug 08, 2019 at 02:29:34PM -0700, Ralph Campbell wrote:
>>>   {
>>> struct nouveau_fence *fence;
>>> unsigned long addr = args->start, nr_dma = 0, i;
>>> for (i = 0; addr < args->end; i++) {
>>> args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->vma,
>>> -   addr, args->src[i], _addrs[nr_dma]);
>>> +   args->src[i], _addrs[nr_dma], [i]);
>>
>> Nit: I find the [i] way to pass the argument a little weird to read.
>> Why not "pfns + i"?
>
> OK, will do in v2.
> Should I convert to "dma_addrs + nr_dma" too?

I'll fix it up for v3 of the migrate_vma series.  This is a leftover
from passing an args structure.

On something vaguely related to this patch:

You use the NVIF_VMM_PFNMAP_V0_V* defines from nvif/if000c.h, which are
a little odd as we only ever set these bits, but they also don't seem
to appear to be in values that are directly fed to the hardware.

On the other hand mmu/vmm.h defines a set of NVIF_VMM_PFNMAP_V0_*
constants with similar names and identical values, and those are used
in mmu/vmmgp100.c and what appears to finally do the low-level dma
mapping and talking to the hardware.  Are these two sets of constants
supposed to be the same?  Are the actual hardware values or just a
driver internal interface?
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] nouveau/hmm: map pages after migration

2019-08-08 Thread Ralph Campbell


On 8/8/19 12:07 AM, Christoph Hellwig wrote:

On Wed, Aug 07, 2019 at 08:02:14AM -0700, Ralph Campbell wrote:

When memory is migrated to the GPU it is likely to be accessed by GPU
code soon afterwards. Instead of waiting for a GPU fault, map the
migrated memory into the GPU page tables with the same access permissions
as the source CPU page table entries. This preserves copy on write
semantics.

Signed-off-by: Ralph Campbell 
Cc: Christoph Hellwig 
Cc: Jason Gunthorpe 
Cc: "Jérôme Glisse" 
Cc: Ben Skeggs 
---

This patch is based on top of Christoph Hellwig's 9 patch series
https://lore.kernel.org/linux-mm/20190729234611.gc7...@redhat.com/T/#u
"turn the hmm migrate_vma upside down" but without patch 9
"mm: remove the unused MIGRATE_PFN_WRITE" and adds a use for the flag.


This looks useful.  I've already dropped that patch for the pending
resend.


Thanks.




  static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
-   struct vm_area_struct *vma, unsigned long addr,
-   unsigned long src, dma_addr_t *dma_addr)
+   struct vm_area_struct *vma, unsigned long src,
+   dma_addr_t *dma_addr, u64 *pfn)


I'll pick up the removal of the not needed addr argument for the patch
introducing nouveau_dmem_migrate_copy_one, thanks,


  static void nouveau_dmem_migrate_chunk(struct migrate_vma *args,
-   struct nouveau_drm *drm, dma_addr_t *dma_addrs)
+   struct nouveau_drm *drm, dma_addr_t *dma_addrs, u64 *pfns)
  {
struct nouveau_fence *fence;
unsigned long addr = args->start, nr_dma = 0, i;
  
  	for (i = 0; addr < args->end; i++) {

args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->vma,
-   addr, args->src[i], _addrs[nr_dma]);
+   args->src[i], _addrs[nr_dma], [i]);


Nit: I find the [i] way to pass the argument a little weird to read.
Why not "pfns + i"?


OK, will do in v2.
Should I convert to "dma_addrs + nr_dma" too?


+u64 *
+nouveau_pfns_alloc(unsigned long npages)
+{
+   struct nouveau_pfnmap_args *args;
+
+   args = kzalloc(sizeof(*args) + npages * sizeof(args->p.phys[0]),


Can we use struct_size here?


Yes, good suggestion.




+   int ret;
+
+   if (!svm)
+   return;
+
+   mutex_lock(>mutex);
+   svmm = nouveau_find_svmm(svm, mm);
+   if (!svmm) {
+   mutex_unlock(>mutex);
+   return;
+   }
+   mutex_unlock(>mutex);


Given that nouveau_find_svmm doesn't take any kind of reference, what
gurantees svmm doesn't go away after dropping the lock?


I asked Ben and Jerome about this too.
I'm still looking into it.




@@ -44,5 +49,19 @@ static inline int nouveau_svmm_bind(struct drm_device 
*device, void *p,
  {
return -ENOSYS;
  }
+
+u64 *nouveau_pfns_alloc(unsigned long npages)
+{
+   return NULL;
+}
+
+void nouveau_pfns_free(u64 *pfns)
+{
+}
+
+void nouveau_pfns_map(struct nouveau_drm *drm, struct mm_struct *mm,
+ unsigned long addr, u64 *pfns, unsigned long npages)
+{
+}
  #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */


nouveau_dmem.c and nouveau_svm.c are both built conditional on
CONFIG_DRM_NOUVEAU_SVM, so there is no need for stubs here.



Good point. I'll remove them in v2.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] nouveau/hmm: map pages after migration

2019-08-08 Thread Christoph Hellwig
On Wed, Aug 07, 2019 at 08:02:14AM -0700, Ralph Campbell wrote:
> When memory is migrated to the GPU it is likely to be accessed by GPU
> code soon afterwards. Instead of waiting for a GPU fault, map the
> migrated memory into the GPU page tables with the same access permissions
> as the source CPU page table entries. This preserves copy on write
> semantics.
> 
> Signed-off-by: Ralph Campbell 
> Cc: Christoph Hellwig 
> Cc: Jason Gunthorpe 
> Cc: "Jérôme Glisse" 
> Cc: Ben Skeggs 
> ---
> 
> This patch is based on top of Christoph Hellwig's 9 patch series
> https://lore.kernel.org/linux-mm/20190729234611.gc7...@redhat.com/T/#u
> "turn the hmm migrate_vma upside down" but without patch 9
> "mm: remove the unused MIGRATE_PFN_WRITE" and adds a use for the flag.

This looks useful.  I've already dropped that patch for the pending
resend.

>  static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
> - struct vm_area_struct *vma, unsigned long addr,
> - unsigned long src, dma_addr_t *dma_addr)
> + struct vm_area_struct *vma, unsigned long src,
> + dma_addr_t *dma_addr, u64 *pfn)

I'll pick up the removal of the not needed addr argument for the patch
introducing nouveau_dmem_migrate_copy_one, thanks,

>  static void nouveau_dmem_migrate_chunk(struct migrate_vma *args,
> - struct nouveau_drm *drm, dma_addr_t *dma_addrs)
> + struct nouveau_drm *drm, dma_addr_t *dma_addrs, u64 *pfns)
>  {
>   struct nouveau_fence *fence;
>   unsigned long addr = args->start, nr_dma = 0, i;
>  
>   for (i = 0; addr < args->end; i++) {
>   args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->vma,
> - addr, args->src[i], _addrs[nr_dma]);
> + args->src[i], _addrs[nr_dma], [i]);

Nit: I find the [i] way to pass the argument a little weird to read.
Why not "pfns + i"?

> +u64 *
> +nouveau_pfns_alloc(unsigned long npages)
> +{
> + struct nouveau_pfnmap_args *args;
> +
> + args = kzalloc(sizeof(*args) + npages * sizeof(args->p.phys[0]),

Can we use struct_size here?

> + int ret;
> +
> + if (!svm)
> + return;
> +
> + mutex_lock(>mutex);
> + svmm = nouveau_find_svmm(svm, mm);
> + if (!svmm) {
> + mutex_unlock(>mutex);
> + return;
> + }
> + mutex_unlock(>mutex);

Given that nouveau_find_svmm doesn't take any kind of reference, what
gurantees svmm doesn't go away after dropping the lock?

> @@ -44,5 +49,19 @@ static inline int nouveau_svmm_bind(struct drm_device 
> *device, void *p,
>  {
>   return -ENOSYS;
>  }
> +
> +u64 *nouveau_pfns_alloc(unsigned long npages)
> +{
> + return NULL;
> +}
> +
> +void nouveau_pfns_free(u64 *pfns)
> +{
> +}
> +
> +void nouveau_pfns_map(struct nouveau_drm *drm, struct mm_struct *mm,
> +   unsigned long addr, u64 *pfns, unsigned long npages)
> +{
> +}
>  #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */

nouveau_dmem.c and nouveau_svm.c are both built conditional on
CONFIG_DRM_NOUVEAU_SVM, so there is no need for stubs here.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx