RE: [PATCH 21/23] drm/xe/svm: GPU page fault support

2024-01-22 Thread Zeng, Oak


> -Original Message-
> From: Welty, Brian 
> Sent: Monday, January 22, 2024 9:06 PM
> To: Zeng, Oak ; dri-devel@lists.freedesktop.org; intel-
> x...@lists.freedesktop.org
> Cc: Bommu, Krishnaiah ; Ghimiray, Himal Prasad
> ; thomas.hellst...@linux.intel.com;
> Vishwanathapura, Niranjana ; Brost,
> Matthew 
> Subject: Re: [PATCH 21/23] drm/xe/svm: GPU page fault support
> 
> 
> On 1/17/2024 2:12 PM, Oak Zeng wrote:
> > On gpu page fault of a virtual address, try to fault in the virtual
> > address range to gpu page table and let HW to retry on the faulty
> > address.
> >
> > Right now, we always migrate the whole vma which contains the fault
> > address to GPU. This is subject to change of a more sophisticated
> > migration policy: decide whether to migrate memory to GPU or map
> > in place with CPU memory; migration granularity.
> >
> > There is rather complicated locking strategy in this patch. See more
> > details in xe_svm_doc.h, lock design section.
> >
> > Signed-off-by: Oak Zeng 
> > Cc: Niranjana Vishwanathapura 
> > Cc: Matthew Brost 
> > Cc: Thomas Hellström 
> > Cc: Brian Welty 
> > ---
> >   drivers/gpu/drm/xe/xe_gt_pagefault.c |   7 ++
> >   drivers/gpu/drm/xe/xe_svm.c  | 116 +++
> >   drivers/gpu/drm/xe/xe_svm.h  |   6 ++
> >   drivers/gpu/drm/xe/xe_svm_range.c|  43 ++
> >   4 files changed, 172 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index 467d68f8332e..462603abab8a 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -22,6 +22,7 @@
> >   #include "xe_pt.h"
> >   #include "xe_trace.h"
> >   #include "xe_vm.h"
> > +#include "xe_svm.h"
> >
> >   enum fault_type {
> > NOT_PRESENT = 0,
> > @@ -131,6 +132,11 @@ static int handle_pagefault(struct xe_gt *gt, struct
> pagefault *pf)
> > if (!vm || !xe_vm_in_fault_mode(vm))
> > return -EINVAL;
> >
> > +   if (vm->svm) {
> > +   ret = xe_svm_handle_gpu_fault(vm, gt, pf);
> > +   goto put_vm;
> > +   }
> > +
> >   retry_userptr:
> > /*
> >  * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
> > @@ -219,6 +225,7 @@ static int handle_pagefault(struct xe_gt *gt, struct
> pagefault *pf)
> > if (ret >= 0)
> > ret = 0;
> > }
> > +put_vm:
> > xe_vm_put(vm);
> >
> > return ret;
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> > index 0c13690a19f5..1ade8d7f0ab2 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -12,6 +12,7 @@
> >   #include "xe_svm.h"
> >   #include 
> >   #include 
> > +#include 
> >   #include "xe_pt.h"
> >   #include "xe_assert.h"
> >   #include "xe_vm_types.h"
> > @@ -206,3 +207,118 @@ static int svm_populate_range(struct xe_svm_range
> *svm_range,
> > kvfree(pfns);
> > return ret;
> >   }
> > +
> > +/**
> > + * svm_access_allowed() -  Determine whether read or/and write to vma is
> allowed
> > + *
> > + * @write: true means a read and write access; false: read only access
> > + */
> > +static bool svm_access_allowed(struct vm_area_struct *vma, bool write)
> > +{
> > +   unsigned long access = VM_READ;
> > +
> > +   if (write)
> > +   access |= VM_WRITE;
> > +
> > +   return (vma->vm_flags & access) == access;
> > +}
> > +
> > +/**
> > + * svm_should_migrate() - Determine whether we should migrate a range to
> > + * a destination memory region
> > + *
> > + * @range: The svm memory range to consider
> > + * @dst_region: target destination memory region
> > + * @is_atomic_fault: Is the intended migration triggered by a atomic 
> > access?
> > + * On some platform, we have to migrate memory to guarantee atomic
> correctness.
> > + */
> > +static bool svm_should_migrate(struct xe_svm_range *range,
> > +   struct xe_mem_region *dst_region, bool
> is_atomic_fault)
> > +{
> > +   return true;
> > +}
> > +
> > +/**
> > + * xe_svm_handle_gpu_fault() - gpu page fault handler for svm subsystem
> > + *
> > + * @vm: The vm of the fault.
> > + * @gt: The gt

Re: [PATCH 21/23] drm/xe/svm: GPU page fault support

2024-01-22 Thread Welty, Brian



On 1/17/2024 2:12 PM, Oak Zeng wrote:

On gpu page fault of a virtual address, try to fault in the virtual
address range to gpu page table and let HW to retry on the faulty
address.

Right now, we always migrate the whole vma which contains the fault
address to GPU. This is subject to change of a more sophisticated
migration policy: decide whether to migrate memory to GPU or map
in place with CPU memory; migration granularity.

There is rather complicated locking strategy in this patch. See more
details in xe_svm_doc.h, lock design section.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
  drivers/gpu/drm/xe/xe_gt_pagefault.c |   7 ++
  drivers/gpu/drm/xe/xe_svm.c  | 116 +++
  drivers/gpu/drm/xe/xe_svm.h  |   6 ++
  drivers/gpu/drm/xe/xe_svm_range.c|  43 ++
  4 files changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 467d68f8332e..462603abab8a 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -22,6 +22,7 @@
  #include "xe_pt.h"
  #include "xe_trace.h"
  #include "xe_vm.h"
+#include "xe_svm.h"
  
  enum fault_type {

NOT_PRESENT = 0,
@@ -131,6 +132,11 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (!vm || !xe_vm_in_fault_mode(vm))
return -EINVAL;
  
+	if (vm->svm) {

+   ret = xe_svm_handle_gpu_fault(vm, gt, pf);
+   goto put_vm;
+   }
+
  retry_userptr:
/*
 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
@@ -219,6 +225,7 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (ret >= 0)
ret = 0;
}
+put_vm:
xe_vm_put(vm);
  
  	return ret;

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 0c13690a19f5..1ade8d7f0ab2 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -12,6 +12,7 @@
  #include "xe_svm.h"
  #include 
  #include 
+#include 
  #include "xe_pt.h"
  #include "xe_assert.h"
  #include "xe_vm_types.h"
@@ -206,3 +207,118 @@ static int svm_populate_range(struct xe_svm_range 
*svm_range,
kvfree(pfns);
return ret;
  }
+
+/**
+ * svm_access_allowed() -  Determine whether read or/and write to vma is 
allowed
+ *
+ * @write: true means a read and write access; false: read only access
+ */
+static bool svm_access_allowed(struct vm_area_struct *vma, bool write)
+{
+   unsigned long access = VM_READ;
+
+   if (write)
+   access |= VM_WRITE;
+
+   return (vma->vm_flags & access) == access;
+}
+
+/**
+ * svm_should_migrate() - Determine whether we should migrate a range to
+ * a destination memory region
+ *
+ * @range: The svm memory range to consider
+ * @dst_region: target destination memory region
+ * @is_atomic_fault: Is the intended migration triggered by a atomic access?
+ * On some platform, we have to migrate memory to guarantee atomic correctness.
+ */
+static bool svm_should_migrate(struct xe_svm_range *range,
+   struct xe_mem_region *dst_region, bool 
is_atomic_fault)
+{
+   return true;
+}
+
+/**
+ * xe_svm_handle_gpu_fault() - gpu page fault handler for svm subsystem
+ *
+ * @vm: The vm of the fault.
+ * @gt: The gt hardware on which the fault happens.
+ * @pf: page fault descriptor
+ *
+ * Workout a backing memory for the fault address, migrate memory from
+ * system memory to gpu vram if nessary, and map the fault address to
+ * GPU so GPU HW can retry the last operation which has caused the GPU
+ * page fault.
+ */
+int xe_svm_handle_gpu_fault(struct xe_vm *vm,
+   struct xe_gt *gt,
+   struct pagefault *pf)
+{
+   u8 access_type = pf->access_type;
+   u64 page_addr = pf->page_addr;
+   struct hmm_range hmm_range;
+   struct vm_area_struct *vma;
+   struct xe_svm_range *range;
+   struct mm_struct *mm;
+   struct xe_svm *svm;
+   int ret = 0;
+
+   svm = vm->svm;
+   if (!svm)
+   return -EINVAL;
+
+   mm = svm->mm;
+   mmap_read_lock(mm);
+   vma = find_vma_intersection(mm, page_addr, page_addr + 4);
+   if (!vma) {
+   mmap_read_unlock(mm);
+   return -ENOENT;
+   }
+
+   if (!svm_access_allowed (vma, access_type != ACCESS_TYPE_READ)) {
+   mmap_read_unlock(mm);
+   return -EPERM;
+   }
+
+   range = xe_svm_range_from_addr(svm, page_addr);
+   if (!range) {
+   range = xe_svm_range_create(svm, vma);
+   if (!range) {
+   mmap_read_unlock(mm);
+   return -ENOMEM;
+   }
+   }
+
+   if (svm_should_migrate(range, >->tile->mem.vram,
+ 

[PATCH 21/23] drm/xe/svm: GPU page fault support

2024-01-17 Thread Oak Zeng
On gpu page fault of a virtual address, try to fault in the virtual
address range to gpu page table and let HW to retry on the faulty
address.

Right now, we always migrate the whole vma which contains the fault
address to GPU. This is subject to change of a more sophisticated
migration policy: decide whether to migrate memory to GPU or map
in place with CPU memory; migration granularity.

There is rather complicated locking strategy in this patch. See more
details in xe_svm_doc.h, lock design section.

Signed-off-by: Oak Zeng 
Cc: Niranjana Vishwanathapura 
Cc: Matthew Brost 
Cc: Thomas Hellström 
Cc: Brian Welty 
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |   7 ++
 drivers/gpu/drm/xe/xe_svm.c  | 116 +++
 drivers/gpu/drm/xe/xe_svm.h  |   6 ++
 drivers/gpu/drm/xe/xe_svm_range.c|  43 ++
 4 files changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c 
b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 467d68f8332e..462603abab8a 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -22,6 +22,7 @@
 #include "xe_pt.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
+#include "xe_svm.h"
 
 enum fault_type {
NOT_PRESENT = 0,
@@ -131,6 +132,11 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (!vm || !xe_vm_in_fault_mode(vm))
return -EINVAL;
 
+   if (vm->svm) {
+   ret = xe_svm_handle_gpu_fault(vm, gt, pf);
+   goto put_vm;
+   }
+
 retry_userptr:
/*
 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
@@ -219,6 +225,7 @@ static int handle_pagefault(struct xe_gt *gt, struct 
pagefault *pf)
if (ret >= 0)
ret = 0;
}
+put_vm:
xe_vm_put(vm);
 
return ret;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 0c13690a19f5..1ade8d7f0ab2 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -12,6 +12,7 @@
 #include "xe_svm.h"
 #include 
 #include 
+#include 
 #include "xe_pt.h"
 #include "xe_assert.h"
 #include "xe_vm_types.h"
@@ -206,3 +207,118 @@ static int svm_populate_range(struct xe_svm_range 
*svm_range,
kvfree(pfns);
return ret;
 }
+
+/**
+ * svm_access_allowed() -  Determine whether read or/and write to vma is 
allowed
+ *
+ * @write: true means a read and write access; false: read only access
+ */
+static bool svm_access_allowed(struct vm_area_struct *vma, bool write)
+{
+   unsigned long access = VM_READ;
+
+   if (write)
+   access |= VM_WRITE;
+
+   return (vma->vm_flags & access) == access;
+}
+
+/**
+ * svm_should_migrate() - Determine whether we should migrate a range to
+ * a destination memory region
+ *
+ * @range: The svm memory range to consider
+ * @dst_region: target destination memory region
+ * @is_atomic_fault: Is the intended migration triggered by a atomic access?
+ * On some platform, we have to migrate memory to guarantee atomic correctness.
+ */
+static bool svm_should_migrate(struct xe_svm_range *range,
+   struct xe_mem_region *dst_region, bool 
is_atomic_fault)
+{
+   return true;
+}
+
+/**
+ * xe_svm_handle_gpu_fault() - gpu page fault handler for svm subsystem
+ *
+ * @vm: The vm of the fault.
+ * @gt: The gt hardware on which the fault happens.
+ * @pf: page fault descriptor
+ *
+ * Workout a backing memory for the fault address, migrate memory from
+ * system memory to gpu vram if nessary, and map the fault address to
+ * GPU so GPU HW can retry the last operation which has caused the GPU
+ * page fault.
+ */
+int xe_svm_handle_gpu_fault(struct xe_vm *vm,
+   struct xe_gt *gt,
+   struct pagefault *pf)
+{
+   u8 access_type = pf->access_type;
+   u64 page_addr = pf->page_addr;
+   struct hmm_range hmm_range;
+   struct vm_area_struct *vma;
+   struct xe_svm_range *range;
+   struct mm_struct *mm;
+   struct xe_svm *svm;
+   int ret = 0;
+
+   svm = vm->svm;
+   if (!svm)
+   return -EINVAL;
+
+   mm = svm->mm;
+   mmap_read_lock(mm);
+   vma = find_vma_intersection(mm, page_addr, page_addr + 4);
+   if (!vma) {
+   mmap_read_unlock(mm);
+   return -ENOENT;
+   }
+
+   if (!svm_access_allowed (vma, access_type != ACCESS_TYPE_READ)) {
+   mmap_read_unlock(mm);
+   return -EPERM;
+   }
+
+   range = xe_svm_range_from_addr(svm, page_addr);
+   if (!range) {
+   range = xe_svm_range_create(svm, vma);
+   if (!range) {
+   mmap_read_unlock(mm);
+   return -ENOMEM;
+   }
+   }
+
+   if (svm_should_migrate(range, >->tile->mem.vram,
+   access_type == 
ACCESS_TYPE_ATOMIC))
+