Re: [PATCH 7/7] x86: mm: accelerate pagefault when badaccess

2024-04-03 Thread Suren Baghdasaryan
On Wed, Apr 3, 2024 at 12:58 AM Kefeng Wang  wrote:
>
>
>
> On 2024/4/3 13:59, Suren Baghdasaryan wrote:
> > On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang  
> > wrote:
> >>
> >> The vm_flags of vma already checked under per-VMA lock, if it is a
> >> bad access, directly handle error and return, there is no need to
> >> lock_mm_and_find_vma() and check vm_flags again.
> >>
> >> Signed-off-by: Kefeng Wang 
> >
> > Looks safe to me.
> > Using (mm != NULL) to indicate that we are holding mmap_lock is not
> > ideal but I guess that works.
> >
>
> Yes, I will add this part it into change too,
>
> The access_error() of vma already checked under per-VMA lock, if it
> is a bad access, directly handle error, no need to retry with mmap_lock
> again. In order to release the correct lock, pass the mm_struct into
> bad_area_access_error(), if mm is NULL, release vma lock, or release
> mmap_lock. Since the page faut is handled under per-VMA lock, count it
> as a vma lock event with VMA_LOCK_SUCCESS.

The part about passing mm_struct is unnecessary IMHO. It explains "how
you do things" but changelog should describe only "what you do" and
"why you do that". The rest we can see from the code.

>
> Thanks.
>
>
> > Reviewed-by: Suren Baghdasaryan 
> >
> >> ---
> >>   arch/x86/mm/fault.c | 23 ++-
> >>   1 file changed, 14 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> >> index a4cc20d0036d..67b18adc75dd 100644
> >> --- a/arch/x86/mm/fault.c
> >> +++ b/arch/x86/mm/fault.c
> >> @@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned 
> >> long error_code,
> >>
> >>   static void
> >>   __bad_area(struct pt_regs *regs, unsigned long error_code,
> >> -  unsigned long address, u32 pkey, int si_code)
> >> +  unsigned long address, struct mm_struct *mm,
> >> +  struct vm_area_struct *vma, u32 pkey, int si_code)
> >>   {
> >> -   struct mm_struct *mm = current->mm;
> >>  /*
> >>   * Something tried to access memory that isn't in our memory map..
> >>   * Fix it, but check if it's kernel or user first..
> >>   */
> >> -   mmap_read_unlock(mm);
> >> +   if (mm)
> >> +   mmap_read_unlock(mm);
> >> +   else
> >> +   vma_end_read(vma);
> >>
> >>  __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
> >>   }
> >> @@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned 
> >> long error_code,
> >>
> >>   static noinline void
> >>   bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
> >> - unsigned long address, struct vm_area_struct *vma)
> >> + unsigned long address, struct mm_struct *mm,
> >> + struct vm_area_struct *vma)
> >>   {
> >>  /*
> >>   * This OSPKE check is not strictly necessary at runtime.
> >> @@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned 
> >> long error_code,
> >>   */
> >>  u32 pkey = vma_pkey(vma);
> >>
> >> -   __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
> >> +   __bad_area(regs, error_code, address, mm, vma, pkey, 
> >> SEGV_PKUERR);
> >>  } else {
> >> -   __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
> >> +   __bad_area(regs, error_code, address, mm, vma, 0, 
> >> SEGV_ACCERR);
> >>  }
> >>   }
> >>
> >> @@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
> >>  goto lock_mmap;
> >>
> >>  if (unlikely(access_error(error_code, vma))) {
> >> -   vma_end_read(vma);
> >> -   goto lock_mmap;
> >> +   bad_area_access_error(regs, error_code, address, NULL, 
> >> vma);
> >> +   count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> >> +   return;
> >>  }
> >>  fault = handle_mm_fault(vma, address, flags | 
> >> FAULT_FLAG_VMA_LOCK, regs);
> >>  if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> >> @@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
> >>   * we can handle it..
> >>   */
> >>  if (unlikely(access_error(error_code, vma))) {
> >> -   bad_area_access_error(regs, error_code, address, vma);
> >> +   bad_area_access_error(regs, error_code, address, mm, vma);
> >>  return;
> >>  }
> >>
> >> --
> >> 2.27.0
> >>


Re: [PATCH 7/7] x86: mm: accelerate pagefault when badaccess

2024-04-03 Thread Kefeng Wang




On 2024/4/3 13:59, Suren Baghdasaryan wrote:

On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang  wrote:


The vm_flags of vma already checked under per-VMA lock, if it is a
bad access, directly handle error and return, there is no need to
lock_mm_and_find_vma() and check vm_flags again.

Signed-off-by: Kefeng Wang 


Looks safe to me.
Using (mm != NULL) to indicate that we are holding mmap_lock is not
ideal but I guess that works.



Yes, I will add this part it into change too,

The access_error() of vma already checked under per-VMA lock, if it
is a bad access, directly handle error, no need to retry with mmap_lock
again. In order to release the correct lock, pass the mm_struct into
bad_area_access_error(), if mm is NULL, release vma lock, or release
mmap_lock. Since the page faut is handled under per-VMA lock, count it
as a vma lock event with VMA_LOCK_SUCCESS.

Thanks.



Reviewed-by: Suren Baghdasaryan 


---
  arch/x86/mm/fault.c | 23 ++-
  1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a4cc20d0036d..67b18adc75dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,

  static void
  __bad_area(struct pt_regs *regs, unsigned long error_code,
-  unsigned long address, u32 pkey, int si_code)
+  unsigned long address, struct mm_struct *mm,
+  struct vm_area_struct *vma, u32 pkey, int si_code)
  {
-   struct mm_struct *mm = current->mm;
 /*
  * Something tried to access memory that isn't in our memory map..
  * Fix it, but check if it's kernel or user first..
  */
-   mmap_read_unlock(mm);
+   if (mm)
+   mmap_read_unlock(mm);
+   else
+   vma_end_read(vma);

 __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
  }
@@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long 
error_code,

  static noinline void
  bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma)
  {
 /*
  * This OSPKE check is not strictly necessary at runtime.
@@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long 
error_code,
  */
 u32 pkey = vma_pkey(vma);

-   __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
+   __bad_area(regs, error_code, address, mm, vma, pkey, 
SEGV_PKUERR);
 } else {
-   __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
+   __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
 }
  }

@@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
 goto lock_mmap;

 if (unlikely(access_error(error_code, vma))) {
-   vma_end_read(vma);
-   goto lock_mmap;
+   bad_area_access_error(regs, error_code, address, NULL, vma);
+   count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+   return;
 }
 fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, 
regs);
 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
  * we can handle it..
  */
 if (unlikely(access_error(error_code, vma))) {
-   bad_area_access_error(regs, error_code, address, vma);
+   bad_area_access_error(regs, error_code, address, mm, vma);
 return;
 }

--
2.27.0



Re: [PATCH 7/7] x86: mm: accelerate pagefault when badaccess

2024-04-03 Thread Suren Baghdasaryan
On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang  wrote:
>
> The vm_flags of vma already checked under per-VMA lock, if it is a
> bad access, directly handle error and return, there is no need to
> lock_mm_and_find_vma() and check vm_flags again.
>
> Signed-off-by: Kefeng Wang 

Looks safe to me.
Using (mm != NULL) to indicate that we are holding mmap_lock is not
ideal but I guess that works.

Reviewed-by: Suren Baghdasaryan 

> ---
>  arch/x86/mm/fault.c | 23 ++-
>  1 file changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index a4cc20d0036d..67b18adc75dd 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned 
> long error_code,
>
>  static void
>  __bad_area(struct pt_regs *regs, unsigned long error_code,
> -  unsigned long address, u32 pkey, int si_code)
> +  unsigned long address, struct mm_struct *mm,
> +  struct vm_area_struct *vma, u32 pkey, int si_code)
>  {
> -   struct mm_struct *mm = current->mm;
> /*
>  * Something tried to access memory that isn't in our memory map..
>  * Fix it, but check if it's kernel or user first..
>  */
> -   mmap_read_unlock(mm);
> +   if (mm)
> +   mmap_read_unlock(mm);
> +   else
> +   vma_end_read(vma);
>
> __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
>  }
> @@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned 
> long error_code,
>
>  static noinline void
>  bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
> - unsigned long address, struct vm_area_struct *vma)
> + unsigned long address, struct mm_struct *mm,
> + struct vm_area_struct *vma)
>  {
> /*
>  * This OSPKE check is not strictly necessary at runtime.
> @@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long 
> error_code,
>  */
> u32 pkey = vma_pkey(vma);
>
> -   __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
> +   __bad_area(regs, error_code, address, mm, vma, pkey, 
> SEGV_PKUERR);
> } else {
> -   __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
> +   __bad_area(regs, error_code, address, mm, vma, 0, 
> SEGV_ACCERR);
> }
>  }
>
> @@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
> goto lock_mmap;
>
> if (unlikely(access_error(error_code, vma))) {
> -   vma_end_read(vma);
> -   goto lock_mmap;
> +   bad_area_access_error(regs, error_code, address, NULL, vma);
> +   count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +   return;
> }
> fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, 
> regs);
> if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> @@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
>  * we can handle it..
>  */
> if (unlikely(access_error(error_code, vma))) {
> -   bad_area_access_error(regs, error_code, address, vma);
> +   bad_area_access_error(regs, error_code, address, mm, vma);
> return;
> }
>
> --
> 2.27.0
>


[PATCH 7/7] x86: mm: accelerate pagefault when badaccess

2024-04-02 Thread Kefeng Wang
The vm_flags of vma already checked under per-VMA lock, if it is a
bad access, directly handle error and return, there is no need to
lock_mm_and_find_vma() and check vm_flags again.

Signed-off-by: Kefeng Wang 
---
 arch/x86/mm/fault.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a4cc20d0036d..67b18adc75dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,
 
 static void
 __bad_area(struct pt_regs *regs, unsigned long error_code,
-  unsigned long address, u32 pkey, int si_code)
+  unsigned long address, struct mm_struct *mm,
+  struct vm_area_struct *vma, u32 pkey, int si_code)
 {
-   struct mm_struct *mm = current->mm;
/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
-   mmap_read_unlock(mm);
+   if (mm)
+   mmap_read_unlock(mm);
+   else
+   vma_end_read(vma);
 
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
 }
@@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long 
error_code,
 
 static noinline void
 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma)
 {
/*
 * This OSPKE check is not strictly necessary at runtime.
@@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long 
error_code,
 */
u32 pkey = vma_pkey(vma);
 
-   __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
+   __bad_area(regs, error_code, address, mm, vma, pkey, 
SEGV_PKUERR);
} else {
-   __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
+   __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
}
 }
 
@@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
goto lock_mmap;
 
if (unlikely(access_error(error_code, vma))) {
-   vma_end_read(vma);
-   goto lock_mmap;
+   bad_area_access_error(regs, error_code, address, NULL, vma);
+   count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+   return;
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, 
regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 * we can handle it..
 */
if (unlikely(access_error(error_code, vma))) {
-   bad_area_access_error(regs, error_code, address, vma);
+   bad_area_access_error(regs, error_code, address, mm, vma);
return;
}
 
-- 
2.27.0