Re: [PATCH v2 2/4] powerpc/mm/radix: Improve TLB/PWC flushes

2017-07-19 Thread Aneesh Kumar K.V
Benjamin Herrenschmidt  writes:

> At the moment we have to rather sub-optimal flushing behaviours:
>
>  - flush_tlb_mm() will flush the PWC which is unnecessary (for example
>when doing a fork)
>
>  - A large unmap will call flush_tlb_pwc() multiple times causing us
>to perform that fairly expensive operation repeatedly. This happens
>often in batches of 3 on every new process.
>
> So we change flush_tlb_mm() to only flush the TLB, and we use the
> existing "need_flush_all" flag in struct mmu_gather to indicate
> that the PWC needs flushing.
>
> Unfortunately, flush_tlb_range() still needs to do a full flush
> for now as it's used by the THP collapsing. We will fix that later.
>
Reviewed-by: Aneesh Kumar K.V 

> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  .../powerpc/include/asm/book3s/64/tlbflush-radix.h |  4 +-
>  arch/powerpc/mm/tlb-radix.c| 66 
> +-
>  2 files changed, 28 insertions(+), 42 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
> b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index cc7fbde4f53c..7196999cdc82 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long 
> start, unsigned long end
>
>  extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
>  extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned 
> long vmaddr);
> -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long 
> addr);
>  extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned 
> long vmaddr,
> int psize);
>  extern void radix__tlb_flush(struct mmu_gather *tlb);
>  #ifdef CONFIG_SMP
>  extern void radix__flush_tlb_mm(struct mm_struct *mm);
>  extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long 
> vmaddr);
> -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
>  extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long 
> vmaddr,
>   int psize);
>  #else
>  #define radix__flush_tlb_mm(mm)  radix__local_flush_tlb_mm(mm)
>  #define radix__flush_tlb_page(vma,addr)  
> radix__local_flush_tlb_page(vma,addr)
>  #define radix__flush_tlb_page_psize(mm,addr,p) 
> radix__local_flush_tlb_page_psize(mm,addr,p)
> -#define radix__flush_tlb_pwc(tlb, addr)  radix__local_flush_tlb_pwc(tlb, 
> addr)
>  #endif
> +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
>  extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
>unsigned long page_size);
>  extern void radix__flush_tlb_lpid(unsigned long lpid);
> diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
> index 2f2967a2db93..28f339cdd836 100644
> --- a/arch/powerpc/mm/tlb-radix.c
> +++ b/arch/powerpc/mm/tlb-radix.c
> @@ -68,17 +68,6 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned 
> long ric)
>   asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
>  }
>
> -static inline void tlbiel_pwc(unsigned long pid)
> -{
> - asm volatile("ptesync": : :"memory");
> -
> - /* For PWC flush, we don't look at set number */
> - __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
> -
> - asm volatile("ptesync": : :"memory");
> - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
> -}
> -
>  static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
>  {
>   unsigned long rb,rs,prs,r;
> @@ -149,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
>   preempt_disable();
>   pid = mm->context.id;
>   if (pid != MMU_NO_CONTEXT)
> - _tlbiel_pid(pid, RIC_FLUSH_ALL);
> + _tlbiel_pid(pid, RIC_FLUSH_TLB);
>   preempt_enable();
>  }
>  EXPORT_SYMBOL(radix__local_flush_tlb_mm);
>
> -void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
> +#ifndef CONFIG_SMP
> +static void radix__local_flush_all_mm(struct mm_struct *mm)
>  {
>   unsigned long pid;
> - struct mm_struct *mm = tlb->mm;
> - /*
> -  * If we are doing a full mm flush, we will do a tlb flush
> -  * with RIC_FLUSH_ALL later.
> -  */
> - if (tlb->fullmm)
> - return;
>
>   preempt_disable();
> -
>   pid = mm->context.id;
>   if (pid != MMU_NO_CONTEXT)
> - tlbiel_pwc(pid);
> -
> + _tlbiel_pid(pid, RIC_FLUSH_ALL);
>   preempt_enable();
>  }
> -EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
> +#endif /* CONFIG_SMP */
>
>  void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long 
> vmaddr,
>  int psize)
> @@ -211,38 +192,35 @@ void 

[PATCH v2 2/4] powerpc/mm/radix: Improve TLB/PWC flushes

2017-07-18 Thread Benjamin Herrenschmidt
At the moment we have to rather sub-optimal flushing behaviours:

 - flush_tlb_mm() will flush the PWC which is unnecessary (for example
   when doing a fork)

 - A large unmap will call flush_tlb_pwc() multiple times causing us
   to perform that fairly expensive operation repeatedly. This happens
   often in batches of 3 on every new process.

So we change flush_tlb_mm() to only flush the TLB, and we use the
existing "need_flush_all" flag in struct mmu_gather to indicate
that the PWC needs flushing.

Unfortunately, flush_tlb_range() still needs to do a full flush
for now as it's used by the THP collapsing. We will fix that later.

Signed-off-by: Benjamin Herrenschmidt 
---
 .../powerpc/include/asm/book3s/64/tlbflush-radix.h |  4 +-
 arch/powerpc/mm/tlb-radix.c| 66 +-
 2 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index cc7fbde4f53c..7196999cdc82 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long 
start, unsigned long end
 
 extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned 
long vmaddr);
-extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long 
addr);
 extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned 
long vmaddr,
  int psize);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
-extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long 
vmaddr,
int psize);
 #else
 #define radix__flush_tlb_mm(mm)radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)
radix__local_flush_tlb_page(vma,addr)
 #define radix__flush_tlb_page_psize(mm,addr,p) 
radix__local_flush_tlb_page_psize(mm,addr,p)
-#define radix__flush_tlb_pwc(tlb, addr)radix__local_flush_tlb_pwc(tlb, 
addr)
 #endif
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
 unsigned long page_size);
 extern void radix__flush_tlb_lpid(unsigned long lpid);
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 2f2967a2db93..28f339cdd836 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -68,17 +68,6 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned 
long ric)
asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
 }
 
-static inline void tlbiel_pwc(unsigned long pid)
-{
-   asm volatile("ptesync": : :"memory");
-
-   /* For PWC flush, we don't look at set number */
-   __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
-
-   asm volatile("ptesync": : :"memory");
-   asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 {
unsigned long rb,rs,prs,r;
@@ -149,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
preempt_disable();
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
-   _tlbiel_pid(pid, RIC_FLUSH_ALL);
+   _tlbiel_pid(pid, RIC_FLUSH_TLB);
preempt_enable();
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
-void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+#ifndef CONFIG_SMP
+static void radix__local_flush_all_mm(struct mm_struct *mm)
 {
unsigned long pid;
-   struct mm_struct *mm = tlb->mm;
-   /*
-* If we are doing a full mm flush, we will do a tlb flush
-* with RIC_FLUSH_ALL later.
-*/
-   if (tlb->fullmm)
-   return;
 
preempt_disable();
-
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
-   tlbiel_pwc(pid);
-
+   _tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
 }
-EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+#endif /* CONFIG_SMP */
 
 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long 
vmaddr,
   int psize)
@@ -211,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto no_context;
 
if (!mm_is_thread_local(mm))
-   _tlbie_pid(pid, RIC_FLUSH_ALL);
+   _tlbie_pid(pid, RIC_FLUSH_TLB);
else
-   _tlbiel_pid(pid, RIC_FLUSH_ALL);
+