Re: [RFC PATCH] powerpc/mm: THP page cache support

2016-09-26 Thread Balbir Singh


On 27/09/16 01:53, Aneesh Kumar K.V wrote:
>>>  
>>> +void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
>>
>> static?
> 
> Ok I will fix that.

inline as well?

Balbir Singh.



Re: [RFC PATCH] powerpc/mm: THP page cache support

2016-09-26 Thread Aneesh Kumar K.V
"Kirill A. Shutemov"  writes:

> On Thu, Sep 22, 2016 at 09:32:40PM +0530, Aneesh Kumar K.V wrote:
>> Update arch hook in the generic THP page cache code, that will
>> deposit and withdarw preallocated page table. Archs like ppc64 use
>> this preallocated table to store the hash pte slot information.
>> 
>> This is an RFC patch and I am sharing this early to get feedback on the
>> approach taken. I have used stress-ng mmap-file operation and that
>> resulted in some thp_file_mmap as show below.
>> 
>> [/mnt/stress]$ grep thp_file /proc/vmstat
>> thp_file_alloc 25403
>> thp_file_mapped 16967
>> [/mnt/stress]$
>> 
>> I did observe wrong nr_ptes count once. I need to recreate the problem
>> again.
>
> I don't see anything that could cause that.
>

I still need to debug this.

> The patch looks good to me (apart from nr_ptes issue). Few minor nitpicks
> below.
>
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/include/asm/book3s/64/pgtable.h |  3 ++
>>  include/asm-generic/pgtable.h|  8 +++-
>>  mm/Kconfig   |  6 +--
>>  mm/huge_memory.c | 19 +-
>>  mm/khugepaged.c  | 21 ++-
>>  mm/memory.c  | 56 
>> +++-
>>  6 files changed, 93 insertions(+), 20 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 263bf39ced40..1f45b06ce78e 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -1017,6 +1017,9 @@ static inline int pmd_move_must_withdraw(struct 
>> spinlock *new_pmd_ptl,
>>   */
>>  return true;
>>  }
>> +
>> +#define arch_needs_pgtable_deposit() (true)
>> +
>>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>  #endif /* __ASSEMBLY__ */
>>  #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
>> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
>> index d4458b6dbfb4..0d1e400e82a2 100644
>> --- a/include/asm-generic/pgtable.h
>> +++ b/include/asm-generic/pgtable.h
>> @@ -660,11 +660,17 @@ static inline int pmd_move_must_withdraw(spinlock_t 
>> *new_pmd_ptl,
>>  /*
>>   * With split pmd lock we also need to move preallocated
>>   * PTE page table if new_pmd is on different PMD page table.
>> + *
>> + * We also don't deposit and withdraw tables for file pages.
>>   */
>> -return new_pmd_ptl != old_pmd_ptl;
>> +return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
>>  }
>>  #endif
>>  
>> +#ifndef arch_needs_pgtable_deposit
>> +#define arch_needs_pgtable_deposit() (false)
>> +#endif
>> +
>>  /*
>>   * This function is meant to be used by sites walking pagetables with
>>   * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index be0ee11fa0d9..0a279d399722 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -447,13 +447,9 @@ choice
>>benefit.
>>  endchoice
>>  
>> -#
>> -# We don't deposit page tables on file THP mapping,
>> -# but Power makes use of them to address MMU quirk.
>> -#
>>  config  TRANSPARENT_HUGE_PAGECACHE
>>  def_bool y
>> -depends on TRANSPARENT_HUGEPAGE && !PPC
>> +depends on TRANSPARENT_HUGEPAGE
>>  
>>  #
>>  # UP and nommu archs use km based percpu allocator
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index a6abd76baa72..37176f455d16 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -1320,6 +1320,14 @@ out_unlocked:
>>  return ret;
>>  }
>>  
>> +void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
>
> static?

Ok I will fix that.
>
>> +{
>> +pgtable_t pgtable;
>> +pgtable = pgtable_trans_huge_withdraw(mm, pmd);
>> +pte_free(mm, pgtable);
>> +atomic_long_dec(>nr_ptes);
>> +}
>> +
>>  int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>>   pmd_t *pmd, unsigned long addr)
>>  {
>> @@ -1359,6 +1367,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
>> vm_area_struct *vma,
>>  atomic_long_dec(>mm->nr_ptes);
>>  add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
>>  } else {
>> +if (arch_needs_pgtable_deposit())
>
> Just hide the arch_needs_pgtable_deposit() check in zap_deposited_table().


ok.

>
>> +zap_deposited_table(tlb->mm, pmd);
>>  add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
>>  }
>>  spin_unlock(ptl);

-aneesh



Re: [RFC PATCH] powerpc/mm: THP page cache support

2016-09-26 Thread Kirill A. Shutemov
On Thu, Sep 22, 2016 at 09:32:40PM +0530, Aneesh Kumar K.V wrote:
> Update arch hook in the generic THP page cache code, that will
> deposit and withdarw preallocated page table. Archs like ppc64 use
> this preallocated table to store the hash pte slot information.
> 
> This is an RFC patch and I am sharing this early to get feedback on the
> approach taken. I have used stress-ng mmap-file operation and that
> resulted in some thp_file_mmap as show below.
> 
> [/mnt/stress]$ grep thp_file /proc/vmstat
> thp_file_alloc 25403
> thp_file_mapped 16967
> [/mnt/stress]$
> 
> I did observe wrong nr_ptes count once. I need to recreate the problem
> again.

I don't see anything that could cause that.

The patch looks good to me (apart from nr_ptes issue). Few minor nitpicks
below.

> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h |  3 ++
>  include/asm-generic/pgtable.h|  8 +++-
>  mm/Kconfig   |  6 +--
>  mm/huge_memory.c | 19 +-
>  mm/khugepaged.c  | 21 ++-
>  mm/memory.c  | 56 
> +++-
>  6 files changed, 93 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 263bf39ced40..1f45b06ce78e 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -1017,6 +1017,9 @@ static inline int pmd_move_must_withdraw(struct 
> spinlock *new_pmd_ptl,
>*/
>   return true;
>  }
> +
> +#define arch_needs_pgtable_deposit() (true)
> +
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>  #endif /* __ASSEMBLY__ */
>  #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
> index d4458b6dbfb4..0d1e400e82a2 100644
> --- a/include/asm-generic/pgtable.h
> +++ b/include/asm-generic/pgtable.h
> @@ -660,11 +660,17 @@ static inline int pmd_move_must_withdraw(spinlock_t 
> *new_pmd_ptl,
>   /*
>* With split pmd lock we also need to move preallocated
>* PTE page table if new_pmd is on different PMD page table.
> +  *
> +  * We also don't deposit and withdraw tables for file pages.
>*/
> - return new_pmd_ptl != old_pmd_ptl;
> + return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
>  }
>  #endif
>  
> +#ifndef arch_needs_pgtable_deposit
> +#define arch_needs_pgtable_deposit() (false)
> +#endif
> +
>  /*
>   * This function is meant to be used by sites walking pagetables with
>   * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
> diff --git a/mm/Kconfig b/mm/Kconfig
> index be0ee11fa0d9..0a279d399722 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -447,13 +447,9 @@ choice
> benefit.
>  endchoice
>  
> -#
> -# We don't deposit page tables on file THP mapping,
> -# but Power makes use of them to address MMU quirk.
> -#
>  config   TRANSPARENT_HUGE_PAGECACHE
>   def_bool y
> - depends on TRANSPARENT_HUGEPAGE && !PPC
> + depends on TRANSPARENT_HUGEPAGE
>  
>  #
>  # UP and nommu archs use km based percpu allocator
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index a6abd76baa72..37176f455d16 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1320,6 +1320,14 @@ out_unlocked:
>   return ret;
>  }
>  
> +void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)

static?

> +{
> + pgtable_t pgtable;
> + pgtable = pgtable_trans_huge_withdraw(mm, pmd);
> + pte_free(mm, pgtable);
> + atomic_long_dec(>nr_ptes);
> +}
> +
>  int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>pmd_t *pmd, unsigned long addr)
>  {
> @@ -1359,6 +1367,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
> vm_area_struct *vma,
>   atomic_long_dec(>mm->nr_ptes);
>   add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
>   } else {
> + if (arch_needs_pgtable_deposit())

Just hide the arch_needs_pgtable_deposit() check in zap_deposited_table().

> + zap_deposited_table(tlb->mm, pmd);
>   add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
>   }
>   spin_unlock(ptl);
-- 
 Kirill A. Shutemov


[RFC PATCH] powerpc/mm: THP page cache support

2016-09-22 Thread Aneesh Kumar K.V
Update arch hook in the generic THP page cache code, that will
deposit and withdarw preallocated page table. Archs like ppc64 use
this preallocated table to store the hash pte slot information.

This is an RFC patch and I am sharing this early to get feedback on the
approach taken. I have used stress-ng mmap-file operation and that
resulted in some thp_file_mmap as show below.

[/mnt/stress]$ grep thp_file /proc/vmstat
thp_file_alloc 25403
thp_file_mapped 16967
[/mnt/stress]$

I did observe wrong nr_ptes count once. I need to recreate the problem
again.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  3 ++
 include/asm-generic/pgtable.h|  8 +++-
 mm/Kconfig   |  6 +--
 mm/huge_memory.c | 19 +-
 mm/khugepaged.c  | 21 ++-
 mm/memory.c  | 56 +++-
 6 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 263bf39ced40..1f45b06ce78e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1017,6 +1017,9 @@ static inline int pmd_move_must_withdraw(struct spinlock 
*new_pmd_ptl,
 */
return true;
 }
+
+#define arch_needs_pgtable_deposit() (true)
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index d4458b6dbfb4..0d1e400e82a2 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -660,11 +660,17 @@ static inline int pmd_move_must_withdraw(spinlock_t 
*new_pmd_ptl,
/*
 * With split pmd lock we also need to move preallocated
 * PTE page table if new_pmd is on different PMD page table.
+*
+* We also don't deposit and withdraw tables for file pages.
 */
-   return new_pmd_ptl != old_pmd_ptl;
+   return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
 }
 #endif
 
+#ifndef arch_needs_pgtable_deposit
+#define arch_needs_pgtable_deposit() (false)
+#endif
+
 /*
  * This function is meant to be used by sites walking pagetables with
  * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
diff --git a/mm/Kconfig b/mm/Kconfig
index be0ee11fa0d9..0a279d399722 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -447,13 +447,9 @@ choice
  benefit.
 endchoice
 
-#
-# We don't deposit page tables on file THP mapping,
-# but Power makes use of them to address MMU quirk.
-#
 config TRANSPARENT_HUGE_PAGECACHE
def_bool y
-   depends on TRANSPARENT_HUGEPAGE && !PPC
+   depends on TRANSPARENT_HUGEPAGE
 
 #
 # UP and nommu archs use km based percpu allocator
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a6abd76baa72..37176f455d16 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1320,6 +1320,14 @@ out_unlocked:
return ret;
 }
 
+void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
+{
+   pgtable_t pgtable;
+   pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+   pte_free(mm, pgtable);
+   atomic_long_dec(>nr_ptes);
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 pmd_t *pmd, unsigned long addr)
 {
@@ -1359,6 +1367,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
atomic_long_dec(>mm->nr_ptes);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
} else {
+   if (arch_needs_pgtable_deposit())
+   zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
}
spin_unlock(ptl);
@@ -1401,8 +1411,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned 
long old_addr,
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
VM_BUG_ON(!pmd_none(*new_pmd));
 
-   if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
-   vma_is_anonymous(vma)) {
+   if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
pgtable_t pgtable;
pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
@@ -1525,6 +1534,12 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
 
if (!vma_is_anonymous(vma)) {
_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+   /*
+* We are going to unmap this huge page. So
+* just go ahead and zap it
+*/
+   if (arch_needs_pgtable_deposit())
+