Re: [PATCH 4/6] powerpc/mm: Add devmap support for ppc64

2017-05-23 Thread Oliver O'Halloran
On Tue, May 23, 2017 at 8:40 PM, Balbir Singh  wrote:
> On Tue, May 23, 2017 at 2:05 PM, Oliver O'Halloran  wrote:
>> Add support for the devmap bit on PTEs and PMDs for PPC64 Book3S.  This
>> is used to differentiate device backed memory from transparent huge
>> pages since they are handled in more or less the same manner by the core
>> mm code.
>>
>> Cc: Aneesh Kumar K.V 
>> Signed-off-by: Oliver O'Halloran 
>> ---
>> v1 -> v2: Properly differentiate THP and PMD Devmap entries. The
>> mm core assumes that pmd_trans_huge() and pmd_devmap() are mutually
>> exclusive and v1 had pmd_trans_huge() being true on a devmap pmd.
>>
>> Aneesh, this has been fleshed out substantially since v1. Can you
>> re-review it? Also no explicit gup support is required in this patch
>> since devmap support was added generic GUP as a part of making x86 use
>> the generic version.
>> ---
>>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
>>  arch/powerpc/include/asm/book3s/64/pgtable.h  | 37 
>> ++-
>>  arch/powerpc/include/asm/book3s/64/radix.h|  2 +-
>>  arch/powerpc/mm/hugetlbpage.c |  2 +-
>>  arch/powerpc/mm/pgtable-book3s64.c|  4 +--
>>  arch/powerpc/mm/pgtable-hash64.c  |  4 ++-
>>  arch/powerpc/mm/pgtable-radix.c   |  3 ++-
>>  arch/powerpc/mm/pgtable_64.c  |  2 +-
>>  8 files changed, 47 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
>> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> index 9732837aaae8..eaaf613c5347 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> @@ -180,7 +180,7 @@ static inline void mark_hpte_slot_valid(unsigned char 
>> *hpte_slot_array,
>>   */
>>  static inline int hash__pmd_trans_huge(pmd_t pmd)
>>  {
>> -   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
>> +   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | 
>> _PAGE_DEVMAP)) ==
>>   (_PAGE_PTE | H_PAGE_THP_HUGE));
>>  }
>
> Like Aneesh suggested, I think we can probably skip this check here
>
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 85bc9875c3be..24634e92dd0b 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -79,6 +79,9 @@
>>
>>  #define _PAGE_SOFT_DIRTY   _RPAGE_SW3 /* software: software dirty 
>> tracking */
>>  #define _PAGE_SPECIAL  _RPAGE_SW2 /* software: special page */
>> +#define _PAGE_DEVMAP   _RPAGE_SW1
>> +#define __HAVE_ARCH_PTE_DEVMAP
>> +
>>  /*
>>   * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
>>   * Instead of fixing all of them, add an alternate define which
>> @@ -599,6 +602,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
>> return pte;
>>  }
>>
>> +static inline pte_t pte_mkdevmap(pte_t pte)
>> +{
>> +   return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
>> +}
>> +
>> +static inline int pte_devmap(pte_t pte)
>> +{
>> +   return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
>> +}
>> +
>>  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>>  {
>> /* FIXME!! check whether this need to be a conditional */
>> @@ -963,6 +976,9 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
>>  #define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
>>  #define pmd_clear_savedwrite(pmd)  
>> pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
>>
>> +#define pud_pfn(...) (0)
>> +#define pgd_pfn(...) (0)
>> +
>
> Don't get these bits.. why are they zero?

I think that was just hacking stuff until it worked. pud_pfn() needs
to exist for the kernel to build when __HAVE_ARCH_PTE_DEVMAP is set,
but we don't need it to do anything (yet) since pud_pfn() is only used
for handing devmap PUD faults. We currently support those so we will
never hit that code path. pgd_pfn() can die though.

>>  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>>  #define pmd_soft_dirty(pmd)pte_soft_dirty(pmd_pte(pmd))
>>  #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
>> @@ -1137,7 +1153,6 @@ static inline int pmd_move_must_withdraw(struct 
>> spinlock *new_pmd_ptl,
>> return true;
>>  }
>>
>> -
>>  #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
>>  static inline bool arch_needs_pgtable_deposit(void)
>>  {
>> @@ -1146,6 +1161,26 @@ static inline bool arch_needs_pgtable_deposit(void)
>> return true;
>>  }
>>
>> +static inline pmd_t pmd_mkdevmap(pmd_t pmd)
>> +{
>> +   return pte_pmd(pte_mkdevmap(pmd_pte(pmd)));
>> +}
>> +
>> +static inline int pmd_devmap(pmd_t pmd)
>> +{
>> +   return pte_devmap(pmd_pte(pmd));
>> +}
>
> This should be defined only if #ifdef __HAVE_ARCH_PTE_DEVMAP

ok

>
> The rest looks OK
>
> Balbir 

Re: [PATCH 4/6] powerpc/mm: Add devmap support for ppc64

2017-05-23 Thread Balbir Singh
On Tue, May 23, 2017 at 2:05 PM, Oliver O'Halloran  wrote:
> Add support for the devmap bit on PTEs and PMDs for PPC64 Book3S.  This
> is used to differentiate device backed memory from transparent huge
> pages since they are handled in more or less the same manner by the core
> mm code.
>
> Cc: Aneesh Kumar K.V 
> Signed-off-by: Oliver O'Halloran 
> ---
> v1 -> v2: Properly differentiate THP and PMD Devmap entries. The
> mm core assumes that pmd_trans_huge() and pmd_devmap() are mutually
> exclusive and v1 had pmd_trans_huge() being true on a devmap pmd.
>
> Aneesh, this has been fleshed out substantially since v1. Can you
> re-review it? Also no explicit gup support is required in this patch
> since devmap support was added generic GUP as a part of making x86 use
> the generic version.
> ---
>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
>  arch/powerpc/include/asm/book3s/64/pgtable.h  | 37 
> ++-
>  arch/powerpc/include/asm/book3s/64/radix.h|  2 +-
>  arch/powerpc/mm/hugetlbpage.c |  2 +-
>  arch/powerpc/mm/pgtable-book3s64.c|  4 +--
>  arch/powerpc/mm/pgtable-hash64.c  |  4 ++-
>  arch/powerpc/mm/pgtable-radix.c   |  3 ++-
>  arch/powerpc/mm/pgtable_64.c  |  2 +-
>  8 files changed, 47 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index 9732837aaae8..eaaf613c5347 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -180,7 +180,7 @@ static inline void mark_hpte_slot_valid(unsigned char 
> *hpte_slot_array,
>   */
>  static inline int hash__pmd_trans_huge(pmd_t pmd)
>  {
> -   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
> +   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | 
> _PAGE_DEVMAP)) ==
>   (_PAGE_PTE | H_PAGE_THP_HUGE));
>  }

Like Aneesh suggested, I think we can probably skip this check here

>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 85bc9875c3be..24634e92dd0b 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -79,6 +79,9 @@
>
>  #define _PAGE_SOFT_DIRTY   _RPAGE_SW3 /* software: software dirty 
> tracking */
>  #define _PAGE_SPECIAL  _RPAGE_SW2 /* software: special page */
> +#define _PAGE_DEVMAP   _RPAGE_SW1
> +#define __HAVE_ARCH_PTE_DEVMAP
> +
>  /*
>   * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
>   * Instead of fixing all of them, add an alternate define which
> @@ -599,6 +602,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
> return pte;
>  }
>
> +static inline pte_t pte_mkdevmap(pte_t pte)
> +{
> +   return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
> +}
> +
> +static inline int pte_devmap(pte_t pte)
> +{
> +   return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
> +}
> +
>  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>  {
> /* FIXME!! check whether this need to be a conditional */
> @@ -963,6 +976,9 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
>  #define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
>  #define pmd_clear_savedwrite(pmd)  
> pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
>
> +#define pud_pfn(...) (0)
> +#define pgd_pfn(...) (0)
> +

Don't get these bits.. why are they zero?

>  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>  #define pmd_soft_dirty(pmd)pte_soft_dirty(pmd_pte(pmd))
>  #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
> @@ -1137,7 +1153,6 @@ static inline int pmd_move_must_withdraw(struct 
> spinlock *new_pmd_ptl,
> return true;
>  }
>
> -
>  #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
>  static inline bool arch_needs_pgtable_deposit(void)
>  {
> @@ -1146,6 +1161,26 @@ static inline bool arch_needs_pgtable_deposit(void)
> return true;
>  }
>
> +static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> +{
> +   return pte_pmd(pte_mkdevmap(pmd_pte(pmd)));
> +}
> +
> +static inline int pmd_devmap(pmd_t pmd)
> +{
> +   return pte_devmap(pmd_pte(pmd));
> +}

This should be defined only if #ifdef __HAVE_ARCH_PTE_DEVMAP

The rest looks OK

Balbir Singh.


Re: [PATCH 4/6] powerpc/mm: Add devmap support for ppc64

2017-05-23 Thread Oliver O'Halloran
On Tue, May 23, 2017 at 2:23 PM, Aneesh Kumar K.V
 wrote:
> Oliver O'Halloran  writes:
>
>> Add support for the devmap bit on PTEs and PMDs for PPC64 Book3S.  This
>> is used to differentiate device backed memory from transparent huge
>> pages since they are handled in more or less the same manner by the core
>> mm code.
>>
>> Cc: Aneesh Kumar K.V 
>> Signed-off-by: Oliver O'Halloran 
>> ---
>> v1 -> v2: Properly differentiate THP and PMD Devmap entries. The
>> mm core assumes that pmd_trans_huge() and pmd_devmap() are mutually
>> exclusive and v1 had pmd_trans_huge() being true on a devmap pmd.
>>
>> Aneesh, this has been fleshed out substantially since v1. Can you
>> re-review it? Also no explicit gup support is required in this patch
>> since devmap support was added generic GUP as a part of making x86 use
>> the generic version.
>> ---
>>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
>>  arch/powerpc/include/asm/book3s/64/pgtable.h  | 37 
>> ++-
>>  arch/powerpc/include/asm/book3s/64/radix.h|  2 +-
>>  arch/powerpc/mm/hugetlbpage.c |  2 +-
>>  arch/powerpc/mm/pgtable-book3s64.c|  4 +--
>>  arch/powerpc/mm/pgtable-hash64.c  |  4 ++-
>>  arch/powerpc/mm/pgtable-radix.c   |  3 ++-
>>  arch/powerpc/mm/pgtable_64.c  |  2 +-
>>  8 files changed, 47 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
>> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> index 9732837aaae8..eaaf613c5347 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> @@ -180,7 +180,7 @@ static inline void mark_hpte_slot_valid(unsigned char 
>> *hpte_slot_array,
>>   */
>>  static inline int hash__pmd_trans_huge(pmd_t pmd)
>>  {
>> - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
>> + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | 
>> _PAGE_DEVMAP)) ==
>> (_PAGE_PTE | H_PAGE_THP_HUGE));
>>  }
>
> _PAGE_DEVMAP is not really needed here. We will set H_PAGE_THP_HUGE only
> for thp hugepage w.r.t hash. But putting it here also makes it clear
> that devmap entries are not considered trans huge.

Good point. I'll remove it.

>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 85bc9875c3be..24634e92dd0b 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -79,6 +79,9 @@
>>
>>  #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty 
>> tracking */
>>  #define _PAGE_SPECIAL_RPAGE_SW2 /* software: special page */
>> +#define _PAGE_DEVMAP _RPAGE_SW1
>> +#define __HAVE_ARCH_PTE_DEVMAP
>> +
>>  /*
>>   * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
>>   * Instead of fixing all of them, add an alternate define which
>> @@ -599,6 +602,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
>>   return pte;
>>  }
>>
>> +static inline pte_t pte_mkdevmap(pte_t pte)
>> +{
>> + return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
>> +}
>> +
>> +static inline int pte_devmap(pte_t pte)
>> +{
>> + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
>> +}
>> +
>>  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>>  {
>>   /* FIXME!! check whether this need to be a conditional */
>> @@ -963,6 +976,9 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
>>  #define pmd_mk_savedwrite(pmd)   
>> pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
>>  #define pmd_clear_savedwrite(pmd)
>> pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
>>
>> +#define pud_pfn(...) (0)
>> +#define pgd_pfn(...) (0)
>> +
>>  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>>  #define pmd_soft_dirty(pmd)pte_soft_dirty(pmd_pte(pmd))
>>  #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
>> @@ -1137,7 +1153,6 @@ static inline int pmd_move_must_withdraw(struct 
>> spinlock *new_pmd_ptl,
>>   return true;
>>  }
>>
>> -
>>  #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
>>  static inline bool arch_needs_pgtable_deposit(void)
>>  {
>> @@ -1146,6 +1161,26 @@ static inline bool arch_needs_pgtable_deposit(void)
>>   return true;
>>  }
>>
>> +static inline pmd_t pmd_mkdevmap(pmd_t pmd)
>> +{
>> + return pte_pmd(pte_mkdevmap(pmd_pte(pmd)));
>> +}
>
>
> We avoided setting _PAGE_SPECIAL on pmd entries. This will set that, we
> may want to check if it is ok.  IIRC, we overloaded _PAGE_SPECIAL at some 
> point to indicate thp splitting. But good to double check.

I took a cursory look in arch/powerpc/ and mm/ for usages and didn't
see any usages of _PAGE_SPECIAL for pmds. There's no good reason to
set the flag though so I'll remove it.

>> +
>> +static inline int pmd_devmap(pmd_t pmd)
>> +{
>> + 

Re: [PATCH 4/6] powerpc/mm: Add devmap support for ppc64

2017-05-22 Thread Aneesh Kumar K.V
Oliver O'Halloran  writes:

> Add support for the devmap bit on PTEs and PMDs for PPC64 Book3S.  This
> is used to differentiate device backed memory from transparent huge
> pages since they are handled in more or less the same manner by the core
> mm code.
>
> Cc: Aneesh Kumar K.V 
> Signed-off-by: Oliver O'Halloran 
> ---
> v1 -> v2: Properly differentiate THP and PMD Devmap entries. The
> mm core assumes that pmd_trans_huge() and pmd_devmap() are mutually
> exclusive and v1 had pmd_trans_huge() being true on a devmap pmd.
>
> Aneesh, this has been fleshed out substantially since v1. Can you
> re-review it? Also no explicit gup support is required in this patch
> since devmap support was added generic GUP as a part of making x86 use
> the generic version.
> ---
>  arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
>  arch/powerpc/include/asm/book3s/64/pgtable.h  | 37 
> ++-
>  arch/powerpc/include/asm/book3s/64/radix.h|  2 +-
>  arch/powerpc/mm/hugetlbpage.c |  2 +-
>  arch/powerpc/mm/pgtable-book3s64.c|  4 +--
>  arch/powerpc/mm/pgtable-hash64.c  |  4 ++-
>  arch/powerpc/mm/pgtable-radix.c   |  3 ++-
>  arch/powerpc/mm/pgtable_64.c  |  2 +-
>  8 files changed, 47 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index 9732837aaae8..eaaf613c5347 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -180,7 +180,7 @@ static inline void mark_hpte_slot_valid(unsigned char 
> *hpte_slot_array,
>   */
>  static inline int hash__pmd_trans_huge(pmd_t pmd)
>  {
> - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
> + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) 
> ==
> (_PAGE_PTE | H_PAGE_THP_HUGE));
>  }


_PAGE_DEVMAP is not really needed here. We will set H_PAGE_THP_HUGE only
for thp hugepage w.r.t hash. But putting it here also makes it clear
that devmap entries are not considered trans huge.

>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 85bc9875c3be..24634e92dd0b 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -79,6 +79,9 @@
>
>  #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking 
> */
>  #define _PAGE_SPECIAL_RPAGE_SW2 /* software: special page */
> +#define _PAGE_DEVMAP _RPAGE_SW1
> +#define __HAVE_ARCH_PTE_DEVMAP
> +
>  /*
>   * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
>   * Instead of fixing all of them, add an alternate define which
> @@ -599,6 +602,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
>   return pte;
>  }
>
> +static inline pte_t pte_mkdevmap(pte_t pte)
> +{
> + return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
> +}
> +
> +static inline int pte_devmap(pte_t pte)
> +{
> + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
> +}
> +
>  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
>  {
>   /* FIXME!! check whether this need to be a conditional */
> @@ -963,6 +976,9 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
>  #define pmd_mk_savedwrite(pmd)   pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
>  #define pmd_clear_savedwrite(pmd)
> pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
>
> +#define pud_pfn(...) (0)
> +#define pgd_pfn(...) (0)
> +
>  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>  #define pmd_soft_dirty(pmd)pte_soft_dirty(pmd_pte(pmd))
>  #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
> @@ -1137,7 +1153,6 @@ static inline int pmd_move_must_withdraw(struct 
> spinlock *new_pmd_ptl,
>   return true;
>  }
>
> -
>  #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
>  static inline bool arch_needs_pgtable_deposit(void)
>  {
> @@ -1146,6 +1161,26 @@ static inline bool arch_needs_pgtable_deposit(void)
>   return true;
>  }
>
> +static inline pmd_t pmd_mkdevmap(pmd_t pmd)
> +{
> + return pte_pmd(pte_mkdevmap(pmd_pte(pmd)));
> +}


We avoided setting _PAGE_SPECIAL on pmd entries. This will set that, we
may want to check if it is ok.  IIRC, we overloaded _PAGE_SPECIAL at
some point to indicate thp splitting. But good to double check. 

> +
> +static inline int pmd_devmap(pmd_t pmd)
> +{
> + return pte_devmap(pmd_pte(pmd));
> +}
> +
> +static inline int pud_devmap(pud_t pud)
> +{
> + return 0;
> +}
> +
> +static inline int pgd_devmap(pgd_t pgd)
> +{
> + return 0;
> +}
> +
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>  #endif /* __ASSEMBLY__ */
>  #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 

[PATCH 4/6] powerpc/mm: Add devmap support for ppc64

2017-05-22 Thread Oliver O'Halloran
Add support for the devmap bit on PTEs and PMDs for PPC64 Book3S.  This
is used to differentiate device backed memory from transparent huge
pages since they are handled in more or less the same manner by the core
mm code.

Cc: Aneesh Kumar K.V 
Signed-off-by: Oliver O'Halloran 
---
v1 -> v2: Properly differentiate THP and PMD Devmap entries. The
mm core assumes that pmd_trans_huge() and pmd_devmap() are mutually
exclusive and v1 had pmd_trans_huge() being true on a devmap pmd.

Aneesh, this has been fleshed out substantially since v1. Can you
re-review it? Also no explicit gup support is required in this patch
since devmap support was added generic GUP as a part of making x86 use
the generic version.
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 37 ++-
 arch/powerpc/include/asm/book3s/64/radix.h|  2 +-
 arch/powerpc/mm/hugetlbpage.c |  2 +-
 arch/powerpc/mm/pgtable-book3s64.c|  4 +--
 arch/powerpc/mm/pgtable-hash64.c  |  4 ++-
 arch/powerpc/mm/pgtable-radix.c   |  3 ++-
 arch/powerpc/mm/pgtable_64.c  |  2 +-
 8 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 9732837aaae8..eaaf613c5347 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -180,7 +180,7 @@ static inline void mark_hpte_slot_valid(unsigned char 
*hpte_slot_array,
  */
 static inline int hash__pmd_trans_huge(pmd_t pmd)
 {
-   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
+   return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) 
==
  (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 85bc9875c3be..24634e92dd0b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -79,6 +79,9 @@
 
 #define _PAGE_SOFT_DIRTY   _RPAGE_SW3 /* software: software dirty tracking 
*/
 #define _PAGE_SPECIAL  _RPAGE_SW2 /* software: special page */
+#define _PAGE_DEVMAP   _RPAGE_SW1
+#define __HAVE_ARCH_PTE_DEVMAP
+
 /*
  * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
  * Instead of fixing all of them, add an alternate define which
@@ -599,6 +602,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
 }
 
+static inline pte_t pte_mkdevmap(pte_t pte)
+{
+   return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+}
+
+static inline int pte_devmap(pte_t pte)
+{
+   return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
+}
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
/* FIXME!! check whether this need to be a conditional */
@@ -963,6 +976,9 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mk_savedwrite(pmd) pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
 #define pmd_clear_savedwrite(pmd)  
pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
 
+#define pud_pfn(...) (0)
+#define pgd_pfn(...) (0)
+
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #define pmd_soft_dirty(pmd)pte_soft_dirty(pmd_pte(pmd))
 #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
@@ -1137,7 +1153,6 @@ static inline int pmd_move_must_withdraw(struct spinlock 
*new_pmd_ptl,
return true;
 }
 
-
 #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
 static inline bool arch_needs_pgtable_deposit(void)
 {
@@ -1146,6 +1161,26 @@ static inline bool arch_needs_pgtable_deposit(void)
return true;
 }
 
+static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+{
+   return pte_pmd(pte_mkdevmap(pmd_pte(pmd)));
+}
+
+static inline int pmd_devmap(pmd_t pmd)
+{
+   return pte_devmap(pmd_pte(pmd));
+}
+
+static inline int pud_devmap(pud_t pud)
+{
+   return 0;
+}
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+   return 0;
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
b/arch/powerpc/include/asm/book3s/64/radix.h
index ac16d1943022..ba43754e96d2 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -252,7 +252,7 @@ static inline int radix__pgd_bad(pgd_t pgd)
 
 static inline int radix__pmd_trans_huge(pmd_t pmd)
 {
-   return !!(pmd_val(pmd) & _PAGE_PTE);
+   return (pmd_val(pmd) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
 }
 
 static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a4f33de4008e..d9958af5c98e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -963,7 +963,7 @@ pte_t